[Backport] Security bug 1378601

Update libaom to fix security issues. Change-Id: I1109e9d2236f5b4f809e9fed4e9b6eae5a34b257 Reviewed-on: https://codereview.qt-project.org/c/qt/qtwebengine-chromium/+/447836 Reviewed-by: Michal Klocek <michal.klocek@qt.io>
author: Michael Brüning <michael.bruning@qt.io> 2022-12-10 02:34:36 +0100
committer: Michael Brüning <michael.bruning@qt.io> 2022-12-22 08:15:56 +0000
commit: 16ba23dd196a5a4556e334e2e491e9f1e63badbc (patch)
tree: 99323e8decd03ec30682730ddbb574d48556ed6e
parent: 916b739acdcafd12b98fb4922c38889774200660 (diff)
download: qtwebengine-chromium-16ba23dd196a5a4556e334e2e491e9f1e63badbc.tar.gz
543 files changed, 49321 insertions, 39800 deletions
diff --git a/chromium/third_party/libaom/BUILD.gn b/chromium/third_party/libaom/BUILD.gn
index 49dbee305c6..3f3f64fc7c8 100644
--- a/chromium/third_party/libaom/BUILD.gn
+++ b/chromium/third_party/libaom/BUILD.gn
@@ -1,4 +1,4 @@
-# Copyright 2014 The Chromium Authors. All rights reserved.
+# Copyright 2014 The Chromium Authors
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.
 
@@ -36,6 +36,10 @@ if (current_cpu == "x86") {
   } else {
     cpu_arch_full = "arm"
   }
+} else if (current_cpu == "riscv64") {
+  cpu_arch_full = "generic"
+} else if (current_cpu == "loong64") {
+  cpu_arch_full = "generic"
 } else {
   cpu_arch_full = current_cpu
 }
@@ -197,9 +201,7 @@ if (current_cpu == "arm64" || cpu_arch_full == "arm-neon" ||
     }
     configs += [ ":libaom_config" ]
 
-    # https://bugs.chromium.org/p/aomedia/issues/detail?id=2294
-    sources = [ "//third_party/libaom/source/libaom/aom_ports/arm_cpudetect.c" ]
-    sources += aom_av1_common_intrin_neon
+    sources = aom_av1_common_intrin_neon
     sources += aom_dsp_common_intrin_neon
     sources += aom_av1_encoder_intrin_neon
     sources += aom_dsp_encoder_intrin_neon
@@ -256,6 +258,11 @@ static_library("libaom") {
       cpu_arch_full == "arm-neon-cpu-detect") {
     deps += [ ":libaom_intrinsics_neon" ]
   }
+  if (current_cpu == "arm64" || current_cpu == "arm") {
+    # This is needed by all arm boards due to aom_arm_cpu_caps()
+    sources +=
+        [ "//third_party/libaom/source/libaom/aom_ports/arm_cpudetect.c" ]
+  }
   if (is_android) {
     deps += [ "//third_party/android_ndk:cpu_features" ]
   }
diff --git a/chromium/third_party/libaom/README.chromium b/chromium/third_party/libaom/README.chromium
index 82e224030c1..60934136757 100644
--- a/chromium/third_party/libaom/README.chromium
+++ b/chromium/third_party/libaom/README.chromium
@@ -1,11 +1,10 @@
 Name: Alliance for Open Media Video Codec
 Short Name: libaom
 URL: https://aomedia.googlesource.com/aom/
-Version: 3.1.2
-CPEPrefix: cpe:/a:aomedia:aomedia:3.1.2
-Date: Tuesday April 12 2022
-Branch: main
-Commit: e24a83a72b507b93a94f299f0eead1213dbac214
+Version: 3.4.0
+Date: Friday November 11 2022
+Revision: 4ebecefe77953f226e620821fe441e24547a121f
+CPEPrefix: cpe:/a:aomedia:aomedia:3.4.0
 License: BSD
 License File: source/libaom/LICENSE
 Security Critical: yes
@@ -31,7 +30,7 @@ Please follow these steps to update libaom source code:
      ./cmake_update.sh
 
    This will also update this file with the new revision.
-   Update 'Branch' in this file if necessary.
+   Update 'Version' and 'CPEPrefix' in this file if necessary.
 
 3. Amend the commit created by the first step:
 
diff --git a/chromium/third_party/libaom/cmake_update.sh b/chromium/third_party/libaom/cmake_update.sh
index 64ae3b6c6e3..f0c1ea5d6fa 100755
--- a/chromium/third_party/libaom/cmake_update.sh
+++ b/chromium/third_party/libaom/cmake_update.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 #
-# Copyright (c) 2018 The Chromium Authors. All rights reserved.
+# Copyright 2018 The Chromium Authors
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.
 
@@ -35,13 +35,13 @@ BASE=$(pwd)
 SRC="${BASE}/source/libaom"
 CFG="${BASE}/source/config"
 
-function clean {
+function cleanup() {
   rm -rf "${TMP}"
 }
 
 # Create empty temp and config directories.
 # $1 - Header file directory.
-function reset_dirs {
+function reset_dirs() {
   cd ..
   rm -rf "${TMP}"
   mkdir "${TMP}"
@@ -52,7 +52,7 @@ function reset_dirs {
   mkdir -p "${CFG}/${1}/config"
 }
 
-if [ $# -ne 0 ]; then
+if [[ $# -ne 0 ]]; then
   echo "Unknown option(s): ${@}"
   exit 1
 fi
@@ -64,13 +64,13 @@ fi
 # Generate Config files.
 # $1 - Header file directory.
 # $2 - cmake options.
-function gen_config_files {
+function gen_config_files() {
   cmake "${SRC}" ${2} &> cmake.txt
 
   case "${1}" in
     *x64*|*ia32*)
-      egrep "#define [A-Z0-9_]+ [01]" config/aom_config.h | \
-        awk '{print "%define " $2 " " $3}' > config/aom_config.asm
+      egrep "#define [A-Z0-9_]+ [01]" config/aom_config.h \
+        | awk '{print "%define " $2 " " $3}' > config/aom_config.asm
       ;;
   esac
 
@@ -79,7 +79,7 @@ function gen_config_files {
   cp config/*_rtcd.h "${CFG}/${1}/config/"
 }
 
-function update_readme {
+function update_readme() {
   local IFS=$'\n'
   # Split git log output '<date>\n<commit hash>' on the newline to produce 2
   # array entries.
@@ -87,14 +87,14 @@ function update_readme {
     --date=format:"%A %B %d %Y"))
   sed -E -i.bak \
     -e "s/^(Date:)[[:space:]]+.*$/\1 ${vals[0]}/" \
-    -e "s/^(Commit:)[[:space:]]+[a-f0-9]{40}/\1 ${vals[1]}/" \
+    -e "s/^(Revision:)[[:space:]]+[a-f0-9]{40}/\1 ${vals[1]}/" \
     ${BASE}/README.chromium
   rm ${BASE}/README.chromium.bak
   cat <<EOF
 
 README.chromium updated with:
 Date: ${vals[0]}
-Commit: ${vals[1]}
+Revision: ${vals[1]}
 EOF
 }
 
@@ -102,7 +102,7 @@ EOF
 # generate VS project files on linux.
 #
 # $1 - File to modify.
-function convert_to_windows {
+function convert_to_windows() {
   sed -i.bak \
     -e 's/\(#define[[:space:]]INLINE[[:space:]]*\)inline/\1 __inline/' \
     -e 's/\(#define[[:space:]]HAVE_PTHREAD_H[[:space:]]*\)1/\1 0/' \
@@ -119,7 +119,7 @@ TMP=$(mktemp -d "${BASE}/build.XXXX")
 cd "${TMP}"
 
 trap '{
-  [ -f ${TMP}/cmake.txt ] && cat ${TMP}/cmake.txt
+  [[ -f ${TMP}/cmake.txt ]] && cat ${TMP}/cmake.txt
   echo "Build directory ${TMP} not removed automatically."
 }' ERR
 
@@ -159,14 +159,16 @@ gen_config_files linux/x64 "${all_platforms}"
 reset_dirs win/ia32
 cp "${CFG}/linux/ia32/config"/* "${CFG}/win/ia32/config/"
 convert_to_windows "${CFG}/win/ia32/config/aom_config.h"
-egrep "#define [A-Z0-9_]+[[:space:]]+[01]" "${CFG}/win/ia32/config/aom_config.h" \
+egrep \
+  "#define [A-Z0-9_]+[[:space:]]+[01]" "${CFG}/win/ia32/config/aom_config.h" \
   | awk '{print "%define " $2 " " $3}' > "${CFG}/win/ia32/config/aom_config.asm"
 
 # Copy linux configurations and modify for Windows.
 reset_dirs win/x64
 cp "${CFG}/linux/x64/config"/* "${CFG}/win/x64/config/"
 convert_to_windows "${CFG}/win/x64/config/aom_config.h"
-egrep "#define [A-Z0-9_]+[[:space:]]+[01]" "${CFG}/win/x64/config/aom_config.h" \
+egrep \
+  "#define [A-Z0-9_]+[[:space:]]+[01]" "${CFG}/win/x64/config/aom_config.h" \
   | awk '{print "%define " $2 " " $3}' > "${CFG}/win/x64/config/aom_config.asm"
 
 reset_dirs linux/arm
@@ -174,14 +176,17 @@ gen_config_files linux/arm \
   "${toolchain}/armv7-linux-gcc.cmake -DENABLE_NEON=0 ${all_platforms}"
 
 reset_dirs linux/arm-neon
-gen_config_files linux/arm-neon "${toolchain}/armv7-linux-gcc.cmake ${all_platforms}"
+gen_config_files linux/arm-neon \
+  "${toolchain}/armv7-linux-gcc.cmake ${all_platforms}"
 
 reset_dirs linux/arm-neon-cpu-detect
 gen_config_files linux/arm-neon-cpu-detect \
-  "${toolchain}/armv7-linux-gcc.cmake -DCONFIG_RUNTIME_CPU_DETECT=1 ${all_platforms}"
+  "${toolchain}/armv7-linux-gcc.cmake -DCONFIG_RUNTIME_CPU_DETECT=1 \
+   ${all_platforms}"
 
 reset_dirs linux/arm64
-gen_config_files linux/arm64 "${toolchain}/arm64-linux-gcc.cmake ${all_platforms}"
+gen_config_files linux/arm64 \
+  "${toolchain}/arm64-linux-gcc.cmake ${all_platforms}"
 
 reset_dirs ios/arm-neon
 gen_config_files ios/arm-neon "${toolchain}/armv7-ios.cmake ${all_platforms}"
@@ -200,4 +205,4 @@ update_readme
 git cl format > /dev/null \
   || echo "ERROR: 'git cl format' failed. Please run 'git cl format' manually."
 
-clean
+cleanup
diff --git a/chromium/third_party/libaom/libaom_srcs.gni b/chromium/third_party/libaom/libaom_srcs.gni
index e415adc5c20..caa5b233213 100644
--- a/chromium/third_party/libaom/libaom_srcs.gni
+++ b/chromium/third_party/libaom/libaom_srcs.gni
@@ -10,6 +10,8 @@ aom_av1_common_intrin_avx2 = [
   "//third_party/libaom/source/libaom/av1/common/x86/highbd_inv_txfm_avx2.c",
   "//third_party/libaom/source/libaom/av1/common/x86/jnt_convolve_avx2.c",
   "//third_party/libaom/source/libaom/av1/common/x86/reconinter_avx2.c",
+  "//third_party/libaom/source/libaom/av1/common/x86/selfguided_avx2.c",
+  "//third_party/libaom/source/libaom/av1/common/x86/warp_plane_avx2.c",
   "//third_party/libaom/source/libaom/av1/common/x86/wiener_convolve_avx2.c",
 ]
 
@@ -28,6 +30,8 @@ aom_av1_common_intrin_neon = [
   "//third_party/libaom/source/libaom/av1/common/arm/reconinter_neon.c",
   "//third_party/libaom/source/libaom/av1/common/arm/reconintra_neon.c",
   "//third_party/libaom/source/libaom/av1/common/arm/resize_neon.c",
+  "//third_party/libaom/source/libaom/av1/common/arm/selfguided_neon.c",
+  "//third_party/libaom/source/libaom/av1/common/arm/warp_plane_neon.c",
   "//third_party/libaom/source/libaom/av1/common/arm/wiener_convolve_neon.c",
 ]
 
@@ -38,6 +42,7 @@ aom_av1_common_intrin_sse2 = [
   "//third_party/libaom/source/libaom/av1/common/x86/convolve_2d_sse2.c",
   "//third_party/libaom/source/libaom/av1/common/x86/convolve_sse2.c",
   "//third_party/libaom/source/libaom/av1/common/x86/jnt_convolve_sse2.c",
+  "//third_party/libaom/source/libaom/av1/common/x86/warp_plane_sse2.c",
   "//third_party/libaom/source/libaom/av1/common/x86/wiener_convolve_sse2.c",
 ]
 
@@ -51,6 +56,8 @@ aom_av1_common_intrin_sse4_1 = [
   "//third_party/libaom/source/libaom/av1/common/x86/highbd_inv_txfm_sse4.c",
   "//third_party/libaom/source/libaom/av1/common/x86/intra_edge_sse4.c",
   "//third_party/libaom/source/libaom/av1/common/x86/reconinter_sse4.c",
+  "//third_party/libaom/source/libaom/av1/common/x86/selfguided_sse4.c",
+  "//third_party/libaom/source/libaom/av1/common/x86/warp_plane_sse4.c",
 ]
 
 aom_av1_common_intrin_ssse3 = [
@@ -120,6 +127,8 @@ aom_av1_common_sources = [
   "//third_party/libaom/source/libaom/av1/common/reconintra.h",
   "//third_party/libaom/source/libaom/av1/common/resize.c",
   "//third_party/libaom/source/libaom/av1/common/resize.h",
+  "//third_party/libaom/source/libaom/av1/common/restoration.c",
+  "//third_party/libaom/source/libaom/av1/common/restoration.h",
   "//third_party/libaom/source/libaom/av1/common/scale.c",
   "//third_party/libaom/source/libaom/av1/common/scale.h",
   "//third_party/libaom/source/libaom/av1/common/scan.c",
@@ -135,6 +144,8 @@ aom_av1_common_sources = [
   "//third_party/libaom/source/libaom/av1/common/token_cdfs.h",
   "//third_party/libaom/source/libaom/av1/common/txb_common.c",
   "//third_party/libaom/source/libaom/av1/common/txb_common.h",
+  "//third_party/libaom/source/libaom/av1/common/warped_motion.c",
+  "//third_party/libaom/source/libaom/av1/common/warped_motion.h",
   "//third_party/libaom/source/libaom/common/args_helper.c",
   "//third_party/libaom/source/libaom/common/args_helper.h",
 ]
@@ -165,6 +176,9 @@ aom_av1_encoder_asm_sse2 = [
 
 aom_av1_encoder_asm_ssse3_x86_64 = [ "//third_party/libaom/source/libaom/av1/encoder/x86/av1_quantize_ssse3_x86_64.asm" ]
 
+aom_av1_encoder_intrin_arm_crc32 =
+    [ "//third_party/libaom/source/libaom/av1/encoder/arm/crc32/hash_crc32.c" ]
+
 aom_av1_encoder_intrin_avx2 = [
   "//third_party/libaom/source/libaom/av1/encoder/x86/av1_quantize_avx2.c",
   "//third_party/libaom/source/libaom/av1/encoder/x86/corner_match_avx2.c",
@@ -179,14 +193,9 @@ aom_av1_encoder_intrin_avx2 = [
   "//third_party/libaom/source/libaom/av1/encoder/x86/temporal_filter_avx2.c",
 ]
 
-aom_av1_encoder_intrin_msa = [
-  "//third_party/libaom/source/libaom/av1/encoder/mips/msa/error_msa.c",
-  "//third_party/libaom/source/libaom/av1/encoder/mips/msa/fdct4x4_msa.c",
-  "//third_party/libaom/source/libaom/av1/encoder/mips/msa/temporal_filter_msa.c",
-]
-
 aom_av1_encoder_intrin_neon = [
   "//third_party/libaom/source/libaom/av1/encoder/arm/neon/quantize_neon.c",
+  "//third_party/libaom/source/libaom/av1/encoder/arm/neon/av1_highbd_quantize_neon.c",
   "//third_party/libaom/source/libaom/av1/encoder/arm/neon/ml_neon.c",
   "//third_party/libaom/source/libaom/av1/encoder/arm/neon/picksrt_neon.c",
   "//third_party/libaom/source/libaom/av1/encoder/arm/neon/rdopt_neon.c",
@@ -195,6 +204,8 @@ aom_av1_encoder_intrin_neon = [
   "//third_party/libaom/source/libaom/av1/encoder/arm/neon/hybrid_fwd_txfm_neon.c",
   "//third_party/libaom/source/libaom/av1/encoder/arm/neon/av1_fwd_txfm2d_neon.c",
   "//third_party/libaom/source/libaom/av1/encoder/arm/neon/highbd_fwd_txfm_neon.c",
+  "//third_party/libaom/source/libaom/av1/encoder/arm/neon/wedge_utils_neon.c",
+  "//third_party/libaom/source/libaom/av1/encoder/arm/neon/temporal_filter_neon.c",
   "//third_party/libaom/source/libaom/av1/encoder/arm/neon/av1_temporal_denoiser_neon.c",
 ]
 
@@ -232,6 +243,7 @@ aom_av1_encoder_intrin_ssse3 = [
 
 aom_av1_encoder_sources = [
   "//third_party/libaom/source/libaom/av1/av1_cx_iface.c",
+  "//third_party/libaom/source/libaom/av1/av1_cx_iface.h",
   "//third_party/libaom/source/libaom/av1/encoder/allintra_vis.c",
   "//third_party/libaom/source/libaom/av1/encoder/allintra_vis.h",
   "//third_party/libaom/source/libaom/av1/encoder/aq_complexity.c",
@@ -307,6 +319,7 @@ aom_av1_encoder_sources = [
   "//third_party/libaom/source/libaom/av1/encoder/lookahead.h",
   "//third_party/libaom/source/libaom/av1/encoder/mcomp.c",
   "//third_party/libaom/source/libaom/av1/encoder/mcomp.h",
+  "//third_party/libaom/source/libaom/av1/encoder/mcomp_structs.h",
   "//third_party/libaom/source/libaom/av1/encoder/ml.c",
   "//third_party/libaom/source/libaom/av1/encoder/ml.h",
   "//third_party/libaom/source/libaom/av1/encoder/model_rd.h",
@@ -397,35 +410,11 @@ aom_dsp_common_intrin_avx2 = [
   "//third_party/libaom/source/libaom/aom_dsp/x86/bitdepth_conversion_avx2.h",
 ]
 
-aom_dsp_common_intrin_dspr2 = [
-  "//third_party/libaom/source/libaom/aom_dsp/mips/aom_convolve_copy_dspr2.c",
-  "//third_party/libaom/source/libaom/aom_dsp/mips/common_dspr2.c",
-  "//third_party/libaom/source/libaom/aom_dsp/mips/common_dspr2.h",
-  "//third_party/libaom/source/libaom/aom_dsp/mips/convolve2_dspr2.c",
-  "//third_party/libaom/source/libaom/aom_dsp/mips/convolve2_horiz_dspr2.c",
-  "//third_party/libaom/source/libaom/aom_dsp/mips/convolve2_vert_dspr2.c",
-  "//third_party/libaom/source/libaom/aom_dsp/mips/convolve8_horiz_dspr2.c",
-  "//third_party/libaom/source/libaom/aom_dsp/mips/convolve8_vert_dspr2.c",
-  "//third_party/libaom/source/libaom/aom_dsp/mips/convolve_common_dspr2.h",
-  "//third_party/libaom/source/libaom/aom_dsp/mips/intrapred16_dspr2.c",
-  "//third_party/libaom/source/libaom/aom_dsp/mips/intrapred4_dspr2.c",
-  "//third_party/libaom/source/libaom/aom_dsp/mips/intrapred8_dspr2.c",
-  "//third_party/libaom/source/libaom/aom_dsp/mips/inv_txfm_dspr2.h",
-]
-
-aom_dsp_common_intrin_msa = [
-  "//third_party/libaom/source/libaom/aom_dsp/mips/aom_convolve8_horiz_msa.c",
-  "//third_party/libaom/source/libaom/aom_dsp/mips/aom_convolve8_vert_msa.c",
-  "//third_party/libaom/source/libaom/aom_dsp/mips/aom_convolve_copy_msa.c",
-  "//third_party/libaom/source/libaom/aom_dsp/mips/aom_convolve_msa.h",
-  "//third_party/libaom/source/libaom/aom_dsp/mips/intrapred_msa.c",
-  "//third_party/libaom/source/libaom/aom_dsp/mips/macros_msa.h",
-]
-
 aom_dsp_common_intrin_neon = [
   "//third_party/libaom/source/libaom/aom_dsp/arm/aom_convolve_copy_neon.c",
   "//third_party/libaom/source/libaom/aom_dsp/arm/fwd_txfm_neon.c",
   "//third_party/libaom/source/libaom/aom_dsp/arm/loopfilter_neon.c",
+  "//third_party/libaom/source/libaom/aom_dsp/arm/highbd_intrapred_neon.c",
   "//third_party/libaom/source/libaom/aom_dsp/arm/intrapred_neon.c",
   "//third_party/libaom/source/libaom/aom_dsp/arm/subtract_neon.c",
   "//third_party/libaom/source/libaom/aom_dsp/arm/blend_a64_mask_neon.c",
@@ -534,8 +523,8 @@ aom_dsp_encoder_intrin_avx2 = [
   "//third_party/libaom/source/libaom/aom_dsp/x86/masked_sad_intrin_avx2.c",
   "//third_party/libaom/source/libaom/aom_dsp/x86/subtract_avx2.c",
   "//third_party/libaom/source/libaom/aom_dsp/x86/highbd_quantize_intrin_avx2.c",
-  "//third_party/libaom/source/libaom/aom_dsp/x86/adaptive_quantize_avx2.c",
   "//third_party/libaom/source/libaom/aom_dsp/x86/highbd_adaptive_quantize_avx2.c",
+  "//third_party/libaom/source/libaom/aom_dsp/x86/quantize_avx2.c",
   "//third_party/libaom/source/libaom/aom_dsp/x86/sad4d_avx2.c",
   "//third_party/libaom/source/libaom/aom_dsp/x86/sad_avx2.c",
   "//third_party/libaom/source/libaom/aom_dsp/x86/highbd_sad_avx2.c",
@@ -547,13 +536,6 @@ aom_dsp_encoder_intrin_avx2 = [
   "//third_party/libaom/source/libaom/aom_dsp/x86/sum_squares_avx2.c",
 ]
 
-aom_dsp_encoder_intrin_msa = [
-  "//third_party/libaom/source/libaom/aom_dsp/mips/sad_msa.c",
-  "//third_party/libaom/source/libaom/aom_dsp/mips/subtract_msa.c",
-  "//third_party/libaom/source/libaom/aom_dsp/mips/variance_msa.c",
-  "//third_party/libaom/source/libaom/aom_dsp/mips/sub_pixel_variance_msa.c",
-]
-
 aom_dsp_encoder_intrin_neon = [
   "//third_party/libaom/source/libaom/aom_dsp/arm/sad4d_neon.c",
   "//third_party/libaom/source/libaom/aom_dsp/arm/sad_neon.c",
@@ -571,15 +553,16 @@ aom_dsp_encoder_intrin_sse2 = [
   "//third_party/libaom/source/libaom/aom_dsp/x86/fwd_txfm_sse2.c",
   "//third_party/libaom/source/libaom/aom_dsp/x86/fwd_txfm_sse2.h",
   "//third_party/libaom/source/libaom/aom_dsp/x86/quantize_sse2.c",
-  "//third_party/libaom/source/libaom/aom_dsp/x86/adaptive_quantize_sse2.c",
   "//third_party/libaom/source/libaom/aom_dsp/x86/quantize_x86.h",
   "//third_party/libaom/source/libaom/aom_dsp/x86/blk_sse_sum_sse2.c",
   "//third_party/libaom/source/libaom/aom_dsp/x86/sum_squares_sse2.c",
   "//third_party/libaom/source/libaom/aom_dsp/x86/variance_sse2.c",
 ]
 
-aom_dsp_encoder_intrin_sse4_1 =
-    [ "//third_party/libaom/source/libaom/aom_dsp/x86/sse_sse4.c" ]
+aom_dsp_encoder_intrin_sse4_1 = [
+  "//third_party/libaom/source/libaom/aom_dsp/x86/avg_intrin_sse4.c",
+  "//third_party/libaom/source/libaom/aom_dsp/x86/sse_sse4.c",
+]
 
 aom_dsp_encoder_intrin_ssse3 = [
   "//third_party/libaom/source/libaom/aom_dsp/x86/masked_sad_intrin_ssse3.h",
@@ -658,10 +641,6 @@ aom_rtcd_sources = [
   "//third_party/libaom/source/libaom/av1/common/av1_rtcd.c",
 ]
 
-aom_scale_intrin_dspr2 = [
-  "//third_party/libaom/source/libaom/aom_scale/mips/dspr2/yv12extend_dspr2.c",
-]
-
 aom_scale_sources = [
   "//third_party/libaom/source/libaom/aom_scale/aom_scale.h",
   "//third_party/libaom/source/libaom/aom_scale/generic/aom_scale.c",
@@ -710,6 +689,11 @@ aom_webm_encoder_sources = [
 ]
 
 av1_rc_qmode_sources = [
+  "//third_party/libaom/source/libaom/common/tools_common.c",
+  "//third_party/libaom/source/libaom/common/tools_common.h",
+  "//third_party/libaom/source/libaom/common/y4minput.c",
+  "//third_party/libaom/source/libaom/common/y4minput.h",
+  "//third_party/libaom/source/libaom/test/ducky_encode_test.cc",
   "//third_party/libaom/source/libaom/test/mock_ratectrl_qmode.h",
   "//third_party/libaom/source/libaom/test/ratectrl_qmode_test.cc",
 ]
@@ -729,3 +713,6 @@ aom_sources_gen = [
 
 aom_version_sources_gen =
     [ "//third_party/libaom/source/libaom/config/aom_version.h" ]
+
+av1_rc_qmode_sources_gen =
+    [ "//third_party/libaom/source/libaom/gen_src/usage_exit.c" ]
diff --git a/chromium/third_party/libaom/lint_config.sh b/chromium/third_party/libaom/lint_config.sh
index 24d1e9b7da5..1c02fb824c7 100755
--- a/chromium/third_party/libaom/lint_config.sh
+++ b/chromium/third_party/libaom/lint_config.sh
@@ -1,6 +1,6 @@
-#!/bin/bash -e
+#!/bin/bash
 #
-# Copyright (c) 2012 The Chromium Authors. All rights reserved.
+# Copyright 2012 The Chromium Authors
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.
 
@@ -19,39 +19,38 @@
 # # Compare the two configuration files and output the final results.
 # ./lint_config.sh -h aom_config.h -a aom_config.asm -o libaom.config -p
 
+set -e
+
 export LC_ALL=C
 print_final="no"
 
-while getopts "h:a:o:p" flag
-do
-  if [ "$flag" = "h" ]; then
+while getopts "h:a:o:p" flag; do
+  if [[ "$flag" == "h" ]]; then
     header_file=$OPTARG
-  elif [ "$flag" = "a" ]; then
+  elif [[ "$flag" == "a" ]]; then
     asm_file=$OPTARG
-  elif [ "$flag" = "o" ]; then
+  elif [[ "$flag" == "o" ]]; then
     out_file=$OPTARG
-  elif [ "$flag" = "p" ]; then
+  elif [[ "$flag" == "p" ]]; then
     print_final="yes"
   fi
 done
 
-if [ -z "$header_file" ]; then
+if [[ -z "$header_file" ]]; then
   echo "Header file not specified."
-  false
-  exit
+  exit 1
 fi
 
-if [ -z "$asm_file" ]; then
+if [[ -z "$asm_file" ]]; then
   echo "ASM file not specified."
-  false
-  exit
+  exit 1
 fi
 
 # Concat header file and assembly file and select those ended with 0 or 1.
 combined_config="$(cat $header_file $asm_file | grep -E ' +[01] *$')"
 
 # Extra filtering for known exceptions.
-# TODO(johannkoenig): Shoule be able to remove a lot of these
+# TODO(johannkoenig): Should be able to remove a lot of these
 combined_config="$(echo "$combined_config" | grep -v WIDE_REFERENCE)"
 combined_config="$(echo "$combined_config" | grep -v ARCHITECTURE)"
 combined_config="$(echo "$combined_config" | grep -v DO1STROUNDING)"
@@ -92,12 +91,11 @@ for var in $odd_vars; do
   echo ""
 done
 
-if [ -n "$odd_vars" ]; then
-  false
-  exit
+if [[ -n "$odd_vars" ]]; then
+  exit 1
 fi
 
-if [ "$print_final" = "no" ]; then
+if [[ "$print_final" == "no" ]]; then
   exit
 fi
 
@@ -106,7 +104,7 @@ combined_config="$(echo "$combined_config" | grep -v ARCH_X86=no)"
 combined_config="$(echo "$combined_config" | grep -v ARCH_X86_64=no)"
 
 # Print out the unique configurations.
-if [ -n "$out_file" ]; then
+if [[ -n "$out_file" ]]; then
   echo "$combined_config" | sort | uniq > $out_file
 else
   echo "$combined_config" | sort | uniq
diff --git a/chromium/third_party/libaom/options.gni b/chromium/third_party/libaom/options.gni
index 9ccf12e535e..d1ce8ce33ac 100644
--- a/chromium/third_party/libaom/options.gni
+++ b/chromium/third_party/libaom/options.gni
@@ -4,7 +4,7 @@ import("//build/config/gclient_args.gni")
 
 declare_args() {
   # Enable encoding and decoding AV1 video files.
-  enable_libaom = !is_chromeos_lacros && !is_android && !is_chromecast
+  enable_libaom = !is_android
 
   # To be deprecated soon.
   enable_libaom_decoder = false
diff --git a/chromium/third_party/libaom/source/config/config/aom_version.h b/chromium/third_party/libaom/source/config/config/aom_version.h
index 790736ffafa..90c9c13020e 100644
--- a/chromium/third_party/libaom/source/config/config/aom_version.h
+++ b/chromium/third_party/libaom/source/config/config/aom_version.h
@@ -10,10 +10,10 @@
  */
 
 #define VERSION_MAJOR 3
-#define VERSION_MINOR 3
+#define VERSION_MINOR 5
 #define VERSION_PATCH 0
-#define VERSION_EXTRA "475-ge24a83a72"
+#define VERSION_EXTRA "289-g4ebecefe7"
 #define VERSION_PACKED \
   ((VERSION_MAJOR << 16) | (VERSION_MINOR << 8) | (VERSION_PATCH))
-#define VERSION_STRING_NOSP "3.3.0-475-ge24a83a72"
-#define VERSION_STRING " 3.3.0-475-ge24a83a72"
+#define VERSION_STRING_NOSP "3.5.0-289-g4ebecefe7"
+#define VERSION_STRING " 3.5.0-289-g4ebecefe7"
diff --git a/chromium/third_party/libaom/source/config/ios/arm-neon/config/aom_config.asm b/chromium/third_party/libaom/source/config/ios/arm-neon/config/aom_config.asm
index 15df49ee5ab..c5ebd6ab9ba 100644
--- a/chromium/third_party/libaom/source/config/ios/arm-neon/config/aom_config.asm
+++ b/chromium/third_party/libaom/source/config/ios/arm-neon/config/aom_config.asm
@@ -9,7 +9,6 @@
 ; PATENTS file, you can obtain it at www.aomedia.org/license/patent.
 ;
 ARCH_ARM equ 1
-ARCH_MIPS equ 0
 ARCH_PPC equ 0
 ARCH_X86 equ 0
 ARCH_X86_64 equ 0
@@ -33,8 +32,6 @@ CONFIG_DISABLE_FULL_PIXEL_SPLIT_8X8 equ 1
 CONFIG_ENTROPY_STATS equ 0
 CONFIG_EXCLUDE_SIMD_MISMATCH equ 0
 CONFIG_FPMT_TEST equ 0
-CONFIG_FRAME_PARALLEL_ENCODE equ 0
-CONFIG_FRAME_PARALLEL_ENCODE_2 equ 0
 CONFIG_GCC equ 1
 CONFIG_GCOV equ 0
 CONFIG_GPROF equ 0
@@ -50,7 +47,7 @@ CONFIG_NORMAL_TILE_MODE equ 1
 CONFIG_OPTICAL_FLOW_API equ 0
 CONFIG_OS_SUPPORT equ 1
 CONFIG_PARTITION_SEARCH_ORDER equ 0
-CONFIG_PIC equ 0
+CONFIG_PIC equ 1
 CONFIG_RATECTRL_LOG equ 0
 CONFIG_RD_COMMAND equ 0
 CONFIG_RD_DEBUG equ 0
@@ -69,14 +66,11 @@ CONFIG_WEBM_IO equ 1
 DECODE_HEIGHT_LIMIT equ 16384
 DECODE_WIDTH_LIMIT equ 16384
 FORCE_HIGHBITDEPTH_DECODING equ 0
+HAVE_ARM_CRC32 equ 0
 HAVE_AVX equ 0
 HAVE_AVX2 equ 0
-HAVE_DSPR2 equ 0
 HAVE_FEXCEPT equ 1
-HAVE_MIPS32 equ 0
-HAVE_MIPS64 equ 0
 HAVE_MMX equ 0
-HAVE_MSA equ 0
 HAVE_NEON equ 1
 HAVE_PTHREAD_H equ 1
 HAVE_SSE equ 0
diff --git a/chromium/third_party/libaom/source/config/ios/arm-neon/config/aom_config.c b/chromium/third_party/libaom/source/config/ios/arm-neon/config/aom_config.c
index b1084c5aef7..e81aaa8bc76 100644
--- a/chromium/third_party/libaom/source/config/ios/arm-neon/config/aom_config.c
+++ b/chromium/third_party/libaom/source/config/ios/arm-neon/config/aom_config.c
@@ -9,5 +9,5 @@
  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
  */
 #include "aom/aom_codec.h"
-static const char* const cfg = "cmake ../source/libaom -G \"Unix Makefiles\" -DCMAKE_TOOLCHAIN_FILE=\"../source/libaom/build/cmake/toolchains/armv7-ios.cmake\" -DCONFIG_AV1_ENCODER=1 -DCONFIG_LIBYUV=0 -DCONFIG_REALTIME_ONLY=1 -DCONFIG_AV1_HIGHBITDEPTH=0 -DCONFIG_AV1_TEMPORAL_DENOISING=1 -DCONFIG_MAX_DECODE_PROFILE=0 -DCONFIG_NORMAL_TILE_MODE=1 -DCONFIG_SIZE_LIMIT=1 -DDECODE_HEIGHT_LIMIT=16384 -DDECODE_WIDTH_LIMIT=16384";
+static const char* const cfg = "cmake ../source/libaom -G \"Unix Makefiles\" -DCMAKE_TOOLCHAIN_FILE=\"../source/libaom/build/cmake/toolchains/armv7-ios.cmake\" -DCONFIG_AV1_ENCODER=1 -DCONFIG_LIBYUV=0 -DCONFIG_AV1_HIGHBITDEPTH=0 -DCONFIG_AV1_TEMPORAL_DENOISING=1 -DCONFIG_REALTIME_ONLY=1 -DCONFIG_MAX_DECODE_PROFILE=0 -DCONFIG_NORMAL_TILE_MODE=1 -DCONFIG_SIZE_LIMIT=1 -DDECODE_HEIGHT_LIMIT=16384 -DDECODE_WIDTH_LIMIT=16384";
 const char *aom_codec_build_config(void) {return cfg;}
diff --git a/chromium/third_party/libaom/source/config/ios/arm-neon/config/aom_config.h b/chromium/third_party/libaom/source/config/ios/arm-neon/config/aom_config.h
index f8e23f257de..dbbde9993bd 100644
--- a/chromium/third_party/libaom/source/config/ios/arm-neon/config/aom_config.h
+++ b/chromium/third_party/libaom/source/config/ios/arm-neon/config/aom_config.h
@@ -11,7 +11,6 @@
 #ifndef AOM_CONFIG_H_
 #define AOM_CONFIG_H_
 #define ARCH_ARM 1
-#define ARCH_MIPS 0
 #define ARCH_PPC 0
 #define ARCH_X86 0
 #define ARCH_X86_64 0
@@ -35,8 +34,6 @@
 #define CONFIG_ENTROPY_STATS 0
 #define CONFIG_EXCLUDE_SIMD_MISMATCH 0
 #define CONFIG_FPMT_TEST 0
-#define CONFIG_FRAME_PARALLEL_ENCODE 0
-#define CONFIG_FRAME_PARALLEL_ENCODE_2 0
 #define CONFIG_GCC 1
 #define CONFIG_GCOV 0
 #define CONFIG_GPROF 0
@@ -52,7 +49,7 @@
 #define CONFIG_OPTICAL_FLOW_API 0
 #define CONFIG_OS_SUPPORT 1
 #define CONFIG_PARTITION_SEARCH_ORDER 0
-#define CONFIG_PIC 0
+#define CONFIG_PIC 1
 #define CONFIG_RATECTRL_LOG 0
 #define CONFIG_RD_COMMAND 0
 #define CONFIG_RD_DEBUG 0
@@ -71,14 +68,11 @@
 #define DECODE_HEIGHT_LIMIT 16384
 #define DECODE_WIDTH_LIMIT 16384
 #define FORCE_HIGHBITDEPTH_DECODING 0
+#define HAVE_ARM_CRC32 0
 #define HAVE_AVX 0
 #define HAVE_AVX2 0
-#define HAVE_DSPR2 0
 #define HAVE_FEXCEPT 1
-#define HAVE_MIPS32 0
-#define HAVE_MIPS64 0
 #define HAVE_MMX 0
-#define HAVE_MSA 0
 #define HAVE_NEON 1
 #define HAVE_PTHREAD_H 1
 #define HAVE_SSE 0
diff --git a/chromium/third_party/libaom/source/config/ios/arm-neon/config/aom_dsp_rtcd.h b/chromium/third_party/libaom/source/config/ios/arm-neon/config/aom_dsp_rtcd.h
index 155c11857c0..c2d2ac85aaa 100644
--- a/chromium/third_party/libaom/source/config/ios/arm-neon/config/aom_dsp_rtcd.h
+++ b/chromium/third_party/libaom/source/config/ios/arm-neon/config/aom_dsp_rtcd.h
@@ -34,7 +34,12 @@ void aom_avg_8x8_quad_c(const uint8_t* s,
                         int x16_idx,
                         int y16_idx,
                         int* avg);
-#define aom_avg_8x8_quad aom_avg_8x8_quad_c
+void aom_avg_8x8_quad_neon(const uint8_t* s,
+                           int p,
+                           int x16_idx,
+                           int y16_idx,
+                           int* avg);
+#define aom_avg_8x8_quad aom_avg_8x8_quad_neon
 
 void aom_blend_a64_hmask_c(uint8_t* dst,
                            uint32_t dst_stride,
@@ -176,18 +181,24 @@ void aom_dc_128_predictor_16x32_c(uint8_t* dst,
                                   const uint8_t* left);
 #define aom_dc_128_predictor_16x32 aom_dc_128_predictor_16x32_c
 
+void aom_dc_128_predictor_16x4_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+#define aom_dc_128_predictor_16x4 aom_dc_128_predictor_16x4_c
+
+void aom_dc_128_predictor_16x64_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+#define aom_dc_128_predictor_16x64 aom_dc_128_predictor_16x64_c
+
 void aom_dc_128_predictor_16x8_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
                                  const uint8_t* left);
 #define aom_dc_128_predictor_16x8 aom_dc_128_predictor_16x8_c
 
-void aom_dc_128_predictor_2x2_c(uint8_t* dst,
-                                ptrdiff_t y_stride,
-                                const uint8_t* above,
-                                const uint8_t* left);
-#define aom_dc_128_predictor_2x2 aom_dc_128_predictor_2x2_c
-
 void aom_dc_128_predictor_32x16_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
@@ -210,6 +221,18 @@ void aom_dc_128_predictor_32x64_c(uint8_t* dst,
                                   const uint8_t* left);
 #define aom_dc_128_predictor_32x64 aom_dc_128_predictor_32x64_c
 
+void aom_dc_128_predictor_32x8_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+#define aom_dc_128_predictor_32x8 aom_dc_128_predictor_32x8_c
+
+void aom_dc_128_predictor_4x16_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+#define aom_dc_128_predictor_4x16 aom_dc_128_predictor_4x16_c
+
 void aom_dc_128_predictor_4x4_c(uint8_t* dst,
                                 ptrdiff_t y_stride,
                                 const uint8_t* above,
@@ -226,6 +249,12 @@ void aom_dc_128_predictor_4x8_c(uint8_t* dst,
                                 const uint8_t* left);
 #define aom_dc_128_predictor_4x8 aom_dc_128_predictor_4x8_c
 
+void aom_dc_128_predictor_64x16_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+#define aom_dc_128_predictor_64x16 aom_dc_128_predictor_64x16_c
+
 void aom_dc_128_predictor_64x32_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
@@ -244,6 +273,12 @@ void aom_dc_128_predictor_8x16_c(uint8_t* dst,
                                  const uint8_t* left);
 #define aom_dc_128_predictor_8x16 aom_dc_128_predictor_8x16_c
 
+void aom_dc_128_predictor_8x32_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+#define aom_dc_128_predictor_8x32 aom_dc_128_predictor_8x32_c
+
 void aom_dc_128_predictor_8x4_c(uint8_t* dst,
                                 ptrdiff_t y_stride,
                                 const uint8_t* above,
@@ -276,18 +311,24 @@ void aom_dc_left_predictor_16x32_c(uint8_t* dst,
                                    const uint8_t* left);
 #define aom_dc_left_predictor_16x32 aom_dc_left_predictor_16x32_c
 
+void aom_dc_left_predictor_16x4_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+#define aom_dc_left_predictor_16x4 aom_dc_left_predictor_16x4_c
+
+void aom_dc_left_predictor_16x64_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+#define aom_dc_left_predictor_16x64 aom_dc_left_predictor_16x64_c
+
 void aom_dc_left_predictor_16x8_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
                                   const uint8_t* left);
 #define aom_dc_left_predictor_16x8 aom_dc_left_predictor_16x8_c
 
-void aom_dc_left_predictor_2x2_c(uint8_t* dst,
-                                 ptrdiff_t y_stride,
-                                 const uint8_t* above,
-                                 const uint8_t* left);
-#define aom_dc_left_predictor_2x2 aom_dc_left_predictor_2x2_c
-
 void aom_dc_left_predictor_32x16_c(uint8_t* dst,
                                    ptrdiff_t y_stride,
                                    const uint8_t* above,
@@ -310,6 +351,18 @@ void aom_dc_left_predictor_32x64_c(uint8_t* dst,
                                    const uint8_t* left);
 #define aom_dc_left_predictor_32x64 aom_dc_left_predictor_32x64_c
 
+void aom_dc_left_predictor_32x8_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+#define aom_dc_left_predictor_32x8 aom_dc_left_predictor_32x8_c
+
+void aom_dc_left_predictor_4x16_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+#define aom_dc_left_predictor_4x16 aom_dc_left_predictor_4x16_c
+
 void aom_dc_left_predictor_4x4_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
@@ -326,6 +379,12 @@ void aom_dc_left_predictor_4x8_c(uint8_t* dst,
                                  const uint8_t* left);
 #define aom_dc_left_predictor_4x8 aom_dc_left_predictor_4x8_c
 
+void aom_dc_left_predictor_64x16_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+#define aom_dc_left_predictor_64x16 aom_dc_left_predictor_64x16_c
+
 void aom_dc_left_predictor_64x32_c(uint8_t* dst,
                                    ptrdiff_t y_stride,
                                    const uint8_t* above,
@@ -344,6 +403,12 @@ void aom_dc_left_predictor_8x16_c(uint8_t* dst,
                                   const uint8_t* left);
 #define aom_dc_left_predictor_8x16 aom_dc_left_predictor_8x16_c
 
+void aom_dc_left_predictor_8x32_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+#define aom_dc_left_predictor_8x32 aom_dc_left_predictor_8x32_c
+
 void aom_dc_left_predictor_8x4_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
@@ -376,18 +441,24 @@ void aom_dc_predictor_16x32_c(uint8_t* dst,
                               const uint8_t* left);
 #define aom_dc_predictor_16x32 aom_dc_predictor_16x32_c
 
+void aom_dc_predictor_16x4_c(uint8_t* dst,
+                             ptrdiff_t y_stride,
+                             const uint8_t* above,
+                             const uint8_t* left);
+#define aom_dc_predictor_16x4 aom_dc_predictor_16x4_c
+
+void aom_dc_predictor_16x64_c(uint8_t* dst,
+                              ptrdiff_t y_stride,
+                              const uint8_t* above,
+                              const uint8_t* left);
+#define aom_dc_predictor_16x64 aom_dc_predictor_16x64_c
+
 void aom_dc_predictor_16x8_c(uint8_t* dst,
                              ptrdiff_t y_stride,
                              const uint8_t* above,
                              const uint8_t* left);
 #define aom_dc_predictor_16x8 aom_dc_predictor_16x8_c
 
-void aom_dc_predictor_2x2_c(uint8_t* dst,
-                            ptrdiff_t y_stride,
-                            const uint8_t* above,
-                            const uint8_t* left);
-#define aom_dc_predictor_2x2 aom_dc_predictor_2x2_c
-
 void aom_dc_predictor_32x16_c(uint8_t* dst,
                               ptrdiff_t y_stride,
                               const uint8_t* above,
@@ -410,6 +481,18 @@ void aom_dc_predictor_32x64_c(uint8_t* dst,
                               const uint8_t* left);
 #define aom_dc_predictor_32x64 aom_dc_predictor_32x64_c
 
+void aom_dc_predictor_32x8_c(uint8_t* dst,
+                             ptrdiff_t y_stride,
+                             const uint8_t* above,
+                             const uint8_t* left);
+#define aom_dc_predictor_32x8 aom_dc_predictor_32x8_c
+
+void aom_dc_predictor_4x16_c(uint8_t* dst,
+                             ptrdiff_t y_stride,
+                             const uint8_t* above,
+                             const uint8_t* left);
+#define aom_dc_predictor_4x16 aom_dc_predictor_4x16_c
+
 void aom_dc_predictor_4x4_c(uint8_t* dst,
                             ptrdiff_t y_stride,
                             const uint8_t* above,
@@ -426,6 +509,12 @@ void aom_dc_predictor_4x8_c(uint8_t* dst,
                             const uint8_t* left);
 #define aom_dc_predictor_4x8 aom_dc_predictor_4x8_c
 
+void aom_dc_predictor_64x16_c(uint8_t* dst,
+                              ptrdiff_t y_stride,
+                              const uint8_t* above,
+                              const uint8_t* left);
+#define aom_dc_predictor_64x16 aom_dc_predictor_64x16_c
+
 void aom_dc_predictor_64x32_c(uint8_t* dst,
                               ptrdiff_t y_stride,
                               const uint8_t* above,
@@ -444,6 +533,12 @@ void aom_dc_predictor_8x16_c(uint8_t* dst,
                              const uint8_t* left);
 #define aom_dc_predictor_8x16 aom_dc_predictor_8x16_c
 
+void aom_dc_predictor_8x32_c(uint8_t* dst,
+                             ptrdiff_t y_stride,
+                             const uint8_t* above,
+                             const uint8_t* left);
+#define aom_dc_predictor_8x32 aom_dc_predictor_8x32_c
+
 void aom_dc_predictor_8x4_c(uint8_t* dst,
                             ptrdiff_t y_stride,
                             const uint8_t* above,
@@ -476,18 +571,24 @@ void aom_dc_top_predictor_16x32_c(uint8_t* dst,
                                   const uint8_t* left);
 #define aom_dc_top_predictor_16x32 aom_dc_top_predictor_16x32_c
 
+void aom_dc_top_predictor_16x4_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+#define aom_dc_top_predictor_16x4 aom_dc_top_predictor_16x4_c
+
+void aom_dc_top_predictor_16x64_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+#define aom_dc_top_predictor_16x64 aom_dc_top_predictor_16x64_c
+
 void aom_dc_top_predictor_16x8_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
                                  const uint8_t* left);
 #define aom_dc_top_predictor_16x8 aom_dc_top_predictor_16x8_c
 
-void aom_dc_top_predictor_2x2_c(uint8_t* dst,
-                                ptrdiff_t y_stride,
-                                const uint8_t* above,
-                                const uint8_t* left);
-#define aom_dc_top_predictor_2x2 aom_dc_top_predictor_2x2_c
-
 void aom_dc_top_predictor_32x16_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
@@ -510,6 +611,18 @@ void aom_dc_top_predictor_32x64_c(uint8_t* dst,
                                   const uint8_t* left);
 #define aom_dc_top_predictor_32x64 aom_dc_top_predictor_32x64_c
 
+void aom_dc_top_predictor_32x8_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+#define aom_dc_top_predictor_32x8 aom_dc_top_predictor_32x8_c
+
+void aom_dc_top_predictor_4x16_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+#define aom_dc_top_predictor_4x16 aom_dc_top_predictor_4x16_c
+
 void aom_dc_top_predictor_4x4_c(uint8_t* dst,
                                 ptrdiff_t y_stride,
                                 const uint8_t* above,
@@ -526,6 +639,12 @@ void aom_dc_top_predictor_4x8_c(uint8_t* dst,
                                 const uint8_t* left);
 #define aom_dc_top_predictor_4x8 aom_dc_top_predictor_4x8_c
 
+void aom_dc_top_predictor_64x16_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+#define aom_dc_top_predictor_64x16 aom_dc_top_predictor_64x16_c
+
 void aom_dc_top_predictor_64x32_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
@@ -544,6 +663,12 @@ void aom_dc_top_predictor_8x16_c(uint8_t* dst,
                                  const uint8_t* left);
 #define aom_dc_top_predictor_8x16 aom_dc_top_predictor_8x16_c
 
+void aom_dc_top_predictor_8x32_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+#define aom_dc_top_predictor_8x32 aom_dc_top_predictor_8x32_c
+
 void aom_dc_top_predictor_8x4_c(uint8_t* dst,
                                 ptrdiff_t y_stride,
                                 const uint8_t* above,
@@ -1014,18 +1139,24 @@ void aom_h_predictor_16x32_c(uint8_t* dst,
                              const uint8_t* left);
 #define aom_h_predictor_16x32 aom_h_predictor_16x32_c
 
+void aom_h_predictor_16x4_c(uint8_t* dst,
+                            ptrdiff_t y_stride,
+                            const uint8_t* above,
+                            const uint8_t* left);
+#define aom_h_predictor_16x4 aom_h_predictor_16x4_c
+
+void aom_h_predictor_16x64_c(uint8_t* dst,
+                             ptrdiff_t y_stride,
+                             const uint8_t* above,
+                             const uint8_t* left);
+#define aom_h_predictor_16x64 aom_h_predictor_16x64_c
+
 void aom_h_predictor_16x8_c(uint8_t* dst,
                             ptrdiff_t y_stride,
                             const uint8_t* above,
                             const uint8_t* left);
 #define aom_h_predictor_16x8 aom_h_predictor_16x8_c
 
-void aom_h_predictor_2x2_c(uint8_t* dst,
-                           ptrdiff_t y_stride,
-                           const uint8_t* above,
-                           const uint8_t* left);
-#define aom_h_predictor_2x2 aom_h_predictor_2x2_c
-
 void aom_h_predictor_32x16_c(uint8_t* dst,
                              ptrdiff_t y_stride,
                              const uint8_t* above,
@@ -1048,6 +1179,18 @@ void aom_h_predictor_32x64_c(uint8_t* dst,
                              const uint8_t* left);
 #define aom_h_predictor_32x64 aom_h_predictor_32x64_c
 
+void aom_h_predictor_32x8_c(uint8_t* dst,
+                            ptrdiff_t y_stride,
+                            const uint8_t* above,
+                            const uint8_t* left);
+#define aom_h_predictor_32x8 aom_h_predictor_32x8_c
+
+void aom_h_predictor_4x16_c(uint8_t* dst,
+                            ptrdiff_t y_stride,
+                            const uint8_t* above,
+                            const uint8_t* left);
+#define aom_h_predictor_4x16 aom_h_predictor_4x16_c
+
 void aom_h_predictor_4x4_c(uint8_t* dst,
                            ptrdiff_t y_stride,
                            const uint8_t* above,
@@ -1064,6 +1207,12 @@ void aom_h_predictor_4x8_c(uint8_t* dst,
                            const uint8_t* left);
 #define aom_h_predictor_4x8 aom_h_predictor_4x8_c
 
+void aom_h_predictor_64x16_c(uint8_t* dst,
+                             ptrdiff_t y_stride,
+                             const uint8_t* above,
+                             const uint8_t* left);
+#define aom_h_predictor_64x16 aom_h_predictor_64x16_c
+
 void aom_h_predictor_64x32_c(uint8_t* dst,
                              ptrdiff_t y_stride,
                              const uint8_t* above,
@@ -1082,6 +1231,12 @@ void aom_h_predictor_8x16_c(uint8_t* dst,
                             const uint8_t* left);
 #define aom_h_predictor_8x16 aom_h_predictor_8x16_c
 
+void aom_h_predictor_8x32_c(uint8_t* dst,
+                            ptrdiff_t y_stride,
+                            const uint8_t* above,
+                            const uint8_t* left);
+#define aom_h_predictor_8x32 aom_h_predictor_8x32_c
+
 void aom_h_predictor_8x4_c(uint8_t* dst,
                            ptrdiff_t y_stride,
                            const uint8_t* above,
@@ -1124,11 +1279,6 @@ void aom_hadamard_8x8_neon(const int16_t* src_diff,
                            tran_low_t* coeff);
 #define aom_hadamard_8x8 aom_hadamard_8x8_neon
 
-void aom_hadamard_8x8_dual_c(const int16_t* src_diff,
-                             ptrdiff_t src_stride,
-                             int16_t* coeff);
-#define aom_hadamard_8x8_dual aom_hadamard_8x8_dual_c
-
 void aom_hadamard_lp_16x16_c(const int16_t* src_diff,
                              ptrdiff_t src_stride,
                              int16_t* coeff);
@@ -1145,6 +1295,14 @@ void aom_hadamard_lp_8x8_neon(const int16_t* src_diff,
                               int16_t* coeff);
 #define aom_hadamard_lp_8x8 aom_hadamard_lp_8x8_neon
 
+void aom_hadamard_lp_8x8_dual_c(const int16_t* src_diff,
+                                ptrdiff_t src_stride,
+                                int16_t* coeff);
+void aom_hadamard_lp_8x8_dual_neon(const int16_t* src_diff,
+                                   ptrdiff_t src_stride,
+                                   int16_t* coeff);
+#define aom_hadamard_lp_8x8_dual aom_hadamard_lp_8x8_dual_neon
+
 void aom_ifft16x16_float_c(const float* input, float* temp, float* output);
 #define aom_ifft16x16_float aom_ifft16x16_float_c
 
@@ -1160,18 +1318,32 @@ void aom_ifft4x4_float_c(const float* input, float* temp, float* output);
 void aom_ifft8x8_float_c(const float* input, float* temp, float* output);
 #define aom_ifft8x8_float aom_ifft8x8_float_c
 
-int16_t aom_int_pro_col_c(const uint8_t* ref, const int width);
-int16_t aom_int_pro_col_neon(const uint8_t* ref, const int width);
+void aom_int_pro_col_c(int16_t* vbuf,
+                       const uint8_t* ref,
+                       const int ref_stride,
+                       const int width,
+                       const int height,
+                       int norm_factor);
+void aom_int_pro_col_neon(int16_t* vbuf,
+                          const uint8_t* ref,
+                          const int ref_stride,
+                          const int width,
+                          const int height,
+                          int norm_factor);
 #define aom_int_pro_col aom_int_pro_col_neon
 
-void aom_int_pro_row_c(int16_t hbuf[16],
+void aom_int_pro_row_c(int16_t* hbuf,
                        const uint8_t* ref,
                        const int ref_stride,
-                       const int height);
-void aom_int_pro_row_neon(int16_t hbuf[16],
+                       const int width,
+                       const int height,
+                       int norm_factor);
+void aom_int_pro_row_neon(int16_t* hbuf,
                           const uint8_t* ref,
                           const int ref_stride,
-                          const int height);
+                          const int width,
+                          const int height,
+                          int norm_factor);
 #define aom_int_pro_row aom_int_pro_row_neon
 
 void aom_lowbd_blend_a64_d16_mask_c(uint8_t* dst,
@@ -1237,7 +1409,12 @@ void aom_lpf_horizontal_14_quad_c(uint8_t* s,
                                   const uint8_t* blimit0,
                                   const uint8_t* limit0,
                                   const uint8_t* thresh0);
-#define aom_lpf_horizontal_14_quad aom_lpf_horizontal_14_quad_c
+void aom_lpf_horizontal_14_quad_neon(uint8_t* s,
+                                     int pitch,
+                                     const uint8_t* blimit0,
+                                     const uint8_t* limit0,
+                                     const uint8_t* thresh0);
+#define aom_lpf_horizontal_14_quad aom_lpf_horizontal_14_quad_neon
 
 void aom_lpf_horizontal_4_c(uint8_t* s,
                             int pitch,
@@ -1274,7 +1451,12 @@ void aom_lpf_horizontal_4_quad_c(uint8_t* s,
                                  const uint8_t* blimit0,
                                  const uint8_t* limit0,
                                  const uint8_t* thresh0);
-#define aom_lpf_horizontal_4_quad aom_lpf_horizontal_4_quad_c
+void aom_lpf_horizontal_4_quad_neon(uint8_t* s,
+                                    int pitch,
+                                    const uint8_t* blimit0,
+                                    const uint8_t* limit0,
+                                    const uint8_t* thresh0);
+#define aom_lpf_horizontal_4_quad aom_lpf_horizontal_4_quad_neon
 
 void aom_lpf_horizontal_6_c(uint8_t* s,
                             int pitch,
@@ -1311,7 +1493,12 @@ void aom_lpf_horizontal_6_quad_c(uint8_t* s,
                                  const uint8_t* blimit0,
                                  const uint8_t* limit0,
                                  const uint8_t* thresh0);
-#define aom_lpf_horizontal_6_quad aom_lpf_horizontal_6_quad_c
+void aom_lpf_horizontal_6_quad_neon(uint8_t* s,
+                                    int pitch,
+                                    const uint8_t* blimit0,
+                                    const uint8_t* limit0,
+                                    const uint8_t* thresh0);
+#define aom_lpf_horizontal_6_quad aom_lpf_horizontal_6_quad_neon
 
 void aom_lpf_horizontal_8_c(uint8_t* s,
                             int pitch,
@@ -1348,7 +1535,12 @@ void aom_lpf_horizontal_8_quad_c(uint8_t* s,
                                  const uint8_t* blimit0,
                                  const uint8_t* limit0,
                                  const uint8_t* thresh0);
-#define aom_lpf_horizontal_8_quad aom_lpf_horizontal_8_quad_c
+void aom_lpf_horizontal_8_quad_neon(uint8_t* s,
+                                    int pitch,
+                                    const uint8_t* blimit0,
+                                    const uint8_t* limit0,
+                                    const uint8_t* thresh0);
+#define aom_lpf_horizontal_8_quad aom_lpf_horizontal_8_quad_neon
 
 void aom_lpf_vertical_14_c(uint8_t* s,
                            int pitch,
@@ -1385,7 +1577,12 @@ void aom_lpf_vertical_14_quad_c(uint8_t* s,
                                 const uint8_t* blimit0,
                                 const uint8_t* limit0,
                                 const uint8_t* thresh0);
-#define aom_lpf_vertical_14_quad aom_lpf_vertical_14_quad_c
+void aom_lpf_vertical_14_quad_neon(uint8_t* s,
+                                   int pitch,
+                                   const uint8_t* blimit0,
+                                   const uint8_t* limit0,
+                                   const uint8_t* thresh0);
+#define aom_lpf_vertical_14_quad aom_lpf_vertical_14_quad_neon
 
 void aom_lpf_vertical_4_c(uint8_t* s,
                           int pitch,
@@ -1422,7 +1619,12 @@ void aom_lpf_vertical_4_quad_c(uint8_t* s,
                                const uint8_t* blimit0,
                                const uint8_t* limit0,
                                const uint8_t* thresh0);
-#define aom_lpf_vertical_4_quad aom_lpf_vertical_4_quad_c
+void aom_lpf_vertical_4_quad_neon(uint8_t* s,
+                                  int pitch,
+                                  const uint8_t* blimit0,
+                                  const uint8_t* limit0,
+                                  const uint8_t* thresh0);
+#define aom_lpf_vertical_4_quad aom_lpf_vertical_4_quad_neon
 
 void aom_lpf_vertical_6_c(uint8_t* s,
                           int pitch,
@@ -1459,7 +1661,12 @@ void aom_lpf_vertical_6_quad_c(uint8_t* s,
                                const uint8_t* blimit0,
                                const uint8_t* limit0,
                                const uint8_t* thresh0);
-#define aom_lpf_vertical_6_quad aom_lpf_vertical_6_quad_c
+void aom_lpf_vertical_6_quad_neon(uint8_t* s,
+                                  int pitch,
+                                  const uint8_t* blimit0,
+                                  const uint8_t* limit0,
+                                  const uint8_t* thresh0);
+#define aom_lpf_vertical_6_quad aom_lpf_vertical_6_quad_neon
 
 void aom_lpf_vertical_8_c(uint8_t* s,
                           int pitch,
@@ -1496,7 +1703,12 @@ void aom_lpf_vertical_8_quad_c(uint8_t* s,
                                const uint8_t* blimit0,
                                const uint8_t* limit0,
                                const uint8_t* thresh0);
-#define aom_lpf_vertical_8_quad aom_lpf_vertical_8_quad_c
+void aom_lpf_vertical_8_quad_neon(uint8_t* s,
+                                  int pitch,
+                                  const uint8_t* blimit0,
+                                  const uint8_t* limit0,
+                                  const uint8_t* thresh0);
+#define aom_lpf_vertical_8_quad aom_lpf_vertical_8_quad_neon
 
 unsigned int aom_masked_sad128x128_c(const uint8_t* src,
                                      int src_stride,
@@ -2070,21 +2282,43 @@ unsigned int aom_mse16x8_c(const uint8_t* src_ptr,
                            const uint8_t* ref_ptr,
                            int recon_stride,
                            unsigned int* sse);
-#define aom_mse16x8 aom_mse16x8_c
+unsigned int aom_mse16x8_neon(const uint8_t* src_ptr,
+                              int source_stride,
+                              const uint8_t* ref_ptr,
+                              int recon_stride,
+                              unsigned int* sse);
+#define aom_mse16x8 aom_mse16x8_neon
 
 unsigned int aom_mse8x16_c(const uint8_t* src_ptr,
                            int source_stride,
                            const uint8_t* ref_ptr,
                            int recon_stride,
                            unsigned int* sse);
-#define aom_mse8x16 aom_mse8x16_c
+unsigned int aom_mse8x16_neon(const uint8_t* src_ptr,
+                              int source_stride,
+                              const uint8_t* ref_ptr,
+                              int recon_stride,
+                              unsigned int* sse);
+#define aom_mse8x16 aom_mse8x16_neon
 
 unsigned int aom_mse8x8_c(const uint8_t* src_ptr,
                           int source_stride,
                           const uint8_t* ref_ptr,
                           int recon_stride,
                           unsigned int* sse);
-#define aom_mse8x8 aom_mse8x8_c
+unsigned int aom_mse8x8_neon(const uint8_t* src_ptr,
+                             int source_stride,
+                             const uint8_t* ref_ptr,
+                             int recon_stride,
+                             unsigned int* sse);
+#define aom_mse8x8 aom_mse8x8_neon
+
+uint64_t aom_mse_16xh_16bit_c(uint8_t* dst,
+                              int dstride,
+                              uint16_t* src,
+                              int w,
+                              int h);
+#define aom_mse_16xh_16bit aom_mse_16xh_16bit_c
 
 uint64_t aom_mse_wxh_16bit_c(uint8_t* dst,
                              int dstride,
@@ -2098,85 +2332,191 @@ void aom_paeth_predictor_16x16_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
                                  const uint8_t* left);
-#define aom_paeth_predictor_16x16 aom_paeth_predictor_16x16_c
+void aom_paeth_predictor_16x16_neon(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+#define aom_paeth_predictor_16x16 aom_paeth_predictor_16x16_neon
 
 void aom_paeth_predictor_16x32_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
                                  const uint8_t* left);
-#define aom_paeth_predictor_16x32 aom_paeth_predictor_16x32_c
+void aom_paeth_predictor_16x32_neon(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+#define aom_paeth_predictor_16x32 aom_paeth_predictor_16x32_neon
 
-void aom_paeth_predictor_16x8_c(uint8_t* dst,
+void aom_paeth_predictor_16x4_c(uint8_t* dst,
                                 ptrdiff_t y_stride,
                                 const uint8_t* above,
                                 const uint8_t* left);
-#define aom_paeth_predictor_16x8 aom_paeth_predictor_16x8_c
+void aom_paeth_predictor_16x4_neon(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+#define aom_paeth_predictor_16x4 aom_paeth_predictor_16x4_neon
 
-void aom_paeth_predictor_2x2_c(uint8_t* dst,
-                               ptrdiff_t y_stride,
-                               const uint8_t* above,
-                               const uint8_t* left);
-#define aom_paeth_predictor_2x2 aom_paeth_predictor_2x2_c
+void aom_paeth_predictor_16x64_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+void aom_paeth_predictor_16x64_neon(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+#define aom_paeth_predictor_16x64 aom_paeth_predictor_16x64_neon
+
+void aom_paeth_predictor_16x8_c(uint8_t* dst,
+                                ptrdiff_t y_stride,
+                                const uint8_t* above,
+                                const uint8_t* left);
+void aom_paeth_predictor_16x8_neon(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+#define aom_paeth_predictor_16x8 aom_paeth_predictor_16x8_neon
 
 void aom_paeth_predictor_32x16_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
                                  const uint8_t* left);
-#define aom_paeth_predictor_32x16 aom_paeth_predictor_32x16_c
+void aom_paeth_predictor_32x16_neon(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+#define aom_paeth_predictor_32x16 aom_paeth_predictor_32x16_neon
 
 void aom_paeth_predictor_32x32_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
                                  const uint8_t* left);
-#define aom_paeth_predictor_32x32 aom_paeth_predictor_32x32_c
+void aom_paeth_predictor_32x32_neon(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+#define aom_paeth_predictor_32x32 aom_paeth_predictor_32x32_neon
 
 void aom_paeth_predictor_32x64_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
                                  const uint8_t* left);
-#define aom_paeth_predictor_32x64 aom_paeth_predictor_32x64_c
+void aom_paeth_predictor_32x64_neon(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+#define aom_paeth_predictor_32x64 aom_paeth_predictor_32x64_neon
+
+void aom_paeth_predictor_32x8_c(uint8_t* dst,
+                                ptrdiff_t y_stride,
+                                const uint8_t* above,
+                                const uint8_t* left);
+void aom_paeth_predictor_32x8_neon(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+#define aom_paeth_predictor_32x8 aom_paeth_predictor_32x8_neon
+
+void aom_paeth_predictor_4x16_c(uint8_t* dst,
+                                ptrdiff_t y_stride,
+                                const uint8_t* above,
+                                const uint8_t* left);
+void aom_paeth_predictor_4x16_neon(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+#define aom_paeth_predictor_4x16 aom_paeth_predictor_4x16_neon
 
 void aom_paeth_predictor_4x4_c(uint8_t* dst,
                                ptrdiff_t y_stride,
                                const uint8_t* above,
                                const uint8_t* left);
-#define aom_paeth_predictor_4x4 aom_paeth_predictor_4x4_c
+void aom_paeth_predictor_4x4_neon(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+#define aom_paeth_predictor_4x4 aom_paeth_predictor_4x4_neon
 
 void aom_paeth_predictor_4x8_c(uint8_t* dst,
                                ptrdiff_t y_stride,
                                const uint8_t* above,
                                const uint8_t* left);
-#define aom_paeth_predictor_4x8 aom_paeth_predictor_4x8_c
+void aom_paeth_predictor_4x8_neon(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+#define aom_paeth_predictor_4x8 aom_paeth_predictor_4x8_neon
+
+void aom_paeth_predictor_64x16_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+void aom_paeth_predictor_64x16_neon(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+#define aom_paeth_predictor_64x16 aom_paeth_predictor_64x16_neon
 
 void aom_paeth_predictor_64x32_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
                                  const uint8_t* left);
-#define aom_paeth_predictor_64x32 aom_paeth_predictor_64x32_c
+void aom_paeth_predictor_64x32_neon(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+#define aom_paeth_predictor_64x32 aom_paeth_predictor_64x32_neon
 
 void aom_paeth_predictor_64x64_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
                                  const uint8_t* left);
-#define aom_paeth_predictor_64x64 aom_paeth_predictor_64x64_c
+void aom_paeth_predictor_64x64_neon(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+#define aom_paeth_predictor_64x64 aom_paeth_predictor_64x64_neon
 
 void aom_paeth_predictor_8x16_c(uint8_t* dst,
                                 ptrdiff_t y_stride,
                                 const uint8_t* above,
                                 const uint8_t* left);
-#define aom_paeth_predictor_8x16 aom_paeth_predictor_8x16_c
+void aom_paeth_predictor_8x16_neon(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+#define aom_paeth_predictor_8x16 aom_paeth_predictor_8x16_neon
+
+void aom_paeth_predictor_8x32_c(uint8_t* dst,
+                                ptrdiff_t y_stride,
+                                const uint8_t* above,
+                                const uint8_t* left);
+void aom_paeth_predictor_8x32_neon(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+#define aom_paeth_predictor_8x32 aom_paeth_predictor_8x32_neon
 
 void aom_paeth_predictor_8x4_c(uint8_t* dst,
                                ptrdiff_t y_stride,
                                const uint8_t* above,
                                const uint8_t* left);
-#define aom_paeth_predictor_8x4 aom_paeth_predictor_8x4_c
+void aom_paeth_predictor_8x4_neon(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+#define aom_paeth_predictor_8x4 aom_paeth_predictor_8x4_neon
 
 void aom_paeth_predictor_8x8_c(uint8_t* dst,
                                ptrdiff_t y_stride,
                                const uint8_t* above,
                                const uint8_t* left);
-#define aom_paeth_predictor_8x8 aom_paeth_predictor_8x8_c
+void aom_paeth_predictor_8x8_neon(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+#define aom_paeth_predictor_8x8 aom_paeth_predictor_8x8_neon
 
 void aom_pixel_scale_c(const int16_t* src_diff,
                        ptrdiff_t src_stride,
@@ -2238,20 +2578,6 @@ void aom_quantize_b_32x32_neon(const tran_low_t* coeff_ptr,
                                const int16_t* iscan);
 #define aom_quantize_b_32x32 aom_quantize_b_32x32_neon
 
-void aom_quantize_b_32x32_adaptive_c(const tran_low_t* coeff_ptr,
-                                     intptr_t n_coeffs,
-                                     const int16_t* zbin_ptr,
-                                     const int16_t* round_ptr,
-                                     const int16_t* quant_ptr,
-                                     const int16_t* quant_shift_ptr,
-                                     tran_low_t* qcoeff_ptr,
-                                     tran_low_t* dqcoeff_ptr,
-                                     const int16_t* dequant_ptr,
-                                     uint16_t* eob_ptr,
-                                     const int16_t* scan,
-                                     const int16_t* iscan);
-#define aom_quantize_b_32x32_adaptive aom_quantize_b_32x32_adaptive_c
-
 void aom_quantize_b_64x64_c(const tran_low_t* coeff_ptr,
                             intptr_t n_coeffs,
                             const int16_t* zbin_ptr,
@@ -2278,34 +2604,6 @@ void aom_quantize_b_64x64_neon(const tran_low_t* coeff_ptr,
                                const int16_t* iscan);
 #define aom_quantize_b_64x64 aom_quantize_b_64x64_neon
 
-void aom_quantize_b_64x64_adaptive_c(const tran_low_t* coeff_ptr,
-                                     intptr_t n_coeffs,
-                                     const int16_t* zbin_ptr,
-                                     const int16_t* round_ptr,
-                                     const int16_t* quant_ptr,
-                                     const int16_t* quant_shift_ptr,
-                                     tran_low_t* qcoeff_ptr,
-                                     tran_low_t* dqcoeff_ptr,
-                                     const int16_t* dequant_ptr,
-                                     uint16_t* eob_ptr,
-                                     const int16_t* scan,
-                                     const int16_t* iscan);
-#define aom_quantize_b_64x64_adaptive aom_quantize_b_64x64_adaptive_c
-
-void aom_quantize_b_adaptive_c(const tran_low_t* coeff_ptr,
-                               intptr_t n_coeffs,
-                               const int16_t* zbin_ptr,
-                               const int16_t* round_ptr,
-                               const int16_t* quant_ptr,
-                               const int16_t* quant_shift_ptr,
-                               tran_low_t* qcoeff_ptr,
-                               tran_low_t* dqcoeff_ptr,
-                               const int16_t* dequant_ptr,
-                               uint16_t* eob_ptr,
-                               const int16_t* scan,
-                               const int16_t* iscan);
-#define aom_quantize_b_adaptive aom_quantize_b_adaptive_c
-
 unsigned int aom_sad128x128_c(const uint8_t* src_ptr,
                               int src_stride,
                               const uint8_t* ref_ptr,
@@ -2328,7 +2626,12 @@ void aom_sad128x128x4d_c(const uint8_t* src_ptr,
                          const uint8_t* const ref_ptr[4],
                          int ref_stride,
                          uint32_t sad_array[4]);
-#define aom_sad128x128x4d aom_sad128x128x4d_c
+void aom_sad128x128x4d_neon(const uint8_t* src_ptr,
+                            int src_stride,
+                            const uint8_t* const ref_ptr[4],
+                            int ref_stride,
+                            uint32_t sad_array[4]);
+#define aom_sad128x128x4d aom_sad128x128x4d_neon
 
 void aom_sad128x128x4d_avg_c(const uint8_t* src_ptr,
                              int src_stride,
@@ -2342,7 +2645,11 @@ unsigned int aom_sad128x64_c(const uint8_t* src_ptr,
                              int src_stride,
                              const uint8_t* ref_ptr,
                              int ref_stride);
-#define aom_sad128x64 aom_sad128x64_c
+unsigned int aom_sad128x64_neon(const uint8_t* src_ptr,
+                                int src_stride,
+                                const uint8_t* ref_ptr,
+                                int ref_stride);
+#define aom_sad128x64 aom_sad128x64_neon
 
 unsigned int aom_sad128x64_avg_c(const uint8_t* src_ptr,
                                  int src_stride,
@@ -2356,7 +2663,12 @@ void aom_sad128x64x4d_c(const uint8_t* src_ptr,
                         const uint8_t* const ref_ptr[4],
                         int ref_stride,
                         uint32_t sad_array[4]);
-#define aom_sad128x64x4d aom_sad128x64x4d_c
+void aom_sad128x64x4d_neon(const uint8_t* src_ptr,
+                           int src_stride,
+                           const uint8_t* const ref_ptr[4],
+                           int ref_stride,
+                           uint32_t sad_array[4]);
+#define aom_sad128x64x4d aom_sad128x64x4d_neon
 
 void aom_sad128x64x4d_avg_c(const uint8_t* src_ptr,
                             int src_stride,
@@ -2415,7 +2727,11 @@ unsigned int aom_sad16x32_c(const uint8_t* src_ptr,
                             int src_stride,
                             const uint8_t* ref_ptr,
                             int ref_stride);
-#define aom_sad16x32 aom_sad16x32_c
+unsigned int aom_sad16x32_neon(const uint8_t* src_ptr,
+                               int src_stride,
+                               const uint8_t* ref_ptr,
+                               int ref_stride);
+#define aom_sad16x32 aom_sad16x32_neon
 
 unsigned int aom_sad16x32_avg_c(const uint8_t* src_ptr,
                                 int src_stride,
@@ -2429,7 +2745,12 @@ void aom_sad16x32x4d_c(const uint8_t* src_ptr,
                        const uint8_t* const ref_ptr[4],
                        int ref_stride,
                        uint32_t sad_array[4]);
-#define aom_sad16x32x4d aom_sad16x32x4d_c
+void aom_sad16x32x4d_neon(const uint8_t* src_ptr,
+                          int src_stride,
+                          const uint8_t* const ref_ptr[4],
+                          int ref_stride,
+                          uint32_t sad_array[4]);
+#define aom_sad16x32x4d aom_sad16x32x4d_neon
 
 void aom_sad16x32x4d_avg_c(const uint8_t* src_ptr,
                            int src_stride,
@@ -2461,7 +2782,12 @@ void aom_sad16x8x4d_c(const uint8_t* src_ptr,
                       const uint8_t* const ref_ptr[4],
                       int ref_stride,
                       uint32_t sad_array[4]);
-#define aom_sad16x8x4d aom_sad16x8x4d_c
+void aom_sad16x8x4d_neon(const uint8_t* src_ptr,
+                         int src_stride,
+                         const uint8_t* const ref_ptr[4],
+                         int ref_stride,
+                         uint32_t sad_array[4]);
+#define aom_sad16x8x4d aom_sad16x8x4d_neon
 
 void aom_sad16x8x4d_avg_c(const uint8_t* src_ptr,
                           int src_stride,
@@ -2483,7 +2809,11 @@ unsigned int aom_sad32x16_c(const uint8_t* src_ptr,
                             int src_stride,
                             const uint8_t* ref_ptr,
                             int ref_stride);
-#define aom_sad32x16 aom_sad32x16_c
+unsigned int aom_sad32x16_neon(const uint8_t* src_ptr,
+                               int src_stride,
+                               const uint8_t* ref_ptr,
+                               int ref_stride);
+#define aom_sad32x16 aom_sad32x16_neon
 
 unsigned int aom_sad32x16_avg_c(const uint8_t* src_ptr,
                                 int src_stride,
@@ -2497,7 +2827,12 @@ void aom_sad32x16x4d_c(const uint8_t* src_ptr,
                        const uint8_t* const ref_ptr[4],
                        int ref_stride,
                        uint32_t sad_array[4]);
-#define aom_sad32x16x4d aom_sad32x16x4d_c
+void aom_sad32x16x4d_neon(const uint8_t* src_ptr,
+                          int src_stride,
+                          const uint8_t* const ref_ptr[4],
+                          int ref_stride,
+                          uint32_t sad_array[4]);
+#define aom_sad32x16x4d aom_sad32x16x4d_neon
 
 void aom_sad32x16x4d_avg_c(const uint8_t* src_ptr,
                            int src_stride,
@@ -2548,7 +2883,11 @@ unsigned int aom_sad32x64_c(const uint8_t* src_ptr,
                             int src_stride,
                             const uint8_t* ref_ptr,
                             int ref_stride);
-#define aom_sad32x64 aom_sad32x64_c
+unsigned int aom_sad32x64_neon(const uint8_t* src_ptr,
+                               int src_stride,
+                               const uint8_t* ref_ptr,
+                               int ref_stride);
+#define aom_sad32x64 aom_sad32x64_neon
 
 unsigned int aom_sad32x64_avg_c(const uint8_t* src_ptr,
                                 int src_stride,
@@ -2562,7 +2901,12 @@ void aom_sad32x64x4d_c(const uint8_t* src_ptr,
                        const uint8_t* const ref_ptr[4],
                        int ref_stride,
                        uint32_t sad_array[4]);
-#define aom_sad32x64x4d aom_sad32x64x4d_c
+void aom_sad32x64x4d_neon(const uint8_t* src_ptr,
+                          int src_stride,
+                          const uint8_t* const ref_ptr[4],
+                          int ref_stride,
+                          uint32_t sad_array[4]);
+#define aom_sad32x64x4d aom_sad32x64x4d_neon
 
 void aom_sad32x64x4d_avg_c(const uint8_t* src_ptr,
                            int src_stride,
@@ -2602,7 +2946,12 @@ void aom_sad4x4x4d_c(const uint8_t* src_ptr,
                      const uint8_t* const ref_ptr[4],
                      int ref_stride,
                      uint32_t sad_array[4]);
-#define aom_sad4x4x4d aom_sad4x4x4d_c
+void aom_sad4x4x4d_neon(const uint8_t* src_ptr,
+                        int src_stride,
+                        const uint8_t* const ref_ptr[4],
+                        int ref_stride,
+                        uint32_t sad_array[4]);
+#define aom_sad4x4x4d aom_sad4x4x4d_neon
 
 void aom_sad4x4x4d_avg_c(const uint8_t* src_ptr,
                          int src_stride,
@@ -2616,7 +2965,11 @@ unsigned int aom_sad4x8_c(const uint8_t* src_ptr,
                           int src_stride,
                           const uint8_t* ref_ptr,
                           int ref_stride);
-#define aom_sad4x8 aom_sad4x8_c
+unsigned int aom_sad4x8_neon(const uint8_t* src_ptr,
+                             int src_stride,
+                             const uint8_t* ref_ptr,
+                             int ref_stride);
+#define aom_sad4x8 aom_sad4x8_neon
 
 unsigned int aom_sad4x8_avg_c(const uint8_t* src_ptr,
                               int src_stride,
@@ -2630,7 +2983,12 @@ void aom_sad4x8x4d_c(const uint8_t* src_ptr,
                      const uint8_t* const ref_ptr[4],
                      int ref_stride,
                      uint32_t sad_array[4]);
-#define aom_sad4x8x4d aom_sad4x8x4d_c
+void aom_sad4x8x4d_neon(const uint8_t* src_ptr,
+                        int src_stride,
+                        const uint8_t* const ref_ptr[4],
+                        int ref_stride,
+                        uint32_t sad_array[4]);
+#define aom_sad4x8x4d aom_sad4x8x4d_neon
 
 void aom_sad4x8x4d_avg_c(const uint8_t* src_ptr,
                          int src_stride,
@@ -2652,7 +3010,11 @@ unsigned int aom_sad64x128_c(const uint8_t* src_ptr,
                              int src_stride,
                              const uint8_t* ref_ptr,
                              int ref_stride);
-#define aom_sad64x128 aom_sad64x128_c
+unsigned int aom_sad64x128_neon(const uint8_t* src_ptr,
+                                int src_stride,
+                                const uint8_t* ref_ptr,
+                                int ref_stride);
+#define aom_sad64x128 aom_sad64x128_neon
 
 unsigned int aom_sad64x128_avg_c(const uint8_t* src_ptr,
                                  int src_stride,
@@ -2666,7 +3028,12 @@ void aom_sad64x128x4d_c(const uint8_t* src_ptr,
                         const uint8_t* const ref_ptr[4],
                         int ref_stride,
                         uint32_t sad_array[4]);
-#define aom_sad64x128x4d aom_sad64x128x4d_c
+void aom_sad64x128x4d_neon(const uint8_t* src_ptr,
+                           int src_stride,
+                           const uint8_t* const ref_ptr[4],
+                           int ref_stride,
+                           uint32_t sad_array[4]);
+#define aom_sad64x128x4d aom_sad64x128x4d_neon
 
 void aom_sad64x128x4d_avg_c(const uint8_t* src_ptr,
                             int src_stride,
@@ -2680,7 +3047,11 @@ unsigned int aom_sad64x32_c(const uint8_t* src_ptr,
                             int src_stride,
                             const uint8_t* ref_ptr,
                             int ref_stride);
-#define aom_sad64x32 aom_sad64x32_c
+unsigned int aom_sad64x32_neon(const uint8_t* src_ptr,
+                               int src_stride,
+                               const uint8_t* ref_ptr,
+                               int ref_stride);
+#define aom_sad64x32 aom_sad64x32_neon
 
 unsigned int aom_sad64x32_avg_c(const uint8_t* src_ptr,
                                 int src_stride,
@@ -2694,7 +3065,12 @@ void aom_sad64x32x4d_c(const uint8_t* src_ptr,
                        const uint8_t* const ref_ptr[4],
                        int ref_stride,
                        uint32_t sad_array[4]);
-#define aom_sad64x32x4d aom_sad64x32x4d_c
+void aom_sad64x32x4d_neon(const uint8_t* src_ptr,
+                          int src_stride,
+                          const uint8_t* const ref_ptr[4],
+                          int ref_stride,
+                          uint32_t sad_array[4]);
+#define aom_sad64x32x4d aom_sad64x32x4d_neon
 
 void aom_sad64x32x4d_avg_c(const uint8_t* src_ptr,
                            int src_stride,
@@ -2771,7 +3147,12 @@ void aom_sad8x16x4d_c(const uint8_t* src_ptr,
                       const uint8_t* const ref_ptr[4],
                       int ref_stride,
                       uint32_t sad_array[4]);
-#define aom_sad8x16x4d aom_sad8x16x4d_c
+void aom_sad8x16x4d_neon(const uint8_t* src_ptr,
+                         int src_stride,
+                         const uint8_t* const ref_ptr[4],
+                         int ref_stride,
+                         uint32_t sad_array[4]);
+#define aom_sad8x16x4d aom_sad8x16x4d_neon
 
 void aom_sad8x16x4d_avg_c(const uint8_t* src_ptr,
                           int src_stride,
@@ -2785,7 +3166,11 @@ unsigned int aom_sad8x4_c(const uint8_t* src_ptr,
                           int src_stride,
                           const uint8_t* ref_ptr,
                           int ref_stride);
-#define aom_sad8x4 aom_sad8x4_c
+unsigned int aom_sad8x4_neon(const uint8_t* src_ptr,
+                             int src_stride,
+                             const uint8_t* ref_ptr,
+                             int ref_stride);
+#define aom_sad8x4 aom_sad8x4_neon
 
 unsigned int aom_sad8x4_avg_c(const uint8_t* src_ptr,
                               int src_stride,
@@ -2799,7 +3184,12 @@ void aom_sad8x4x4d_c(const uint8_t* src_ptr,
                      const uint8_t* const ref_ptr[4],
                      int ref_stride,
                      uint32_t sad_array[4]);
-#define aom_sad8x4x4d aom_sad8x4x4d_c
+void aom_sad8x4x4d_neon(const uint8_t* src_ptr,
+                        int src_stride,
+                        const uint8_t* const ref_ptr[4],
+                        int ref_stride,
+                        uint32_t sad_array[4]);
+#define aom_sad8x4x4d aom_sad8x4x4d_neon
 
 void aom_sad8x4x4d_avg_c(const uint8_t* src_ptr,
                          int src_stride,
@@ -2831,7 +3221,12 @@ void aom_sad8x8x4d_c(const uint8_t* src_ptr,
                      const uint8_t* const ref_ptr[4],
                      int ref_stride,
                      uint32_t sad_array[4]);
-#define aom_sad8x8x4d aom_sad8x8x4d_c
+void aom_sad8x8x4d_neon(const uint8_t* src_ptr,
+                        int src_stride,
+                        const uint8_t* const ref_ptr[4],
+                        int ref_stride,
+                        uint32_t sad_array[4]);
+#define aom_sad8x8x4d aom_sad8x8x4d_neon
 
 void aom_sad8x8x4d_avg_c(const uint8_t* src_ptr,
                          int src_stride,
@@ -3219,85 +3614,191 @@ void aom_smooth_h_predictor_16x16_c(uint8_t* dst,
                                     ptrdiff_t y_stride,
                                     const uint8_t* above,
                                     const uint8_t* left);
-#define aom_smooth_h_predictor_16x16 aom_smooth_h_predictor_16x16_c
+void aom_smooth_h_predictor_16x16_neon(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+#define aom_smooth_h_predictor_16x16 aom_smooth_h_predictor_16x16_neon
 
 void aom_smooth_h_predictor_16x32_c(uint8_t* dst,
                                     ptrdiff_t y_stride,
                                     const uint8_t* above,
                                     const uint8_t* left);
-#define aom_smooth_h_predictor_16x32 aom_smooth_h_predictor_16x32_c
+void aom_smooth_h_predictor_16x32_neon(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+#define aom_smooth_h_predictor_16x32 aom_smooth_h_predictor_16x32_neon
 
-void aom_smooth_h_predictor_16x8_c(uint8_t* dst,
+void aom_smooth_h_predictor_16x4_c(uint8_t* dst,
                                    ptrdiff_t y_stride,
                                    const uint8_t* above,
                                    const uint8_t* left);
-#define aom_smooth_h_predictor_16x8 aom_smooth_h_predictor_16x8_c
+void aom_smooth_h_predictor_16x4_neon(uint8_t* dst,
+                                      ptrdiff_t y_stride,
+                                      const uint8_t* above,
+                                      const uint8_t* left);
+#define aom_smooth_h_predictor_16x4 aom_smooth_h_predictor_16x4_neon
 
-void aom_smooth_h_predictor_2x2_c(uint8_t* dst,
-                                  ptrdiff_t y_stride,
-                                  const uint8_t* above,
-                                  const uint8_t* left);
-#define aom_smooth_h_predictor_2x2 aom_smooth_h_predictor_2x2_c
+void aom_smooth_h_predictor_16x64_c(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+void aom_smooth_h_predictor_16x64_neon(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+#define aom_smooth_h_predictor_16x64 aom_smooth_h_predictor_16x64_neon
+
+void aom_smooth_h_predictor_16x8_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+void aom_smooth_h_predictor_16x8_neon(uint8_t* dst,
+                                      ptrdiff_t y_stride,
+                                      const uint8_t* above,
+                                      const uint8_t* left);
+#define aom_smooth_h_predictor_16x8 aom_smooth_h_predictor_16x8_neon
 
 void aom_smooth_h_predictor_32x16_c(uint8_t* dst,
                                     ptrdiff_t y_stride,
                                     const uint8_t* above,
                                     const uint8_t* left);
-#define aom_smooth_h_predictor_32x16 aom_smooth_h_predictor_32x16_c
+void aom_smooth_h_predictor_32x16_neon(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+#define aom_smooth_h_predictor_32x16 aom_smooth_h_predictor_32x16_neon
 
 void aom_smooth_h_predictor_32x32_c(uint8_t* dst,
                                     ptrdiff_t y_stride,
                                     const uint8_t* above,
                                     const uint8_t* left);
-#define aom_smooth_h_predictor_32x32 aom_smooth_h_predictor_32x32_c
+void aom_smooth_h_predictor_32x32_neon(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+#define aom_smooth_h_predictor_32x32 aom_smooth_h_predictor_32x32_neon
 
 void aom_smooth_h_predictor_32x64_c(uint8_t* dst,
                                     ptrdiff_t y_stride,
                                     const uint8_t* above,
                                     const uint8_t* left);
-#define aom_smooth_h_predictor_32x64 aom_smooth_h_predictor_32x64_c
+void aom_smooth_h_predictor_32x64_neon(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+#define aom_smooth_h_predictor_32x64 aom_smooth_h_predictor_32x64_neon
+
+void aom_smooth_h_predictor_32x8_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+void aom_smooth_h_predictor_32x8_neon(uint8_t* dst,
+                                      ptrdiff_t y_stride,
+                                      const uint8_t* above,
+                                      const uint8_t* left);
+#define aom_smooth_h_predictor_32x8 aom_smooth_h_predictor_32x8_neon
+
+void aom_smooth_h_predictor_4x16_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+void aom_smooth_h_predictor_4x16_neon(uint8_t* dst,
+                                      ptrdiff_t y_stride,
+                                      const uint8_t* above,
+                                      const uint8_t* left);
+#define aom_smooth_h_predictor_4x16 aom_smooth_h_predictor_4x16_neon
 
 void aom_smooth_h_predictor_4x4_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
                                   const uint8_t* left);
-#define aom_smooth_h_predictor_4x4 aom_smooth_h_predictor_4x4_c
+void aom_smooth_h_predictor_4x4_neon(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+#define aom_smooth_h_predictor_4x4 aom_smooth_h_predictor_4x4_neon
 
 void aom_smooth_h_predictor_4x8_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
                                   const uint8_t* left);
-#define aom_smooth_h_predictor_4x8 aom_smooth_h_predictor_4x8_c
+void aom_smooth_h_predictor_4x8_neon(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+#define aom_smooth_h_predictor_4x8 aom_smooth_h_predictor_4x8_neon
+
+void aom_smooth_h_predictor_64x16_c(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+void aom_smooth_h_predictor_64x16_neon(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+#define aom_smooth_h_predictor_64x16 aom_smooth_h_predictor_64x16_neon
 
 void aom_smooth_h_predictor_64x32_c(uint8_t* dst,
                                     ptrdiff_t y_stride,
                                     const uint8_t* above,
                                     const uint8_t* left);
-#define aom_smooth_h_predictor_64x32 aom_smooth_h_predictor_64x32_c
+void aom_smooth_h_predictor_64x32_neon(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+#define aom_smooth_h_predictor_64x32 aom_smooth_h_predictor_64x32_neon
 
 void aom_smooth_h_predictor_64x64_c(uint8_t* dst,
                                     ptrdiff_t y_stride,
                                     const uint8_t* above,
                                     const uint8_t* left);
-#define aom_smooth_h_predictor_64x64 aom_smooth_h_predictor_64x64_c
+void aom_smooth_h_predictor_64x64_neon(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+#define aom_smooth_h_predictor_64x64 aom_smooth_h_predictor_64x64_neon
 
 void aom_smooth_h_predictor_8x16_c(uint8_t* dst,
                                    ptrdiff_t y_stride,
                                    const uint8_t* above,
                                    const uint8_t* left);
-#define aom_smooth_h_predictor_8x16 aom_smooth_h_predictor_8x16_c
+void aom_smooth_h_predictor_8x16_neon(uint8_t* dst,
+                                      ptrdiff_t y_stride,
+                                      const uint8_t* above,
+                                      const uint8_t* left);
+#define aom_smooth_h_predictor_8x16 aom_smooth_h_predictor_8x16_neon
+
+void aom_smooth_h_predictor_8x32_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+void aom_smooth_h_predictor_8x32_neon(uint8_t* dst,
+                                      ptrdiff_t y_stride,
+                                      const uint8_t* above,
+                                      const uint8_t* left);
+#define aom_smooth_h_predictor_8x32 aom_smooth_h_predictor_8x32_neon
 
 void aom_smooth_h_predictor_8x4_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
                                   const uint8_t* left);
-#define aom_smooth_h_predictor_8x4 aom_smooth_h_predictor_8x4_c
+void aom_smooth_h_predictor_8x4_neon(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+#define aom_smooth_h_predictor_8x4 aom_smooth_h_predictor_8x4_neon
 
 void aom_smooth_h_predictor_8x8_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
                                   const uint8_t* left);
-#define aom_smooth_h_predictor_8x8 aom_smooth_h_predictor_8x8_c
+void aom_smooth_h_predictor_8x8_neon(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+#define aom_smooth_h_predictor_8x8 aom_smooth_h_predictor_8x8_neon
 
 void aom_smooth_predictor_16x16_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
@@ -3319,6 +3820,26 @@ void aom_smooth_predictor_16x32_neon(uint8_t* dst,
                                      const uint8_t* left);
 #define aom_smooth_predictor_16x32 aom_smooth_predictor_16x32_neon
 
+void aom_smooth_predictor_16x4_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+void aom_smooth_predictor_16x4_neon(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+#define aom_smooth_predictor_16x4 aom_smooth_predictor_16x4_neon
+
+void aom_smooth_predictor_16x64_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+void aom_smooth_predictor_16x64_neon(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+#define aom_smooth_predictor_16x64 aom_smooth_predictor_16x64_neon
+
 void aom_smooth_predictor_16x8_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
@@ -3329,12 +3850,6 @@ void aom_smooth_predictor_16x8_neon(uint8_t* dst,
                                     const uint8_t* left);
 #define aom_smooth_predictor_16x8 aom_smooth_predictor_16x8_neon
 
-void aom_smooth_predictor_2x2_c(uint8_t* dst,
-                                ptrdiff_t y_stride,
-                                const uint8_t* above,
-                                const uint8_t* left);
-#define aom_smooth_predictor_2x2 aom_smooth_predictor_2x2_c
-
 void aom_smooth_predictor_32x16_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
@@ -3365,6 +3880,26 @@ void aom_smooth_predictor_32x64_neon(uint8_t* dst,
                                      const uint8_t* left);
 #define aom_smooth_predictor_32x64 aom_smooth_predictor_32x64_neon
 
+void aom_smooth_predictor_32x8_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+void aom_smooth_predictor_32x8_neon(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+#define aom_smooth_predictor_32x8 aom_smooth_predictor_32x8_neon
+
+void aom_smooth_predictor_4x16_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+void aom_smooth_predictor_4x16_neon(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+#define aom_smooth_predictor_4x16 aom_smooth_predictor_4x16_neon
+
 void aom_smooth_predictor_4x4_c(uint8_t* dst,
                                 ptrdiff_t y_stride,
                                 const uint8_t* above,
@@ -3385,6 +3920,16 @@ void aom_smooth_predictor_4x8_neon(uint8_t* dst,
                                    const uint8_t* left);
 #define aom_smooth_predictor_4x8 aom_smooth_predictor_4x8_neon
 
+void aom_smooth_predictor_64x16_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+void aom_smooth_predictor_64x16_neon(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+#define aom_smooth_predictor_64x16 aom_smooth_predictor_64x16_neon
+
 void aom_smooth_predictor_64x32_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
@@ -3415,6 +3960,16 @@ void aom_smooth_predictor_8x16_neon(uint8_t* dst,
                                     const uint8_t* left);
 #define aom_smooth_predictor_8x16 aom_smooth_predictor_8x16_neon
 
+void aom_smooth_predictor_8x32_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+void aom_smooth_predictor_8x32_neon(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+#define aom_smooth_predictor_8x32 aom_smooth_predictor_8x32_neon
+
 void aom_smooth_predictor_8x4_c(uint8_t* dst,
                                 ptrdiff_t y_stride,
                                 const uint8_t* above,
@@ -3439,85 +3994,191 @@ void aom_smooth_v_predictor_16x16_c(uint8_t* dst,
                                     ptrdiff_t y_stride,
                                     const uint8_t* above,
                                     const uint8_t* left);
-#define aom_smooth_v_predictor_16x16 aom_smooth_v_predictor_16x16_c
+void aom_smooth_v_predictor_16x16_neon(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+#define aom_smooth_v_predictor_16x16 aom_smooth_v_predictor_16x16_neon
 
 void aom_smooth_v_predictor_16x32_c(uint8_t* dst,
                                     ptrdiff_t y_stride,
                                     const uint8_t* above,
                                     const uint8_t* left);
-#define aom_smooth_v_predictor_16x32 aom_smooth_v_predictor_16x32_c
+void aom_smooth_v_predictor_16x32_neon(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+#define aom_smooth_v_predictor_16x32 aom_smooth_v_predictor_16x32_neon
 
-void aom_smooth_v_predictor_16x8_c(uint8_t* dst,
+void aom_smooth_v_predictor_16x4_c(uint8_t* dst,
                                    ptrdiff_t y_stride,
                                    const uint8_t* above,
                                    const uint8_t* left);
-#define aom_smooth_v_predictor_16x8 aom_smooth_v_predictor_16x8_c
+void aom_smooth_v_predictor_16x4_neon(uint8_t* dst,
+                                      ptrdiff_t y_stride,
+                                      const uint8_t* above,
+                                      const uint8_t* left);
+#define aom_smooth_v_predictor_16x4 aom_smooth_v_predictor_16x4_neon
 
-void aom_smooth_v_predictor_2x2_c(uint8_t* dst,
-                                  ptrdiff_t y_stride,
-                                  const uint8_t* above,
-                                  const uint8_t* left);
-#define aom_smooth_v_predictor_2x2 aom_smooth_v_predictor_2x2_c
+void aom_smooth_v_predictor_16x64_c(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+void aom_smooth_v_predictor_16x64_neon(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+#define aom_smooth_v_predictor_16x64 aom_smooth_v_predictor_16x64_neon
+
+void aom_smooth_v_predictor_16x8_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+void aom_smooth_v_predictor_16x8_neon(uint8_t* dst,
+                                      ptrdiff_t y_stride,
+                                      const uint8_t* above,
+                                      const uint8_t* left);
+#define aom_smooth_v_predictor_16x8 aom_smooth_v_predictor_16x8_neon
 
 void aom_smooth_v_predictor_32x16_c(uint8_t* dst,
                                     ptrdiff_t y_stride,
                                     const uint8_t* above,
                                     const uint8_t* left);
-#define aom_smooth_v_predictor_32x16 aom_smooth_v_predictor_32x16_c
+void aom_smooth_v_predictor_32x16_neon(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+#define aom_smooth_v_predictor_32x16 aom_smooth_v_predictor_32x16_neon
 
 void aom_smooth_v_predictor_32x32_c(uint8_t* dst,
                                     ptrdiff_t y_stride,
                                     const uint8_t* above,
                                     const uint8_t* left);
-#define aom_smooth_v_predictor_32x32 aom_smooth_v_predictor_32x32_c
+void aom_smooth_v_predictor_32x32_neon(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+#define aom_smooth_v_predictor_32x32 aom_smooth_v_predictor_32x32_neon
 
 void aom_smooth_v_predictor_32x64_c(uint8_t* dst,
                                     ptrdiff_t y_stride,
                                     const uint8_t* above,
                                     const uint8_t* left);
-#define aom_smooth_v_predictor_32x64 aom_smooth_v_predictor_32x64_c
+void aom_smooth_v_predictor_32x64_neon(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+#define aom_smooth_v_predictor_32x64 aom_smooth_v_predictor_32x64_neon
+
+void aom_smooth_v_predictor_32x8_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+void aom_smooth_v_predictor_32x8_neon(uint8_t* dst,
+                                      ptrdiff_t y_stride,
+                                      const uint8_t* above,
+                                      const uint8_t* left);
+#define aom_smooth_v_predictor_32x8 aom_smooth_v_predictor_32x8_neon
+
+void aom_smooth_v_predictor_4x16_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+void aom_smooth_v_predictor_4x16_neon(uint8_t* dst,
+                                      ptrdiff_t y_stride,
+                                      const uint8_t* above,
+                                      const uint8_t* left);
+#define aom_smooth_v_predictor_4x16 aom_smooth_v_predictor_4x16_neon
 
 void aom_smooth_v_predictor_4x4_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
                                   const uint8_t* left);
-#define aom_smooth_v_predictor_4x4 aom_smooth_v_predictor_4x4_c
+void aom_smooth_v_predictor_4x4_neon(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+#define aom_smooth_v_predictor_4x4 aom_smooth_v_predictor_4x4_neon
 
 void aom_smooth_v_predictor_4x8_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
                                   const uint8_t* left);
-#define aom_smooth_v_predictor_4x8 aom_smooth_v_predictor_4x8_c
+void aom_smooth_v_predictor_4x8_neon(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+#define aom_smooth_v_predictor_4x8 aom_smooth_v_predictor_4x8_neon
+
+void aom_smooth_v_predictor_64x16_c(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+void aom_smooth_v_predictor_64x16_neon(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+#define aom_smooth_v_predictor_64x16 aom_smooth_v_predictor_64x16_neon
 
 void aom_smooth_v_predictor_64x32_c(uint8_t* dst,
                                     ptrdiff_t y_stride,
                                     const uint8_t* above,
                                     const uint8_t* left);
-#define aom_smooth_v_predictor_64x32 aom_smooth_v_predictor_64x32_c
+void aom_smooth_v_predictor_64x32_neon(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+#define aom_smooth_v_predictor_64x32 aom_smooth_v_predictor_64x32_neon
 
 void aom_smooth_v_predictor_64x64_c(uint8_t* dst,
                                     ptrdiff_t y_stride,
                                     const uint8_t* above,
                                     const uint8_t* left);
-#define aom_smooth_v_predictor_64x64 aom_smooth_v_predictor_64x64_c
+void aom_smooth_v_predictor_64x64_neon(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+#define aom_smooth_v_predictor_64x64 aom_smooth_v_predictor_64x64_neon
 
 void aom_smooth_v_predictor_8x16_c(uint8_t* dst,
                                    ptrdiff_t y_stride,
                                    const uint8_t* above,
                                    const uint8_t* left);
-#define aom_smooth_v_predictor_8x16 aom_smooth_v_predictor_8x16_c
+void aom_smooth_v_predictor_8x16_neon(uint8_t* dst,
+                                      ptrdiff_t y_stride,
+                                      const uint8_t* above,
+                                      const uint8_t* left);
+#define aom_smooth_v_predictor_8x16 aom_smooth_v_predictor_8x16_neon
+
+void aom_smooth_v_predictor_8x32_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+void aom_smooth_v_predictor_8x32_neon(uint8_t* dst,
+                                      ptrdiff_t y_stride,
+                                      const uint8_t* above,
+                                      const uint8_t* left);
+#define aom_smooth_v_predictor_8x32 aom_smooth_v_predictor_8x32_neon
 
 void aom_smooth_v_predictor_8x4_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
                                   const uint8_t* left);
-#define aom_smooth_v_predictor_8x4 aom_smooth_v_predictor_8x4_c
+void aom_smooth_v_predictor_8x4_neon(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+#define aom_smooth_v_predictor_8x4 aom_smooth_v_predictor_8x4_neon
 
 void aom_smooth_v_predictor_8x8_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
                                   const uint8_t* left);
-#define aom_smooth_v_predictor_8x8 aom_smooth_v_predictor_8x8_c
+void aom_smooth_v_predictor_8x8_neon(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+#define aom_smooth_v_predictor_8x8 aom_smooth_v_predictor_8x8_neon
 
 int64_t aom_sse_c(const uint8_t* a,
                   int a_stride,
@@ -3996,7 +4657,12 @@ uint64_t aom_sum_sse_2d_i16_c(const int16_t* src,
                               int width,
                               int height,
                               int* sum);
-#define aom_sum_sse_2d_i16 aom_sum_sse_2d_i16_c
+uint64_t aom_sum_sse_2d_i16_neon(const int16_t* src,
+                                 int src_stride,
+                                 int width,
+                                 int height,
+                                 int* sum);
+#define aom_sum_sse_2d_i16 aom_sum_sse_2d_i16_neon
 
 void aom_v_predictor_16x16_c(uint8_t* dst,
                              ptrdiff_t y_stride,
@@ -4014,18 +4680,24 @@ void aom_v_predictor_16x32_c(uint8_t* dst,
                              const uint8_t* left);
 #define aom_v_predictor_16x32 aom_v_predictor_16x32_c
 
+void aom_v_predictor_16x4_c(uint8_t* dst,
+                            ptrdiff_t y_stride,
+                            const uint8_t* above,
+                            const uint8_t* left);
+#define aom_v_predictor_16x4 aom_v_predictor_16x4_c
+
+void aom_v_predictor_16x64_c(uint8_t* dst,
+                             ptrdiff_t y_stride,
+                             const uint8_t* above,
+                             const uint8_t* left);
+#define aom_v_predictor_16x64 aom_v_predictor_16x64_c
+
 void aom_v_predictor_16x8_c(uint8_t* dst,
                             ptrdiff_t y_stride,
                             const uint8_t* above,
                             const uint8_t* left);
 #define aom_v_predictor_16x8 aom_v_predictor_16x8_c
 
-void aom_v_predictor_2x2_c(uint8_t* dst,
-                           ptrdiff_t y_stride,
-                           const uint8_t* above,
-                           const uint8_t* left);
-#define aom_v_predictor_2x2 aom_v_predictor_2x2_c
-
 void aom_v_predictor_32x16_c(uint8_t* dst,
                              ptrdiff_t y_stride,
                              const uint8_t* above,
@@ -4048,6 +4720,18 @@ void aom_v_predictor_32x64_c(uint8_t* dst,
                              const uint8_t* left);
 #define aom_v_predictor_32x64 aom_v_predictor_32x64_c
 
+void aom_v_predictor_32x8_c(uint8_t* dst,
+                            ptrdiff_t y_stride,
+                            const uint8_t* above,
+                            const uint8_t* left);
+#define aom_v_predictor_32x8 aom_v_predictor_32x8_c
+
+void aom_v_predictor_4x16_c(uint8_t* dst,
+                            ptrdiff_t y_stride,
+                            const uint8_t* above,
+                            const uint8_t* left);
+#define aom_v_predictor_4x16 aom_v_predictor_4x16_c
+
 void aom_v_predictor_4x4_c(uint8_t* dst,
                            ptrdiff_t y_stride,
                            const uint8_t* above,
@@ -4064,6 +4748,12 @@ void aom_v_predictor_4x8_c(uint8_t* dst,
                            const uint8_t* left);
 #define aom_v_predictor_4x8 aom_v_predictor_4x8_c
 
+void aom_v_predictor_64x16_c(uint8_t* dst,
+                             ptrdiff_t y_stride,
+                             const uint8_t* above,
+                             const uint8_t* left);
+#define aom_v_predictor_64x16 aom_v_predictor_64x16_c
+
 void aom_v_predictor_64x32_c(uint8_t* dst,
                              ptrdiff_t y_stride,
                              const uint8_t* above,
@@ -4082,6 +4772,12 @@ void aom_v_predictor_8x16_c(uint8_t* dst,
                             const uint8_t* left);
 #define aom_v_predictor_8x16 aom_v_predictor_8x16_c
 
+void aom_v_predictor_8x32_c(uint8_t* dst,
+                            ptrdiff_t y_stride,
+                            const uint8_t* above,
+                            const uint8_t* left);
+#define aom_v_predictor_8x32 aom_v_predictor_8x32_c
+
 void aom_v_predictor_8x4_c(uint8_t* dst,
                            ptrdiff_t y_stride,
                            const uint8_t* above,
@@ -4317,8 +5013,8 @@ unsigned int aom_variance8x8_neon(const uint8_t* src_ptr,
                                   unsigned int* sse);
 #define aom_variance8x8 aom_variance8x8_neon
 
-int aom_vector_var_c(const int16_t* ref, const int16_t* src, const int bwl);
-int aom_vector_var_neon(const int16_t* ref, const int16_t* src, const int bwl);
+int aom_vector_var_c(const int16_t* ref, const int16_t* src, int bwl);
+int aom_vector_var_neon(const int16_t* ref, const int16_t* src, int bwl);
 #define aom_vector_var aom_vector_var_neon
 
 void aom_dsp_rtcd(void);
diff --git a/chromium/third_party/libaom/source/config/ios/arm-neon/config/aom_scale_rtcd.h b/chromium/third_party/libaom/source/config/ios/arm-neon/config/aom_scale_rtcd.h
index e27b558dd38..7db7b9d0f84 100644
--- a/chromium/third_party/libaom/source/config/ios/arm-neon/config/aom_scale_rtcd.h
+++ b/chromium/third_party/libaom/source/config/ios/arm-neon/config/aom_scale_rtcd.h
@@ -18,6 +18,12 @@ void aom_extend_frame_borders_c(struct yv12_buffer_config* ybf,
                                 const int num_planes);
 #define aom_extend_frame_borders aom_extend_frame_borders_c
 
+void aom_extend_frame_borders_plane_row_c(const struct yv12_buffer_config* ybf,
+                                          int plane,
+                                          int v_start,
+                                          int v_end);
+#define aom_extend_frame_borders_plane_row aom_extend_frame_borders_plane_row_c
+
 void aom_extend_frame_borders_y_c(struct yv12_buffer_config* ybf);
 #define aom_extend_frame_borders_y aom_extend_frame_borders_y_c
 
diff --git a/chromium/third_party/libaom/source/config/ios/arm-neon/config/av1_rtcd.h b/chromium/third_party/libaom/source/config/ios/arm-neon/config/av1_rtcd.h
index 23e831cc591..fd9ac835ad7 100644
--- a/chromium/third_party/libaom/source/config/ios/arm-neon/config/av1_rtcd.h
+++ b/chromium/third_party/libaom/source/config/ios/arm-neon/config/av1_rtcd.h
@@ -183,6 +183,30 @@ void aom_upsampled_pred_c(MACROBLOCKD* xd,
                           int subpel_search);
 #define aom_upsampled_pred aom_upsampled_pred_c
 
+void av1_apply_selfguided_restoration_c(const uint8_t* dat,
+                                        int width,
+                                        int height,
+                                        int stride,
+                                        int eps,
+                                        const int* xqd,
+                                        uint8_t* dst,
+                                        int dst_stride,
+                                        int32_t* tmpbuf,
+                                        int bit_depth,
+                                        int highbd);
+void av1_apply_selfguided_restoration_neon(const uint8_t* dat,
+                                           int width,
+                                           int height,
+                                           int stride,
+                                           int eps,
+                                           const int* xqd,
+                                           uint8_t* dst,
+                                           int dst_stride,
+                                           int32_t* tmpbuf,
+                                           int bit_depth,
+                                           int highbd);
+#define av1_apply_selfguided_restoration av1_apply_selfguided_restoration_neon
+
 int64_t av1_block_error_c(const tran_low_t* coeff,
                           const tran_low_t* dqcoeff,
                           intptr_t block_size,
@@ -234,6 +258,14 @@ void av1_build_compound_diffwtd_mask_d16_neon(uint8_t* mask,
 #define av1_build_compound_diffwtd_mask_d16 \
   av1_build_compound_diffwtd_mask_d16_neon
 
+int64_t av1_calc_frame_error_c(const uint8_t* const ref,
+                               int stride,
+                               const uint8_t* const dst,
+                               int p_width,
+                               int p_height,
+                               int p_stride);
+#define av1_calc_frame_error av1_calc_frame_error_c
+
 void av1_calc_indices_dim1_c(const int* data,
                              const int* centroids,
                              uint8_t* indices,
@@ -1314,6 +1346,28 @@ void av1_round_shift_array_c(int32_t* arr, int size, int bit);
 void av1_round_shift_array_neon(int32_t* arr, int size, int bit);
 #define av1_round_shift_array av1_round_shift_array_neon
 
+int av1_selfguided_restoration_c(const uint8_t* dgd8,
+                                 int width,
+                                 int height,
+                                 int dgd_stride,
+                                 int32_t* flt0,
+                                 int32_t* flt1,
+                                 int flt_stride,
+                                 int sgr_params_idx,
+                                 int bit_depth,
+                                 int highbd);
+int av1_selfguided_restoration_neon(const uint8_t* dgd8,
+                                    int width,
+                                    int height,
+                                    int dgd_stride,
+                                    int32_t* flt0,
+                                    int32_t* flt1,
+                                    int flt_stride,
+                                    int sgr_params_idx,
+                                    int bit_depth,
+                                    int highbd);
+#define av1_selfguided_restoration av1_selfguided_restoration_neon
+
 void av1_txb_init_levels_c(const tran_low_t* const coeff,
                            const int width,
                            const int height,
@@ -1330,6 +1384,44 @@ void av1_upsample_intra_edge_c(uint8_t* p, int sz);
 void av1_upsample_intra_edge_high_c(uint16_t* p, int sz, int bd);
 #define av1_upsample_intra_edge_high av1_upsample_intra_edge_high_c
 
+void av1_warp_affine_c(const int32_t* mat,
+                       const uint8_t* ref,
+                       int width,
+                       int height,
+                       int stride,
+                       uint8_t* pred,
+                       int p_col,
+                       int p_row,
+                       int p_width,
+                       int p_height,
+                       int p_stride,
+                       int subsampling_x,
+                       int subsampling_y,
+                       ConvolveParams* conv_params,
+                       int16_t alpha,
+                       int16_t beta,
+                       int16_t gamma,
+                       int16_t delta);
+void av1_warp_affine_neon(const int32_t* mat,
+                          const uint8_t* ref,
+                          int width,
+                          int height,
+                          int stride,
+                          uint8_t* pred,
+                          int p_col,
+                          int p_row,
+                          int p_width,
+                          int p_height,
+                          int p_stride,
+                          int subsampling_x,
+                          int subsampling_y,
+                          ConvolveParams* conv_params,
+                          int16_t alpha,
+                          int16_t beta,
+                          int16_t gamma,
+                          int16_t delta);
+#define av1_warp_affine av1_warp_affine_neon
+
 void av1_wedge_compute_delta_squares_c(int16_t* d,
                                        const int16_t* a,
                                        const int16_t* b,
@@ -1346,7 +1438,11 @@ uint64_t av1_wedge_sse_from_residuals_c(const int16_t* r1,
                                         const int16_t* d,
                                         const uint8_t* m,
                                         int N);
-#define av1_wedge_sse_from_residuals av1_wedge_sse_from_residuals_c
+uint64_t av1_wedge_sse_from_residuals_neon(const int16_t* r1,
+                                           const int16_t* d,
+                                           const uint8_t* m,
+                                           int N);
+#define av1_wedge_sse_from_residuals av1_wedge_sse_from_residuals_neon
 
 void av1_wiener_convolve_add_src_c(const uint8_t* src,
                                    ptrdiff_t src_stride,
@@ -1610,7 +1706,15 @@ void cdef_find_dir_dual_c(const uint16_t* img1,
                           int coeff_shift,
                           int* out1,
                           int* out2);
-#define cdef_find_dir_dual cdef_find_dir_dual_c
+void cdef_find_dir_dual_neon(const uint16_t* img1,
+                             const uint16_t* img2,
+                             int stride,
+                             int32_t* var1,
+                             int32_t* var2,
+                             int coeff_shift,
+                             int* out1,
+                             int* out2);
+#define cdef_find_dir_dual cdef_find_dir_dual_neon
 
 cfl_subsample_lbd_fn cfl_get_luma_subsampling_420_lbd_c(TX_SIZE tx_size);
 cfl_subsample_lbd_fn cfl_get_luma_subsampling_420_lbd_neon(TX_SIZE tx_size);
diff --git a/chromium/third_party/libaom/source/config/ios/arm64/config/aom_config.asm b/chromium/third_party/libaom/source/config/ios/arm64/config/aom_config.asm
index 15df49ee5ab..c5ebd6ab9ba 100644
--- a/chromium/third_party/libaom/source/config/ios/arm64/config/aom_config.asm
+++ b/chromium/third_party/libaom/source/config/ios/arm64/config/aom_config.asm
@@ -9,7 +9,6 @@
 ; PATENTS file, you can obtain it at www.aomedia.org/license/patent.
 ;
 ARCH_ARM equ 1
-ARCH_MIPS equ 0
 ARCH_PPC equ 0
 ARCH_X86 equ 0
 ARCH_X86_64 equ 0
@@ -33,8 +32,6 @@ CONFIG_DISABLE_FULL_PIXEL_SPLIT_8X8 equ 1
 CONFIG_ENTROPY_STATS equ 0
 CONFIG_EXCLUDE_SIMD_MISMATCH equ 0
 CONFIG_FPMT_TEST equ 0
-CONFIG_FRAME_PARALLEL_ENCODE equ 0
-CONFIG_FRAME_PARALLEL_ENCODE_2 equ 0
 CONFIG_GCC equ 1
 CONFIG_GCOV equ 0
 CONFIG_GPROF equ 0
@@ -50,7 +47,7 @@ CONFIG_NORMAL_TILE_MODE equ 1
 CONFIG_OPTICAL_FLOW_API equ 0
 CONFIG_OS_SUPPORT equ 1
 CONFIG_PARTITION_SEARCH_ORDER equ 0
-CONFIG_PIC equ 0
+CONFIG_PIC equ 1
 CONFIG_RATECTRL_LOG equ 0
 CONFIG_RD_COMMAND equ 0
 CONFIG_RD_DEBUG equ 0
@@ -69,14 +66,11 @@ CONFIG_WEBM_IO equ 1
 DECODE_HEIGHT_LIMIT equ 16384
 DECODE_WIDTH_LIMIT equ 16384
 FORCE_HIGHBITDEPTH_DECODING equ 0
+HAVE_ARM_CRC32 equ 0
 HAVE_AVX equ 0
 HAVE_AVX2 equ 0
-HAVE_DSPR2 equ 0
 HAVE_FEXCEPT equ 1
-HAVE_MIPS32 equ 0
-HAVE_MIPS64 equ 0
 HAVE_MMX equ 0
-HAVE_MSA equ 0
 HAVE_NEON equ 1
 HAVE_PTHREAD_H equ 1
 HAVE_SSE equ 0
diff --git a/chromium/third_party/libaom/source/config/ios/arm64/config/aom_config.c b/chromium/third_party/libaom/source/config/ios/arm64/config/aom_config.c
index cd0a632b72a..7f23b547ab1 100644
--- a/chromium/third_party/libaom/source/config/ios/arm64/config/aom_config.c
+++ b/chromium/third_party/libaom/source/config/ios/arm64/config/aom_config.c
@@ -9,5 +9,5 @@
  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
  */
 #include "aom/aom_codec.h"
-static const char* const cfg = "cmake ../source/libaom -G \"Unix Makefiles\" -DCMAKE_TOOLCHAIN_FILE=\"../source/libaom/build/cmake/toolchains/arm64-ios.cmake\" -DCONFIG_AV1_ENCODER=1 -DCONFIG_LIBYUV=0 -DCONFIG_REALTIME_ONLY=1 -DCONFIG_AV1_HIGHBITDEPTH=0 -DCONFIG_AV1_TEMPORAL_DENOISING=1 -DCONFIG_MAX_DECODE_PROFILE=0 -DCONFIG_NORMAL_TILE_MODE=1 -DCONFIG_SIZE_LIMIT=1 -DDECODE_HEIGHT_LIMIT=16384 -DDECODE_WIDTH_LIMIT=16384";
+static const char* const cfg = "cmake ../source/libaom -G \"Unix Makefiles\" -DCMAKE_TOOLCHAIN_FILE=\"../source/libaom/build/cmake/toolchains/arm64-ios.cmake\" -DCONFIG_AV1_ENCODER=1 -DCONFIG_LIBYUV=0 -DCONFIG_AV1_HIGHBITDEPTH=0 -DCONFIG_AV1_TEMPORAL_DENOISING=1 -DCONFIG_REALTIME_ONLY=1 -DCONFIG_MAX_DECODE_PROFILE=0 -DCONFIG_NORMAL_TILE_MODE=1 -DCONFIG_SIZE_LIMIT=1 -DDECODE_HEIGHT_LIMIT=16384 -DDECODE_WIDTH_LIMIT=16384";
 const char *aom_codec_build_config(void) {return cfg;}
diff --git a/chromium/third_party/libaom/source/config/ios/arm64/config/aom_config.h b/chromium/third_party/libaom/source/config/ios/arm64/config/aom_config.h
index f8e23f257de..dbbde9993bd 100644
--- a/chromium/third_party/libaom/source/config/ios/arm64/config/aom_config.h
+++ b/chromium/third_party/libaom/source/config/ios/arm64/config/aom_config.h
@@ -11,7 +11,6 @@
 #ifndef AOM_CONFIG_H_
 #define AOM_CONFIG_H_
 #define ARCH_ARM 1
-#define ARCH_MIPS 0
 #define ARCH_PPC 0
 #define ARCH_X86 0
 #define ARCH_X86_64 0
@@ -35,8 +34,6 @@
 #define CONFIG_ENTROPY_STATS 0
 #define CONFIG_EXCLUDE_SIMD_MISMATCH 0
 #define CONFIG_FPMT_TEST 0
-#define CONFIG_FRAME_PARALLEL_ENCODE 0
-#define CONFIG_FRAME_PARALLEL_ENCODE_2 0
 #define CONFIG_GCC 1
 #define CONFIG_GCOV 0
 #define CONFIG_GPROF 0
@@ -52,7 +49,7 @@
 #define CONFIG_OPTICAL_FLOW_API 0
 #define CONFIG_OS_SUPPORT 1
 #define CONFIG_PARTITION_SEARCH_ORDER 0
-#define CONFIG_PIC 0
+#define CONFIG_PIC 1
 #define CONFIG_RATECTRL_LOG 0
 #define CONFIG_RD_COMMAND 0
 #define CONFIG_RD_DEBUG 0
@@ -71,14 +68,11 @@
 #define DECODE_HEIGHT_LIMIT 16384
 #define DECODE_WIDTH_LIMIT 16384
 #define FORCE_HIGHBITDEPTH_DECODING 0
+#define HAVE_ARM_CRC32 0
 #define HAVE_AVX 0
 #define HAVE_AVX2 0
-#define HAVE_DSPR2 0
 #define HAVE_FEXCEPT 1
-#define HAVE_MIPS32 0
-#define HAVE_MIPS64 0
 #define HAVE_MMX 0
-#define HAVE_MSA 0
 #define HAVE_NEON 1
 #define HAVE_PTHREAD_H 1
 #define HAVE_SSE 0
diff --git a/chromium/third_party/libaom/source/config/ios/arm64/config/aom_dsp_rtcd.h b/chromium/third_party/libaom/source/config/ios/arm64/config/aom_dsp_rtcd.h
index 155c11857c0..c2d2ac85aaa 100644
--- a/chromium/third_party/libaom/source/config/ios/arm64/config/aom_dsp_rtcd.h
+++ b/chromium/third_party/libaom/source/config/ios/arm64/config/aom_dsp_rtcd.h
@@ -34,7 +34,12 @@ void aom_avg_8x8_quad_c(const uint8_t* s,
                         int x16_idx,
                         int y16_idx,
                         int* avg);
-#define aom_avg_8x8_quad aom_avg_8x8_quad_c
+void aom_avg_8x8_quad_neon(const uint8_t* s,
+                           int p,
+                           int x16_idx,
+                           int y16_idx,
+                           int* avg);
+#define aom_avg_8x8_quad aom_avg_8x8_quad_neon
 
 void aom_blend_a64_hmask_c(uint8_t* dst,
                            uint32_t dst_stride,
@@ -176,18 +181,24 @@ void aom_dc_128_predictor_16x32_c(uint8_t* dst,
                                   const uint8_t* left);
 #define aom_dc_128_predictor_16x32 aom_dc_128_predictor_16x32_c
 
+void aom_dc_128_predictor_16x4_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+#define aom_dc_128_predictor_16x4 aom_dc_128_predictor_16x4_c
+
+void aom_dc_128_predictor_16x64_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+#define aom_dc_128_predictor_16x64 aom_dc_128_predictor_16x64_c
+
 void aom_dc_128_predictor_16x8_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
                                  const uint8_t* left);
 #define aom_dc_128_predictor_16x8 aom_dc_128_predictor_16x8_c
 
-void aom_dc_128_predictor_2x2_c(uint8_t* dst,
-                                ptrdiff_t y_stride,
-                                const uint8_t* above,
-                                const uint8_t* left);
-#define aom_dc_128_predictor_2x2 aom_dc_128_predictor_2x2_c
-
 void aom_dc_128_predictor_32x16_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
@@ -210,6 +221,18 @@ void aom_dc_128_predictor_32x64_c(uint8_t* dst,
                                   const uint8_t* left);
 #define aom_dc_128_predictor_32x64 aom_dc_128_predictor_32x64_c
 
+void aom_dc_128_predictor_32x8_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+#define aom_dc_128_predictor_32x8 aom_dc_128_predictor_32x8_c
+
+void aom_dc_128_predictor_4x16_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+#define aom_dc_128_predictor_4x16 aom_dc_128_predictor_4x16_c
+
 void aom_dc_128_predictor_4x4_c(uint8_t* dst,
                                 ptrdiff_t y_stride,
                                 const uint8_t* above,
@@ -226,6 +249,12 @@ void aom_dc_128_predictor_4x8_c(uint8_t* dst,
                                 const uint8_t* left);
 #define aom_dc_128_predictor_4x8 aom_dc_128_predictor_4x8_c
 
+void aom_dc_128_predictor_64x16_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+#define aom_dc_128_predictor_64x16 aom_dc_128_predictor_64x16_c
+
 void aom_dc_128_predictor_64x32_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
@@ -244,6 +273,12 @@ void aom_dc_128_predictor_8x16_c(uint8_t* dst,
                                  const uint8_t* left);
 #define aom_dc_128_predictor_8x16 aom_dc_128_predictor_8x16_c
 
+void aom_dc_128_predictor_8x32_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+#define aom_dc_128_predictor_8x32 aom_dc_128_predictor_8x32_c
+
 void aom_dc_128_predictor_8x4_c(uint8_t* dst,
                                 ptrdiff_t y_stride,
                                 const uint8_t* above,
@@ -276,18 +311,24 @@ void aom_dc_left_predictor_16x32_c(uint8_t* dst,
                                    const uint8_t* left);
 #define aom_dc_left_predictor_16x32 aom_dc_left_predictor_16x32_c
 
+void aom_dc_left_predictor_16x4_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+#define aom_dc_left_predictor_16x4 aom_dc_left_predictor_16x4_c
+
+void aom_dc_left_predictor_16x64_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+#define aom_dc_left_predictor_16x64 aom_dc_left_predictor_16x64_c
+
 void aom_dc_left_predictor_16x8_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
                                   const uint8_t* left);
 #define aom_dc_left_predictor_16x8 aom_dc_left_predictor_16x8_c
 
-void aom_dc_left_predictor_2x2_c(uint8_t* dst,
-                                 ptrdiff_t y_stride,
-                                 const uint8_t* above,
-                                 const uint8_t* left);
-#define aom_dc_left_predictor_2x2 aom_dc_left_predictor_2x2_c
-
 void aom_dc_left_predictor_32x16_c(uint8_t* dst,
                                    ptrdiff_t y_stride,
                                    const uint8_t* above,
@@ -310,6 +351,18 @@ void aom_dc_left_predictor_32x64_c(uint8_t* dst,
                                    const uint8_t* left);
 #define aom_dc_left_predictor_32x64 aom_dc_left_predictor_32x64_c
 
+void aom_dc_left_predictor_32x8_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+#define aom_dc_left_predictor_32x8 aom_dc_left_predictor_32x8_c
+
+void aom_dc_left_predictor_4x16_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+#define aom_dc_left_predictor_4x16 aom_dc_left_predictor_4x16_c
+
 void aom_dc_left_predictor_4x4_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
@@ -326,6 +379,12 @@ void aom_dc_left_predictor_4x8_c(uint8_t* dst,
                                  const uint8_t* left);
 #define aom_dc_left_predictor_4x8 aom_dc_left_predictor_4x8_c
 
+void aom_dc_left_predictor_64x16_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+#define aom_dc_left_predictor_64x16 aom_dc_left_predictor_64x16_c
+
 void aom_dc_left_predictor_64x32_c(uint8_t* dst,
                                    ptrdiff_t y_stride,
                                    const uint8_t* above,
@@ -344,6 +403,12 @@ void aom_dc_left_predictor_8x16_c(uint8_t* dst,
                                   const uint8_t* left);
 #define aom_dc_left_predictor_8x16 aom_dc_left_predictor_8x16_c
 
+void aom_dc_left_predictor_8x32_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+#define aom_dc_left_predictor_8x32 aom_dc_left_predictor_8x32_c
+
 void aom_dc_left_predictor_8x4_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
@@ -376,18 +441,24 @@ void aom_dc_predictor_16x32_c(uint8_t* dst,
                               const uint8_t* left);
 #define aom_dc_predictor_16x32 aom_dc_predictor_16x32_c
 
+void aom_dc_predictor_16x4_c(uint8_t* dst,
+                             ptrdiff_t y_stride,
+                             const uint8_t* above,
+                             const uint8_t* left);
+#define aom_dc_predictor_16x4 aom_dc_predictor_16x4_c
+
+void aom_dc_predictor_16x64_c(uint8_t* dst,
+                              ptrdiff_t y_stride,
+                              const uint8_t* above,
+                              const uint8_t* left);
+#define aom_dc_predictor_16x64 aom_dc_predictor_16x64_c
+
 void aom_dc_predictor_16x8_c(uint8_t* dst,
                              ptrdiff_t y_stride,
                              const uint8_t* above,
                              const uint8_t* left);
 #define aom_dc_predictor_16x8 aom_dc_predictor_16x8_c
 
-void aom_dc_predictor_2x2_c(uint8_t* dst,
-                            ptrdiff_t y_stride,
-                            const uint8_t* above,
-                            const uint8_t* left);
-#define aom_dc_predictor_2x2 aom_dc_predictor_2x2_c
-
 void aom_dc_predictor_32x16_c(uint8_t* dst,
                               ptrdiff_t y_stride,
                               const uint8_t* above,
@@ -410,6 +481,18 @@ void aom_dc_predictor_32x64_c(uint8_t* dst,
                               const uint8_t* left);
 #define aom_dc_predictor_32x64 aom_dc_predictor_32x64_c
 
+void aom_dc_predictor_32x8_c(uint8_t* dst,
+                             ptrdiff_t y_stride,
+                             const uint8_t* above,
+                             const uint8_t* left);
+#define aom_dc_predictor_32x8 aom_dc_predictor_32x8_c
+
+void aom_dc_predictor_4x16_c(uint8_t* dst,
+                             ptrdiff_t y_stride,
+                             const uint8_t* above,
+                             const uint8_t* left);
+#define aom_dc_predictor_4x16 aom_dc_predictor_4x16_c
+
 void aom_dc_predictor_4x4_c(uint8_t* dst,
                             ptrdiff_t y_stride,
                             const uint8_t* above,
@@ -426,6 +509,12 @@ void aom_dc_predictor_4x8_c(uint8_t* dst,
                             const uint8_t* left);
 #define aom_dc_predictor_4x8 aom_dc_predictor_4x8_c
 
+void aom_dc_predictor_64x16_c(uint8_t* dst,
+                              ptrdiff_t y_stride,
+                              const uint8_t* above,
+                              const uint8_t* left);
+#define aom_dc_predictor_64x16 aom_dc_predictor_64x16_c
+
 void aom_dc_predictor_64x32_c(uint8_t* dst,
                               ptrdiff_t y_stride,
                               const uint8_t* above,
@@ -444,6 +533,12 @@ void aom_dc_predictor_8x16_c(uint8_t* dst,
                              const uint8_t* left);
 #define aom_dc_predictor_8x16 aom_dc_predictor_8x16_c
 
+void aom_dc_predictor_8x32_c(uint8_t* dst,
+                             ptrdiff_t y_stride,
+                             const uint8_t* above,
+                             const uint8_t* left);
+#define aom_dc_predictor_8x32 aom_dc_predictor_8x32_c
+
 void aom_dc_predictor_8x4_c(uint8_t* dst,
                             ptrdiff_t y_stride,
                             const uint8_t* above,
@@ -476,18 +571,24 @@ void aom_dc_top_predictor_16x32_c(uint8_t* dst,
                                   const uint8_t* left);
 #define aom_dc_top_predictor_16x32 aom_dc_top_predictor_16x32_c
 
+void aom_dc_top_predictor_16x4_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+#define aom_dc_top_predictor_16x4 aom_dc_top_predictor_16x4_c
+
+void aom_dc_top_predictor_16x64_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+#define aom_dc_top_predictor_16x64 aom_dc_top_predictor_16x64_c
+
 void aom_dc_top_predictor_16x8_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
                                  const uint8_t* left);
 #define aom_dc_top_predictor_16x8 aom_dc_top_predictor_16x8_c
 
-void aom_dc_top_predictor_2x2_c(uint8_t* dst,
-                                ptrdiff_t y_stride,
-                                const uint8_t* above,
-                                const uint8_t* left);
-#define aom_dc_top_predictor_2x2 aom_dc_top_predictor_2x2_c
-
 void aom_dc_top_predictor_32x16_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
@@ -510,6 +611,18 @@ void aom_dc_top_predictor_32x64_c(uint8_t* dst,
                                   const uint8_t* left);
 #define aom_dc_top_predictor_32x64 aom_dc_top_predictor_32x64_c
 
+void aom_dc_top_predictor_32x8_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+#define aom_dc_top_predictor_32x8 aom_dc_top_predictor_32x8_c
+
+void aom_dc_top_predictor_4x16_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+#define aom_dc_top_predictor_4x16 aom_dc_top_predictor_4x16_c
+
 void aom_dc_top_predictor_4x4_c(uint8_t* dst,
                                 ptrdiff_t y_stride,
                                 const uint8_t* above,
@@ -526,6 +639,12 @@ void aom_dc_top_predictor_4x8_c(uint8_t* dst,
                                 const uint8_t* left);
 #define aom_dc_top_predictor_4x8 aom_dc_top_predictor_4x8_c
 
+void aom_dc_top_predictor_64x16_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+#define aom_dc_top_predictor_64x16 aom_dc_top_predictor_64x16_c
+
 void aom_dc_top_predictor_64x32_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
@@ -544,6 +663,12 @@ void aom_dc_top_predictor_8x16_c(uint8_t* dst,
                                  const uint8_t* left);
 #define aom_dc_top_predictor_8x16 aom_dc_top_predictor_8x16_c
 
+void aom_dc_top_predictor_8x32_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+#define aom_dc_top_predictor_8x32 aom_dc_top_predictor_8x32_c
+
 void aom_dc_top_predictor_8x4_c(uint8_t* dst,
                                 ptrdiff_t y_stride,
                                 const uint8_t* above,
@@ -1014,18 +1139,24 @@ void aom_h_predictor_16x32_c(uint8_t* dst,
                              const uint8_t* left);
 #define aom_h_predictor_16x32 aom_h_predictor_16x32_c
 
+void aom_h_predictor_16x4_c(uint8_t* dst,
+                            ptrdiff_t y_stride,
+                            const uint8_t* above,
+                            const uint8_t* left);
+#define aom_h_predictor_16x4 aom_h_predictor_16x4_c
+
+void aom_h_predictor_16x64_c(uint8_t* dst,
+                             ptrdiff_t y_stride,
+                             const uint8_t* above,
+                             const uint8_t* left);
+#define aom_h_predictor_16x64 aom_h_predictor_16x64_c
+
 void aom_h_predictor_16x8_c(uint8_t* dst,
                             ptrdiff_t y_stride,
                             const uint8_t* above,
                             const uint8_t* left);
 #define aom_h_predictor_16x8 aom_h_predictor_16x8_c
 
-void aom_h_predictor_2x2_c(uint8_t* dst,
-                           ptrdiff_t y_stride,
-                           const uint8_t* above,
-                           const uint8_t* left);
-#define aom_h_predictor_2x2 aom_h_predictor_2x2_c
-
 void aom_h_predictor_32x16_c(uint8_t* dst,
                              ptrdiff_t y_stride,
                              const uint8_t* above,
@@ -1048,6 +1179,18 @@ void aom_h_predictor_32x64_c(uint8_t* dst,
                              const uint8_t* left);
 #define aom_h_predictor_32x64 aom_h_predictor_32x64_c
 
+void aom_h_predictor_32x8_c(uint8_t* dst,
+                            ptrdiff_t y_stride,
+                            const uint8_t* above,
+                            const uint8_t* left);
+#define aom_h_predictor_32x8 aom_h_predictor_32x8_c
+
+void aom_h_predictor_4x16_c(uint8_t* dst,
+                            ptrdiff_t y_stride,
+                            const uint8_t* above,
+                            const uint8_t* left);
+#define aom_h_predictor_4x16 aom_h_predictor_4x16_c
+
 void aom_h_predictor_4x4_c(uint8_t* dst,
                            ptrdiff_t y_stride,
                            const uint8_t* above,
@@ -1064,6 +1207,12 @@ void aom_h_predictor_4x8_c(uint8_t* dst,
                            const uint8_t* left);
 #define aom_h_predictor_4x8 aom_h_predictor_4x8_c
 
+void aom_h_predictor_64x16_c(uint8_t* dst,
+                             ptrdiff_t y_stride,
+                             const uint8_t* above,
+                             const uint8_t* left);
+#define aom_h_predictor_64x16 aom_h_predictor_64x16_c
+
 void aom_h_predictor_64x32_c(uint8_t* dst,
                              ptrdiff_t y_stride,
                              const uint8_t* above,
@@ -1082,6 +1231,12 @@ void aom_h_predictor_8x16_c(uint8_t* dst,
                             const uint8_t* left);
 #define aom_h_predictor_8x16 aom_h_predictor_8x16_c
 
+void aom_h_predictor_8x32_c(uint8_t* dst,
+                            ptrdiff_t y_stride,
+                            const uint8_t* above,
+                            const uint8_t* left);
+#define aom_h_predictor_8x32 aom_h_predictor_8x32_c
+
 void aom_h_predictor_8x4_c(uint8_t* dst,
                            ptrdiff_t y_stride,
                            const uint8_t* above,
@@ -1124,11 +1279,6 @@ void aom_hadamard_8x8_neon(const int16_t* src_diff,
                            tran_low_t* coeff);
 #define aom_hadamard_8x8 aom_hadamard_8x8_neon
 
-void aom_hadamard_8x8_dual_c(const int16_t* src_diff,
-                             ptrdiff_t src_stride,
-                             int16_t* coeff);
-#define aom_hadamard_8x8_dual aom_hadamard_8x8_dual_c
-
 void aom_hadamard_lp_16x16_c(const int16_t* src_diff,
                              ptrdiff_t src_stride,
                              int16_t* coeff);
@@ -1145,6 +1295,14 @@ void aom_hadamard_lp_8x8_neon(const int16_t* src_diff,
                               int16_t* coeff);
 #define aom_hadamard_lp_8x8 aom_hadamard_lp_8x8_neon
 
+void aom_hadamard_lp_8x8_dual_c(const int16_t* src_diff,
+                                ptrdiff_t src_stride,
+                                int16_t* coeff);
+void aom_hadamard_lp_8x8_dual_neon(const int16_t* src_diff,
+                                   ptrdiff_t src_stride,
+                                   int16_t* coeff);
+#define aom_hadamard_lp_8x8_dual aom_hadamard_lp_8x8_dual_neon
+
 void aom_ifft16x16_float_c(const float* input, float* temp, float* output);
 #define aom_ifft16x16_float aom_ifft16x16_float_c
 
@@ -1160,18 +1318,32 @@ void aom_ifft4x4_float_c(const float* input, float* temp, float* output);
 void aom_ifft8x8_float_c(const float* input, float* temp, float* output);
 #define aom_ifft8x8_float aom_ifft8x8_float_c
 
-int16_t aom_int_pro_col_c(const uint8_t* ref, const int width);
-int16_t aom_int_pro_col_neon(const uint8_t* ref, const int width);
+void aom_int_pro_col_c(int16_t* vbuf,
+                       const uint8_t* ref,
+                       const int ref_stride,
+                       const int width,
+                       const int height,
+                       int norm_factor);
+void aom_int_pro_col_neon(int16_t* vbuf,
+                          const uint8_t* ref,
+                          const int ref_stride,
+                          const int width,
+                          const int height,
+                          int norm_factor);
 #define aom_int_pro_col aom_int_pro_col_neon
 
-void aom_int_pro_row_c(int16_t hbuf[16],
+void aom_int_pro_row_c(int16_t* hbuf,
                        const uint8_t* ref,
                        const int ref_stride,
-                       const int height);
-void aom_int_pro_row_neon(int16_t hbuf[16],
+                       const int width,
+                       const int height,
+                       int norm_factor);
+void aom_int_pro_row_neon(int16_t* hbuf,
                           const uint8_t* ref,
                           const int ref_stride,
-                          const int height);
+                          const int width,
+                          const int height,
+                          int norm_factor);
 #define aom_int_pro_row aom_int_pro_row_neon
 
 void aom_lowbd_blend_a64_d16_mask_c(uint8_t* dst,
@@ -1237,7 +1409,12 @@ void aom_lpf_horizontal_14_quad_c(uint8_t* s,
                                   const uint8_t* blimit0,
                                   const uint8_t* limit0,
                                   const uint8_t* thresh0);
-#define aom_lpf_horizontal_14_quad aom_lpf_horizontal_14_quad_c
+void aom_lpf_horizontal_14_quad_neon(uint8_t* s,
+                                     int pitch,
+                                     const uint8_t* blimit0,
+                                     const uint8_t* limit0,
+                                     const uint8_t* thresh0);
+#define aom_lpf_horizontal_14_quad aom_lpf_horizontal_14_quad_neon
 
 void aom_lpf_horizontal_4_c(uint8_t* s,
                             int pitch,
@@ -1274,7 +1451,12 @@ void aom_lpf_horizontal_4_quad_c(uint8_t* s,
                                  const uint8_t* blimit0,
                                  const uint8_t* limit0,
                                  const uint8_t* thresh0);
-#define aom_lpf_horizontal_4_quad aom_lpf_horizontal_4_quad_c
+void aom_lpf_horizontal_4_quad_neon(uint8_t* s,
+                                    int pitch,
+                                    const uint8_t* blimit0,
+                                    const uint8_t* limit0,
+                                    const uint8_t* thresh0);
+#define aom_lpf_horizontal_4_quad aom_lpf_horizontal_4_quad_neon
 
 void aom_lpf_horizontal_6_c(uint8_t* s,
                             int pitch,
@@ -1311,7 +1493,12 @@ void aom_lpf_horizontal_6_quad_c(uint8_t* s,
                                  const uint8_t* blimit0,
                                  const uint8_t* limit0,
                                  const uint8_t* thresh0);
-#define aom_lpf_horizontal_6_quad aom_lpf_horizontal_6_quad_c
+void aom_lpf_horizontal_6_quad_neon(uint8_t* s,
+                                    int pitch,
+                                    const uint8_t* blimit0,
+                                    const uint8_t* limit0,
+                                    const uint8_t* thresh0);
+#define aom_lpf_horizontal_6_quad aom_lpf_horizontal_6_quad_neon
 
 void aom_lpf_horizontal_8_c(uint8_t* s,
                             int pitch,
@@ -1348,7 +1535,12 @@ void aom_lpf_horizontal_8_quad_c(uint8_t* s,
                                  const uint8_t* blimit0,
                                  const uint8_t* limit0,
                                  const uint8_t* thresh0);
-#define aom_lpf_horizontal_8_quad aom_lpf_horizontal_8_quad_c
+void aom_lpf_horizontal_8_quad_neon(uint8_t* s,
+                                    int pitch,
+                                    const uint8_t* blimit0,
+                                    const uint8_t* limit0,
+                                    const uint8_t* thresh0);
+#define aom_lpf_horizontal_8_quad aom_lpf_horizontal_8_quad_neon
 
 void aom_lpf_vertical_14_c(uint8_t* s,
                            int pitch,
@@ -1385,7 +1577,12 @@ void aom_lpf_vertical_14_quad_c(uint8_t* s,
                                 const uint8_t* blimit0,
                                 const uint8_t* limit0,
                                 const uint8_t* thresh0);
-#define aom_lpf_vertical_14_quad aom_lpf_vertical_14_quad_c
+void aom_lpf_vertical_14_quad_neon(uint8_t* s,
+                                   int pitch,
+                                   const uint8_t* blimit0,
+                                   const uint8_t* limit0,
+                                   const uint8_t* thresh0);
+#define aom_lpf_vertical_14_quad aom_lpf_vertical_14_quad_neon
 
 void aom_lpf_vertical_4_c(uint8_t* s,
                           int pitch,
@@ -1422,7 +1619,12 @@ void aom_lpf_vertical_4_quad_c(uint8_t* s,
                                const uint8_t* blimit0,
                                const uint8_t* limit0,
                                const uint8_t* thresh0);
-#define aom_lpf_vertical_4_quad aom_lpf_vertical_4_quad_c
+void aom_lpf_vertical_4_quad_neon(uint8_t* s,
+                                  int pitch,
+                                  const uint8_t* blimit0,
+                                  const uint8_t* limit0,
+                                  const uint8_t* thresh0);
+#define aom_lpf_vertical_4_quad aom_lpf_vertical_4_quad_neon
 
 void aom_lpf_vertical_6_c(uint8_t* s,
                           int pitch,
@@ -1459,7 +1661,12 @@ void aom_lpf_vertical_6_quad_c(uint8_t* s,
                                const uint8_t* blimit0,
                                const uint8_t* limit0,
                                const uint8_t* thresh0);
-#define aom_lpf_vertical_6_quad aom_lpf_vertical_6_quad_c
+void aom_lpf_vertical_6_quad_neon(uint8_t* s,
+                                  int pitch,
+                                  const uint8_t* blimit0,
+                                  const uint8_t* limit0,
+                                  const uint8_t* thresh0);
+#define aom_lpf_vertical_6_quad aom_lpf_vertical_6_quad_neon
 
 void aom_lpf_vertical_8_c(uint8_t* s,
                           int pitch,
@@ -1496,7 +1703,12 @@ void aom_lpf_vertical_8_quad_c(uint8_t* s,
                                const uint8_t* blimit0,
                                const uint8_t* limit0,
                                const uint8_t* thresh0);
-#define aom_lpf_vertical_8_quad aom_lpf_vertical_8_quad_c
+void aom_lpf_vertical_8_quad_neon(uint8_t* s,
+                                  int pitch,
+                                  const uint8_t* blimit0,
+                                  const uint8_t* limit0,
+                                  const uint8_t* thresh0);
+#define aom_lpf_vertical_8_quad aom_lpf_vertical_8_quad_neon
 
 unsigned int aom_masked_sad128x128_c(const uint8_t* src,
                                      int src_stride,
@@ -2070,21 +2282,43 @@ unsigned int aom_mse16x8_c(const uint8_t* src_ptr,
                            const uint8_t* ref_ptr,
                            int recon_stride,
                            unsigned int* sse);
-#define aom_mse16x8 aom_mse16x8_c
+unsigned int aom_mse16x8_neon(const uint8_t* src_ptr,
+                              int source_stride,
+                              const uint8_t* ref_ptr,
+                              int recon_stride,
+                              unsigned int* sse);
+#define aom_mse16x8 aom_mse16x8_neon
 
 unsigned int aom_mse8x16_c(const uint8_t* src_ptr,
                            int source_stride,
                            const uint8_t* ref_ptr,
                            int recon_stride,
                            unsigned int* sse);
-#define aom_mse8x16 aom_mse8x16_c
+unsigned int aom_mse8x16_neon(const uint8_t* src_ptr,
+                              int source_stride,
+                              const uint8_t* ref_ptr,
+                              int recon_stride,
+                              unsigned int* sse);
+#define aom_mse8x16 aom_mse8x16_neon
 
 unsigned int aom_mse8x8_c(const uint8_t* src_ptr,
                           int source_stride,
                           const uint8_t* ref_ptr,
                           int recon_stride,
                           unsigned int* sse);
-#define aom_mse8x8 aom_mse8x8_c
+unsigned int aom_mse8x8_neon(const uint8_t* src_ptr,
+                             int source_stride,
+                             const uint8_t* ref_ptr,
+                             int recon_stride,
+                             unsigned int* sse);
+#define aom_mse8x8 aom_mse8x8_neon
+
+uint64_t aom_mse_16xh_16bit_c(uint8_t* dst,
+                              int dstride,
+                              uint16_t* src,
+                              int w,
+                              int h);
+#define aom_mse_16xh_16bit aom_mse_16xh_16bit_c
 
 uint64_t aom_mse_wxh_16bit_c(uint8_t* dst,
                              int dstride,
@@ -2098,85 +2332,191 @@ void aom_paeth_predictor_16x16_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
                                  const uint8_t* left);
-#define aom_paeth_predictor_16x16 aom_paeth_predictor_16x16_c
+void aom_paeth_predictor_16x16_neon(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+#define aom_paeth_predictor_16x16 aom_paeth_predictor_16x16_neon
 
 void aom_paeth_predictor_16x32_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
                                  const uint8_t* left);
-#define aom_paeth_predictor_16x32 aom_paeth_predictor_16x32_c
+void aom_paeth_predictor_16x32_neon(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+#define aom_paeth_predictor_16x32 aom_paeth_predictor_16x32_neon
 
-void aom_paeth_predictor_16x8_c(uint8_t* dst,
+void aom_paeth_predictor_16x4_c(uint8_t* dst,
                                 ptrdiff_t y_stride,
                                 const uint8_t* above,
                                 const uint8_t* left);
-#define aom_paeth_predictor_16x8 aom_paeth_predictor_16x8_c
+void aom_paeth_predictor_16x4_neon(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+#define aom_paeth_predictor_16x4 aom_paeth_predictor_16x4_neon
 
-void aom_paeth_predictor_2x2_c(uint8_t* dst,
-                               ptrdiff_t y_stride,
-                               const uint8_t* above,
-                               const uint8_t* left);
-#define aom_paeth_predictor_2x2 aom_paeth_predictor_2x2_c
+void aom_paeth_predictor_16x64_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+void aom_paeth_predictor_16x64_neon(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+#define aom_paeth_predictor_16x64 aom_paeth_predictor_16x64_neon
+
+void aom_paeth_predictor_16x8_c(uint8_t* dst,
+                                ptrdiff_t y_stride,
+                                const uint8_t* above,
+                                const uint8_t* left);
+void aom_paeth_predictor_16x8_neon(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+#define aom_paeth_predictor_16x8 aom_paeth_predictor_16x8_neon
 
 void aom_paeth_predictor_32x16_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
                                  const uint8_t* left);
-#define aom_paeth_predictor_32x16 aom_paeth_predictor_32x16_c
+void aom_paeth_predictor_32x16_neon(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+#define aom_paeth_predictor_32x16 aom_paeth_predictor_32x16_neon
 
 void aom_paeth_predictor_32x32_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
                                  const uint8_t* left);
-#define aom_paeth_predictor_32x32 aom_paeth_predictor_32x32_c
+void aom_paeth_predictor_32x32_neon(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+#define aom_paeth_predictor_32x32 aom_paeth_predictor_32x32_neon
 
 void aom_paeth_predictor_32x64_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
                                  const uint8_t* left);
-#define aom_paeth_predictor_32x64 aom_paeth_predictor_32x64_c
+void aom_paeth_predictor_32x64_neon(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+#define aom_paeth_predictor_32x64 aom_paeth_predictor_32x64_neon
+
+void aom_paeth_predictor_32x8_c(uint8_t* dst,
+                                ptrdiff_t y_stride,
+                                const uint8_t* above,
+                                const uint8_t* left);
+void aom_paeth_predictor_32x8_neon(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+#define aom_paeth_predictor_32x8 aom_paeth_predictor_32x8_neon
+
+void aom_paeth_predictor_4x16_c(uint8_t* dst,
+                                ptrdiff_t y_stride,
+                                const uint8_t* above,
+                                const uint8_t* left);
+void aom_paeth_predictor_4x16_neon(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+#define aom_paeth_predictor_4x16 aom_paeth_predictor_4x16_neon
 
 void aom_paeth_predictor_4x4_c(uint8_t* dst,
                                ptrdiff_t y_stride,
                                const uint8_t* above,
                                const uint8_t* left);
-#define aom_paeth_predictor_4x4 aom_paeth_predictor_4x4_c
+void aom_paeth_predictor_4x4_neon(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+#define aom_paeth_predictor_4x4 aom_paeth_predictor_4x4_neon
 
 void aom_paeth_predictor_4x8_c(uint8_t* dst,
                                ptrdiff_t y_stride,
                                const uint8_t* above,
                                const uint8_t* left);
-#define aom_paeth_predictor_4x8 aom_paeth_predictor_4x8_c
+void aom_paeth_predictor_4x8_neon(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+#define aom_paeth_predictor_4x8 aom_paeth_predictor_4x8_neon
+
+void aom_paeth_predictor_64x16_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+void aom_paeth_predictor_64x16_neon(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+#define aom_paeth_predictor_64x16 aom_paeth_predictor_64x16_neon
 
 void aom_paeth_predictor_64x32_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
                                  const uint8_t* left);
-#define aom_paeth_predictor_64x32 aom_paeth_predictor_64x32_c
+void aom_paeth_predictor_64x32_neon(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+#define aom_paeth_predictor_64x32 aom_paeth_predictor_64x32_neon
 
 void aom_paeth_predictor_64x64_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
                                  const uint8_t* left);
-#define aom_paeth_predictor_64x64 aom_paeth_predictor_64x64_c
+void aom_paeth_predictor_64x64_neon(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+#define aom_paeth_predictor_64x64 aom_paeth_predictor_64x64_neon
 
 void aom_paeth_predictor_8x16_c(uint8_t* dst,
                                 ptrdiff_t y_stride,
                                 const uint8_t* above,
                                 const uint8_t* left);
-#define aom_paeth_predictor_8x16 aom_paeth_predictor_8x16_c
+void aom_paeth_predictor_8x16_neon(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+#define aom_paeth_predictor_8x16 aom_paeth_predictor_8x16_neon
+
+void aom_paeth_predictor_8x32_c(uint8_t* dst,
+                                ptrdiff_t y_stride,
+                                const uint8_t* above,
+                                const uint8_t* left);
+void aom_paeth_predictor_8x32_neon(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+#define aom_paeth_predictor_8x32 aom_paeth_predictor_8x32_neon
 
 void aom_paeth_predictor_8x4_c(uint8_t* dst,
                                ptrdiff_t y_stride,
                                const uint8_t* above,
                                const uint8_t* left);
-#define aom_paeth_predictor_8x4 aom_paeth_predictor_8x4_c
+void aom_paeth_predictor_8x4_neon(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+#define aom_paeth_predictor_8x4 aom_paeth_predictor_8x4_neon
 
 void aom_paeth_predictor_8x8_c(uint8_t* dst,
                                ptrdiff_t y_stride,
                                const uint8_t* above,
                                const uint8_t* left);
-#define aom_paeth_predictor_8x8 aom_paeth_predictor_8x8_c
+void aom_paeth_predictor_8x8_neon(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+#define aom_paeth_predictor_8x8 aom_paeth_predictor_8x8_neon
 
 void aom_pixel_scale_c(const int16_t* src_diff,
                        ptrdiff_t src_stride,
@@ -2238,20 +2578,6 @@ void aom_quantize_b_32x32_neon(const tran_low_t* coeff_ptr,
                                const int16_t* iscan);
 #define aom_quantize_b_32x32 aom_quantize_b_32x32_neon
 
-void aom_quantize_b_32x32_adaptive_c(const tran_low_t* coeff_ptr,
-                                     intptr_t n_coeffs,
-                                     const int16_t* zbin_ptr,
-                                     const int16_t* round_ptr,
-                                     const int16_t* quant_ptr,
-                                     const int16_t* quant_shift_ptr,
-                                     tran_low_t* qcoeff_ptr,
-                                     tran_low_t* dqcoeff_ptr,
-                                     const int16_t* dequant_ptr,
-                                     uint16_t* eob_ptr,
-                                     const int16_t* scan,
-                                     const int16_t* iscan);
-#define aom_quantize_b_32x32_adaptive aom_quantize_b_32x32_adaptive_c
-
 void aom_quantize_b_64x64_c(const tran_low_t* coeff_ptr,
                             intptr_t n_coeffs,
                             const int16_t* zbin_ptr,
@@ -2278,34 +2604,6 @@ void aom_quantize_b_64x64_neon(const tran_low_t* coeff_ptr,
                                const int16_t* iscan);
 #define aom_quantize_b_64x64 aom_quantize_b_64x64_neon
 
-void aom_quantize_b_64x64_adaptive_c(const tran_low_t* coeff_ptr,
-                                     intptr_t n_coeffs,
-                                     const int16_t* zbin_ptr,
-                                     const int16_t* round_ptr,
-                                     const int16_t* quant_ptr,
-                                     const int16_t* quant_shift_ptr,
-                                     tran_low_t* qcoeff_ptr,
-                                     tran_low_t* dqcoeff_ptr,
-                                     const int16_t* dequant_ptr,
-                                     uint16_t* eob_ptr,
-                                     const int16_t* scan,
-                                     const int16_t* iscan);
-#define aom_quantize_b_64x64_adaptive aom_quantize_b_64x64_adaptive_c
-
-void aom_quantize_b_adaptive_c(const tran_low_t* coeff_ptr,
-                               intptr_t n_coeffs,
-                               const int16_t* zbin_ptr,
-                               const int16_t* round_ptr,
-                               const int16_t* quant_ptr,
-                               const int16_t* quant_shift_ptr,
-                               tran_low_t* qcoeff_ptr,
-                               tran_low_t* dqcoeff_ptr,
-                               const int16_t* dequant_ptr,
-                               uint16_t* eob_ptr,
-                               const int16_t* scan,
-                               const int16_t* iscan);
-#define aom_quantize_b_adaptive aom_quantize_b_adaptive_c
-
 unsigned int aom_sad128x128_c(const uint8_t* src_ptr,
                               int src_stride,
                               const uint8_t* ref_ptr,
@@ -2328,7 +2626,12 @@ void aom_sad128x128x4d_c(const uint8_t* src_ptr,
                          const uint8_t* const ref_ptr[4],
                          int ref_stride,
                          uint32_t sad_array[4]);
-#define aom_sad128x128x4d aom_sad128x128x4d_c
+void aom_sad128x128x4d_neon(const uint8_t* src_ptr,
+                            int src_stride,
+                            const uint8_t* const ref_ptr[4],
+                            int ref_stride,
+                            uint32_t sad_array[4]);
+#define aom_sad128x128x4d aom_sad128x128x4d_neon
 
 void aom_sad128x128x4d_avg_c(const uint8_t* src_ptr,
                              int src_stride,
@@ -2342,7 +2645,11 @@ unsigned int aom_sad128x64_c(const uint8_t* src_ptr,
                              int src_stride,
                              const uint8_t* ref_ptr,
                              int ref_stride);
-#define aom_sad128x64 aom_sad128x64_c
+unsigned int aom_sad128x64_neon(const uint8_t* src_ptr,
+                                int src_stride,
+                                const uint8_t* ref_ptr,
+                                int ref_stride);
+#define aom_sad128x64 aom_sad128x64_neon
 
 unsigned int aom_sad128x64_avg_c(const uint8_t* src_ptr,
                                  int src_stride,
@@ -2356,7 +2663,12 @@ void aom_sad128x64x4d_c(const uint8_t* src_ptr,
                         const uint8_t* const ref_ptr[4],
                         int ref_stride,
                         uint32_t sad_array[4]);
-#define aom_sad128x64x4d aom_sad128x64x4d_c
+void aom_sad128x64x4d_neon(const uint8_t* src_ptr,
+                           int src_stride,
+                           const uint8_t* const ref_ptr[4],
+                           int ref_stride,
+                           uint32_t sad_array[4]);
+#define aom_sad128x64x4d aom_sad128x64x4d_neon
 
 void aom_sad128x64x4d_avg_c(const uint8_t* src_ptr,
                             int src_stride,
@@ -2415,7 +2727,11 @@ unsigned int aom_sad16x32_c(const uint8_t* src_ptr,
                             int src_stride,
                             const uint8_t* ref_ptr,
                             int ref_stride);
-#define aom_sad16x32 aom_sad16x32_c
+unsigned int aom_sad16x32_neon(const uint8_t* src_ptr,
+                               int src_stride,
+                               const uint8_t* ref_ptr,
+                               int ref_stride);
+#define aom_sad16x32 aom_sad16x32_neon
 
 unsigned int aom_sad16x32_avg_c(const uint8_t* src_ptr,
                                 int src_stride,
@@ -2429,7 +2745,12 @@ void aom_sad16x32x4d_c(const uint8_t* src_ptr,
                        const uint8_t* const ref_ptr[4],
                        int ref_stride,
                        uint32_t sad_array[4]);
-#define aom_sad16x32x4d aom_sad16x32x4d_c
+void aom_sad16x32x4d_neon(const uint8_t* src_ptr,
+                          int src_stride,
+                          const uint8_t* const ref_ptr[4],
+                          int ref_stride,
+                          uint32_t sad_array[4]);
+#define aom_sad16x32x4d aom_sad16x32x4d_neon
 
 void aom_sad16x32x4d_avg_c(const uint8_t* src_ptr,
                            int src_stride,
@@ -2461,7 +2782,12 @@ void aom_sad16x8x4d_c(const uint8_t* src_ptr,
                       const uint8_t* const ref_ptr[4],
                       int ref_stride,
                       uint32_t sad_array[4]);
-#define aom_sad16x8x4d aom_sad16x8x4d_c
+void aom_sad16x8x4d_neon(const uint8_t* src_ptr,
+                         int src_stride,
+                         const uint8_t* const ref_ptr[4],
+                         int ref_stride,
+                         uint32_t sad_array[4]);
+#define aom_sad16x8x4d aom_sad16x8x4d_neon
 
 void aom_sad16x8x4d_avg_c(const uint8_t* src_ptr,
                           int src_stride,
@@ -2483,7 +2809,11 @@ unsigned int aom_sad32x16_c(const uint8_t* src_ptr,
                             int src_stride,
                             const uint8_t* ref_ptr,
                             int ref_stride);
-#define aom_sad32x16 aom_sad32x16_c
+unsigned int aom_sad32x16_neon(const uint8_t* src_ptr,
+                               int src_stride,
+                               const uint8_t* ref_ptr,
+                               int ref_stride);
+#define aom_sad32x16 aom_sad32x16_neon
 
 unsigned int aom_sad32x16_avg_c(const uint8_t* src_ptr,
                                 int src_stride,
@@ -2497,7 +2827,12 @@ void aom_sad32x16x4d_c(const uint8_t* src_ptr,
                        const uint8_t* const ref_ptr[4],
                        int ref_stride,
                        uint32_t sad_array[4]);
-#define aom_sad32x16x4d aom_sad32x16x4d_c
+void aom_sad32x16x4d_neon(const uint8_t* src_ptr,
+                          int src_stride,
+                          const uint8_t* const ref_ptr[4],
+                          int ref_stride,
+                          uint32_t sad_array[4]);
+#define aom_sad32x16x4d aom_sad32x16x4d_neon
 
 void aom_sad32x16x4d_avg_c(const uint8_t* src_ptr,
                            int src_stride,
@@ -2548,7 +2883,11 @@ unsigned int aom_sad32x64_c(const uint8_t* src_ptr,
                             int src_stride,
                             const uint8_t* ref_ptr,
                             int ref_stride);
-#define aom_sad32x64 aom_sad32x64_c
+unsigned int aom_sad32x64_neon(const uint8_t* src_ptr,
+                               int src_stride,
+                               const uint8_t* ref_ptr,
+                               int ref_stride);
+#define aom_sad32x64 aom_sad32x64_neon
 
 unsigned int aom_sad32x64_avg_c(const uint8_t* src_ptr,
                                 int src_stride,
@@ -2562,7 +2901,12 @@ void aom_sad32x64x4d_c(const uint8_t* src_ptr,
                        const uint8_t* const ref_ptr[4],
                        int ref_stride,
                        uint32_t sad_array[4]);
-#define aom_sad32x64x4d aom_sad32x64x4d_c
+void aom_sad32x64x4d_neon(const uint8_t* src_ptr,
+                          int src_stride,
+                          const uint8_t* const ref_ptr[4],
+                          int ref_stride,
+                          uint32_t sad_array[4]);
+#define aom_sad32x64x4d aom_sad32x64x4d_neon
 
 void aom_sad32x64x4d_avg_c(const uint8_t* src_ptr,
                            int src_stride,
@@ -2602,7 +2946,12 @@ void aom_sad4x4x4d_c(const uint8_t* src_ptr,
                      const uint8_t* const ref_ptr[4],
                      int ref_stride,
                      uint32_t sad_array[4]);
-#define aom_sad4x4x4d aom_sad4x4x4d_c
+void aom_sad4x4x4d_neon(const uint8_t* src_ptr,
+                        int src_stride,
+                        const uint8_t* const ref_ptr[4],
+                        int ref_stride,
+                        uint32_t sad_array[4]);
+#define aom_sad4x4x4d aom_sad4x4x4d_neon
 
 void aom_sad4x4x4d_avg_c(const uint8_t* src_ptr,
                          int src_stride,
@@ -2616,7 +2965,11 @@ unsigned int aom_sad4x8_c(const uint8_t* src_ptr,
                           int src_stride,
                           const uint8_t* ref_ptr,
                           int ref_stride);
-#define aom_sad4x8 aom_sad4x8_c
+unsigned int aom_sad4x8_neon(const uint8_t* src_ptr,
+                             int src_stride,
+                             const uint8_t* ref_ptr,
+                             int ref_stride);
+#define aom_sad4x8 aom_sad4x8_neon
 
 unsigned int aom_sad4x8_avg_c(const uint8_t* src_ptr,
                               int src_stride,
@@ -2630,7 +2983,12 @@ void aom_sad4x8x4d_c(const uint8_t* src_ptr,
                      const uint8_t* const ref_ptr[4],
                      int ref_stride,
                      uint32_t sad_array[4]);
-#define aom_sad4x8x4d aom_sad4x8x4d_c
+void aom_sad4x8x4d_neon(const uint8_t* src_ptr,
+                        int src_stride,
+                        const uint8_t* const ref_ptr[4],
+                        int ref_stride,
+                        uint32_t sad_array[4]);
+#define aom_sad4x8x4d aom_sad4x8x4d_neon
 
 void aom_sad4x8x4d_avg_c(const uint8_t* src_ptr,
                          int src_stride,
@@ -2652,7 +3010,11 @@ unsigned int aom_sad64x128_c(const uint8_t* src_ptr,
                              int src_stride,
                              const uint8_t* ref_ptr,
                              int ref_stride);
-#define aom_sad64x128 aom_sad64x128_c
+unsigned int aom_sad64x128_neon(const uint8_t* src_ptr,
+                                int src_stride,
+                                const uint8_t* ref_ptr,
+                                int ref_stride);
+#define aom_sad64x128 aom_sad64x128_neon
 
 unsigned int aom_sad64x128_avg_c(const uint8_t* src_ptr,
                                  int src_stride,
@@ -2666,7 +3028,12 @@ void aom_sad64x128x4d_c(const uint8_t* src_ptr,
                         const uint8_t* const ref_ptr[4],
                         int ref_stride,
                         uint32_t sad_array[4]);
-#define aom_sad64x128x4d aom_sad64x128x4d_c
+void aom_sad64x128x4d_neon(const uint8_t* src_ptr,
+                           int src_stride,
+                           const uint8_t* const ref_ptr[4],
+                           int ref_stride,
+                           uint32_t sad_array[4]);
+#define aom_sad64x128x4d aom_sad64x128x4d_neon
 
 void aom_sad64x128x4d_avg_c(const uint8_t* src_ptr,
                             int src_stride,
@@ -2680,7 +3047,11 @@ unsigned int aom_sad64x32_c(const uint8_t* src_ptr,
                             int src_stride,
                             const uint8_t* ref_ptr,
                             int ref_stride);
-#define aom_sad64x32 aom_sad64x32_c
+unsigned int aom_sad64x32_neon(const uint8_t* src_ptr,
+                               int src_stride,
+                               const uint8_t* ref_ptr,
+                               int ref_stride);
+#define aom_sad64x32 aom_sad64x32_neon
 
 unsigned int aom_sad64x32_avg_c(const uint8_t* src_ptr,
                                 int src_stride,
@@ -2694,7 +3065,12 @@ void aom_sad64x32x4d_c(const uint8_t* src_ptr,
                        const uint8_t* const ref_ptr[4],
                        int ref_stride,
                        uint32_t sad_array[4]);
-#define aom_sad64x32x4d aom_sad64x32x4d_c
+void aom_sad64x32x4d_neon(const uint8_t* src_ptr,
+                          int src_stride,
+                          const uint8_t* const ref_ptr[4],
+                          int ref_stride,
+                          uint32_t sad_array[4]);
+#define aom_sad64x32x4d aom_sad64x32x4d_neon
 
 void aom_sad64x32x4d_avg_c(const uint8_t* src_ptr,
                            int src_stride,
@@ -2771,7 +3147,12 @@ void aom_sad8x16x4d_c(const uint8_t* src_ptr,
                       const uint8_t* const ref_ptr[4],
                       int ref_stride,
                       uint32_t sad_array[4]);
-#define aom_sad8x16x4d aom_sad8x16x4d_c
+void aom_sad8x16x4d_neon(const uint8_t* src_ptr,
+                         int src_stride,
+                         const uint8_t* const ref_ptr[4],
+                         int ref_stride,
+                         uint32_t sad_array[4]);
+#define aom_sad8x16x4d aom_sad8x16x4d_neon
 
 void aom_sad8x16x4d_avg_c(const uint8_t* src_ptr,
                           int src_stride,
@@ -2785,7 +3166,11 @@ unsigned int aom_sad8x4_c(const uint8_t* src_ptr,
                           int src_stride,
                           const uint8_t* ref_ptr,
                           int ref_stride);
-#define aom_sad8x4 aom_sad8x4_c
+unsigned int aom_sad8x4_neon(const uint8_t* src_ptr,
+                             int src_stride,
+                             const uint8_t* ref_ptr,
+                             int ref_stride);
+#define aom_sad8x4 aom_sad8x4_neon
 
 unsigned int aom_sad8x4_avg_c(const uint8_t* src_ptr,
                               int src_stride,
@@ -2799,7 +3184,12 @@ void aom_sad8x4x4d_c(const uint8_t* src_ptr,
                      const uint8_t* const ref_ptr[4],
                      int ref_stride,
                      uint32_t sad_array[4]);
-#define aom_sad8x4x4d aom_sad8x4x4d_c
+void aom_sad8x4x4d_neon(const uint8_t* src_ptr,
+                        int src_stride,
+                        const uint8_t* const ref_ptr[4],
+                        int ref_stride,
+                        uint32_t sad_array[4]);
+#define aom_sad8x4x4d aom_sad8x4x4d_neon
 
 void aom_sad8x4x4d_avg_c(const uint8_t* src_ptr,
                          int src_stride,
@@ -2831,7 +3221,12 @@ void aom_sad8x8x4d_c(const uint8_t* src_ptr,
                      const uint8_t* const ref_ptr[4],
                      int ref_stride,
                      uint32_t sad_array[4]);
-#define aom_sad8x8x4d aom_sad8x8x4d_c
+void aom_sad8x8x4d_neon(const uint8_t* src_ptr,
+                        int src_stride,
+                        const uint8_t* const ref_ptr[4],
+                        int ref_stride,
+                        uint32_t sad_array[4]);
+#define aom_sad8x8x4d aom_sad8x8x4d_neon
 
 void aom_sad8x8x4d_avg_c(const uint8_t* src_ptr,
                          int src_stride,
@@ -3219,85 +3614,191 @@ void aom_smooth_h_predictor_16x16_c(uint8_t* dst,
                                     ptrdiff_t y_stride,
                                     const uint8_t* above,
                                     const uint8_t* left);
-#define aom_smooth_h_predictor_16x16 aom_smooth_h_predictor_16x16_c
+void aom_smooth_h_predictor_16x16_neon(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+#define aom_smooth_h_predictor_16x16 aom_smooth_h_predictor_16x16_neon
 
 void aom_smooth_h_predictor_16x32_c(uint8_t* dst,
                                     ptrdiff_t y_stride,
                                     const uint8_t* above,
                                     const uint8_t* left);
-#define aom_smooth_h_predictor_16x32 aom_smooth_h_predictor_16x32_c
+void aom_smooth_h_predictor_16x32_neon(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+#define aom_smooth_h_predictor_16x32 aom_smooth_h_predictor_16x32_neon
 
-void aom_smooth_h_predictor_16x8_c(uint8_t* dst,
+void aom_smooth_h_predictor_16x4_c(uint8_t* dst,
                                    ptrdiff_t y_stride,
                                    const uint8_t* above,
                                    const uint8_t* left);
-#define aom_smooth_h_predictor_16x8 aom_smooth_h_predictor_16x8_c
+void aom_smooth_h_predictor_16x4_neon(uint8_t* dst,
+                                      ptrdiff_t y_stride,
+                                      const uint8_t* above,
+                                      const uint8_t* left);
+#define aom_smooth_h_predictor_16x4 aom_smooth_h_predictor_16x4_neon
 
-void aom_smooth_h_predictor_2x2_c(uint8_t* dst,
-                                  ptrdiff_t y_stride,
-                                  const uint8_t* above,
-                                  const uint8_t* left);
-#define aom_smooth_h_predictor_2x2 aom_smooth_h_predictor_2x2_c
+void aom_smooth_h_predictor_16x64_c(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+void aom_smooth_h_predictor_16x64_neon(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+#define aom_smooth_h_predictor_16x64 aom_smooth_h_predictor_16x64_neon
+
+void aom_smooth_h_predictor_16x8_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+void aom_smooth_h_predictor_16x8_neon(uint8_t* dst,
+                                      ptrdiff_t y_stride,
+                                      const uint8_t* above,
+                                      const uint8_t* left);
+#define aom_smooth_h_predictor_16x8 aom_smooth_h_predictor_16x8_neon
 
 void aom_smooth_h_predictor_32x16_c(uint8_t* dst,
                                     ptrdiff_t y_stride,
                                     const uint8_t* above,
                                     const uint8_t* left);
-#define aom_smooth_h_predictor_32x16 aom_smooth_h_predictor_32x16_c
+void aom_smooth_h_predictor_32x16_neon(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+#define aom_smooth_h_predictor_32x16 aom_smooth_h_predictor_32x16_neon
 
 void aom_smooth_h_predictor_32x32_c(uint8_t* dst,
                                     ptrdiff_t y_stride,
                                     const uint8_t* above,
                                     const uint8_t* left);
-#define aom_smooth_h_predictor_32x32 aom_smooth_h_predictor_32x32_c
+void aom_smooth_h_predictor_32x32_neon(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+#define aom_smooth_h_predictor_32x32 aom_smooth_h_predictor_32x32_neon
 
 void aom_smooth_h_predictor_32x64_c(uint8_t* dst,
                                     ptrdiff_t y_stride,
                                     const uint8_t* above,
                                     const uint8_t* left);
-#define aom_smooth_h_predictor_32x64 aom_smooth_h_predictor_32x64_c
+void aom_smooth_h_predictor_32x64_neon(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+#define aom_smooth_h_predictor_32x64 aom_smooth_h_predictor_32x64_neon
+
+void aom_smooth_h_predictor_32x8_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+void aom_smooth_h_predictor_32x8_neon(uint8_t* dst,
+                                      ptrdiff_t y_stride,
+                                      const uint8_t* above,
+                                      const uint8_t* left);
+#define aom_smooth_h_predictor_32x8 aom_smooth_h_predictor_32x8_neon
+
+void aom_smooth_h_predictor_4x16_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+void aom_smooth_h_predictor_4x16_neon(uint8_t* dst,
+                                      ptrdiff_t y_stride,
+                                      const uint8_t* above,
+                                      const uint8_t* left);
+#define aom_smooth_h_predictor_4x16 aom_smooth_h_predictor_4x16_neon
 
 void aom_smooth_h_predictor_4x4_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
                                   const uint8_t* left);
-#define aom_smooth_h_predictor_4x4 aom_smooth_h_predictor_4x4_c
+void aom_smooth_h_predictor_4x4_neon(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+#define aom_smooth_h_predictor_4x4 aom_smooth_h_predictor_4x4_neon
 
 void aom_smooth_h_predictor_4x8_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
                                   const uint8_t* left);
-#define aom_smooth_h_predictor_4x8 aom_smooth_h_predictor_4x8_c
+void aom_smooth_h_predictor_4x8_neon(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+#define aom_smooth_h_predictor_4x8 aom_smooth_h_predictor_4x8_neon
+
+void aom_smooth_h_predictor_64x16_c(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+void aom_smooth_h_predictor_64x16_neon(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+#define aom_smooth_h_predictor_64x16 aom_smooth_h_predictor_64x16_neon
 
 void aom_smooth_h_predictor_64x32_c(uint8_t* dst,
                                     ptrdiff_t y_stride,
                                     const uint8_t* above,
                                     const uint8_t* left);
-#define aom_smooth_h_predictor_64x32 aom_smooth_h_predictor_64x32_c
+void aom_smooth_h_predictor_64x32_neon(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+#define aom_smooth_h_predictor_64x32 aom_smooth_h_predictor_64x32_neon
 
 void aom_smooth_h_predictor_64x64_c(uint8_t* dst,
                                     ptrdiff_t y_stride,
                                     const uint8_t* above,
                                     const uint8_t* left);
-#define aom_smooth_h_predictor_64x64 aom_smooth_h_predictor_64x64_c
+void aom_smooth_h_predictor_64x64_neon(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+#define aom_smooth_h_predictor_64x64 aom_smooth_h_predictor_64x64_neon
 
 void aom_smooth_h_predictor_8x16_c(uint8_t* dst,
                                    ptrdiff_t y_stride,
                                    const uint8_t* above,
                                    const uint8_t* left);
-#define aom_smooth_h_predictor_8x16 aom_smooth_h_predictor_8x16_c
+void aom_smooth_h_predictor_8x16_neon(uint8_t* dst,
+                                      ptrdiff_t y_stride,
+                                      const uint8_t* above,
+                                      const uint8_t* left);
+#define aom_smooth_h_predictor_8x16 aom_smooth_h_predictor_8x16_neon
+
+void aom_smooth_h_predictor_8x32_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+void aom_smooth_h_predictor_8x32_neon(uint8_t* dst,
+                                      ptrdiff_t y_stride,
+                                      const uint8_t* above,
+                                      const uint8_t* left);
+#define aom_smooth_h_predictor_8x32 aom_smooth_h_predictor_8x32_neon
 
 void aom_smooth_h_predictor_8x4_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
                                   const uint8_t* left);
-#define aom_smooth_h_predictor_8x4 aom_smooth_h_predictor_8x4_c
+void aom_smooth_h_predictor_8x4_neon(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+#define aom_smooth_h_predictor_8x4 aom_smooth_h_predictor_8x4_neon
 
 void aom_smooth_h_predictor_8x8_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
                                   const uint8_t* left);
-#define aom_smooth_h_predictor_8x8 aom_smooth_h_predictor_8x8_c
+void aom_smooth_h_predictor_8x8_neon(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+#define aom_smooth_h_predictor_8x8 aom_smooth_h_predictor_8x8_neon
 
 void aom_smooth_predictor_16x16_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
@@ -3319,6 +3820,26 @@ void aom_smooth_predictor_16x32_neon(uint8_t* dst,
                                      const uint8_t* left);
 #define aom_smooth_predictor_16x32 aom_smooth_predictor_16x32_neon
 
+void aom_smooth_predictor_16x4_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+void aom_smooth_predictor_16x4_neon(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+#define aom_smooth_predictor_16x4 aom_smooth_predictor_16x4_neon
+
+void aom_smooth_predictor_16x64_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+void aom_smooth_predictor_16x64_neon(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+#define aom_smooth_predictor_16x64 aom_smooth_predictor_16x64_neon
+
 void aom_smooth_predictor_16x8_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
@@ -3329,12 +3850,6 @@ void aom_smooth_predictor_16x8_neon(uint8_t* dst,
                                     const uint8_t* left);
 #define aom_smooth_predictor_16x8 aom_smooth_predictor_16x8_neon
 
-void aom_smooth_predictor_2x2_c(uint8_t* dst,
-                                ptrdiff_t y_stride,
-                                const uint8_t* above,
-                                const uint8_t* left);
-#define aom_smooth_predictor_2x2 aom_smooth_predictor_2x2_c
-
 void aom_smooth_predictor_32x16_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
@@ -3365,6 +3880,26 @@ void aom_smooth_predictor_32x64_neon(uint8_t* dst,
                                      const uint8_t* left);
 #define aom_smooth_predictor_32x64 aom_smooth_predictor_32x64_neon
 
+void aom_smooth_predictor_32x8_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+void aom_smooth_predictor_32x8_neon(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+#define aom_smooth_predictor_32x8 aom_smooth_predictor_32x8_neon
+
+void aom_smooth_predictor_4x16_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+void aom_smooth_predictor_4x16_neon(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+#define aom_smooth_predictor_4x16 aom_smooth_predictor_4x16_neon
+
 void aom_smooth_predictor_4x4_c(uint8_t* dst,
                                 ptrdiff_t y_stride,
                                 const uint8_t* above,
@@ -3385,6 +3920,16 @@ void aom_smooth_predictor_4x8_neon(uint8_t* dst,
                                    const uint8_t* left);
 #define aom_smooth_predictor_4x8 aom_smooth_predictor_4x8_neon
 
+void aom_smooth_predictor_64x16_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+void aom_smooth_predictor_64x16_neon(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+#define aom_smooth_predictor_64x16 aom_smooth_predictor_64x16_neon
+
 void aom_smooth_predictor_64x32_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
@@ -3415,6 +3960,16 @@ void aom_smooth_predictor_8x16_neon(uint8_t* dst,
                                     const uint8_t* left);
 #define aom_smooth_predictor_8x16 aom_smooth_predictor_8x16_neon
 
+void aom_smooth_predictor_8x32_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+void aom_smooth_predictor_8x32_neon(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+#define aom_smooth_predictor_8x32 aom_smooth_predictor_8x32_neon
+
 void aom_smooth_predictor_8x4_c(uint8_t* dst,
                                 ptrdiff_t y_stride,
                                 const uint8_t* above,
@@ -3439,85 +3994,191 @@ void aom_smooth_v_predictor_16x16_c(uint8_t* dst,
                                     ptrdiff_t y_stride,
                                     const uint8_t* above,
                                     const uint8_t* left);
-#define aom_smooth_v_predictor_16x16 aom_smooth_v_predictor_16x16_c
+void aom_smooth_v_predictor_16x16_neon(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+#define aom_smooth_v_predictor_16x16 aom_smooth_v_predictor_16x16_neon
 
 void aom_smooth_v_predictor_16x32_c(uint8_t* dst,
                                     ptrdiff_t y_stride,
                                     const uint8_t* above,
                                     const uint8_t* left);
-#define aom_smooth_v_predictor_16x32 aom_smooth_v_predictor_16x32_c
+void aom_smooth_v_predictor_16x32_neon(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+#define aom_smooth_v_predictor_16x32 aom_smooth_v_predictor_16x32_neon
 
-void aom_smooth_v_predictor_16x8_c(uint8_t* dst,
+void aom_smooth_v_predictor_16x4_c(uint8_t* dst,
                                    ptrdiff_t y_stride,
                                    const uint8_t* above,
                                    const uint8_t* left);
-#define aom_smooth_v_predictor_16x8 aom_smooth_v_predictor_16x8_c
+void aom_smooth_v_predictor_16x4_neon(uint8_t* dst,
+                                      ptrdiff_t y_stride,
+                                      const uint8_t* above,
+                                      const uint8_t* left);
+#define aom_smooth_v_predictor_16x4 aom_smooth_v_predictor_16x4_neon
 
-void aom_smooth_v_predictor_2x2_c(uint8_t* dst,
-                                  ptrdiff_t y_stride,
-                                  const uint8_t* above,
-                                  const uint8_t* left);
-#define aom_smooth_v_predictor_2x2 aom_smooth_v_predictor_2x2_c
+void aom_smooth_v_predictor_16x64_c(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+void aom_smooth_v_predictor_16x64_neon(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+#define aom_smooth_v_predictor_16x64 aom_smooth_v_predictor_16x64_neon
+
+void aom_smooth_v_predictor_16x8_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+void aom_smooth_v_predictor_16x8_neon(uint8_t* dst,
+                                      ptrdiff_t y_stride,
+                                      const uint8_t* above,
+                                      const uint8_t* left);
+#define aom_smooth_v_predictor_16x8 aom_smooth_v_predictor_16x8_neon
 
 void aom_smooth_v_predictor_32x16_c(uint8_t* dst,
                                     ptrdiff_t y_stride,
                                     const uint8_t* above,
                                     const uint8_t* left);
-#define aom_smooth_v_predictor_32x16 aom_smooth_v_predictor_32x16_c
+void aom_smooth_v_predictor_32x16_neon(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+#define aom_smooth_v_predictor_32x16 aom_smooth_v_predictor_32x16_neon
 
 void aom_smooth_v_predictor_32x32_c(uint8_t* dst,
                                     ptrdiff_t y_stride,
                                     const uint8_t* above,
                                     const uint8_t* left);
-#define aom_smooth_v_predictor_32x32 aom_smooth_v_predictor_32x32_c
+void aom_smooth_v_predictor_32x32_neon(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+#define aom_smooth_v_predictor_32x32 aom_smooth_v_predictor_32x32_neon
 
 void aom_smooth_v_predictor_32x64_c(uint8_t* dst,
                                     ptrdiff_t y_stride,
                                     const uint8_t* above,
                                     const uint8_t* left);
-#define aom_smooth_v_predictor_32x64 aom_smooth_v_predictor_32x64_c
+void aom_smooth_v_predictor_32x64_neon(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+#define aom_smooth_v_predictor_32x64 aom_smooth_v_predictor_32x64_neon
+
+void aom_smooth_v_predictor_32x8_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+void aom_smooth_v_predictor_32x8_neon(uint8_t* dst,
+                                      ptrdiff_t y_stride,
+                                      const uint8_t* above,
+                                      const uint8_t* left);
+#define aom_smooth_v_predictor_32x8 aom_smooth_v_predictor_32x8_neon
+
+void aom_smooth_v_predictor_4x16_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+void aom_smooth_v_predictor_4x16_neon(uint8_t* dst,
+                                      ptrdiff_t y_stride,
+                                      const uint8_t* above,
+                                      const uint8_t* left);
+#define aom_smooth_v_predictor_4x16 aom_smooth_v_predictor_4x16_neon
 
 void aom_smooth_v_predictor_4x4_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
                                   const uint8_t* left);
-#define aom_smooth_v_predictor_4x4 aom_smooth_v_predictor_4x4_c
+void aom_smooth_v_predictor_4x4_neon(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+#define aom_smooth_v_predictor_4x4 aom_smooth_v_predictor_4x4_neon
 
 void aom_smooth_v_predictor_4x8_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
                                   const uint8_t* left);
-#define aom_smooth_v_predictor_4x8 aom_smooth_v_predictor_4x8_c
+void aom_smooth_v_predictor_4x8_neon(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+#define aom_smooth_v_predictor_4x8 aom_smooth_v_predictor_4x8_neon
+
+void aom_smooth_v_predictor_64x16_c(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+void aom_smooth_v_predictor_64x16_neon(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+#define aom_smooth_v_predictor_64x16 aom_smooth_v_predictor_64x16_neon
 
 void aom_smooth_v_predictor_64x32_c(uint8_t* dst,
                                     ptrdiff_t y_stride,
                                     const uint8_t* above,
                                     const uint8_t* left);
-#define aom_smooth_v_predictor_64x32 aom_smooth_v_predictor_64x32_c
+void aom_smooth_v_predictor_64x32_neon(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+#define aom_smooth_v_predictor_64x32 aom_smooth_v_predictor_64x32_neon
 
 void aom_smooth_v_predictor_64x64_c(uint8_t* dst,
                                     ptrdiff_t y_stride,
                                     const uint8_t* above,
                                     const uint8_t* left);
-#define aom_smooth_v_predictor_64x64 aom_smooth_v_predictor_64x64_c
+void aom_smooth_v_predictor_64x64_neon(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+#define aom_smooth_v_predictor_64x64 aom_smooth_v_predictor_64x64_neon
 
 void aom_smooth_v_predictor_8x16_c(uint8_t* dst,
                                    ptrdiff_t y_stride,
                                    const uint8_t* above,
                                    const uint8_t* left);
-#define aom_smooth_v_predictor_8x16 aom_smooth_v_predictor_8x16_c
+void aom_smooth_v_predictor_8x16_neon(uint8_t* dst,
+                                      ptrdiff_t y_stride,
+                                      const uint8_t* above,
+                                      const uint8_t* left);
+#define aom_smooth_v_predictor_8x16 aom_smooth_v_predictor_8x16_neon
+
+void aom_smooth_v_predictor_8x32_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+void aom_smooth_v_predictor_8x32_neon(uint8_t* dst,
+                                      ptrdiff_t y_stride,
+                                      const uint8_t* above,
+                                      const uint8_t* left);
+#define aom_smooth_v_predictor_8x32 aom_smooth_v_predictor_8x32_neon
 
 void aom_smooth_v_predictor_8x4_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
                                   const uint8_t* left);
-#define aom_smooth_v_predictor_8x4 aom_smooth_v_predictor_8x4_c
+void aom_smooth_v_predictor_8x4_neon(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+#define aom_smooth_v_predictor_8x4 aom_smooth_v_predictor_8x4_neon
 
 void aom_smooth_v_predictor_8x8_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
                                   const uint8_t* left);
-#define aom_smooth_v_predictor_8x8 aom_smooth_v_predictor_8x8_c
+void aom_smooth_v_predictor_8x8_neon(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+#define aom_smooth_v_predictor_8x8 aom_smooth_v_predictor_8x8_neon
 
 int64_t aom_sse_c(const uint8_t* a,
                   int a_stride,
@@ -3996,7 +4657,12 @@ uint64_t aom_sum_sse_2d_i16_c(const int16_t* src,
                               int width,
                               int height,
                               int* sum);
-#define aom_sum_sse_2d_i16 aom_sum_sse_2d_i16_c
+uint64_t aom_sum_sse_2d_i16_neon(const int16_t* src,
+                                 int src_stride,
+                                 int width,
+                                 int height,
+                                 int* sum);
+#define aom_sum_sse_2d_i16 aom_sum_sse_2d_i16_neon
 
 void aom_v_predictor_16x16_c(uint8_t* dst,
                              ptrdiff_t y_stride,
@@ -4014,18 +4680,24 @@ void aom_v_predictor_16x32_c(uint8_t* dst,
                              const uint8_t* left);
 #define aom_v_predictor_16x32 aom_v_predictor_16x32_c
 
+void aom_v_predictor_16x4_c(uint8_t* dst,
+                            ptrdiff_t y_stride,
+                            const uint8_t* above,
+                            const uint8_t* left);
+#define aom_v_predictor_16x4 aom_v_predictor_16x4_c
+
+void aom_v_predictor_16x64_c(uint8_t* dst,
+                             ptrdiff_t y_stride,
+                             const uint8_t* above,
+                             const uint8_t* left);
+#define aom_v_predictor_16x64 aom_v_predictor_16x64_c
+
 void aom_v_predictor_16x8_c(uint8_t* dst,
                             ptrdiff_t y_stride,
                             const uint8_t* above,
                             const uint8_t* left);
 #define aom_v_predictor_16x8 aom_v_predictor_16x8_c
 
-void aom_v_predictor_2x2_c(uint8_t* dst,
-                           ptrdiff_t y_stride,
-                           const uint8_t* above,
-                           const uint8_t* left);
-#define aom_v_predictor_2x2 aom_v_predictor_2x2_c
-
 void aom_v_predictor_32x16_c(uint8_t* dst,
                              ptrdiff_t y_stride,
                              const uint8_t* above,
@@ -4048,6 +4720,18 @@ void aom_v_predictor_32x64_c(uint8_t* dst,
                              const uint8_t* left);
 #define aom_v_predictor_32x64 aom_v_predictor_32x64_c
 
+void aom_v_predictor_32x8_c(uint8_t* dst,
+                            ptrdiff_t y_stride,
+                            const uint8_t* above,
+                            const uint8_t* left);
+#define aom_v_predictor_32x8 aom_v_predictor_32x8_c
+
+void aom_v_predictor_4x16_c(uint8_t* dst,
+                            ptrdiff_t y_stride,
+                            const uint8_t* above,
+                            const uint8_t* left);
+#define aom_v_predictor_4x16 aom_v_predictor_4x16_c
+
 void aom_v_predictor_4x4_c(uint8_t* dst,
                            ptrdiff_t y_stride,
                            const uint8_t* above,
@@ -4064,6 +4748,12 @@ void aom_v_predictor_4x8_c(uint8_t* dst,
                            const uint8_t* left);
 #define aom_v_predictor_4x8 aom_v_predictor_4x8_c
 
+void aom_v_predictor_64x16_c(uint8_t* dst,
+                             ptrdiff_t y_stride,
+                             const uint8_t* above,
+                             const uint8_t* left);
+#define aom_v_predictor_64x16 aom_v_predictor_64x16_c
+
 void aom_v_predictor_64x32_c(uint8_t* dst,
                              ptrdiff_t y_stride,
                              const uint8_t* above,
@@ -4082,6 +4772,12 @@ void aom_v_predictor_8x16_c(uint8_t* dst,
                             const uint8_t* left);
 #define aom_v_predictor_8x16 aom_v_predictor_8x16_c
 
+void aom_v_predictor_8x32_c(uint8_t* dst,
+                            ptrdiff_t y_stride,
+                            const uint8_t* above,
+                            const uint8_t* left);
+#define aom_v_predictor_8x32 aom_v_predictor_8x32_c
+
 void aom_v_predictor_8x4_c(uint8_t* dst,
                            ptrdiff_t y_stride,
                            const uint8_t* above,
@@ -4317,8 +5013,8 @@ unsigned int aom_variance8x8_neon(const uint8_t* src_ptr,
                                   unsigned int* sse);
 #define aom_variance8x8 aom_variance8x8_neon
 
-int aom_vector_var_c(const int16_t* ref, const int16_t* src, const int bwl);
-int aom_vector_var_neon(const int16_t* ref, const int16_t* src, const int bwl);
+int aom_vector_var_c(const int16_t* ref, const int16_t* src, int bwl);
+int aom_vector_var_neon(const int16_t* ref, const int16_t* src, int bwl);
 #define aom_vector_var aom_vector_var_neon
 
 void aom_dsp_rtcd(void);
diff --git a/chromium/third_party/libaom/source/config/ios/arm64/config/aom_scale_rtcd.h b/chromium/third_party/libaom/source/config/ios/arm64/config/aom_scale_rtcd.h
index e27b558dd38..7db7b9d0f84 100644
--- a/chromium/third_party/libaom/source/config/ios/arm64/config/aom_scale_rtcd.h
+++ b/chromium/third_party/libaom/source/config/ios/arm64/config/aom_scale_rtcd.h
@@ -18,6 +18,12 @@ void aom_extend_frame_borders_c(struct yv12_buffer_config* ybf,
                                 const int num_planes);
 #define aom_extend_frame_borders aom_extend_frame_borders_c
 
+void aom_extend_frame_borders_plane_row_c(const struct yv12_buffer_config* ybf,
+                                          int plane,
+                                          int v_start,
+                                          int v_end);
+#define aom_extend_frame_borders_plane_row aom_extend_frame_borders_plane_row_c
+
 void aom_extend_frame_borders_y_c(struct yv12_buffer_config* ybf);
 #define aom_extend_frame_borders_y aom_extend_frame_borders_y_c
 
diff --git a/chromium/third_party/libaom/source/config/ios/arm64/config/av1_rtcd.h b/chromium/third_party/libaom/source/config/ios/arm64/config/av1_rtcd.h
index 23e831cc591..fd9ac835ad7 100644
--- a/chromium/third_party/libaom/source/config/ios/arm64/config/av1_rtcd.h
+++ b/chromium/third_party/libaom/source/config/ios/arm64/config/av1_rtcd.h
@@ -183,6 +183,30 @@ void aom_upsampled_pred_c(MACROBLOCKD* xd,
                           int subpel_search);
 #define aom_upsampled_pred aom_upsampled_pred_c
 
+void av1_apply_selfguided_restoration_c(const uint8_t* dat,
+                                        int width,
+                                        int height,
+                                        int stride,
+                                        int eps,
+                                        const int* xqd,
+                                        uint8_t* dst,
+                                        int dst_stride,
+                                        int32_t* tmpbuf,
+                                        int bit_depth,
+                                        int highbd);
+void av1_apply_selfguided_restoration_neon(const uint8_t* dat,
+                                           int width,
+                                           int height,
+                                           int stride,
+                                           int eps,
+                                           const int* xqd,
+                                           uint8_t* dst,
+                                           int dst_stride,
+                                           int32_t* tmpbuf,
+                                           int bit_depth,
+                                           int highbd);
+#define av1_apply_selfguided_restoration av1_apply_selfguided_restoration_neon
+
 int64_t av1_block_error_c(const tran_low_t* coeff,
                           const tran_low_t* dqcoeff,
                           intptr_t block_size,
@@ -234,6 +258,14 @@ void av1_build_compound_diffwtd_mask_d16_neon(uint8_t* mask,
 #define av1_build_compound_diffwtd_mask_d16 \
   av1_build_compound_diffwtd_mask_d16_neon
 
+int64_t av1_calc_frame_error_c(const uint8_t* const ref,
+                               int stride,
+                               const uint8_t* const dst,
+                               int p_width,
+                               int p_height,
+                               int p_stride);
+#define av1_calc_frame_error av1_calc_frame_error_c
+
 void av1_calc_indices_dim1_c(const int* data,
                              const int* centroids,
                              uint8_t* indices,
@@ -1314,6 +1346,28 @@ void av1_round_shift_array_c(int32_t* arr, int size, int bit);
 void av1_round_shift_array_neon(int32_t* arr, int size, int bit);
 #define av1_round_shift_array av1_round_shift_array_neon
 
+int av1_selfguided_restoration_c(const uint8_t* dgd8,
+                                 int width,
+                                 int height,
+                                 int dgd_stride,
+                                 int32_t* flt0,
+                                 int32_t* flt1,
+                                 int flt_stride,
+                                 int sgr_params_idx,
+                                 int bit_depth,
+                                 int highbd);
+int av1_selfguided_restoration_neon(const uint8_t* dgd8,
+                                    int width,
+                                    int height,
+                                    int dgd_stride,
+                                    int32_t* flt0,
+                                    int32_t* flt1,
+                                    int flt_stride,
+                                    int sgr_params_idx,
+                                    int bit_depth,
+                                    int highbd);
+#define av1_selfguided_restoration av1_selfguided_restoration_neon
+
 void av1_txb_init_levels_c(const tran_low_t* const coeff,
                            const int width,
                            const int height,
@@ -1330,6 +1384,44 @@ void av1_upsample_intra_edge_c(uint8_t* p, int sz);
 void av1_upsample_intra_edge_high_c(uint16_t* p, int sz, int bd);
 #define av1_upsample_intra_edge_high av1_upsample_intra_edge_high_c
 
+void av1_warp_affine_c(const int32_t* mat,
+                       const uint8_t* ref,
+                       int width,
+                       int height,
+                       int stride,
+                       uint8_t* pred,
+                       int p_col,
+                       int p_row,
+                       int p_width,
+                       int p_height,
+                       int p_stride,
+                       int subsampling_x,
+                       int subsampling_y,
+                       ConvolveParams* conv_params,
+                       int16_t alpha,
+                       int16_t beta,
+                       int16_t gamma,
+                       int16_t delta);
+void av1_warp_affine_neon(const int32_t* mat,
+                          const uint8_t* ref,
+                          int width,
+                          int height,
+                          int stride,
+                          uint8_t* pred,
+                          int p_col,
+                          int p_row,
+                          int p_width,
+                          int p_height,
+                          int p_stride,
+                          int subsampling_x,
+                          int subsampling_y,
+                          ConvolveParams* conv_params,
+                          int16_t alpha,
+                          int16_t beta,
+                          int16_t gamma,
+                          int16_t delta);
+#define av1_warp_affine av1_warp_affine_neon
+
 void av1_wedge_compute_delta_squares_c(int16_t* d,
                                        const int16_t* a,
                                        const int16_t* b,
@@ -1346,7 +1438,11 @@ uint64_t av1_wedge_sse_from_residuals_c(const int16_t* r1,
                                         const int16_t* d,
                                         const uint8_t* m,
                                         int N);
-#define av1_wedge_sse_from_residuals av1_wedge_sse_from_residuals_c
+uint64_t av1_wedge_sse_from_residuals_neon(const int16_t* r1,
+                                           const int16_t* d,
+                                           const uint8_t* m,
+                                           int N);
+#define av1_wedge_sse_from_residuals av1_wedge_sse_from_residuals_neon
 
 void av1_wiener_convolve_add_src_c(const uint8_t* src,
                                    ptrdiff_t src_stride,
@@ -1610,7 +1706,15 @@ void cdef_find_dir_dual_c(const uint16_t* img1,
                           int coeff_shift,
                           int* out1,
                           int* out2);
-#define cdef_find_dir_dual cdef_find_dir_dual_c
+void cdef_find_dir_dual_neon(const uint16_t* img1,
+                             const uint16_t* img2,
+                             int stride,
+                             int32_t* var1,
+                             int32_t* var2,
+                             int coeff_shift,
+                             int* out1,
+                             int* out2);
+#define cdef_find_dir_dual cdef_find_dir_dual_neon
 
 cfl_subsample_lbd_fn cfl_get_luma_subsampling_420_lbd_c(TX_SIZE tx_size);
 cfl_subsample_lbd_fn cfl_get_luma_subsampling_420_lbd_neon(TX_SIZE tx_size);
diff --git a/chromium/third_party/libaom/source/config/linux/arm-neon-cpu-detect/config/aom_config.asm b/chromium/third_party/libaom/source/config/linux/arm-neon-cpu-detect/config/aom_config.asm
index 410ee85fa68..4ee530ad7f8 100644
--- a/chromium/third_party/libaom/source/config/linux/arm-neon-cpu-detect/config/aom_config.asm
+++ b/chromium/third_party/libaom/source/config/linux/arm-neon-cpu-detect/config/aom_config.asm
@@ -9,7 +9,6 @@
 ; PATENTS file, you can obtain it at www.aomedia.org/license/patent.
 ;
 ARCH_ARM equ 1
-ARCH_MIPS equ 0
 ARCH_PPC equ 0
 ARCH_X86 equ 0
 ARCH_X86_64 equ 0
@@ -33,8 +32,6 @@ CONFIG_DISABLE_FULL_PIXEL_SPLIT_8X8 equ 1
 CONFIG_ENTROPY_STATS equ 0
 CONFIG_EXCLUDE_SIMD_MISMATCH equ 0
 CONFIG_FPMT_TEST equ 0
-CONFIG_FRAME_PARALLEL_ENCODE equ 0
-CONFIG_FRAME_PARALLEL_ENCODE_2 equ 0
 CONFIG_GCC equ 1
 CONFIG_GCOV equ 0
 CONFIG_GPROF equ 0
@@ -50,7 +47,7 @@ CONFIG_NORMAL_TILE_MODE equ 1
 CONFIG_OPTICAL_FLOW_API equ 0
 CONFIG_OS_SUPPORT equ 1
 CONFIG_PARTITION_SEARCH_ORDER equ 0
-CONFIG_PIC equ 0
+CONFIG_PIC equ 1
 CONFIG_RATECTRL_LOG equ 0
 CONFIG_RD_COMMAND equ 0
 CONFIG_RD_DEBUG equ 0
@@ -69,14 +66,11 @@ CONFIG_WEBM_IO equ 1
 DECODE_HEIGHT_LIMIT equ 16384
 DECODE_WIDTH_LIMIT equ 16384
 FORCE_HIGHBITDEPTH_DECODING equ 0
+HAVE_ARM_CRC32 equ 0
 HAVE_AVX equ 0
 HAVE_AVX2 equ 0
-HAVE_DSPR2 equ 0
 HAVE_FEXCEPT equ 1
-HAVE_MIPS32 equ 0
-HAVE_MIPS64 equ 0
 HAVE_MMX equ 0
-HAVE_MSA equ 0
 HAVE_NEON equ 1
 HAVE_PTHREAD_H equ 1
 HAVE_SSE equ 0
diff --git a/chromium/third_party/libaom/source/config/linux/arm-neon-cpu-detect/config/aom_config.c b/chromium/third_party/libaom/source/config/linux/arm-neon-cpu-detect/config/aom_config.c
index c20c00bb8b1..237bcef9085 100644
--- a/chromium/third_party/libaom/source/config/linux/arm-neon-cpu-detect/config/aom_config.c
+++ b/chromium/third_party/libaom/source/config/linux/arm-neon-cpu-detect/config/aom_config.c
@@ -9,5 +9,5 @@
  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
  */
 #include "aom/aom_codec.h"
-static const char* const cfg = "cmake ../source/libaom -G \"Unix Makefiles\" -DCMAKE_TOOLCHAIN_FILE=\"../source/libaom/build/cmake/toolchains/armv7-linux-gcc.cmake\" -DCONFIG_AV1_ENCODER=1 -DCONFIG_LIBYUV=0 -DCONFIG_REALTIME_ONLY=1 -DCONFIG_AV1_HIGHBITDEPTH=0 -DCONFIG_AV1_TEMPORAL_DENOISING=1 -DCONFIG_MAX_DECODE_PROFILE=0 -DCONFIG_NORMAL_TILE_MODE=1 -DCONFIG_SIZE_LIMIT=1 -DDECODE_HEIGHT_LIMIT=16384 -DDECODE_WIDTH_LIMIT=16384";
+static const char* const cfg = "cmake ../source/libaom -G \"Unix Makefiles\" -DCMAKE_TOOLCHAIN_FILE=\"../source/libaom/build/cmake/toolchains/armv7-linux-gcc.cmake\" -DCONFIG_AV1_ENCODER=1 -DCONFIG_LIBYUV=0 -DCONFIG_AV1_HIGHBITDEPTH=0 -DCONFIG_AV1_TEMPORAL_DENOISING=1 -DCONFIG_REALTIME_ONLY=1 -DCONFIG_MAX_DECODE_PROFILE=0 -DCONFIG_NORMAL_TILE_MODE=1 -DCONFIG_SIZE_LIMIT=1 -DDECODE_HEIGHT_LIMIT=16384 -DDECODE_WIDTH_LIMIT=16384";
 const char *aom_codec_build_config(void) {return cfg;}
diff --git a/chromium/third_party/libaom/source/config/linux/arm-neon-cpu-detect/config/aom_config.h b/chromium/third_party/libaom/source/config/linux/arm-neon-cpu-detect/config/aom_config.h
index 86d9d5af013..70f19c9b2e9 100644
--- a/chromium/third_party/libaom/source/config/linux/arm-neon-cpu-detect/config/aom_config.h
+++ b/chromium/third_party/libaom/source/config/linux/arm-neon-cpu-detect/config/aom_config.h
@@ -11,7 +11,6 @@
 #ifndef AOM_CONFIG_H_
 #define AOM_CONFIG_H_
 #define ARCH_ARM 1
-#define ARCH_MIPS 0
 #define ARCH_PPC 0
 #define ARCH_X86 0
 #define ARCH_X86_64 0
@@ -35,8 +34,6 @@
 #define CONFIG_ENTROPY_STATS 0
 #define CONFIG_EXCLUDE_SIMD_MISMATCH 0
 #define CONFIG_FPMT_TEST 0
-#define CONFIG_FRAME_PARALLEL_ENCODE 0
-#define CONFIG_FRAME_PARALLEL_ENCODE_2 0
 #define CONFIG_GCC 1
 #define CONFIG_GCOV 0
 #define CONFIG_GPROF 0
@@ -52,7 +49,7 @@
 #define CONFIG_OPTICAL_FLOW_API 0
 #define CONFIG_OS_SUPPORT 1
 #define CONFIG_PARTITION_SEARCH_ORDER 0
-#define CONFIG_PIC 0
+#define CONFIG_PIC 1
 #define CONFIG_RATECTRL_LOG 0
 #define CONFIG_RD_COMMAND 0
 #define CONFIG_RD_DEBUG 0
@@ -71,14 +68,11 @@
 #define DECODE_HEIGHT_LIMIT 16384
 #define DECODE_WIDTH_LIMIT 16384
 #define FORCE_HIGHBITDEPTH_DECODING 0
+#define HAVE_ARM_CRC32 0
 #define HAVE_AVX 0
 #define HAVE_AVX2 0
-#define HAVE_DSPR2 0
 #define HAVE_FEXCEPT 1
-#define HAVE_MIPS32 0
-#define HAVE_MIPS64 0
 #define HAVE_MMX 0
-#define HAVE_MSA 0
 #define HAVE_NEON 1
 #define HAVE_PTHREAD_H 1
 #define HAVE_SSE 0
diff --git a/chromium/third_party/libaom/source/config/linux/arm-neon-cpu-detect/config/aom_dsp_rtcd.h b/chromium/third_party/libaom/source/config/linux/arm-neon-cpu-detect/config/aom_dsp_rtcd.h
index 2a73acd6108..30467989fa3 100644
--- a/chromium/third_party/libaom/source/config/linux/arm-neon-cpu-detect/config/aom_dsp_rtcd.h
+++ b/chromium/third_party/libaom/source/config/linux/arm-neon-cpu-detect/config/aom_dsp_rtcd.h
@@ -34,7 +34,16 @@ void aom_avg_8x8_quad_c(const uint8_t* s,
                         int x16_idx,
                         int y16_idx,
                         int* avg);
-#define aom_avg_8x8_quad aom_avg_8x8_quad_c
+void aom_avg_8x8_quad_neon(const uint8_t* s,
+                           int p,
+                           int x16_idx,
+                           int y16_idx,
+                           int* avg);
+RTCD_EXTERN void (*aom_avg_8x8_quad)(const uint8_t* s,
+                                     int p,
+                                     int x16_idx,
+                                     int y16_idx,
+                                     int* avg);
 
 void aom_blend_a64_hmask_c(uint8_t* dst,
                            uint32_t dst_stride,
@@ -200,18 +209,24 @@ void aom_dc_128_predictor_16x32_c(uint8_t* dst,
                                   const uint8_t* left);
 #define aom_dc_128_predictor_16x32 aom_dc_128_predictor_16x32_c
 
+void aom_dc_128_predictor_16x4_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+#define aom_dc_128_predictor_16x4 aom_dc_128_predictor_16x4_c
+
+void aom_dc_128_predictor_16x64_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+#define aom_dc_128_predictor_16x64 aom_dc_128_predictor_16x64_c
+
 void aom_dc_128_predictor_16x8_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
                                  const uint8_t* left);
 #define aom_dc_128_predictor_16x8 aom_dc_128_predictor_16x8_c
 
-void aom_dc_128_predictor_2x2_c(uint8_t* dst,
-                                ptrdiff_t y_stride,
-                                const uint8_t* above,
-                                const uint8_t* left);
-#define aom_dc_128_predictor_2x2 aom_dc_128_predictor_2x2_c
-
 void aom_dc_128_predictor_32x16_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
@@ -237,6 +252,18 @@ void aom_dc_128_predictor_32x64_c(uint8_t* dst,
                                   const uint8_t* left);
 #define aom_dc_128_predictor_32x64 aom_dc_128_predictor_32x64_c
 
+void aom_dc_128_predictor_32x8_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+#define aom_dc_128_predictor_32x8 aom_dc_128_predictor_32x8_c
+
+void aom_dc_128_predictor_4x16_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+#define aom_dc_128_predictor_4x16 aom_dc_128_predictor_4x16_c
+
 void aom_dc_128_predictor_4x4_c(uint8_t* dst,
                                 ptrdiff_t y_stride,
                                 const uint8_t* above,
@@ -256,6 +283,12 @@ void aom_dc_128_predictor_4x8_c(uint8_t* dst,
                                 const uint8_t* left);
 #define aom_dc_128_predictor_4x8 aom_dc_128_predictor_4x8_c
 
+void aom_dc_128_predictor_64x16_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+#define aom_dc_128_predictor_64x16 aom_dc_128_predictor_64x16_c
+
 void aom_dc_128_predictor_64x32_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
@@ -274,6 +307,12 @@ void aom_dc_128_predictor_8x16_c(uint8_t* dst,
                                  const uint8_t* left);
 #define aom_dc_128_predictor_8x16 aom_dc_128_predictor_8x16_c
 
+void aom_dc_128_predictor_8x32_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+#define aom_dc_128_predictor_8x32 aom_dc_128_predictor_8x32_c
+
 void aom_dc_128_predictor_8x4_c(uint8_t* dst,
                                 ptrdiff_t y_stride,
                                 const uint8_t* above,
@@ -312,18 +351,24 @@ void aom_dc_left_predictor_16x32_c(uint8_t* dst,
                                    const uint8_t* left);
 #define aom_dc_left_predictor_16x32 aom_dc_left_predictor_16x32_c
 
+void aom_dc_left_predictor_16x4_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+#define aom_dc_left_predictor_16x4 aom_dc_left_predictor_16x4_c
+
+void aom_dc_left_predictor_16x64_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+#define aom_dc_left_predictor_16x64 aom_dc_left_predictor_16x64_c
+
 void aom_dc_left_predictor_16x8_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
                                   const uint8_t* left);
 #define aom_dc_left_predictor_16x8 aom_dc_left_predictor_16x8_c
 
-void aom_dc_left_predictor_2x2_c(uint8_t* dst,
-                                 ptrdiff_t y_stride,
-                                 const uint8_t* above,
-                                 const uint8_t* left);
-#define aom_dc_left_predictor_2x2 aom_dc_left_predictor_2x2_c
-
 void aom_dc_left_predictor_32x16_c(uint8_t* dst,
                                    ptrdiff_t y_stride,
                                    const uint8_t* above,
@@ -349,6 +394,18 @@ void aom_dc_left_predictor_32x64_c(uint8_t* dst,
                                    const uint8_t* left);
 #define aom_dc_left_predictor_32x64 aom_dc_left_predictor_32x64_c
 
+void aom_dc_left_predictor_32x8_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+#define aom_dc_left_predictor_32x8 aom_dc_left_predictor_32x8_c
+
+void aom_dc_left_predictor_4x16_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+#define aom_dc_left_predictor_4x16 aom_dc_left_predictor_4x16_c
+
 void aom_dc_left_predictor_4x4_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
@@ -368,6 +425,12 @@ void aom_dc_left_predictor_4x8_c(uint8_t* dst,
                                  const uint8_t* left);
 #define aom_dc_left_predictor_4x8 aom_dc_left_predictor_4x8_c
 
+void aom_dc_left_predictor_64x16_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+#define aom_dc_left_predictor_64x16 aom_dc_left_predictor_64x16_c
+
 void aom_dc_left_predictor_64x32_c(uint8_t* dst,
                                    ptrdiff_t y_stride,
                                    const uint8_t* above,
@@ -386,6 +449,12 @@ void aom_dc_left_predictor_8x16_c(uint8_t* dst,
                                   const uint8_t* left);
 #define aom_dc_left_predictor_8x16 aom_dc_left_predictor_8x16_c
 
+void aom_dc_left_predictor_8x32_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+#define aom_dc_left_predictor_8x32 aom_dc_left_predictor_8x32_c
+
 void aom_dc_left_predictor_8x4_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
@@ -424,18 +493,24 @@ void aom_dc_predictor_16x32_c(uint8_t* dst,
                               const uint8_t* left);
 #define aom_dc_predictor_16x32 aom_dc_predictor_16x32_c
 
+void aom_dc_predictor_16x4_c(uint8_t* dst,
+                             ptrdiff_t y_stride,
+                             const uint8_t* above,
+                             const uint8_t* left);
+#define aom_dc_predictor_16x4 aom_dc_predictor_16x4_c
+
+void aom_dc_predictor_16x64_c(uint8_t* dst,
+                              ptrdiff_t y_stride,
+                              const uint8_t* above,
+                              const uint8_t* left);
+#define aom_dc_predictor_16x64 aom_dc_predictor_16x64_c
+
 void aom_dc_predictor_16x8_c(uint8_t* dst,
                              ptrdiff_t y_stride,
                              const uint8_t* above,
                              const uint8_t* left);
 #define aom_dc_predictor_16x8 aom_dc_predictor_16x8_c
 
-void aom_dc_predictor_2x2_c(uint8_t* dst,
-                            ptrdiff_t y_stride,
-                            const uint8_t* above,
-                            const uint8_t* left);
-#define aom_dc_predictor_2x2 aom_dc_predictor_2x2_c
-
 void aom_dc_predictor_32x16_c(uint8_t* dst,
                               ptrdiff_t y_stride,
                               const uint8_t* above,
@@ -461,6 +536,18 @@ void aom_dc_predictor_32x64_c(uint8_t* dst,
                               const uint8_t* left);
 #define aom_dc_predictor_32x64 aom_dc_predictor_32x64_c
 
+void aom_dc_predictor_32x8_c(uint8_t* dst,
+                             ptrdiff_t y_stride,
+                             const uint8_t* above,
+                             const uint8_t* left);
+#define aom_dc_predictor_32x8 aom_dc_predictor_32x8_c
+
+void aom_dc_predictor_4x16_c(uint8_t* dst,
+                             ptrdiff_t y_stride,
+                             const uint8_t* above,
+                             const uint8_t* left);
+#define aom_dc_predictor_4x16 aom_dc_predictor_4x16_c
+
 void aom_dc_predictor_4x4_c(uint8_t* dst,
                             ptrdiff_t y_stride,
                             const uint8_t* above,
@@ -480,6 +567,12 @@ void aom_dc_predictor_4x8_c(uint8_t* dst,
                             const uint8_t* left);
 #define aom_dc_predictor_4x8 aom_dc_predictor_4x8_c
 
+void aom_dc_predictor_64x16_c(uint8_t* dst,
+                              ptrdiff_t y_stride,
+                              const uint8_t* above,
+                              const uint8_t* left);
+#define aom_dc_predictor_64x16 aom_dc_predictor_64x16_c
+
 void aom_dc_predictor_64x32_c(uint8_t* dst,
                               ptrdiff_t y_stride,
                               const uint8_t* above,
@@ -498,6 +591,12 @@ void aom_dc_predictor_8x16_c(uint8_t* dst,
                              const uint8_t* left);
 #define aom_dc_predictor_8x16 aom_dc_predictor_8x16_c
 
+void aom_dc_predictor_8x32_c(uint8_t* dst,
+                             ptrdiff_t y_stride,
+                             const uint8_t* above,
+                             const uint8_t* left);
+#define aom_dc_predictor_8x32 aom_dc_predictor_8x32_c
+
 void aom_dc_predictor_8x4_c(uint8_t* dst,
                             ptrdiff_t y_stride,
                             const uint8_t* above,
@@ -536,18 +635,24 @@ void aom_dc_top_predictor_16x32_c(uint8_t* dst,
                                   const uint8_t* left);
 #define aom_dc_top_predictor_16x32 aom_dc_top_predictor_16x32_c
 
+void aom_dc_top_predictor_16x4_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+#define aom_dc_top_predictor_16x4 aom_dc_top_predictor_16x4_c
+
+void aom_dc_top_predictor_16x64_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+#define aom_dc_top_predictor_16x64 aom_dc_top_predictor_16x64_c
+
 void aom_dc_top_predictor_16x8_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
                                  const uint8_t* left);
 #define aom_dc_top_predictor_16x8 aom_dc_top_predictor_16x8_c
 
-void aom_dc_top_predictor_2x2_c(uint8_t* dst,
-                                ptrdiff_t y_stride,
-                                const uint8_t* above,
-                                const uint8_t* left);
-#define aom_dc_top_predictor_2x2 aom_dc_top_predictor_2x2_c
-
 void aom_dc_top_predictor_32x16_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
@@ -573,6 +678,18 @@ void aom_dc_top_predictor_32x64_c(uint8_t* dst,
                                   const uint8_t* left);
 #define aom_dc_top_predictor_32x64 aom_dc_top_predictor_32x64_c
 
+void aom_dc_top_predictor_32x8_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+#define aom_dc_top_predictor_32x8 aom_dc_top_predictor_32x8_c
+
+void aom_dc_top_predictor_4x16_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+#define aom_dc_top_predictor_4x16 aom_dc_top_predictor_4x16_c
+
 void aom_dc_top_predictor_4x4_c(uint8_t* dst,
                                 ptrdiff_t y_stride,
                                 const uint8_t* above,
@@ -592,6 +709,12 @@ void aom_dc_top_predictor_4x8_c(uint8_t* dst,
                                 const uint8_t* left);
 #define aom_dc_top_predictor_4x8 aom_dc_top_predictor_4x8_c
 
+void aom_dc_top_predictor_64x16_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+#define aom_dc_top_predictor_64x16 aom_dc_top_predictor_64x16_c
+
 void aom_dc_top_predictor_64x32_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
@@ -610,6 +733,12 @@ void aom_dc_top_predictor_8x16_c(uint8_t* dst,
                                  const uint8_t* left);
 #define aom_dc_top_predictor_8x16 aom_dc_top_predictor_8x16_c
 
+void aom_dc_top_predictor_8x32_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+#define aom_dc_top_predictor_8x32 aom_dc_top_predictor_8x32_c
+
 void aom_dc_top_predictor_8x4_c(uint8_t* dst,
                                 ptrdiff_t y_stride,
                                 const uint8_t* above,
@@ -1110,18 +1239,24 @@ void aom_h_predictor_16x32_c(uint8_t* dst,
                              const uint8_t* left);
 #define aom_h_predictor_16x32 aom_h_predictor_16x32_c
 
+void aom_h_predictor_16x4_c(uint8_t* dst,
+                            ptrdiff_t y_stride,
+                            const uint8_t* above,
+                            const uint8_t* left);
+#define aom_h_predictor_16x4 aom_h_predictor_16x4_c
+
+void aom_h_predictor_16x64_c(uint8_t* dst,
+                             ptrdiff_t y_stride,
+                             const uint8_t* above,
+                             const uint8_t* left);
+#define aom_h_predictor_16x64 aom_h_predictor_16x64_c
+
 void aom_h_predictor_16x8_c(uint8_t* dst,
                             ptrdiff_t y_stride,
                             const uint8_t* above,
                             const uint8_t* left);
 #define aom_h_predictor_16x8 aom_h_predictor_16x8_c
 
-void aom_h_predictor_2x2_c(uint8_t* dst,
-                           ptrdiff_t y_stride,
-                           const uint8_t* above,
-                           const uint8_t* left);
-#define aom_h_predictor_2x2 aom_h_predictor_2x2_c
-
 void aom_h_predictor_32x16_c(uint8_t* dst,
                              ptrdiff_t y_stride,
                              const uint8_t* above,
@@ -1147,6 +1282,18 @@ void aom_h_predictor_32x64_c(uint8_t* dst,
                              const uint8_t* left);
 #define aom_h_predictor_32x64 aom_h_predictor_32x64_c
 
+void aom_h_predictor_32x8_c(uint8_t* dst,
+                            ptrdiff_t y_stride,
+                            const uint8_t* above,
+                            const uint8_t* left);
+#define aom_h_predictor_32x8 aom_h_predictor_32x8_c
+
+void aom_h_predictor_4x16_c(uint8_t* dst,
+                            ptrdiff_t y_stride,
+                            const uint8_t* above,
+                            const uint8_t* left);
+#define aom_h_predictor_4x16 aom_h_predictor_4x16_c
+
 void aom_h_predictor_4x4_c(uint8_t* dst,
                            ptrdiff_t y_stride,
                            const uint8_t* above,
@@ -1166,6 +1313,12 @@ void aom_h_predictor_4x8_c(uint8_t* dst,
                            const uint8_t* left);
 #define aom_h_predictor_4x8 aom_h_predictor_4x8_c
 
+void aom_h_predictor_64x16_c(uint8_t* dst,
+                             ptrdiff_t y_stride,
+                             const uint8_t* above,
+                             const uint8_t* left);
+#define aom_h_predictor_64x16 aom_h_predictor_64x16_c
+
 void aom_h_predictor_64x32_c(uint8_t* dst,
                              ptrdiff_t y_stride,
                              const uint8_t* above,
@@ -1184,6 +1337,12 @@ void aom_h_predictor_8x16_c(uint8_t* dst,
                             const uint8_t* left);
 #define aom_h_predictor_8x16 aom_h_predictor_8x16_c
 
+void aom_h_predictor_8x32_c(uint8_t* dst,
+                            ptrdiff_t y_stride,
+                            const uint8_t* above,
+                            const uint8_t* left);
+#define aom_h_predictor_8x32 aom_h_predictor_8x32_c
+
 void aom_h_predictor_8x4_c(uint8_t* dst,
                            ptrdiff_t y_stride,
                            const uint8_t* above,
@@ -1233,11 +1392,6 @@ RTCD_EXTERN void (*aom_hadamard_8x8)(const int16_t* src_diff,
                                      ptrdiff_t src_stride,
                                      tran_low_t* coeff);
 
-void aom_hadamard_8x8_dual_c(const int16_t* src_diff,
-                             ptrdiff_t src_stride,
-                             int16_t* coeff);
-#define aom_hadamard_8x8_dual aom_hadamard_8x8_dual_c
-
 void aom_hadamard_lp_16x16_c(const int16_t* src_diff,
                              ptrdiff_t src_stride,
                              int16_t* coeff);
@@ -1258,6 +1412,16 @@ RTCD_EXTERN void (*aom_hadamard_lp_8x8)(const int16_t* src_diff,
                                         ptrdiff_t src_stride,
                                         int16_t* coeff);
 
+void aom_hadamard_lp_8x8_dual_c(const int16_t* src_diff,
+                                ptrdiff_t src_stride,
+                                int16_t* coeff);
+void aom_hadamard_lp_8x8_dual_neon(const int16_t* src_diff,
+                                   ptrdiff_t src_stride,
+                                   int16_t* coeff);
+RTCD_EXTERN void (*aom_hadamard_lp_8x8_dual)(const int16_t* src_diff,
+                                             ptrdiff_t src_stride,
+                                             int16_t* coeff);
+
 void aom_ifft16x16_float_c(const float* input, float* temp, float* output);
 #define aom_ifft16x16_float aom_ifft16x16_float_c
 
@@ -1273,22 +1437,43 @@ void aom_ifft4x4_float_c(const float* input, float* temp, float* output);
 void aom_ifft8x8_float_c(const float* input, float* temp, float* output);
 #define aom_ifft8x8_float aom_ifft8x8_float_c
 
-int16_t aom_int_pro_col_c(const uint8_t* ref, const int width);
-int16_t aom_int_pro_col_neon(const uint8_t* ref, const int width);
-RTCD_EXTERN int16_t (*aom_int_pro_col)(const uint8_t* ref, const int width);
+void aom_int_pro_col_c(int16_t* vbuf,
+                       const uint8_t* ref,
+                       const int ref_stride,
+                       const int width,
+                       const int height,
+                       int norm_factor);
+void aom_int_pro_col_neon(int16_t* vbuf,
+                          const uint8_t* ref,
+                          const int ref_stride,
+                          const int width,
+                          const int height,
+                          int norm_factor);
+RTCD_EXTERN void (*aom_int_pro_col)(int16_t* vbuf,
+                                    const uint8_t* ref,
+                                    const int ref_stride,
+                                    const int width,
+                                    const int height,
+                                    int norm_factor);
 
-void aom_int_pro_row_c(int16_t hbuf[16],
+void aom_int_pro_row_c(int16_t* hbuf,
                        const uint8_t* ref,
                        const int ref_stride,
-                       const int height);
-void aom_int_pro_row_neon(int16_t hbuf[16],
+                       const int width,
+                       const int height,
+                       int norm_factor);
+void aom_int_pro_row_neon(int16_t* hbuf,
                           const uint8_t* ref,
                           const int ref_stride,
-                          const int height);
-RTCD_EXTERN void (*aom_int_pro_row)(int16_t hbuf[16],
+                          const int width,
+                          const int height,
+                          int norm_factor);
+RTCD_EXTERN void (*aom_int_pro_row)(int16_t* hbuf,
                                     const uint8_t* ref,
                                     const int ref_stride,
-                                    const int height);
+                                    const int width,
+                                    const int height,
+                                    int norm_factor);
 
 void aom_lowbd_blend_a64_d16_mask_c(uint8_t* dst,
                                     uint32_t dst_stride,
@@ -1376,7 +1561,16 @@ void aom_lpf_horizontal_14_quad_c(uint8_t* s,
                                   const uint8_t* blimit0,
                                   const uint8_t* limit0,
                                   const uint8_t* thresh0);
-#define aom_lpf_horizontal_14_quad aom_lpf_horizontal_14_quad_c
+void aom_lpf_horizontal_14_quad_neon(uint8_t* s,
+                                     int pitch,
+                                     const uint8_t* blimit0,
+                                     const uint8_t* limit0,
+                                     const uint8_t* thresh0);
+RTCD_EXTERN void (*aom_lpf_horizontal_14_quad)(uint8_t* s,
+                                               int pitch,
+                                               const uint8_t* blimit0,
+                                               const uint8_t* limit0,
+                                               const uint8_t* thresh0);
 
 void aom_lpf_horizontal_4_c(uint8_t* s,
                             int pitch,
@@ -1424,7 +1618,16 @@ void aom_lpf_horizontal_4_quad_c(uint8_t* s,
                                  const uint8_t* blimit0,
                                  const uint8_t* limit0,
                                  const uint8_t* thresh0);
-#define aom_lpf_horizontal_4_quad aom_lpf_horizontal_4_quad_c
+void aom_lpf_horizontal_4_quad_neon(uint8_t* s,
+                                    int pitch,
+                                    const uint8_t* blimit0,
+                                    const uint8_t* limit0,
+                                    const uint8_t* thresh0);
+RTCD_EXTERN void (*aom_lpf_horizontal_4_quad)(uint8_t* s,
+                                              int pitch,
+                                              const uint8_t* blimit0,
+                                              const uint8_t* limit0,
+                                              const uint8_t* thresh0);
 
 void aom_lpf_horizontal_6_c(uint8_t* s,
                             int pitch,
@@ -1472,7 +1675,16 @@ void aom_lpf_horizontal_6_quad_c(uint8_t* s,
                                  const uint8_t* blimit0,
                                  const uint8_t* limit0,
                                  const uint8_t* thresh0);
-#define aom_lpf_horizontal_6_quad aom_lpf_horizontal_6_quad_c
+void aom_lpf_horizontal_6_quad_neon(uint8_t* s,
+                                    int pitch,
+                                    const uint8_t* blimit0,
+                                    const uint8_t* limit0,
+                                    const uint8_t* thresh0);
+RTCD_EXTERN void (*aom_lpf_horizontal_6_quad)(uint8_t* s,
+                                              int pitch,
+                                              const uint8_t* blimit0,
+                                              const uint8_t* limit0,
+                                              const uint8_t* thresh0);
 
 void aom_lpf_horizontal_8_c(uint8_t* s,
                             int pitch,
@@ -1520,7 +1732,16 @@ void aom_lpf_horizontal_8_quad_c(uint8_t* s,
                                  const uint8_t* blimit0,
                                  const uint8_t* limit0,
                                  const uint8_t* thresh0);
-#define aom_lpf_horizontal_8_quad aom_lpf_horizontal_8_quad_c
+void aom_lpf_horizontal_8_quad_neon(uint8_t* s,
+                                    int pitch,
+                                    const uint8_t* blimit0,
+                                    const uint8_t* limit0,
+                                    const uint8_t* thresh0);
+RTCD_EXTERN void (*aom_lpf_horizontal_8_quad)(uint8_t* s,
+                                              int pitch,
+                                              const uint8_t* blimit0,
+                                              const uint8_t* limit0,
+                                              const uint8_t* thresh0);
 
 void aom_lpf_vertical_14_c(uint8_t* s,
                            int pitch,
@@ -1568,7 +1789,16 @@ void aom_lpf_vertical_14_quad_c(uint8_t* s,
                                 const uint8_t* blimit0,
                                 const uint8_t* limit0,
                                 const uint8_t* thresh0);
-#define aom_lpf_vertical_14_quad aom_lpf_vertical_14_quad_c
+void aom_lpf_vertical_14_quad_neon(uint8_t* s,
+                                   int pitch,
+                                   const uint8_t* blimit0,
+                                   const uint8_t* limit0,
+                                   const uint8_t* thresh0);
+RTCD_EXTERN void (*aom_lpf_vertical_14_quad)(uint8_t* s,
+                                             int pitch,
+                                             const uint8_t* blimit0,
+                                             const uint8_t* limit0,
+                                             const uint8_t* thresh0);
 
 void aom_lpf_vertical_4_c(uint8_t* s,
                           int pitch,
@@ -1616,7 +1846,16 @@ void aom_lpf_vertical_4_quad_c(uint8_t* s,
                                const uint8_t* blimit0,
                                const uint8_t* limit0,
                                const uint8_t* thresh0);
-#define aom_lpf_vertical_4_quad aom_lpf_vertical_4_quad_c
+void aom_lpf_vertical_4_quad_neon(uint8_t* s,
+                                  int pitch,
+                                  const uint8_t* blimit0,
+                                  const uint8_t* limit0,
+                                  const uint8_t* thresh0);
+RTCD_EXTERN void (*aom_lpf_vertical_4_quad)(uint8_t* s,
+                                            int pitch,
+                                            const uint8_t* blimit0,
+                                            const uint8_t* limit0,
+                                            const uint8_t* thresh0);
 
 void aom_lpf_vertical_6_c(uint8_t* s,
                           int pitch,
@@ -1664,7 +1903,16 @@ void aom_lpf_vertical_6_quad_c(uint8_t* s,
                                const uint8_t* blimit0,
                                const uint8_t* limit0,
                                const uint8_t* thresh0);
-#define aom_lpf_vertical_6_quad aom_lpf_vertical_6_quad_c
+void aom_lpf_vertical_6_quad_neon(uint8_t* s,
+                                  int pitch,
+                                  const uint8_t* blimit0,
+                                  const uint8_t* limit0,
+                                  const uint8_t* thresh0);
+RTCD_EXTERN void (*aom_lpf_vertical_6_quad)(uint8_t* s,
+                                            int pitch,
+                                            const uint8_t* blimit0,
+                                            const uint8_t* limit0,
+                                            const uint8_t* thresh0);
 
 void aom_lpf_vertical_8_c(uint8_t* s,
                           int pitch,
@@ -1712,7 +1960,16 @@ void aom_lpf_vertical_8_quad_c(uint8_t* s,
                                const uint8_t* blimit0,
                                const uint8_t* limit0,
                                const uint8_t* thresh0);
-#define aom_lpf_vertical_8_quad aom_lpf_vertical_8_quad_c
+void aom_lpf_vertical_8_quad_neon(uint8_t* s,
+                                  int pitch,
+                                  const uint8_t* blimit0,
+                                  const uint8_t* limit0,
+                                  const uint8_t* thresh0);
+RTCD_EXTERN void (*aom_lpf_vertical_8_quad)(uint8_t* s,
+                                            int pitch,
+                                            const uint8_t* blimit0,
+                                            const uint8_t* limit0,
+                                            const uint8_t* thresh0);
 
 unsigned int aom_masked_sad128x128_c(const uint8_t* src,
                                      int src_stride,
@@ -2290,21 +2547,55 @@ unsigned int aom_mse16x8_c(const uint8_t* src_ptr,
                            const uint8_t* ref_ptr,
                            int recon_stride,
                            unsigned int* sse);
-#define aom_mse16x8 aom_mse16x8_c
+unsigned int aom_mse16x8_neon(const uint8_t* src_ptr,
+                              int source_stride,
+                              const uint8_t* ref_ptr,
+                              int recon_stride,
+                              unsigned int* sse);
+RTCD_EXTERN unsigned int (*aom_mse16x8)(const uint8_t* src_ptr,
+                                        int source_stride,
+                                        const uint8_t* ref_ptr,
+                                        int recon_stride,
+                                        unsigned int* sse);
 
 unsigned int aom_mse8x16_c(const uint8_t* src_ptr,
                            int source_stride,
                            const uint8_t* ref_ptr,
                            int recon_stride,
                            unsigned int* sse);
-#define aom_mse8x16 aom_mse8x16_c
+unsigned int aom_mse8x16_neon(const uint8_t* src_ptr,
+                              int source_stride,
+                              const uint8_t* ref_ptr,
+                              int recon_stride,
+                              unsigned int* sse);
+RTCD_EXTERN unsigned int (*aom_mse8x16)(const uint8_t* src_ptr,
+                                        int source_stride,
+                                        const uint8_t* ref_ptr,
+                                        int recon_stride,
+                                        unsigned int* sse);
 
 unsigned int aom_mse8x8_c(const uint8_t* src_ptr,
                           int source_stride,
                           const uint8_t* ref_ptr,
                           int recon_stride,
                           unsigned int* sse);
-#define aom_mse8x8 aom_mse8x8_c
+unsigned int aom_mse8x8_neon(const uint8_t* src_ptr,
+                             int source_stride,
+                             const uint8_t* ref_ptr,
+                             int recon_stride,
+                             unsigned int* sse);
+RTCD_EXTERN unsigned int (*aom_mse8x8)(const uint8_t* src_ptr,
+                                       int source_stride,
+                                       const uint8_t* ref_ptr,
+                                       int recon_stride,
+                                       unsigned int* sse);
+
+uint64_t aom_mse_16xh_16bit_c(uint8_t* dst,
+                              int dstride,
+                              uint16_t* src,
+                              int w,
+                              int h);
+#define aom_mse_16xh_16bit aom_mse_16xh_16bit_c
 
 uint64_t aom_mse_wxh_16bit_c(uint8_t* dst,
                              int dstride,
@@ -2318,85 +2609,248 @@ void aom_paeth_predictor_16x16_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
                                  const uint8_t* left);
-#define aom_paeth_predictor_16x16 aom_paeth_predictor_16x16_c
+void aom_paeth_predictor_16x16_neon(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+RTCD_EXTERN void (*aom_paeth_predictor_16x16)(uint8_t* dst,
+                                              ptrdiff_t y_stride,
+                                              const uint8_t* above,
+                                              const uint8_t* left);
 
 void aom_paeth_predictor_16x32_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
                                  const uint8_t* left);
-#define aom_paeth_predictor_16x32 aom_paeth_predictor_16x32_c
+void aom_paeth_predictor_16x32_neon(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+RTCD_EXTERN void (*aom_paeth_predictor_16x32)(uint8_t* dst,
+                                              ptrdiff_t y_stride,
+                                              const uint8_t* above,
+                                              const uint8_t* left);
 
-void aom_paeth_predictor_16x8_c(uint8_t* dst,
+void aom_paeth_predictor_16x4_c(uint8_t* dst,
                                 ptrdiff_t y_stride,
                                 const uint8_t* above,
                                 const uint8_t* left);
-#define aom_paeth_predictor_16x8 aom_paeth_predictor_16x8_c
+void aom_paeth_predictor_16x4_neon(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+RTCD_EXTERN void (*aom_paeth_predictor_16x4)(uint8_t* dst,
+                                             ptrdiff_t y_stride,
+                                             const uint8_t* above,
+                                             const uint8_t* left);
 
-void aom_paeth_predictor_2x2_c(uint8_t* dst,
-                               ptrdiff_t y_stride,
-                               const uint8_t* above,
-                               const uint8_t* left);
-#define aom_paeth_predictor_2x2 aom_paeth_predictor_2x2_c
+void aom_paeth_predictor_16x64_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+void aom_paeth_predictor_16x64_neon(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+RTCD_EXTERN void (*aom_paeth_predictor_16x64)(uint8_t* dst,
+                                              ptrdiff_t y_stride,
+                                              const uint8_t* above,
+                                              const uint8_t* left);
+
+void aom_paeth_predictor_16x8_c(uint8_t* dst,
+                                ptrdiff_t y_stride,
+                                const uint8_t* above,
+                                const uint8_t* left);
+void aom_paeth_predictor_16x8_neon(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+RTCD_EXTERN void (*aom_paeth_predictor_16x8)(uint8_t* dst,
+                                             ptrdiff_t y_stride,
+                                             const uint8_t* above,
+                                             const uint8_t* left);
 
 void aom_paeth_predictor_32x16_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
                                  const uint8_t* left);
-#define aom_paeth_predictor_32x16 aom_paeth_predictor_32x16_c
+void aom_paeth_predictor_32x16_neon(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+RTCD_EXTERN void (*aom_paeth_predictor_32x16)(uint8_t* dst,
+                                              ptrdiff_t y_stride,
+                                              const uint8_t* above,
+                                              const uint8_t* left);
 
 void aom_paeth_predictor_32x32_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
                                  const uint8_t* left);
-#define aom_paeth_predictor_32x32 aom_paeth_predictor_32x32_c
+void aom_paeth_predictor_32x32_neon(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+RTCD_EXTERN void (*aom_paeth_predictor_32x32)(uint8_t* dst,
+                                              ptrdiff_t y_stride,
+                                              const uint8_t* above,
+                                              const uint8_t* left);
 
 void aom_paeth_predictor_32x64_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
                                  const uint8_t* left);
-#define aom_paeth_predictor_32x64 aom_paeth_predictor_32x64_c
+void aom_paeth_predictor_32x64_neon(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+RTCD_EXTERN void (*aom_paeth_predictor_32x64)(uint8_t* dst,
+                                              ptrdiff_t y_stride,
+                                              const uint8_t* above,
+                                              const uint8_t* left);
+
+void aom_paeth_predictor_32x8_c(uint8_t* dst,
+                                ptrdiff_t y_stride,
+                                const uint8_t* above,
+                                const uint8_t* left);
+void aom_paeth_predictor_32x8_neon(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+RTCD_EXTERN void (*aom_paeth_predictor_32x8)(uint8_t* dst,
+                                             ptrdiff_t y_stride,
+                                             const uint8_t* above,
+                                             const uint8_t* left);
+
+void aom_paeth_predictor_4x16_c(uint8_t* dst,
+                                ptrdiff_t y_stride,
+                                const uint8_t* above,
+                                const uint8_t* left);
+void aom_paeth_predictor_4x16_neon(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+RTCD_EXTERN void (*aom_paeth_predictor_4x16)(uint8_t* dst,
+                                             ptrdiff_t y_stride,
+                                             const uint8_t* above,
+                                             const uint8_t* left);
 
 void aom_paeth_predictor_4x4_c(uint8_t* dst,
                                ptrdiff_t y_stride,
                                const uint8_t* above,
                                const uint8_t* left);
-#define aom_paeth_predictor_4x4 aom_paeth_predictor_4x4_c
+void aom_paeth_predictor_4x4_neon(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+RTCD_EXTERN void (*aom_paeth_predictor_4x4)(uint8_t* dst,
+                                            ptrdiff_t y_stride,
+                                            const uint8_t* above,
+                                            const uint8_t* left);
 
 void aom_paeth_predictor_4x8_c(uint8_t* dst,
                                ptrdiff_t y_stride,
                                const uint8_t* above,
                                const uint8_t* left);
-#define aom_paeth_predictor_4x8 aom_paeth_predictor_4x8_c
+void aom_paeth_predictor_4x8_neon(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+RTCD_EXTERN void (*aom_paeth_predictor_4x8)(uint8_t* dst,
+                                            ptrdiff_t y_stride,
+                                            const uint8_t* above,
+                                            const uint8_t* left);
+
+void aom_paeth_predictor_64x16_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+void aom_paeth_predictor_64x16_neon(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+RTCD_EXTERN void (*aom_paeth_predictor_64x16)(uint8_t* dst,
+                                              ptrdiff_t y_stride,
+                                              const uint8_t* above,
+                                              const uint8_t* left);
 
 void aom_paeth_predictor_64x32_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
                                  const uint8_t* left);
-#define aom_paeth_predictor_64x32 aom_paeth_predictor_64x32_c
+void aom_paeth_predictor_64x32_neon(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+RTCD_EXTERN void (*aom_paeth_predictor_64x32)(uint8_t* dst,
+                                              ptrdiff_t y_stride,
+                                              const uint8_t* above,
+                                              const uint8_t* left);
 
 void aom_paeth_predictor_64x64_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
                                  const uint8_t* left);
-#define aom_paeth_predictor_64x64 aom_paeth_predictor_64x64_c
+void aom_paeth_predictor_64x64_neon(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+RTCD_EXTERN void (*aom_paeth_predictor_64x64)(uint8_t* dst,
+                                              ptrdiff_t y_stride,
+                                              const uint8_t* above,
+                                              const uint8_t* left);
 
 void aom_paeth_predictor_8x16_c(uint8_t* dst,
                                 ptrdiff_t y_stride,
                                 const uint8_t* above,
                                 const uint8_t* left);
-#define aom_paeth_predictor_8x16 aom_paeth_predictor_8x16_c
+void aom_paeth_predictor_8x16_neon(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+RTCD_EXTERN void (*aom_paeth_predictor_8x16)(uint8_t* dst,
+                                             ptrdiff_t y_stride,
+                                             const uint8_t* above,
+                                             const uint8_t* left);
+
+void aom_paeth_predictor_8x32_c(uint8_t* dst,
+                                ptrdiff_t y_stride,
+                                const uint8_t* above,
+                                const uint8_t* left);
+void aom_paeth_predictor_8x32_neon(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+RTCD_EXTERN void (*aom_paeth_predictor_8x32)(uint8_t* dst,
+                                             ptrdiff_t y_stride,
+                                             const uint8_t* above,
+                                             const uint8_t* left);
 
 void aom_paeth_predictor_8x4_c(uint8_t* dst,
                                ptrdiff_t y_stride,
                                const uint8_t* above,
                                const uint8_t* left);
-#define aom_paeth_predictor_8x4 aom_paeth_predictor_8x4_c
+void aom_paeth_predictor_8x4_neon(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+RTCD_EXTERN void (*aom_paeth_predictor_8x4)(uint8_t* dst,
+                                            ptrdiff_t y_stride,
+                                            const uint8_t* above,
+                                            const uint8_t* left);
 
 void aom_paeth_predictor_8x8_c(uint8_t* dst,
                                ptrdiff_t y_stride,
                                const uint8_t* above,
                                const uint8_t* left);
-#define aom_paeth_predictor_8x8 aom_paeth_predictor_8x8_c
+void aom_paeth_predictor_8x8_neon(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+RTCD_EXTERN void (*aom_paeth_predictor_8x8)(uint8_t* dst,
+                                            ptrdiff_t y_stride,
+                                            const uint8_t* above,
+                                            const uint8_t* left);
 
 void aom_pixel_scale_c(const int16_t* src_diff,
                        ptrdiff_t src_stride,
@@ -2480,20 +2934,6 @@ RTCD_EXTERN void (*aom_quantize_b_32x32)(const tran_low_t* coeff_ptr,
                                          const int16_t* scan,
                                          const int16_t* iscan);
 
-void aom_quantize_b_32x32_adaptive_c(const tran_low_t* coeff_ptr,
-                                     intptr_t n_coeffs,
-                                     const int16_t* zbin_ptr,
-                                     const int16_t* round_ptr,
-                                     const int16_t* quant_ptr,
-                                     const int16_t* quant_shift_ptr,
-                                     tran_low_t* qcoeff_ptr,
-                                     tran_low_t* dqcoeff_ptr,
-                                     const int16_t* dequant_ptr,
-                                     uint16_t* eob_ptr,
-                                     const int16_t* scan,
-                                     const int16_t* iscan);
-#define aom_quantize_b_32x32_adaptive aom_quantize_b_32x32_adaptive_c
-
 void aom_quantize_b_64x64_c(const tran_low_t* coeff_ptr,
                             intptr_t n_coeffs,
                             const int16_t* zbin_ptr,
@@ -2531,34 +2971,6 @@ RTCD_EXTERN void (*aom_quantize_b_64x64)(const tran_low_t* coeff_ptr,
                                          const int16_t* scan,
                                          const int16_t* iscan);
 
-void aom_quantize_b_64x64_adaptive_c(const tran_low_t* coeff_ptr,
-                                     intptr_t n_coeffs,
-                                     const int16_t* zbin_ptr,
-                                     const int16_t* round_ptr,
-                                     const int16_t* quant_ptr,
-                                     const int16_t* quant_shift_ptr,
-                                     tran_low_t* qcoeff_ptr,
-                                     tran_low_t* dqcoeff_ptr,
-                                     const int16_t* dequant_ptr,
-                                     uint16_t* eob_ptr,
-                                     const int16_t* scan,
-                                     const int16_t* iscan);
-#define aom_quantize_b_64x64_adaptive aom_quantize_b_64x64_adaptive_c
-
-void aom_quantize_b_adaptive_c(const tran_low_t* coeff_ptr,
-                               intptr_t n_coeffs,
-                               const int16_t* zbin_ptr,
-                               const int16_t* round_ptr,
-                               const int16_t* quant_ptr,
-                               const int16_t* quant_shift_ptr,
-                               tran_low_t* qcoeff_ptr,
-                               tran_low_t* dqcoeff_ptr,
-                               const int16_t* dequant_ptr,
-                               uint16_t* eob_ptr,
-                               const int16_t* scan,
-                               const int16_t* iscan);
-#define aom_quantize_b_adaptive aom_quantize_b_adaptive_c
-
 unsigned int aom_sad128x128_c(const uint8_t* src_ptr,
                               int src_stride,
                               const uint8_t* ref_ptr,
@@ -2584,7 +2996,16 @@ void aom_sad128x128x4d_c(const uint8_t* src_ptr,
                          const uint8_t* const ref_ptr[4],
                          int ref_stride,
                          uint32_t sad_array[4]);
-#define aom_sad128x128x4d aom_sad128x128x4d_c
+void aom_sad128x128x4d_neon(const uint8_t* src_ptr,
+                            int src_stride,
+                            const uint8_t* const ref_ptr[4],
+                            int ref_stride,
+                            uint32_t sad_array[4]);
+RTCD_EXTERN void (*aom_sad128x128x4d)(const uint8_t* src_ptr,
+                                      int src_stride,
+                                      const uint8_t* const ref_ptr[4],
+                                      int ref_stride,
+                                      uint32_t sad_array[4]);
 
 void aom_sad128x128x4d_avg_c(const uint8_t* src_ptr,
                              int src_stride,
@@ -2598,7 +3019,14 @@ unsigned int aom_sad128x64_c(const uint8_t* src_ptr,
                              int src_stride,
                              const uint8_t* ref_ptr,
                              int ref_stride);
-#define aom_sad128x64 aom_sad128x64_c
+unsigned int aom_sad128x64_neon(const uint8_t* src_ptr,
+                                int src_stride,
+                                const uint8_t* ref_ptr,
+                                int ref_stride);
+RTCD_EXTERN unsigned int (*aom_sad128x64)(const uint8_t* src_ptr,
+                                          int src_stride,
+                                          const uint8_t* ref_ptr,
+                                          int ref_stride);
 
 unsigned int aom_sad128x64_avg_c(const uint8_t* src_ptr,
                                  int src_stride,
@@ -2612,7 +3040,16 @@ void aom_sad128x64x4d_c(const uint8_t* src_ptr,
                         const uint8_t* const ref_ptr[4],
                         int ref_stride,
                         uint32_t sad_array[4]);
-#define aom_sad128x64x4d aom_sad128x64x4d_c
+void aom_sad128x64x4d_neon(const uint8_t* src_ptr,
+                           int src_stride,
+                           const uint8_t* const ref_ptr[4],
+                           int ref_stride,
+                           uint32_t sad_array[4]);
+RTCD_EXTERN void (*aom_sad128x64x4d)(const uint8_t* src_ptr,
+                                     int src_stride,
+                                     const uint8_t* const ref_ptr[4],
+                                     int ref_stride,
+                                     uint32_t sad_array[4]);
 
 void aom_sad128x64x4d_avg_c(const uint8_t* src_ptr,
                             int src_stride,
@@ -2678,7 +3115,14 @@ unsigned int aom_sad16x32_c(const uint8_t* src_ptr,
                             int src_stride,
                             const uint8_t* ref_ptr,
                             int ref_stride);
-#define aom_sad16x32 aom_sad16x32_c
+unsigned int aom_sad16x32_neon(const uint8_t* src_ptr,
+                               int src_stride,
+                               const uint8_t* ref_ptr,
+                               int ref_stride);
+RTCD_EXTERN unsigned int (*aom_sad16x32)(const uint8_t* src_ptr,
+                                         int src_stride,
+                                         const uint8_t* ref_ptr,
+                                         int ref_stride);
 
 unsigned int aom_sad16x32_avg_c(const uint8_t* src_ptr,
                                 int src_stride,
@@ -2692,7 +3136,16 @@ void aom_sad16x32x4d_c(const uint8_t* src_ptr,
                        const uint8_t* const ref_ptr[4],
                        int ref_stride,
                        uint32_t sad_array[4]);
-#define aom_sad16x32x4d aom_sad16x32x4d_c
+void aom_sad16x32x4d_neon(const uint8_t* src_ptr,
+                          int src_stride,
+                          const uint8_t* const ref_ptr[4],
+                          int ref_stride,
+                          uint32_t sad_array[4]);
+RTCD_EXTERN void (*aom_sad16x32x4d)(const uint8_t* src_ptr,
+                                    int src_stride,
+                                    const uint8_t* const ref_ptr[4],
+                                    int ref_stride,
+                                    uint32_t sad_array[4]);
 
 void aom_sad16x32x4d_avg_c(const uint8_t* src_ptr,
                            int src_stride,
@@ -2727,7 +3180,16 @@ void aom_sad16x8x4d_c(const uint8_t* src_ptr,
                       const uint8_t* const ref_ptr[4],
                       int ref_stride,
                       uint32_t sad_array[4]);
-#define aom_sad16x8x4d aom_sad16x8x4d_c
+void aom_sad16x8x4d_neon(const uint8_t* src_ptr,
+                         int src_stride,
+                         const uint8_t* const ref_ptr[4],
+                         int ref_stride,
+                         uint32_t sad_array[4]);
+RTCD_EXTERN void (*aom_sad16x8x4d)(const uint8_t* src_ptr,
+                                   int src_stride,
+                                   const uint8_t* const ref_ptr[4],
+                                   int ref_stride,
+                                   uint32_t sad_array[4]);
 
 void aom_sad16x8x4d_avg_c(const uint8_t* src_ptr,
                           int src_stride,
@@ -2749,7 +3211,14 @@ unsigned int aom_sad32x16_c(const uint8_t* src_ptr,
                             int src_stride,
                             const uint8_t* ref_ptr,
                             int ref_stride);
-#define aom_sad32x16 aom_sad32x16_c
+unsigned int aom_sad32x16_neon(const uint8_t* src_ptr,
+                               int src_stride,
+                               const uint8_t* ref_ptr,
+                               int ref_stride);
+RTCD_EXTERN unsigned int (*aom_sad32x16)(const uint8_t* src_ptr,
+                                         int src_stride,
+                                         const uint8_t* ref_ptr,
+                                         int ref_stride);
 
 unsigned int aom_sad32x16_avg_c(const uint8_t* src_ptr,
                                 int src_stride,
@@ -2763,7 +3232,16 @@ void aom_sad32x16x4d_c(const uint8_t* src_ptr,
                        const uint8_t* const ref_ptr[4],
                        int ref_stride,
                        uint32_t sad_array[4]);
-#define aom_sad32x16x4d aom_sad32x16x4d_c
+void aom_sad32x16x4d_neon(const uint8_t* src_ptr,
+                          int src_stride,
+                          const uint8_t* const ref_ptr[4],
+                          int ref_stride,
+                          uint32_t sad_array[4]);
+RTCD_EXTERN void (*aom_sad32x16x4d)(const uint8_t* src_ptr,
+                                    int src_stride,
+                                    const uint8_t* const ref_ptr[4],
+                                    int ref_stride,
+                                    uint32_t sad_array[4]);
 
 void aom_sad32x16x4d_avg_c(const uint8_t* src_ptr,
                            int src_stride,
@@ -2821,7 +3299,14 @@ unsigned int aom_sad32x64_c(const uint8_t* src_ptr,
                             int src_stride,
                             const uint8_t* ref_ptr,
                             int ref_stride);
-#define aom_sad32x64 aom_sad32x64_c
+unsigned int aom_sad32x64_neon(const uint8_t* src_ptr,
+                               int src_stride,
+                               const uint8_t* ref_ptr,
+                               int ref_stride);
+RTCD_EXTERN unsigned int (*aom_sad32x64)(const uint8_t* src_ptr,
+                                         int src_stride,
+                                         const uint8_t* ref_ptr,
+                                         int ref_stride);
 
 unsigned int aom_sad32x64_avg_c(const uint8_t* src_ptr,
                                 int src_stride,
@@ -2835,7 +3320,16 @@ void aom_sad32x64x4d_c(const uint8_t* src_ptr,
                        const uint8_t* const ref_ptr[4],
                        int ref_stride,
                        uint32_t sad_array[4]);
-#define aom_sad32x64x4d aom_sad32x64x4d_c
+void aom_sad32x64x4d_neon(const uint8_t* src_ptr,
+                          int src_stride,
+                          const uint8_t* const ref_ptr[4],
+                          int ref_stride,
+                          uint32_t sad_array[4]);
+RTCD_EXTERN void (*aom_sad32x64x4d)(const uint8_t* src_ptr,
+                                    int src_stride,
+                                    const uint8_t* const ref_ptr[4],
+                                    int ref_stride,
+                                    uint32_t sad_array[4]);
 
 void aom_sad32x64x4d_avg_c(const uint8_t* src_ptr,
                            int src_stride,
@@ -2878,7 +3372,16 @@ void aom_sad4x4x4d_c(const uint8_t* src_ptr,
                      const uint8_t* const ref_ptr[4],
                      int ref_stride,
                      uint32_t sad_array[4]);
-#define aom_sad4x4x4d aom_sad4x4x4d_c
+void aom_sad4x4x4d_neon(const uint8_t* src_ptr,
+                        int src_stride,
+                        const uint8_t* const ref_ptr[4],
+                        int ref_stride,
+                        uint32_t sad_array[4]);
+RTCD_EXTERN void (*aom_sad4x4x4d)(const uint8_t* src_ptr,
+                                  int src_stride,
+                                  const uint8_t* const ref_ptr[4],
+                                  int ref_stride,
+                                  uint32_t sad_array[4]);
 
 void aom_sad4x4x4d_avg_c(const uint8_t* src_ptr,
                          int src_stride,
@@ -2892,7 +3395,14 @@ unsigned int aom_sad4x8_c(const uint8_t* src_ptr,
                           int src_stride,
                           const uint8_t* ref_ptr,
                           int ref_stride);
-#define aom_sad4x8 aom_sad4x8_c
+unsigned int aom_sad4x8_neon(const uint8_t* src_ptr,
+                             int src_stride,
+                             const uint8_t* ref_ptr,
+                             int ref_stride);
+RTCD_EXTERN unsigned int (*aom_sad4x8)(const uint8_t* src_ptr,
+                                       int src_stride,
+                                       const uint8_t* ref_ptr,
+                                       int ref_stride);
 
 unsigned int aom_sad4x8_avg_c(const uint8_t* src_ptr,
                               int src_stride,
@@ -2906,7 +3416,16 @@ void aom_sad4x8x4d_c(const uint8_t* src_ptr,
                      const uint8_t* const ref_ptr[4],
                      int ref_stride,
                      uint32_t sad_array[4]);
-#define aom_sad4x8x4d aom_sad4x8x4d_c
+void aom_sad4x8x4d_neon(const uint8_t* src_ptr,
+                        int src_stride,
+                        const uint8_t* const ref_ptr[4],
+                        int ref_stride,
+                        uint32_t sad_array[4]);
+RTCD_EXTERN void (*aom_sad4x8x4d)(const uint8_t* src_ptr,
+                                  int src_stride,
+                                  const uint8_t* const ref_ptr[4],
+                                  int ref_stride,
+                                  uint32_t sad_array[4]);
 
 void aom_sad4x8x4d_avg_c(const uint8_t* src_ptr,
                          int src_stride,
@@ -2928,7 +3447,14 @@ unsigned int aom_sad64x128_c(const uint8_t* src_ptr,
                              int src_stride,
                              const uint8_t* ref_ptr,
                              int ref_stride);
-#define aom_sad64x128 aom_sad64x128_c
+unsigned int aom_sad64x128_neon(const uint8_t* src_ptr,
+                                int src_stride,
+                                const uint8_t* ref_ptr,
+                                int ref_stride);
+RTCD_EXTERN unsigned int (*aom_sad64x128)(const uint8_t* src_ptr,
+                                          int src_stride,
+                                          const uint8_t* ref_ptr,
+                                          int ref_stride);
 
 unsigned int aom_sad64x128_avg_c(const uint8_t* src_ptr,
                                  int src_stride,
@@ -2942,7 +3468,16 @@ void aom_sad64x128x4d_c(const uint8_t* src_ptr,
                         const uint8_t* const ref_ptr[4],
                         int ref_stride,
                         uint32_t sad_array[4]);
-#define aom_sad64x128x4d aom_sad64x128x4d_c
+void aom_sad64x128x4d_neon(const uint8_t* src_ptr,
+                           int src_stride,
+                           const uint8_t* const ref_ptr[4],
+                           int ref_stride,
+                           uint32_t sad_array[4]);
+RTCD_EXTERN void (*aom_sad64x128x4d)(const uint8_t* src_ptr,
+                                     int src_stride,
+                                     const uint8_t* const ref_ptr[4],
+                                     int ref_stride,
+                                     uint32_t sad_array[4]);
 
 void aom_sad64x128x4d_avg_c(const uint8_t* src_ptr,
                             int src_stride,
@@ -2956,7 +3491,14 @@ unsigned int aom_sad64x32_c(const uint8_t* src_ptr,
                             int src_stride,
                             const uint8_t* ref_ptr,
                             int ref_stride);
-#define aom_sad64x32 aom_sad64x32_c
+unsigned int aom_sad64x32_neon(const uint8_t* src_ptr,
+                               int src_stride,
+                               const uint8_t* ref_ptr,
+                               int ref_stride);
+RTCD_EXTERN unsigned int (*aom_sad64x32)(const uint8_t* src_ptr,
+                                         int src_stride,
+                                         const uint8_t* ref_ptr,
+                                         int ref_stride);
 
 unsigned int aom_sad64x32_avg_c(const uint8_t* src_ptr,
                                 int src_stride,
@@ -2970,7 +3512,16 @@ void aom_sad64x32x4d_c(const uint8_t* src_ptr,
                        const uint8_t* const ref_ptr[4],
                        int ref_stride,
                        uint32_t sad_array[4]);
-#define aom_sad64x32x4d aom_sad64x32x4d_c
+void aom_sad64x32x4d_neon(const uint8_t* src_ptr,
+                          int src_stride,
+                          const uint8_t* const ref_ptr[4],
+                          int ref_stride,
+                          uint32_t sad_array[4]);
+RTCD_EXTERN void (*aom_sad64x32x4d)(const uint8_t* src_ptr,
+                                    int src_stride,
+                                    const uint8_t* const ref_ptr[4],
+                                    int ref_stride,
+                                    uint32_t sad_array[4]);
 
 void aom_sad64x32x4d_avg_c(const uint8_t* src_ptr,
                            int src_stride,
@@ -3057,7 +3608,16 @@ void aom_sad8x16x4d_c(const uint8_t* src_ptr,
                       const uint8_t* const ref_ptr[4],
                       int ref_stride,
                       uint32_t sad_array[4]);
-#define aom_sad8x16x4d aom_sad8x16x4d_c
+void aom_sad8x16x4d_neon(const uint8_t* src_ptr,
+                         int src_stride,
+                         const uint8_t* const ref_ptr[4],
+                         int ref_stride,
+                         uint32_t sad_array[4]);
+RTCD_EXTERN void (*aom_sad8x16x4d)(const uint8_t* src_ptr,
+                                   int src_stride,
+                                   const uint8_t* const ref_ptr[4],
+                                   int ref_stride,
+                                   uint32_t sad_array[4]);
 
 void aom_sad8x16x4d_avg_c(const uint8_t* src_ptr,
                           int src_stride,
@@ -3071,7 +3631,14 @@ unsigned int aom_sad8x4_c(const uint8_t* src_ptr,
                           int src_stride,
                           const uint8_t* ref_ptr,
                           int ref_stride);
-#define aom_sad8x4 aom_sad8x4_c
+unsigned int aom_sad8x4_neon(const uint8_t* src_ptr,
+                             int src_stride,
+                             const uint8_t* ref_ptr,
+                             int ref_stride);
+RTCD_EXTERN unsigned int (*aom_sad8x4)(const uint8_t* src_ptr,
+                                       int src_stride,
+                                       const uint8_t* ref_ptr,
+                                       int ref_stride);
 
 unsigned int aom_sad8x4_avg_c(const uint8_t* src_ptr,
                               int src_stride,
@@ -3085,7 +3652,16 @@ void aom_sad8x4x4d_c(const uint8_t* src_ptr,
                      const uint8_t* const ref_ptr[4],
                      int ref_stride,
                      uint32_t sad_array[4]);
-#define aom_sad8x4x4d aom_sad8x4x4d_c
+void aom_sad8x4x4d_neon(const uint8_t* src_ptr,
+                        int src_stride,
+                        const uint8_t* const ref_ptr[4],
+                        int ref_stride,
+                        uint32_t sad_array[4]);
+RTCD_EXTERN void (*aom_sad8x4x4d)(const uint8_t* src_ptr,
+                                  int src_stride,
+                                  const uint8_t* const ref_ptr[4],
+                                  int ref_stride,
+                                  uint32_t sad_array[4]);
 
 void aom_sad8x4x4d_avg_c(const uint8_t* src_ptr,
                          int src_stride,
@@ -3120,7 +3696,16 @@ void aom_sad8x8x4d_c(const uint8_t* src_ptr,
                      const uint8_t* const ref_ptr[4],
                      int ref_stride,
                      uint32_t sad_array[4]);
-#define aom_sad8x8x4d aom_sad8x8x4d_c
+void aom_sad8x8x4d_neon(const uint8_t* src_ptr,
+                        int src_stride,
+                        const uint8_t* const ref_ptr[4],
+                        int ref_stride,
+                        uint32_t sad_array[4]);
+RTCD_EXTERN void (*aom_sad8x8x4d)(const uint8_t* src_ptr,
+                                  int src_stride,
+                                  const uint8_t* const ref_ptr[4],
+                                  int ref_stride,
+                                  uint32_t sad_array[4]);
 
 void aom_sad8x8x4d_avg_c(const uint8_t* src_ptr,
                          int src_stride,
@@ -3616,85 +4201,248 @@ void aom_smooth_h_predictor_16x16_c(uint8_t* dst,
                                     ptrdiff_t y_stride,
                                     const uint8_t* above,
                                     const uint8_t* left);
-#define aom_smooth_h_predictor_16x16 aom_smooth_h_predictor_16x16_c
+void aom_smooth_h_predictor_16x16_neon(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_h_predictor_16x16)(uint8_t* dst,
+                                                 ptrdiff_t y_stride,
+                                                 const uint8_t* above,
+                                                 const uint8_t* left);
 
 void aom_smooth_h_predictor_16x32_c(uint8_t* dst,
                                     ptrdiff_t y_stride,
                                     const uint8_t* above,
                                     const uint8_t* left);
-#define aom_smooth_h_predictor_16x32 aom_smooth_h_predictor_16x32_c
+void aom_smooth_h_predictor_16x32_neon(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_h_predictor_16x32)(uint8_t* dst,
+                                                 ptrdiff_t y_stride,
+                                                 const uint8_t* above,
+                                                 const uint8_t* left);
+
+void aom_smooth_h_predictor_16x4_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+void aom_smooth_h_predictor_16x4_neon(uint8_t* dst,
+                                      ptrdiff_t y_stride,
+                                      const uint8_t* above,
+                                      const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_h_predictor_16x4)(uint8_t* dst,
+                                                ptrdiff_t y_stride,
+                                                const uint8_t* above,
+                                                const uint8_t* left);
+
+void aom_smooth_h_predictor_16x64_c(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+void aom_smooth_h_predictor_16x64_neon(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_h_predictor_16x64)(uint8_t* dst,
+                                                 ptrdiff_t y_stride,
+                                                 const uint8_t* above,
+                                                 const uint8_t* left);
 
 void aom_smooth_h_predictor_16x8_c(uint8_t* dst,
                                    ptrdiff_t y_stride,
                                    const uint8_t* above,
                                    const uint8_t* left);
-#define aom_smooth_h_predictor_16x8 aom_smooth_h_predictor_16x8_c
-
-void aom_smooth_h_predictor_2x2_c(uint8_t* dst,
-                                  ptrdiff_t y_stride,
-                                  const uint8_t* above,
-                                  const uint8_t* left);
-#define aom_smooth_h_predictor_2x2 aom_smooth_h_predictor_2x2_c
+void aom_smooth_h_predictor_16x8_neon(uint8_t* dst,
+                                      ptrdiff_t y_stride,
+                                      const uint8_t* above,
+                                      const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_h_predictor_16x8)(uint8_t* dst,
+                                                ptrdiff_t y_stride,
+                                                const uint8_t* above,
+                                                const uint8_t* left);
 
 void aom_smooth_h_predictor_32x16_c(uint8_t* dst,
                                     ptrdiff_t y_stride,
                                     const uint8_t* above,
                                     const uint8_t* left);
-#define aom_smooth_h_predictor_32x16 aom_smooth_h_predictor_32x16_c
+void aom_smooth_h_predictor_32x16_neon(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_h_predictor_32x16)(uint8_t* dst,
+                                                 ptrdiff_t y_stride,
+                                                 const uint8_t* above,
+                                                 const uint8_t* left);
 
 void aom_smooth_h_predictor_32x32_c(uint8_t* dst,
                                     ptrdiff_t y_stride,
                                     const uint8_t* above,
                                     const uint8_t* left);
-#define aom_smooth_h_predictor_32x32 aom_smooth_h_predictor_32x32_c
+void aom_smooth_h_predictor_32x32_neon(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_h_predictor_32x32)(uint8_t* dst,
+                                                 ptrdiff_t y_stride,
+                                                 const uint8_t* above,
+                                                 const uint8_t* left);
 
 void aom_smooth_h_predictor_32x64_c(uint8_t* dst,
                                     ptrdiff_t y_stride,
                                     const uint8_t* above,
                                     const uint8_t* left);
-#define aom_smooth_h_predictor_32x64 aom_smooth_h_predictor_32x64_c
+void aom_smooth_h_predictor_32x64_neon(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_h_predictor_32x64)(uint8_t* dst,
+                                                 ptrdiff_t y_stride,
+                                                 const uint8_t* above,
+                                                 const uint8_t* left);
+
+void aom_smooth_h_predictor_32x8_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+void aom_smooth_h_predictor_32x8_neon(uint8_t* dst,
+                                      ptrdiff_t y_stride,
+                                      const uint8_t* above,
+                                      const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_h_predictor_32x8)(uint8_t* dst,
+                                                ptrdiff_t y_stride,
+                                                const uint8_t* above,
+                                                const uint8_t* left);
+
+void aom_smooth_h_predictor_4x16_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+void aom_smooth_h_predictor_4x16_neon(uint8_t* dst,
+                                      ptrdiff_t y_stride,
+                                      const uint8_t* above,
+                                      const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_h_predictor_4x16)(uint8_t* dst,
+                                                ptrdiff_t y_stride,
+                                                const uint8_t* above,
+                                                const uint8_t* left);
 
 void aom_smooth_h_predictor_4x4_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
                                   const uint8_t* left);
-#define aom_smooth_h_predictor_4x4 aom_smooth_h_predictor_4x4_c
+void aom_smooth_h_predictor_4x4_neon(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_h_predictor_4x4)(uint8_t* dst,
+                                               ptrdiff_t y_stride,
+                                               const uint8_t* above,
+                                               const uint8_t* left);
 
 void aom_smooth_h_predictor_4x8_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
                                   const uint8_t* left);
-#define aom_smooth_h_predictor_4x8 aom_smooth_h_predictor_4x8_c
+void aom_smooth_h_predictor_4x8_neon(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_h_predictor_4x8)(uint8_t* dst,
+                                               ptrdiff_t y_stride,
+                                               const uint8_t* above,
+                                               const uint8_t* left);
+
+void aom_smooth_h_predictor_64x16_c(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+void aom_smooth_h_predictor_64x16_neon(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_h_predictor_64x16)(uint8_t* dst,
+                                                 ptrdiff_t y_stride,
+                                                 const uint8_t* above,
+                                                 const uint8_t* left);
 
 void aom_smooth_h_predictor_64x32_c(uint8_t* dst,
                                     ptrdiff_t y_stride,
                                     const uint8_t* above,
                                     const uint8_t* left);
-#define aom_smooth_h_predictor_64x32 aom_smooth_h_predictor_64x32_c
+void aom_smooth_h_predictor_64x32_neon(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_h_predictor_64x32)(uint8_t* dst,
+                                                 ptrdiff_t y_stride,
+                                                 const uint8_t* above,
+                                                 const uint8_t* left);
 
 void aom_smooth_h_predictor_64x64_c(uint8_t* dst,
                                     ptrdiff_t y_stride,
                                     const uint8_t* above,
                                     const uint8_t* left);
-#define aom_smooth_h_predictor_64x64 aom_smooth_h_predictor_64x64_c
+void aom_smooth_h_predictor_64x64_neon(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_h_predictor_64x64)(uint8_t* dst,
+                                                 ptrdiff_t y_stride,
+                                                 const uint8_t* above,
+                                                 const uint8_t* left);
 
 void aom_smooth_h_predictor_8x16_c(uint8_t* dst,
                                    ptrdiff_t y_stride,
                                    const uint8_t* above,
                                    const uint8_t* left);
-#define aom_smooth_h_predictor_8x16 aom_smooth_h_predictor_8x16_c
+void aom_smooth_h_predictor_8x16_neon(uint8_t* dst,
+                                      ptrdiff_t y_stride,
+                                      const uint8_t* above,
+                                      const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_h_predictor_8x16)(uint8_t* dst,
+                                                ptrdiff_t y_stride,
+                                                const uint8_t* above,
+                                                const uint8_t* left);
+
+void aom_smooth_h_predictor_8x32_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+void aom_smooth_h_predictor_8x32_neon(uint8_t* dst,
+                                      ptrdiff_t y_stride,
+                                      const uint8_t* above,
+                                      const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_h_predictor_8x32)(uint8_t* dst,
+                                                ptrdiff_t y_stride,
+                                                const uint8_t* above,
+                                                const uint8_t* left);
 
 void aom_smooth_h_predictor_8x4_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
                                   const uint8_t* left);
-#define aom_smooth_h_predictor_8x4 aom_smooth_h_predictor_8x4_c
+void aom_smooth_h_predictor_8x4_neon(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_h_predictor_8x4)(uint8_t* dst,
+                                               ptrdiff_t y_stride,
+                                               const uint8_t* above,
+                                               const uint8_t* left);
 
 void aom_smooth_h_predictor_8x8_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
                                   const uint8_t* left);
-#define aom_smooth_h_predictor_8x8 aom_smooth_h_predictor_8x8_c
+void aom_smooth_h_predictor_8x8_neon(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_h_predictor_8x8)(uint8_t* dst,
+                                               ptrdiff_t y_stride,
+                                               const uint8_t* above,
+                                               const uint8_t* left);
 
 void aom_smooth_predictor_16x16_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
@@ -3722,6 +4470,32 @@ RTCD_EXTERN void (*aom_smooth_predictor_16x32)(uint8_t* dst,
                                                const uint8_t* above,
                                                const uint8_t* left);
 
+void aom_smooth_predictor_16x4_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+void aom_smooth_predictor_16x4_neon(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_predictor_16x4)(uint8_t* dst,
+                                              ptrdiff_t y_stride,
+                                              const uint8_t* above,
+                                              const uint8_t* left);
+
+void aom_smooth_predictor_16x64_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+void aom_smooth_predictor_16x64_neon(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_predictor_16x64)(uint8_t* dst,
+                                               ptrdiff_t y_stride,
+                                               const uint8_t* above,
+                                               const uint8_t* left);
+
 void aom_smooth_predictor_16x8_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
@@ -3735,12 +4509,6 @@ RTCD_EXTERN void (*aom_smooth_predictor_16x8)(uint8_t* dst,
                                               const uint8_t* above,
                                               const uint8_t* left);
 
-void aom_smooth_predictor_2x2_c(uint8_t* dst,
-                                ptrdiff_t y_stride,
-                                const uint8_t* above,
-                                const uint8_t* left);
-#define aom_smooth_predictor_2x2 aom_smooth_predictor_2x2_c
-
 void aom_smooth_predictor_32x16_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
@@ -3780,6 +4548,32 @@ RTCD_EXTERN void (*aom_smooth_predictor_32x64)(uint8_t* dst,
                                                const uint8_t* above,
                                                const uint8_t* left);
 
+void aom_smooth_predictor_32x8_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+void aom_smooth_predictor_32x8_neon(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_predictor_32x8)(uint8_t* dst,
+                                              ptrdiff_t y_stride,
+                                              const uint8_t* above,
+                                              const uint8_t* left);
+
+void aom_smooth_predictor_4x16_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+void aom_smooth_predictor_4x16_neon(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_predictor_4x16)(uint8_t* dst,
+                                              ptrdiff_t y_stride,
+                                              const uint8_t* above,
+                                              const uint8_t* left);
+
 void aom_smooth_predictor_4x4_c(uint8_t* dst,
                                 ptrdiff_t y_stride,
                                 const uint8_t* above,
@@ -3806,6 +4600,19 @@ RTCD_EXTERN void (*aom_smooth_predictor_4x8)(uint8_t* dst,
                                              const uint8_t* above,
                                              const uint8_t* left);
 
+void aom_smooth_predictor_64x16_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+void aom_smooth_predictor_64x16_neon(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_predictor_64x16)(uint8_t* dst,
+                                               ptrdiff_t y_stride,
+                                               const uint8_t* above,
+                                               const uint8_t* left);
+
 void aom_smooth_predictor_64x32_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
@@ -3845,6 +4652,19 @@ RTCD_EXTERN void (*aom_smooth_predictor_8x16)(uint8_t* dst,
                                               const uint8_t* above,
                                               const uint8_t* left);
 
+void aom_smooth_predictor_8x32_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+void aom_smooth_predictor_8x32_neon(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_predictor_8x32)(uint8_t* dst,
+                                              ptrdiff_t y_stride,
+                                              const uint8_t* above,
+                                              const uint8_t* left);
+
 void aom_smooth_predictor_8x4_c(uint8_t* dst,
                                 ptrdiff_t y_stride,
                                 const uint8_t* above,
@@ -3875,85 +4695,248 @@ void aom_smooth_v_predictor_16x16_c(uint8_t* dst,
                                     ptrdiff_t y_stride,
                                     const uint8_t* above,
                                     const uint8_t* left);
-#define aom_smooth_v_predictor_16x16 aom_smooth_v_predictor_16x16_c
+void aom_smooth_v_predictor_16x16_neon(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_v_predictor_16x16)(uint8_t* dst,
+                                                 ptrdiff_t y_stride,
+                                                 const uint8_t* above,
+                                                 const uint8_t* left);
 
 void aom_smooth_v_predictor_16x32_c(uint8_t* dst,
                                     ptrdiff_t y_stride,
                                     const uint8_t* above,
                                     const uint8_t* left);
-#define aom_smooth_v_predictor_16x32 aom_smooth_v_predictor_16x32_c
+void aom_smooth_v_predictor_16x32_neon(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_v_predictor_16x32)(uint8_t* dst,
+                                                 ptrdiff_t y_stride,
+                                                 const uint8_t* above,
+                                                 const uint8_t* left);
+
+void aom_smooth_v_predictor_16x4_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+void aom_smooth_v_predictor_16x4_neon(uint8_t* dst,
+                                      ptrdiff_t y_stride,
+                                      const uint8_t* above,
+                                      const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_v_predictor_16x4)(uint8_t* dst,
+                                                ptrdiff_t y_stride,
+                                                const uint8_t* above,
+                                                const uint8_t* left);
+
+void aom_smooth_v_predictor_16x64_c(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+void aom_smooth_v_predictor_16x64_neon(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_v_predictor_16x64)(uint8_t* dst,
+                                                 ptrdiff_t y_stride,
+                                                 const uint8_t* above,
+                                                 const uint8_t* left);
 
 void aom_smooth_v_predictor_16x8_c(uint8_t* dst,
                                    ptrdiff_t y_stride,
                                    const uint8_t* above,
                                    const uint8_t* left);
-#define aom_smooth_v_predictor_16x8 aom_smooth_v_predictor_16x8_c
-
-void aom_smooth_v_predictor_2x2_c(uint8_t* dst,
-                                  ptrdiff_t y_stride,
-                                  const uint8_t* above,
-                                  const uint8_t* left);
-#define aom_smooth_v_predictor_2x2 aom_smooth_v_predictor_2x2_c
+void aom_smooth_v_predictor_16x8_neon(uint8_t* dst,
+                                      ptrdiff_t y_stride,
+                                      const uint8_t* above,
+                                      const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_v_predictor_16x8)(uint8_t* dst,
+                                                ptrdiff_t y_stride,
+                                                const uint8_t* above,
+                                                const uint8_t* left);
 
 void aom_smooth_v_predictor_32x16_c(uint8_t* dst,
                                     ptrdiff_t y_stride,
                                     const uint8_t* above,
                                     const uint8_t* left);
-#define aom_smooth_v_predictor_32x16 aom_smooth_v_predictor_32x16_c
+void aom_smooth_v_predictor_32x16_neon(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_v_predictor_32x16)(uint8_t* dst,
+                                                 ptrdiff_t y_stride,
+                                                 const uint8_t* above,
+                                                 const uint8_t* left);
 
 void aom_smooth_v_predictor_32x32_c(uint8_t* dst,
                                     ptrdiff_t y_stride,
                                     const uint8_t* above,
                                     const uint8_t* left);
-#define aom_smooth_v_predictor_32x32 aom_smooth_v_predictor_32x32_c
+void aom_smooth_v_predictor_32x32_neon(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_v_predictor_32x32)(uint8_t* dst,
+                                                 ptrdiff_t y_stride,
+                                                 const uint8_t* above,
+                                                 const uint8_t* left);
 
 void aom_smooth_v_predictor_32x64_c(uint8_t* dst,
                                     ptrdiff_t y_stride,
                                     const uint8_t* above,
                                     const uint8_t* left);
-#define aom_smooth_v_predictor_32x64 aom_smooth_v_predictor_32x64_c
+void aom_smooth_v_predictor_32x64_neon(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_v_predictor_32x64)(uint8_t* dst,
+                                                 ptrdiff_t y_stride,
+                                                 const uint8_t* above,
+                                                 const uint8_t* left);
+
+void aom_smooth_v_predictor_32x8_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+void aom_smooth_v_predictor_32x8_neon(uint8_t* dst,
+                                      ptrdiff_t y_stride,
+                                      const uint8_t* above,
+                                      const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_v_predictor_32x8)(uint8_t* dst,
+                                                ptrdiff_t y_stride,
+                                                const uint8_t* above,
+                                                const uint8_t* left);
+
+void aom_smooth_v_predictor_4x16_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+void aom_smooth_v_predictor_4x16_neon(uint8_t* dst,
+                                      ptrdiff_t y_stride,
+                                      const uint8_t* above,
+                                      const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_v_predictor_4x16)(uint8_t* dst,
+                                                ptrdiff_t y_stride,
+                                                const uint8_t* above,
+                                                const uint8_t* left);
 
 void aom_smooth_v_predictor_4x4_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
                                   const uint8_t* left);
-#define aom_smooth_v_predictor_4x4 aom_smooth_v_predictor_4x4_c
+void aom_smooth_v_predictor_4x4_neon(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_v_predictor_4x4)(uint8_t* dst,
+                                               ptrdiff_t y_stride,
+                                               const uint8_t* above,
+                                               const uint8_t* left);
 
 void aom_smooth_v_predictor_4x8_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
                                   const uint8_t* left);
-#define aom_smooth_v_predictor_4x8 aom_smooth_v_predictor_4x8_c
+void aom_smooth_v_predictor_4x8_neon(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_v_predictor_4x8)(uint8_t* dst,
+                                               ptrdiff_t y_stride,
+                                               const uint8_t* above,
+                                               const uint8_t* left);
+
+void aom_smooth_v_predictor_64x16_c(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+void aom_smooth_v_predictor_64x16_neon(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_v_predictor_64x16)(uint8_t* dst,
+                                                 ptrdiff_t y_stride,
+                                                 const uint8_t* above,
+                                                 const uint8_t* left);
 
 void aom_smooth_v_predictor_64x32_c(uint8_t* dst,
                                     ptrdiff_t y_stride,
                                     const uint8_t* above,
                                     const uint8_t* left);
-#define aom_smooth_v_predictor_64x32 aom_smooth_v_predictor_64x32_c
+void aom_smooth_v_predictor_64x32_neon(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_v_predictor_64x32)(uint8_t* dst,
+                                                 ptrdiff_t y_stride,
+                                                 const uint8_t* above,
+                                                 const uint8_t* left);
 
 void aom_smooth_v_predictor_64x64_c(uint8_t* dst,
                                     ptrdiff_t y_stride,
                                     const uint8_t* above,
                                     const uint8_t* left);
-#define aom_smooth_v_predictor_64x64 aom_smooth_v_predictor_64x64_c
+void aom_smooth_v_predictor_64x64_neon(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_v_predictor_64x64)(uint8_t* dst,
+                                                 ptrdiff_t y_stride,
+                                                 const uint8_t* above,
+                                                 const uint8_t* left);
 
 void aom_smooth_v_predictor_8x16_c(uint8_t* dst,
                                    ptrdiff_t y_stride,
                                    const uint8_t* above,
                                    const uint8_t* left);
-#define aom_smooth_v_predictor_8x16 aom_smooth_v_predictor_8x16_c
+void aom_smooth_v_predictor_8x16_neon(uint8_t* dst,
+                                      ptrdiff_t y_stride,
+                                      const uint8_t* above,
+                                      const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_v_predictor_8x16)(uint8_t* dst,
+                                                ptrdiff_t y_stride,
+                                                const uint8_t* above,
+                                                const uint8_t* left);
+
+void aom_smooth_v_predictor_8x32_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+void aom_smooth_v_predictor_8x32_neon(uint8_t* dst,
+                                      ptrdiff_t y_stride,
+                                      const uint8_t* above,
+                                      const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_v_predictor_8x32)(uint8_t* dst,
+                                                ptrdiff_t y_stride,
+                                                const uint8_t* above,
+                                                const uint8_t* left);
 
 void aom_smooth_v_predictor_8x4_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
                                   const uint8_t* left);
-#define aom_smooth_v_predictor_8x4 aom_smooth_v_predictor_8x4_c
+void aom_smooth_v_predictor_8x4_neon(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_v_predictor_8x4)(uint8_t* dst,
+                                               ptrdiff_t y_stride,
+                                               const uint8_t* above,
+                                               const uint8_t* left);
 
 void aom_smooth_v_predictor_8x8_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
                                   const uint8_t* left);
-#define aom_smooth_v_predictor_8x8 aom_smooth_v_predictor_8x8_c
+void aom_smooth_v_predictor_8x8_neon(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_v_predictor_8x8)(uint8_t* dst,
+                                               ptrdiff_t y_stride,
+                                               const uint8_t* above,
+                                               const uint8_t* left);
 
 int64_t aom_sse_c(const uint8_t* a,
                   int a_stride,
@@ -4543,7 +5526,16 @@ uint64_t aom_sum_sse_2d_i16_c(const int16_t* src,
                               int width,
                               int height,
                               int* sum);
-#define aom_sum_sse_2d_i16 aom_sum_sse_2d_i16_c
+uint64_t aom_sum_sse_2d_i16_neon(const int16_t* src,
+                                 int src_stride,
+                                 int width,
+                                 int height,
+                                 int* sum);
+RTCD_EXTERN uint64_t (*aom_sum_sse_2d_i16)(const int16_t* src,
+                                           int src_stride,
+                                           int width,
+                                           int height,
+                                           int* sum);
 
 void aom_v_predictor_16x16_c(uint8_t* dst,
                              ptrdiff_t y_stride,
@@ -4564,18 +5556,24 @@ void aom_v_predictor_16x32_c(uint8_t* dst,
                              const uint8_t* left);
 #define aom_v_predictor_16x32 aom_v_predictor_16x32_c
 
+void aom_v_predictor_16x4_c(uint8_t* dst,
+                            ptrdiff_t y_stride,
+                            const uint8_t* above,
+                            const uint8_t* left);
+#define aom_v_predictor_16x4 aom_v_predictor_16x4_c
+
+void aom_v_predictor_16x64_c(uint8_t* dst,
+                             ptrdiff_t y_stride,
+                             const uint8_t* above,
+                             const uint8_t* left);
+#define aom_v_predictor_16x64 aom_v_predictor_16x64_c
+
 void aom_v_predictor_16x8_c(uint8_t* dst,
                             ptrdiff_t y_stride,
                             const uint8_t* above,
                             const uint8_t* left);
 #define aom_v_predictor_16x8 aom_v_predictor_16x8_c
 
-void aom_v_predictor_2x2_c(uint8_t* dst,
-                           ptrdiff_t y_stride,
-                           const uint8_t* above,
-                           const uint8_t* left);
-#define aom_v_predictor_2x2 aom_v_predictor_2x2_c
-
 void aom_v_predictor_32x16_c(uint8_t* dst,
                              ptrdiff_t y_stride,
                              const uint8_t* above,
@@ -4601,6 +5599,18 @@ void aom_v_predictor_32x64_c(uint8_t* dst,
                              const uint8_t* left);
 #define aom_v_predictor_32x64 aom_v_predictor_32x64_c
 
+void aom_v_predictor_32x8_c(uint8_t* dst,
+                            ptrdiff_t y_stride,
+                            const uint8_t* above,
+                            const uint8_t* left);
+#define aom_v_predictor_32x8 aom_v_predictor_32x8_c
+
+void aom_v_predictor_4x16_c(uint8_t* dst,
+                            ptrdiff_t y_stride,
+                            const uint8_t* above,
+                            const uint8_t* left);
+#define aom_v_predictor_4x16 aom_v_predictor_4x16_c
+
 void aom_v_predictor_4x4_c(uint8_t* dst,
                            ptrdiff_t y_stride,
                            const uint8_t* above,
@@ -4620,6 +5630,12 @@ void aom_v_predictor_4x8_c(uint8_t* dst,
                            const uint8_t* left);
 #define aom_v_predictor_4x8 aom_v_predictor_4x8_c
 
+void aom_v_predictor_64x16_c(uint8_t* dst,
+                             ptrdiff_t y_stride,
+                             const uint8_t* above,
+                             const uint8_t* left);
+#define aom_v_predictor_64x16 aom_v_predictor_64x16_c
+
 void aom_v_predictor_64x32_c(uint8_t* dst,
                              ptrdiff_t y_stride,
                              const uint8_t* above,
@@ -4638,6 +5654,12 @@ void aom_v_predictor_8x16_c(uint8_t* dst,
                             const uint8_t* left);
 #define aom_v_predictor_8x16 aom_v_predictor_8x16_c
 
+void aom_v_predictor_8x32_c(uint8_t* dst,
+                            ptrdiff_t y_stride,
+                            const uint8_t* above,
+                            const uint8_t* left);
+#define aom_v_predictor_8x32 aom_v_predictor_8x32_c
+
 void aom_v_predictor_8x4_c(uint8_t* dst,
                            ptrdiff_t y_stride,
                            const uint8_t* above,
@@ -4940,11 +5962,11 @@ RTCD_EXTERN unsigned int (*aom_variance8x8)(const uint8_t* src_ptr,
                                             int ref_stride,
                                             unsigned int* sse);
 
-int aom_vector_var_c(const int16_t* ref, const int16_t* src, const int bwl);
-int aom_vector_var_neon(const int16_t* ref, const int16_t* src, const int bwl);
+int aom_vector_var_c(const int16_t* ref, const int16_t* src, int bwl);
+int aom_vector_var_neon(const int16_t* ref, const int16_t* src, int bwl);
 RTCD_EXTERN int (*aom_vector_var)(const int16_t* ref,
                                   const int16_t* src,
-                                  const int bwl);
+                                  int bwl);
 
 void aom_dsp_rtcd(void);
 
@@ -4963,6 +5985,9 @@ static void setup_rtcd_internal(void) {
   aom_avg_8x8 = aom_avg_8x8_c;
   if (flags & HAS_NEON)
     aom_avg_8x8 = aom_avg_8x8_neon;
+  aom_avg_8x8_quad = aom_avg_8x8_quad_c;
+  if (flags & HAS_NEON)
+    aom_avg_8x8_quad = aom_avg_8x8_quad_neon;
   aom_blend_a64_hmask = aom_blend_a64_hmask_c;
   if (flags & HAS_NEON)
     aom_blend_a64_hmask = aom_blend_a64_hmask_neon;
@@ -5065,6 +6090,9 @@ static void setup_rtcd_internal(void) {
   aom_hadamard_lp_8x8 = aom_hadamard_lp_8x8_c;
   if (flags & HAS_NEON)
     aom_hadamard_lp_8x8 = aom_hadamard_lp_8x8_neon;
+  aom_hadamard_lp_8x8_dual = aom_hadamard_lp_8x8_dual_c;
+  if (flags & HAS_NEON)
+    aom_hadamard_lp_8x8_dual = aom_hadamard_lp_8x8_dual_neon;
   aom_int_pro_col = aom_int_pro_col_c;
   if (flags & HAS_NEON)
     aom_int_pro_col = aom_int_pro_col_neon;
@@ -5080,51 +6108,141 @@ static void setup_rtcd_internal(void) {
   aom_lpf_horizontal_14_dual = aom_lpf_horizontal_14_dual_c;
   if (flags & HAS_NEON)
     aom_lpf_horizontal_14_dual = aom_lpf_horizontal_14_dual_neon;
+  aom_lpf_horizontal_14_quad = aom_lpf_horizontal_14_quad_c;
+  if (flags & HAS_NEON)
+    aom_lpf_horizontal_14_quad = aom_lpf_horizontal_14_quad_neon;
   aom_lpf_horizontal_4 = aom_lpf_horizontal_4_c;
   if (flags & HAS_NEON)
     aom_lpf_horizontal_4 = aom_lpf_horizontal_4_neon;
   aom_lpf_horizontal_4_dual = aom_lpf_horizontal_4_dual_c;
   if (flags & HAS_NEON)
     aom_lpf_horizontal_4_dual = aom_lpf_horizontal_4_dual_neon;
+  aom_lpf_horizontal_4_quad = aom_lpf_horizontal_4_quad_c;
+  if (flags & HAS_NEON)
+    aom_lpf_horizontal_4_quad = aom_lpf_horizontal_4_quad_neon;
   aom_lpf_horizontal_6 = aom_lpf_horizontal_6_c;
   if (flags & HAS_NEON)
     aom_lpf_horizontal_6 = aom_lpf_horizontal_6_neon;
   aom_lpf_horizontal_6_dual = aom_lpf_horizontal_6_dual_c;
   if (flags & HAS_NEON)
     aom_lpf_horizontal_6_dual = aom_lpf_horizontal_6_dual_neon;
+  aom_lpf_horizontal_6_quad = aom_lpf_horizontal_6_quad_c;
+  if (flags & HAS_NEON)
+    aom_lpf_horizontal_6_quad = aom_lpf_horizontal_6_quad_neon;
   aom_lpf_horizontal_8 = aom_lpf_horizontal_8_c;
   if (flags & HAS_NEON)
     aom_lpf_horizontal_8 = aom_lpf_horizontal_8_neon;
   aom_lpf_horizontal_8_dual = aom_lpf_horizontal_8_dual_c;
   if (flags & HAS_NEON)
     aom_lpf_horizontal_8_dual = aom_lpf_horizontal_8_dual_neon;
+  aom_lpf_horizontal_8_quad = aom_lpf_horizontal_8_quad_c;
+  if (flags & HAS_NEON)
+    aom_lpf_horizontal_8_quad = aom_lpf_horizontal_8_quad_neon;
   aom_lpf_vertical_14 = aom_lpf_vertical_14_c;
   if (flags & HAS_NEON)
     aom_lpf_vertical_14 = aom_lpf_vertical_14_neon;
   aom_lpf_vertical_14_dual = aom_lpf_vertical_14_dual_c;
   if (flags & HAS_NEON)
     aom_lpf_vertical_14_dual = aom_lpf_vertical_14_dual_neon;
+  aom_lpf_vertical_14_quad = aom_lpf_vertical_14_quad_c;
+  if (flags & HAS_NEON)
+    aom_lpf_vertical_14_quad = aom_lpf_vertical_14_quad_neon;
   aom_lpf_vertical_4 = aom_lpf_vertical_4_c;
   if (flags & HAS_NEON)
     aom_lpf_vertical_4 = aom_lpf_vertical_4_neon;
   aom_lpf_vertical_4_dual = aom_lpf_vertical_4_dual_c;
   if (flags & HAS_NEON)
     aom_lpf_vertical_4_dual = aom_lpf_vertical_4_dual_neon;
+  aom_lpf_vertical_4_quad = aom_lpf_vertical_4_quad_c;
+  if (flags & HAS_NEON)
+    aom_lpf_vertical_4_quad = aom_lpf_vertical_4_quad_neon;
   aom_lpf_vertical_6 = aom_lpf_vertical_6_c;
   if (flags & HAS_NEON)
     aom_lpf_vertical_6 = aom_lpf_vertical_6_neon;
   aom_lpf_vertical_6_dual = aom_lpf_vertical_6_dual_c;
   if (flags & HAS_NEON)
     aom_lpf_vertical_6_dual = aom_lpf_vertical_6_dual_neon;
+  aom_lpf_vertical_6_quad = aom_lpf_vertical_6_quad_c;
+  if (flags & HAS_NEON)
+    aom_lpf_vertical_6_quad = aom_lpf_vertical_6_quad_neon;
   aom_lpf_vertical_8 = aom_lpf_vertical_8_c;
   if (flags & HAS_NEON)
     aom_lpf_vertical_8 = aom_lpf_vertical_8_neon;
   aom_lpf_vertical_8_dual = aom_lpf_vertical_8_dual_c;
   if (flags & HAS_NEON)
     aom_lpf_vertical_8_dual = aom_lpf_vertical_8_dual_neon;
+  aom_lpf_vertical_8_quad = aom_lpf_vertical_8_quad_c;
+  if (flags & HAS_NEON)
+    aom_lpf_vertical_8_quad = aom_lpf_vertical_8_quad_neon;
   aom_mse16x16 = aom_mse16x16_c;
   if (flags & HAS_NEON)
     aom_mse16x16 = aom_mse16x16_neon;
+  aom_mse16x8 = aom_mse16x8_c;
+  if (flags & HAS_NEON)
+    aom_mse16x8 = aom_mse16x8_neon;
+  aom_mse8x16 = aom_mse8x16_c;
+  if (flags & HAS_NEON)
+    aom_mse8x16 = aom_mse8x16_neon;
+  aom_mse8x8 = aom_mse8x8_c;
+  if (flags & HAS_NEON)
+    aom_mse8x8 = aom_mse8x8_neon;
+  aom_paeth_predictor_16x16 = aom_paeth_predictor_16x16_c;
+  if (flags & HAS_NEON)
+    aom_paeth_predictor_16x16 = aom_paeth_predictor_16x16_neon;
+  aom_paeth_predictor_16x32 = aom_paeth_predictor_16x32_c;
+  if (flags & HAS_NEON)
+    aom_paeth_predictor_16x32 = aom_paeth_predictor_16x32_neon;
+  aom_paeth_predictor_16x4 = aom_paeth_predictor_16x4_c;
+  if (flags & HAS_NEON)
+    aom_paeth_predictor_16x4 = aom_paeth_predictor_16x4_neon;
+  aom_paeth_predictor_16x64 = aom_paeth_predictor_16x64_c;
+  if (flags & HAS_NEON)
+    aom_paeth_predictor_16x64 = aom_paeth_predictor_16x64_neon;
+  aom_paeth_predictor_16x8 = aom_paeth_predictor_16x8_c;
+  if (flags & HAS_NEON)
+    aom_paeth_predictor_16x8 = aom_paeth_predictor_16x8_neon;
+  aom_paeth_predictor_32x16 = aom_paeth_predictor_32x16_c;
+  if (flags & HAS_NEON)
+    aom_paeth_predictor_32x16 = aom_paeth_predictor_32x16_neon;
+  aom_paeth_predictor_32x32 = aom_paeth_predictor_32x32_c;
+  if (flags & HAS_NEON)
+    aom_paeth_predictor_32x32 = aom_paeth_predictor_32x32_neon;
+  aom_paeth_predictor_32x64 = aom_paeth_predictor_32x64_c;
+  if (flags & HAS_NEON)
+    aom_paeth_predictor_32x64 = aom_paeth_predictor_32x64_neon;
+  aom_paeth_predictor_32x8 = aom_paeth_predictor_32x8_c;
+  if (flags & HAS_NEON)
+    aom_paeth_predictor_32x8 = aom_paeth_predictor_32x8_neon;
+  aom_paeth_predictor_4x16 = aom_paeth_predictor_4x16_c;
+  if (flags & HAS_NEON)
+    aom_paeth_predictor_4x16 = aom_paeth_predictor_4x16_neon;
+  aom_paeth_predictor_4x4 = aom_paeth_predictor_4x4_c;
+  if (flags & HAS_NEON)
+    aom_paeth_predictor_4x4 = aom_paeth_predictor_4x4_neon;
+  aom_paeth_predictor_4x8 = aom_paeth_predictor_4x8_c;
+  if (flags & HAS_NEON)
+    aom_paeth_predictor_4x8 = aom_paeth_predictor_4x8_neon;
+  aom_paeth_predictor_64x16 = aom_paeth_predictor_64x16_c;
+  if (flags & HAS_NEON)
+    aom_paeth_predictor_64x16 = aom_paeth_predictor_64x16_neon;
+  aom_paeth_predictor_64x32 = aom_paeth_predictor_64x32_c;
+  if (flags & HAS_NEON)
+    aom_paeth_predictor_64x32 = aom_paeth_predictor_64x32_neon;
+  aom_paeth_predictor_64x64 = aom_paeth_predictor_64x64_c;
+  if (flags & HAS_NEON)
+    aom_paeth_predictor_64x64 = aom_paeth_predictor_64x64_neon;
+  aom_paeth_predictor_8x16 = aom_paeth_predictor_8x16_c;
+  if (flags & HAS_NEON)
+    aom_paeth_predictor_8x16 = aom_paeth_predictor_8x16_neon;
+  aom_paeth_predictor_8x32 = aom_paeth_predictor_8x32_c;
+  if (flags & HAS_NEON)
+    aom_paeth_predictor_8x32 = aom_paeth_predictor_8x32_neon;
+  aom_paeth_predictor_8x4 = aom_paeth_predictor_8x4_c;
+  if (flags & HAS_NEON)
+    aom_paeth_predictor_8x4 = aom_paeth_predictor_8x4_neon;
+  aom_paeth_predictor_8x8 = aom_paeth_predictor_8x8_c;
+  if (flags & HAS_NEON)
+    aom_paeth_predictor_8x8 = aom_paeth_predictor_8x8_neon;
   aom_quantize_b = aom_quantize_b_c;
   if (flags & HAS_NEON)
     aom_quantize_b = aom_quantize_b_neon;
@@ -5137,24 +6255,75 @@ static void setup_rtcd_internal(void) {
   aom_sad128x128 = aom_sad128x128_c;
   if (flags & HAS_NEON)
     aom_sad128x128 = aom_sad128x128_neon;
+  aom_sad128x128x4d = aom_sad128x128x4d_c;
+  if (flags & HAS_NEON)
+    aom_sad128x128x4d = aom_sad128x128x4d_neon;
+  aom_sad128x64 = aom_sad128x64_c;
+  if (flags & HAS_NEON)
+    aom_sad128x64 = aom_sad128x64_neon;
+  aom_sad128x64x4d = aom_sad128x64x4d_c;
+  if (flags & HAS_NEON)
+    aom_sad128x64x4d = aom_sad128x64x4d_neon;
   aom_sad16x16 = aom_sad16x16_c;
   if (flags & HAS_NEON)
     aom_sad16x16 = aom_sad16x16_neon;
   aom_sad16x16x4d = aom_sad16x16x4d_c;
   if (flags & HAS_NEON)
     aom_sad16x16x4d = aom_sad16x16x4d_neon;
+  aom_sad16x32 = aom_sad16x32_c;
+  if (flags & HAS_NEON)
+    aom_sad16x32 = aom_sad16x32_neon;
+  aom_sad16x32x4d = aom_sad16x32x4d_c;
+  if (flags & HAS_NEON)
+    aom_sad16x32x4d = aom_sad16x32x4d_neon;
   aom_sad16x8 = aom_sad16x8_c;
   if (flags & HAS_NEON)
     aom_sad16x8 = aom_sad16x8_neon;
+  aom_sad16x8x4d = aom_sad16x8x4d_c;
+  if (flags & HAS_NEON)
+    aom_sad16x8x4d = aom_sad16x8x4d_neon;
+  aom_sad32x16 = aom_sad32x16_c;
+  if (flags & HAS_NEON)
+    aom_sad32x16 = aom_sad32x16_neon;
+  aom_sad32x16x4d = aom_sad32x16x4d_c;
+  if (flags & HAS_NEON)
+    aom_sad32x16x4d = aom_sad32x16x4d_neon;
   aom_sad32x32 = aom_sad32x32_c;
   if (flags & HAS_NEON)
     aom_sad32x32 = aom_sad32x32_neon;
   aom_sad32x32x4d = aom_sad32x32x4d_c;
   if (flags & HAS_NEON)
     aom_sad32x32x4d = aom_sad32x32x4d_neon;
+  aom_sad32x64 = aom_sad32x64_c;
+  if (flags & HAS_NEON)
+    aom_sad32x64 = aom_sad32x64_neon;
+  aom_sad32x64x4d = aom_sad32x64x4d_c;
+  if (flags & HAS_NEON)
+    aom_sad32x64x4d = aom_sad32x64x4d_neon;
   aom_sad4x4 = aom_sad4x4_c;
   if (flags & HAS_NEON)
     aom_sad4x4 = aom_sad4x4_neon;
+  aom_sad4x4x4d = aom_sad4x4x4d_c;
+  if (flags & HAS_NEON)
+    aom_sad4x4x4d = aom_sad4x4x4d_neon;
+  aom_sad4x8 = aom_sad4x8_c;
+  if (flags & HAS_NEON)
+    aom_sad4x8 = aom_sad4x8_neon;
+  aom_sad4x8x4d = aom_sad4x8x4d_c;
+  if (flags & HAS_NEON)
+    aom_sad4x8x4d = aom_sad4x8x4d_neon;
+  aom_sad64x128 = aom_sad64x128_c;
+  if (flags & HAS_NEON)
+    aom_sad64x128 = aom_sad64x128_neon;
+  aom_sad64x128x4d = aom_sad64x128x4d_c;
+  if (flags & HAS_NEON)
+    aom_sad64x128x4d = aom_sad64x128x4d_neon;
+  aom_sad64x32 = aom_sad64x32_c;
+  if (flags & HAS_NEON)
+    aom_sad64x32 = aom_sad64x32_neon;
+  aom_sad64x32x4d = aom_sad64x32x4d_c;
+  if (flags & HAS_NEON)
+    aom_sad64x32x4d = aom_sad64x32x4d_neon;
   aom_sad64x64 = aom_sad64x64_c;
   if (flags & HAS_NEON)
     aom_sad64x64 = aom_sad64x64_neon;
@@ -5164,9 +6333,21 @@ static void setup_rtcd_internal(void) {
   aom_sad8x16 = aom_sad8x16_c;
   if (flags & HAS_NEON)
     aom_sad8x16 = aom_sad8x16_neon;
+  aom_sad8x16x4d = aom_sad8x16x4d_c;
+  if (flags & HAS_NEON)
+    aom_sad8x16x4d = aom_sad8x16x4d_neon;
+  aom_sad8x4 = aom_sad8x4_c;
+  if (flags & HAS_NEON)
+    aom_sad8x4 = aom_sad8x4_neon;
+  aom_sad8x4x4d = aom_sad8x4x4d_c;
+  if (flags & HAS_NEON)
+    aom_sad8x4x4d = aom_sad8x4x4d_neon;
   aom_sad8x8 = aom_sad8x8_c;
   if (flags & HAS_NEON)
     aom_sad8x8 = aom_sad8x8_neon;
+  aom_sad8x8x4d = aom_sad8x8x4d_c;
+  if (flags & HAS_NEON)
+    aom_sad8x8x4d = aom_sad8x8x4d_neon;
   aom_sad_skip_128x128 = aom_sad_skip_128x128_c;
   if (flags & HAS_NEON)
     aom_sad_skip_128x128 = aom_sad_skip_128x128_neon;
@@ -5260,12 +6441,75 @@ static void setup_rtcd_internal(void) {
   aom_scaled_2d = aom_scaled_2d_c;
   if (flags & HAS_NEON)
     aom_scaled_2d = aom_scaled_2d_neon;
+  aom_smooth_h_predictor_16x16 = aom_smooth_h_predictor_16x16_c;
+  if (flags & HAS_NEON)
+    aom_smooth_h_predictor_16x16 = aom_smooth_h_predictor_16x16_neon;
+  aom_smooth_h_predictor_16x32 = aom_smooth_h_predictor_16x32_c;
+  if (flags & HAS_NEON)
+    aom_smooth_h_predictor_16x32 = aom_smooth_h_predictor_16x32_neon;
+  aom_smooth_h_predictor_16x4 = aom_smooth_h_predictor_16x4_c;
+  if (flags & HAS_NEON)
+    aom_smooth_h_predictor_16x4 = aom_smooth_h_predictor_16x4_neon;
+  aom_smooth_h_predictor_16x64 = aom_smooth_h_predictor_16x64_c;
+  if (flags & HAS_NEON)
+    aom_smooth_h_predictor_16x64 = aom_smooth_h_predictor_16x64_neon;
+  aom_smooth_h_predictor_16x8 = aom_smooth_h_predictor_16x8_c;
+  if (flags & HAS_NEON)
+    aom_smooth_h_predictor_16x8 = aom_smooth_h_predictor_16x8_neon;
+  aom_smooth_h_predictor_32x16 = aom_smooth_h_predictor_32x16_c;
+  if (flags & HAS_NEON)
+    aom_smooth_h_predictor_32x16 = aom_smooth_h_predictor_32x16_neon;
+  aom_smooth_h_predictor_32x32 = aom_smooth_h_predictor_32x32_c;
+  if (flags & HAS_NEON)
+    aom_smooth_h_predictor_32x32 = aom_smooth_h_predictor_32x32_neon;
+  aom_smooth_h_predictor_32x64 = aom_smooth_h_predictor_32x64_c;
+  if (flags & HAS_NEON)
+    aom_smooth_h_predictor_32x64 = aom_smooth_h_predictor_32x64_neon;
+  aom_smooth_h_predictor_32x8 = aom_smooth_h_predictor_32x8_c;
+  if (flags & HAS_NEON)
+    aom_smooth_h_predictor_32x8 = aom_smooth_h_predictor_32x8_neon;
+  aom_smooth_h_predictor_4x16 = aom_smooth_h_predictor_4x16_c;
+  if (flags & HAS_NEON)
+    aom_smooth_h_predictor_4x16 = aom_smooth_h_predictor_4x16_neon;
+  aom_smooth_h_predictor_4x4 = aom_smooth_h_predictor_4x4_c;
+  if (flags & HAS_NEON)
+    aom_smooth_h_predictor_4x4 = aom_smooth_h_predictor_4x4_neon;
+  aom_smooth_h_predictor_4x8 = aom_smooth_h_predictor_4x8_c;
+  if (flags & HAS_NEON)
+    aom_smooth_h_predictor_4x8 = aom_smooth_h_predictor_4x8_neon;
+  aom_smooth_h_predictor_64x16 = aom_smooth_h_predictor_64x16_c;
+  if (flags & HAS_NEON)
+    aom_smooth_h_predictor_64x16 = aom_smooth_h_predictor_64x16_neon;
+  aom_smooth_h_predictor_64x32 = aom_smooth_h_predictor_64x32_c;
+  if (flags & HAS_NEON)
+    aom_smooth_h_predictor_64x32 = aom_smooth_h_predictor_64x32_neon;
+  aom_smooth_h_predictor_64x64 = aom_smooth_h_predictor_64x64_c;
+  if (flags & HAS_NEON)
+    aom_smooth_h_predictor_64x64 = aom_smooth_h_predictor_64x64_neon;
+  aom_smooth_h_predictor_8x16 = aom_smooth_h_predictor_8x16_c;
+  if (flags & HAS_NEON)
+    aom_smooth_h_predictor_8x16 = aom_smooth_h_predictor_8x16_neon;
+  aom_smooth_h_predictor_8x32 = aom_smooth_h_predictor_8x32_c;
+  if (flags & HAS_NEON)
+    aom_smooth_h_predictor_8x32 = aom_smooth_h_predictor_8x32_neon;
+  aom_smooth_h_predictor_8x4 = aom_smooth_h_predictor_8x4_c;
+  if (flags & HAS_NEON)
+    aom_smooth_h_predictor_8x4 = aom_smooth_h_predictor_8x4_neon;
+  aom_smooth_h_predictor_8x8 = aom_smooth_h_predictor_8x8_c;
+  if (flags & HAS_NEON)
+    aom_smooth_h_predictor_8x8 = aom_smooth_h_predictor_8x8_neon;
   aom_smooth_predictor_16x16 = aom_smooth_predictor_16x16_c;
   if (flags & HAS_NEON)
     aom_smooth_predictor_16x16 = aom_smooth_predictor_16x16_neon;
   aom_smooth_predictor_16x32 = aom_smooth_predictor_16x32_c;
   if (flags & HAS_NEON)
     aom_smooth_predictor_16x32 = aom_smooth_predictor_16x32_neon;
+  aom_smooth_predictor_16x4 = aom_smooth_predictor_16x4_c;
+  if (flags & HAS_NEON)
+    aom_smooth_predictor_16x4 = aom_smooth_predictor_16x4_neon;
+  aom_smooth_predictor_16x64 = aom_smooth_predictor_16x64_c;
+  if (flags & HAS_NEON)
+    aom_smooth_predictor_16x64 = aom_smooth_predictor_16x64_neon;
   aom_smooth_predictor_16x8 = aom_smooth_predictor_16x8_c;
   if (flags & HAS_NEON)
     aom_smooth_predictor_16x8 = aom_smooth_predictor_16x8_neon;
@@ -5278,12 +6522,21 @@ static void setup_rtcd_internal(void) {
   aom_smooth_predictor_32x64 = aom_smooth_predictor_32x64_c;
   if (flags & HAS_NEON)
     aom_smooth_predictor_32x64 = aom_smooth_predictor_32x64_neon;
+  aom_smooth_predictor_32x8 = aom_smooth_predictor_32x8_c;
+  if (flags & HAS_NEON)
+    aom_smooth_predictor_32x8 = aom_smooth_predictor_32x8_neon;
+  aom_smooth_predictor_4x16 = aom_smooth_predictor_4x16_c;
+  if (flags & HAS_NEON)
+    aom_smooth_predictor_4x16 = aom_smooth_predictor_4x16_neon;
   aom_smooth_predictor_4x4 = aom_smooth_predictor_4x4_c;
   if (flags & HAS_NEON)
     aom_smooth_predictor_4x4 = aom_smooth_predictor_4x4_neon;
   aom_smooth_predictor_4x8 = aom_smooth_predictor_4x8_c;
   if (flags & HAS_NEON)
     aom_smooth_predictor_4x8 = aom_smooth_predictor_4x8_neon;
+  aom_smooth_predictor_64x16 = aom_smooth_predictor_64x16_c;
+  if (flags & HAS_NEON)
+    aom_smooth_predictor_64x16 = aom_smooth_predictor_64x16_neon;
   aom_smooth_predictor_64x32 = aom_smooth_predictor_64x32_c;
   if (flags & HAS_NEON)
     aom_smooth_predictor_64x32 = aom_smooth_predictor_64x32_neon;
@@ -5293,12 +6546,72 @@ static void setup_rtcd_internal(void) {
   aom_smooth_predictor_8x16 = aom_smooth_predictor_8x16_c;
   if (flags & HAS_NEON)
     aom_smooth_predictor_8x16 = aom_smooth_predictor_8x16_neon;
+  aom_smooth_predictor_8x32 = aom_smooth_predictor_8x32_c;
+  if (flags & HAS_NEON)
+    aom_smooth_predictor_8x32 = aom_smooth_predictor_8x32_neon;
   aom_smooth_predictor_8x4 = aom_smooth_predictor_8x4_c;
   if (flags & HAS_NEON)
     aom_smooth_predictor_8x4 = aom_smooth_predictor_8x4_neon;
   aom_smooth_predictor_8x8 = aom_smooth_predictor_8x8_c;
   if (flags & HAS_NEON)
     aom_smooth_predictor_8x8 = aom_smooth_predictor_8x8_neon;
+  aom_smooth_v_predictor_16x16 = aom_smooth_v_predictor_16x16_c;
+  if (flags & HAS_NEON)
+    aom_smooth_v_predictor_16x16 = aom_smooth_v_predictor_16x16_neon;
+  aom_smooth_v_predictor_16x32 = aom_smooth_v_predictor_16x32_c;
+  if (flags & HAS_NEON)
+    aom_smooth_v_predictor_16x32 = aom_smooth_v_predictor_16x32_neon;
+  aom_smooth_v_predictor_16x4 = aom_smooth_v_predictor_16x4_c;
+  if (flags & HAS_NEON)
+    aom_smooth_v_predictor_16x4 = aom_smooth_v_predictor_16x4_neon;
+  aom_smooth_v_predictor_16x64 = aom_smooth_v_predictor_16x64_c;
+  if (flags & HAS_NEON)
+    aom_smooth_v_predictor_16x64 = aom_smooth_v_predictor_16x64_neon;
+  aom_smooth_v_predictor_16x8 = aom_smooth_v_predictor_16x8_c;
+  if (flags & HAS_NEON)
+    aom_smooth_v_predictor_16x8 = aom_smooth_v_predictor_16x8_neon;
+  aom_smooth_v_predictor_32x16 = aom_smooth_v_predictor_32x16_c;
+  if (flags & HAS_NEON)
+    aom_smooth_v_predictor_32x16 = aom_smooth_v_predictor_32x16_neon;
+  aom_smooth_v_predictor_32x32 = aom_smooth_v_predictor_32x32_c;
+  if (flags & HAS_NEON)
+    aom_smooth_v_predictor_32x32 = aom_smooth_v_predictor_32x32_neon;
+  aom_smooth_v_predictor_32x64 = aom_smooth_v_predictor_32x64_c;
+  if (flags & HAS_NEON)
+    aom_smooth_v_predictor_32x64 = aom_smooth_v_predictor_32x64_neon;
+  aom_smooth_v_predictor_32x8 = aom_smooth_v_predictor_32x8_c;
+  if (flags & HAS_NEON)
+    aom_smooth_v_predictor_32x8 = aom_smooth_v_predictor_32x8_neon;
+  aom_smooth_v_predictor_4x16 = aom_smooth_v_predictor_4x16_c;
+  if (flags & HAS_NEON)
+    aom_smooth_v_predictor_4x16 = aom_smooth_v_predictor_4x16_neon;
+  aom_smooth_v_predictor_4x4 = aom_smooth_v_predictor_4x4_c;
+  if (flags & HAS_NEON)
+    aom_smooth_v_predictor_4x4 = aom_smooth_v_predictor_4x4_neon;
+  aom_smooth_v_predictor_4x8 = aom_smooth_v_predictor_4x8_c;
+  if (flags & HAS_NEON)
+    aom_smooth_v_predictor_4x8 = aom_smooth_v_predictor_4x8_neon;
+  aom_smooth_v_predictor_64x16 = aom_smooth_v_predictor_64x16_c;
+  if (flags & HAS_NEON)
+    aom_smooth_v_predictor_64x16 = aom_smooth_v_predictor_64x16_neon;
+  aom_smooth_v_predictor_64x32 = aom_smooth_v_predictor_64x32_c;
+  if (flags & HAS_NEON)
+    aom_smooth_v_predictor_64x32 = aom_smooth_v_predictor_64x32_neon;
+  aom_smooth_v_predictor_64x64 = aom_smooth_v_predictor_64x64_c;
+  if (flags & HAS_NEON)
+    aom_smooth_v_predictor_64x64 = aom_smooth_v_predictor_64x64_neon;
+  aom_smooth_v_predictor_8x16 = aom_smooth_v_predictor_8x16_c;
+  if (flags & HAS_NEON)
+    aom_smooth_v_predictor_8x16 = aom_smooth_v_predictor_8x16_neon;
+  aom_smooth_v_predictor_8x32 = aom_smooth_v_predictor_8x32_c;
+  if (flags & HAS_NEON)
+    aom_smooth_v_predictor_8x32 = aom_smooth_v_predictor_8x32_neon;
+  aom_smooth_v_predictor_8x4 = aom_smooth_v_predictor_8x4_c;
+  if (flags & HAS_NEON)
+    aom_smooth_v_predictor_8x4 = aom_smooth_v_predictor_8x4_neon;
+  aom_smooth_v_predictor_8x8 = aom_smooth_v_predictor_8x8_c;
+  if (flags & HAS_NEON)
+    aom_smooth_v_predictor_8x8 = aom_smooth_v_predictor_8x8_neon;
   aom_sse = aom_sse_c;
   if (flags & HAS_NEON)
     aom_sse = aom_sse_neon;
@@ -5356,6 +6669,9 @@ static void setup_rtcd_internal(void) {
   aom_sum_squares_2d_i16 = aom_sum_squares_2d_i16_c;
   if (flags & HAS_NEON)
     aom_sum_squares_2d_i16 = aom_sum_squares_2d_i16_neon;
+  aom_sum_sse_2d_i16 = aom_sum_sse_2d_i16_c;
+  if (flags & HAS_NEON)
+    aom_sum_sse_2d_i16 = aom_sum_sse_2d_i16_neon;
   aom_v_predictor_16x16 = aom_v_predictor_16x16_c;
   if (flags & HAS_NEON)
     aom_v_predictor_16x16 = aom_v_predictor_16x16_neon;
diff --git a/chromium/third_party/libaom/source/config/linux/arm-neon-cpu-detect/config/aom_scale_rtcd.h b/chromium/third_party/libaom/source/config/linux/arm-neon-cpu-detect/config/aom_scale_rtcd.h
index e27b558dd38..7db7b9d0f84 100644
--- a/chromium/third_party/libaom/source/config/linux/arm-neon-cpu-detect/config/aom_scale_rtcd.h
+++ b/chromium/third_party/libaom/source/config/linux/arm-neon-cpu-detect/config/aom_scale_rtcd.h
@@ -18,6 +18,12 @@ void aom_extend_frame_borders_c(struct yv12_buffer_config* ybf,
                                 const int num_planes);
 #define aom_extend_frame_borders aom_extend_frame_borders_c
 
+void aom_extend_frame_borders_plane_row_c(const struct yv12_buffer_config* ybf,
+                                          int plane,
+                                          int v_start,
+                                          int v_end);
+#define aom_extend_frame_borders_plane_row aom_extend_frame_borders_plane_row_c
+
 void aom_extend_frame_borders_y_c(struct yv12_buffer_config* ybf);
 #define aom_extend_frame_borders_y aom_extend_frame_borders_y_c
 
diff --git a/chromium/third_party/libaom/source/config/linux/arm-neon-cpu-detect/config/av1_rtcd.h b/chromium/third_party/libaom/source/config/linux/arm-neon-cpu-detect/config/av1_rtcd.h
index 90c222a2a20..09e25d18330 100644
--- a/chromium/third_party/libaom/source/config/linux/arm-neon-cpu-detect/config/av1_rtcd.h
+++ b/chromium/third_party/libaom/source/config/linux/arm-neon-cpu-detect/config/av1_rtcd.h
@@ -197,6 +197,40 @@ void aom_upsampled_pred_c(MACROBLOCKD* xd,
                           int subpel_search);
 #define aom_upsampled_pred aom_upsampled_pred_c
 
+void av1_apply_selfguided_restoration_c(const uint8_t* dat,
+                                        int width,
+                                        int height,
+                                        int stride,
+                                        int eps,
+                                        const int* xqd,
+                                        uint8_t* dst,
+                                        int dst_stride,
+                                        int32_t* tmpbuf,
+                                        int bit_depth,
+                                        int highbd);
+void av1_apply_selfguided_restoration_neon(const uint8_t* dat,
+                                           int width,
+                                           int height,
+                                           int stride,
+                                           int eps,
+                                           const int* xqd,
+                                           uint8_t* dst,
+                                           int dst_stride,
+                                           int32_t* tmpbuf,
+                                           int bit_depth,
+                                           int highbd);
+RTCD_EXTERN void (*av1_apply_selfguided_restoration)(const uint8_t* dat,
+                                                     int width,
+                                                     int height,
+                                                     int stride,
+                                                     int eps,
+                                                     const int* xqd,
+                                                     uint8_t* dst,
+                                                     int dst_stride,
+                                                     int32_t* tmpbuf,
+                                                     int bit_depth,
+                                                     int highbd);
+
 int64_t av1_block_error_c(const tran_low_t* coeff,
                           const tran_low_t* dqcoeff,
                           intptr_t block_size,
@@ -262,6 +296,14 @@ RTCD_EXTERN void (*av1_build_compound_diffwtd_mask_d16)(
     ConvolveParams* conv_params,
     int bd);
 
+int64_t av1_calc_frame_error_c(const uint8_t* const ref,
+                               int stride,
+                               const uint8_t* const dst,
+                               int p_width,
+                               int p_height,
+                               int p_stride);
+#define av1_calc_frame_error av1_calc_frame_error_c
+
 void av1_calc_indices_dim1_c(const int* data,
                              const int* centroids,
                              uint8_t* indices,
@@ -1683,6 +1725,37 @@ void av1_round_shift_array_c(int32_t* arr, int size, int bit);
 void av1_round_shift_array_neon(int32_t* arr, int size, int bit);
 RTCD_EXTERN void (*av1_round_shift_array)(int32_t* arr, int size, int bit);
 
+int av1_selfguided_restoration_c(const uint8_t* dgd8,
+                                 int width,
+                                 int height,
+                                 int dgd_stride,
+                                 int32_t* flt0,
+                                 int32_t* flt1,
+                                 int flt_stride,
+                                 int sgr_params_idx,
+                                 int bit_depth,
+                                 int highbd);
+int av1_selfguided_restoration_neon(const uint8_t* dgd8,
+                                    int width,
+                                    int height,
+                                    int dgd_stride,
+                                    int32_t* flt0,
+                                    int32_t* flt1,
+                                    int flt_stride,
+                                    int sgr_params_idx,
+                                    int bit_depth,
+                                    int highbd);
+RTCD_EXTERN int (*av1_selfguided_restoration)(const uint8_t* dgd8,
+                                              int width,
+                                              int height,
+                                              int dgd_stride,
+                                              int32_t* flt0,
+                                              int32_t* flt1,
+                                              int flt_stride,
+                                              int sgr_params_idx,
+                                              int bit_depth,
+                                              int highbd);
+
 void av1_txb_init_levels_c(const tran_low_t* const coeff,
                            const int width,
                            const int height,
@@ -1702,6 +1775,61 @@ void av1_upsample_intra_edge_c(uint8_t* p, int sz);
 void av1_upsample_intra_edge_high_c(uint16_t* p, int sz, int bd);
 #define av1_upsample_intra_edge_high av1_upsample_intra_edge_high_c
 
+void av1_warp_affine_c(const int32_t* mat,
+                       const uint8_t* ref,
+                       int width,
+                       int height,
+                       int stride,
+                       uint8_t* pred,
+                       int p_col,
+                       int p_row,
+                       int p_width,
+                       int p_height,
+                       int p_stride,
+                       int subsampling_x,
+                       int subsampling_y,
+                       ConvolveParams* conv_params,
+                       int16_t alpha,
+                       int16_t beta,
+                       int16_t gamma,
+                       int16_t delta);
+void av1_warp_affine_neon(const int32_t* mat,
+                          const uint8_t* ref,
+                          int width,
+                          int height,
+                          int stride,
+                          uint8_t* pred,
+                          int p_col,
+                          int p_row,
+                          int p_width,
+                          int p_height,
+                          int p_stride,
+                          int subsampling_x,
+                          int subsampling_y,
+                          ConvolveParams* conv_params,
+                          int16_t alpha,
+                          int16_t beta,
+                          int16_t gamma,
+                          int16_t delta);
+RTCD_EXTERN void (*av1_warp_affine)(const int32_t* mat,
+                                    const uint8_t* ref,
+                                    int width,
+                                    int height,
+                                    int stride,
+                                    uint8_t* pred,
+                                    int p_col,
+                                    int p_row,
+                                    int p_width,
+                                    int p_height,
+                                    int p_stride,
+                                    int subsampling_x,
+                                    int subsampling_y,
+                                    ConvolveParams* conv_params,
+                                    int16_t alpha,
+                                    int16_t beta,
+                                    int16_t gamma,
+                                    int16_t delta);
+
 void av1_wedge_compute_delta_squares_c(int16_t* d,
                                        const int16_t* a,
                                        const int16_t* b,
@@ -1718,7 +1846,14 @@ uint64_t av1_wedge_sse_from_residuals_c(const int16_t* r1,
                                         const int16_t* d,
                                         const uint8_t* m,
                                         int N);
-#define av1_wedge_sse_from_residuals av1_wedge_sse_from_residuals_c
+uint64_t av1_wedge_sse_from_residuals_neon(const int16_t* r1,
+                                           const int16_t* d,
+                                           const uint8_t* m,
+                                           int N);
+RTCD_EXTERN uint64_t (*av1_wedge_sse_from_residuals)(const int16_t* r1,
+                                                     const int16_t* d,
+                                                     const uint8_t* m,
+                                                     int N);
 
 void av1_wiener_convolve_add_src_c(const uint8_t* src,
                                    ptrdiff_t src_stride,
@@ -2086,7 +2221,22 @@ void cdef_find_dir_dual_c(const uint16_t* img1,
                           int coeff_shift,
                           int* out1,
                           int* out2);
-#define cdef_find_dir_dual cdef_find_dir_dual_c
+void cdef_find_dir_dual_neon(const uint16_t* img1,
+                             const uint16_t* img2,
+                             int stride,
+                             int32_t* var1,
+                             int32_t* var2,
+                             int coeff_shift,
+                             int* out1,
+                             int* out2);
+RTCD_EXTERN void (*cdef_find_dir_dual)(const uint16_t* img1,
+                                       const uint16_t* img2,
+                                       int stride,
+                                       int32_t* var1,
+                                       int32_t* var2,
+                                       int coeff_shift,
+                                       int* out1,
+                                       int* out2);
 
 cfl_subsample_lbd_fn cfl_get_luma_subsampling_420_lbd_c(TX_SIZE tx_size);
 cfl_subsample_lbd_fn cfl_get_luma_subsampling_420_lbd_neon(TX_SIZE tx_size);
@@ -2126,6 +2276,9 @@ static void setup_rtcd_internal(void) {
   aom_quantize_b_helper = aom_quantize_b_helper_c;
   if (flags & HAS_NEON)
     aom_quantize_b_helper = aom_quantize_b_helper_neon;
+  av1_apply_selfguided_restoration = av1_apply_selfguided_restoration_c;
+  if (flags & HAS_NEON)
+    av1_apply_selfguided_restoration = av1_apply_selfguided_restoration_neon;
   av1_block_error = av1_block_error_c;
   if (flags & HAS_NEON)
     av1_block_error = av1_block_error_neon;
@@ -2352,9 +2505,18 @@ static void setup_rtcd_internal(void) {
   av1_round_shift_array = av1_round_shift_array_c;
   if (flags & HAS_NEON)
     av1_round_shift_array = av1_round_shift_array_neon;
+  av1_selfguided_restoration = av1_selfguided_restoration_c;
+  if (flags & HAS_NEON)
+    av1_selfguided_restoration = av1_selfguided_restoration_neon;
   av1_txb_init_levels = av1_txb_init_levels_c;
   if (flags & HAS_NEON)
     av1_txb_init_levels = av1_txb_init_levels_neon;
+  av1_warp_affine = av1_warp_affine_c;
+  if (flags & HAS_NEON)
+    av1_warp_affine = av1_warp_affine_neon;
+  av1_wedge_sse_from_residuals = av1_wedge_sse_from_residuals_c;
+  if (flags & HAS_NEON)
+    av1_wedge_sse_from_residuals = av1_wedge_sse_from_residuals_neon;
   av1_wiener_convolve_add_src = av1_wiener_convolve_add_src_c;
   if (flags & HAS_NEON)
     av1_wiener_convolve_add_src = av1_wiener_convolve_add_src_neon;
@@ -2391,6 +2553,9 @@ static void setup_rtcd_internal(void) {
   cdef_find_dir = cdef_find_dir_c;
   if (flags & HAS_NEON)
     cdef_find_dir = cdef_find_dir_neon;
+  cdef_find_dir_dual = cdef_find_dir_dual_c;
+  if (flags & HAS_NEON)
+    cdef_find_dir_dual = cdef_find_dir_dual_neon;
   cfl_get_luma_subsampling_420_lbd = cfl_get_luma_subsampling_420_lbd_c;
   if (flags & HAS_NEON)
     cfl_get_luma_subsampling_420_lbd = cfl_get_luma_subsampling_420_lbd_neon;
diff --git a/chromium/third_party/libaom/source/config/linux/arm-neon/config/aom_config.asm b/chromium/third_party/libaom/source/config/linux/arm-neon/config/aom_config.asm
index 15df49ee5ab..c5ebd6ab9ba 100644
--- a/chromium/third_party/libaom/source/config/linux/arm-neon/config/aom_config.asm
+++ b/chromium/third_party/libaom/source/config/linux/arm-neon/config/aom_config.asm
@@ -9,7 +9,6 @@
 ; PATENTS file, you can obtain it at www.aomedia.org/license/patent.
 ;
 ARCH_ARM equ 1
-ARCH_MIPS equ 0
 ARCH_PPC equ 0
 ARCH_X86 equ 0
 ARCH_X86_64 equ 0
@@ -33,8 +32,6 @@ CONFIG_DISABLE_FULL_PIXEL_SPLIT_8X8 equ 1
 CONFIG_ENTROPY_STATS equ 0
 CONFIG_EXCLUDE_SIMD_MISMATCH equ 0
 CONFIG_FPMT_TEST equ 0
-CONFIG_FRAME_PARALLEL_ENCODE equ 0
-CONFIG_FRAME_PARALLEL_ENCODE_2 equ 0
 CONFIG_GCC equ 1
 CONFIG_GCOV equ 0
 CONFIG_GPROF equ 0
@@ -50,7 +47,7 @@ CONFIG_NORMAL_TILE_MODE equ 1
 CONFIG_OPTICAL_FLOW_API equ 0
 CONFIG_OS_SUPPORT equ 1
 CONFIG_PARTITION_SEARCH_ORDER equ 0
-CONFIG_PIC equ 0
+CONFIG_PIC equ 1
 CONFIG_RATECTRL_LOG equ 0
 CONFIG_RD_COMMAND equ 0
 CONFIG_RD_DEBUG equ 0
@@ -69,14 +66,11 @@ CONFIG_WEBM_IO equ 1
 DECODE_HEIGHT_LIMIT equ 16384
 DECODE_WIDTH_LIMIT equ 16384
 FORCE_HIGHBITDEPTH_DECODING equ 0
+HAVE_ARM_CRC32 equ 0
 HAVE_AVX equ 0
 HAVE_AVX2 equ 0
-HAVE_DSPR2 equ 0
 HAVE_FEXCEPT equ 1
-HAVE_MIPS32 equ 0
-HAVE_MIPS64 equ 0
 HAVE_MMX equ 0
-HAVE_MSA equ 0
 HAVE_NEON equ 1
 HAVE_PTHREAD_H equ 1
 HAVE_SSE equ 0
diff --git a/chromium/third_party/libaom/source/config/linux/arm-neon/config/aom_config.c b/chromium/third_party/libaom/source/config/linux/arm-neon/config/aom_config.c
index c20c00bb8b1..237bcef9085 100644
--- a/chromium/third_party/libaom/source/config/linux/arm-neon/config/aom_config.c
+++ b/chromium/third_party/libaom/source/config/linux/arm-neon/config/aom_config.c
@@ -9,5 +9,5 @@
  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
  */
 #include "aom/aom_codec.h"
-static const char* const cfg = "cmake ../source/libaom -G \"Unix Makefiles\" -DCMAKE_TOOLCHAIN_FILE=\"../source/libaom/build/cmake/toolchains/armv7-linux-gcc.cmake\" -DCONFIG_AV1_ENCODER=1 -DCONFIG_LIBYUV=0 -DCONFIG_REALTIME_ONLY=1 -DCONFIG_AV1_HIGHBITDEPTH=0 -DCONFIG_AV1_TEMPORAL_DENOISING=1 -DCONFIG_MAX_DECODE_PROFILE=0 -DCONFIG_NORMAL_TILE_MODE=1 -DCONFIG_SIZE_LIMIT=1 -DDECODE_HEIGHT_LIMIT=16384 -DDECODE_WIDTH_LIMIT=16384";
+static const char* const cfg = "cmake ../source/libaom -G \"Unix Makefiles\" -DCMAKE_TOOLCHAIN_FILE=\"../source/libaom/build/cmake/toolchains/armv7-linux-gcc.cmake\" -DCONFIG_AV1_ENCODER=1 -DCONFIG_LIBYUV=0 -DCONFIG_AV1_HIGHBITDEPTH=0 -DCONFIG_AV1_TEMPORAL_DENOISING=1 -DCONFIG_REALTIME_ONLY=1 -DCONFIG_MAX_DECODE_PROFILE=0 -DCONFIG_NORMAL_TILE_MODE=1 -DCONFIG_SIZE_LIMIT=1 -DDECODE_HEIGHT_LIMIT=16384 -DDECODE_WIDTH_LIMIT=16384";
 const char *aom_codec_build_config(void) {return cfg;}
diff --git a/chromium/third_party/libaom/source/config/linux/arm-neon/config/aom_config.h b/chromium/third_party/libaom/source/config/linux/arm-neon/config/aom_config.h
index f8e23f257de..dbbde9993bd 100644
--- a/chromium/third_party/libaom/source/config/linux/arm-neon/config/aom_config.h
+++ b/chromium/third_party/libaom/source/config/linux/arm-neon/config/aom_config.h
@@ -11,7 +11,6 @@
 #ifndef AOM_CONFIG_H_
 #define AOM_CONFIG_H_
 #define ARCH_ARM 1
-#define ARCH_MIPS 0
 #define ARCH_PPC 0
 #define ARCH_X86 0
 #define ARCH_X86_64 0
@@ -35,8 +34,6 @@
 #define CONFIG_ENTROPY_STATS 0
 #define CONFIG_EXCLUDE_SIMD_MISMATCH 0
 #define CONFIG_FPMT_TEST 0
-#define CONFIG_FRAME_PARALLEL_ENCODE 0
-#define CONFIG_FRAME_PARALLEL_ENCODE_2 0
 #define CONFIG_GCC 1
 #define CONFIG_GCOV 0
 #define CONFIG_GPROF 0
@@ -52,7 +49,7 @@
 #define CONFIG_OPTICAL_FLOW_API 0
 #define CONFIG_OS_SUPPORT 1
 #define CONFIG_PARTITION_SEARCH_ORDER 0
-#define CONFIG_PIC 0
+#define CONFIG_PIC 1
 #define CONFIG_RATECTRL_LOG 0
 #define CONFIG_RD_COMMAND 0
 #define CONFIG_RD_DEBUG 0
@@ -71,14 +68,11 @@
 #define DECODE_HEIGHT_LIMIT 16384
 #define DECODE_WIDTH_LIMIT 16384
 #define FORCE_HIGHBITDEPTH_DECODING 0
+#define HAVE_ARM_CRC32 0
 #define HAVE_AVX 0
 #define HAVE_AVX2 0
-#define HAVE_DSPR2 0
 #define HAVE_FEXCEPT 1
-#define HAVE_MIPS32 0
-#define HAVE_MIPS64 0
 #define HAVE_MMX 0
-#define HAVE_MSA 0
 #define HAVE_NEON 1
 #define HAVE_PTHREAD_H 1
 #define HAVE_SSE 0
diff --git a/chromium/third_party/libaom/source/config/linux/arm-neon/config/aom_dsp_rtcd.h b/chromium/third_party/libaom/source/config/linux/arm-neon/config/aom_dsp_rtcd.h
index 155c11857c0..c2d2ac85aaa 100644
--- a/chromium/third_party/libaom/source/config/linux/arm-neon/config/aom_dsp_rtcd.h
+++ b/chromium/third_party/libaom/source/config/linux/arm-neon/config/aom_dsp_rtcd.h
@@ -34,7 +34,12 @@ void aom_avg_8x8_quad_c(const uint8_t* s,
                         int x16_idx,
                         int y16_idx,
                         int* avg);
-#define aom_avg_8x8_quad aom_avg_8x8_quad_c
+void aom_avg_8x8_quad_neon(const uint8_t* s,
+                           int p,
+                           int x16_idx,
+                           int y16_idx,
+                           int* avg);
+#define aom_avg_8x8_quad aom_avg_8x8_quad_neon
 
 void aom_blend_a64_hmask_c(uint8_t* dst,
                            uint32_t dst_stride,
@@ -176,18 +181,24 @@ void aom_dc_128_predictor_16x32_c(uint8_t* dst,
                                   const uint8_t* left);
 #define aom_dc_128_predictor_16x32 aom_dc_128_predictor_16x32_c
 
+void aom_dc_128_predictor_16x4_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+#define aom_dc_128_predictor_16x4 aom_dc_128_predictor_16x4_c
+
+void aom_dc_128_predictor_16x64_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+#define aom_dc_128_predictor_16x64 aom_dc_128_predictor_16x64_c
+
 void aom_dc_128_predictor_16x8_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
                                  const uint8_t* left);
 #define aom_dc_128_predictor_16x8 aom_dc_128_predictor_16x8_c
 
-void aom_dc_128_predictor_2x2_c(uint8_t* dst,
-                                ptrdiff_t y_stride,
-                                const uint8_t* above,
-                                const uint8_t* left);
-#define aom_dc_128_predictor_2x2 aom_dc_128_predictor_2x2_c
-
 void aom_dc_128_predictor_32x16_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
@@ -210,6 +221,18 @@ void aom_dc_128_predictor_32x64_c(uint8_t* dst,
                                   const uint8_t* left);
 #define aom_dc_128_predictor_32x64 aom_dc_128_predictor_32x64_c
 
+void aom_dc_128_predictor_32x8_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+#define aom_dc_128_predictor_32x8 aom_dc_128_predictor_32x8_c
+
+void aom_dc_128_predictor_4x16_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+#define aom_dc_128_predictor_4x16 aom_dc_128_predictor_4x16_c
+
 void aom_dc_128_predictor_4x4_c(uint8_t* dst,
                                 ptrdiff_t y_stride,
                                 const uint8_t* above,
@@ -226,6 +249,12 @@ void aom_dc_128_predictor_4x8_c(uint8_t* dst,
                                 const uint8_t* left);
 #define aom_dc_128_predictor_4x8 aom_dc_128_predictor_4x8_c
 
+void aom_dc_128_predictor_64x16_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+#define aom_dc_128_predictor_64x16 aom_dc_128_predictor_64x16_c
+
 void aom_dc_128_predictor_64x32_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
@@ -244,6 +273,12 @@ void aom_dc_128_predictor_8x16_c(uint8_t* dst,
                                  const uint8_t* left);
 #define aom_dc_128_predictor_8x16 aom_dc_128_predictor_8x16_c
 
+void aom_dc_128_predictor_8x32_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+#define aom_dc_128_predictor_8x32 aom_dc_128_predictor_8x32_c
+
 void aom_dc_128_predictor_8x4_c(uint8_t* dst,
                                 ptrdiff_t y_stride,
                                 const uint8_t* above,
@@ -276,18 +311,24 @@ void aom_dc_left_predictor_16x32_c(uint8_t* dst,
                                    const uint8_t* left);
 #define aom_dc_left_predictor_16x32 aom_dc_left_predictor_16x32_c
 
+void aom_dc_left_predictor_16x4_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+#define aom_dc_left_predictor_16x4 aom_dc_left_predictor_16x4_c
+
+void aom_dc_left_predictor_16x64_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+#define aom_dc_left_predictor_16x64 aom_dc_left_predictor_16x64_c
+
 void aom_dc_left_predictor_16x8_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
                                   const uint8_t* left);
 #define aom_dc_left_predictor_16x8 aom_dc_left_predictor_16x8_c
 
-void aom_dc_left_predictor_2x2_c(uint8_t* dst,
-                                 ptrdiff_t y_stride,
-                                 const uint8_t* above,
-                                 const uint8_t* left);
-#define aom_dc_left_predictor_2x2 aom_dc_left_predictor_2x2_c
-
 void aom_dc_left_predictor_32x16_c(uint8_t* dst,
                                    ptrdiff_t y_stride,
                                    const uint8_t* above,
@@ -310,6 +351,18 @@ void aom_dc_left_predictor_32x64_c(uint8_t* dst,
                                    const uint8_t* left);
 #define aom_dc_left_predictor_32x64 aom_dc_left_predictor_32x64_c
 
+void aom_dc_left_predictor_32x8_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+#define aom_dc_left_predictor_32x8 aom_dc_left_predictor_32x8_c
+
+void aom_dc_left_predictor_4x16_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+#define aom_dc_left_predictor_4x16 aom_dc_left_predictor_4x16_c
+
 void aom_dc_left_predictor_4x4_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
@@ -326,6 +379,12 @@ void aom_dc_left_predictor_4x8_c(uint8_t* dst,
                                  const uint8_t* left);
 #define aom_dc_left_predictor_4x8 aom_dc_left_predictor_4x8_c
 
+void aom_dc_left_predictor_64x16_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+#define aom_dc_left_predictor_64x16 aom_dc_left_predictor_64x16_c
+
 void aom_dc_left_predictor_64x32_c(uint8_t* dst,
                                    ptrdiff_t y_stride,
                                    const uint8_t* above,
@@ -344,6 +403,12 @@ void aom_dc_left_predictor_8x16_c(uint8_t* dst,
                                   const uint8_t* left);
 #define aom_dc_left_predictor_8x16 aom_dc_left_predictor_8x16_c
 
+void aom_dc_left_predictor_8x32_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+#define aom_dc_left_predictor_8x32 aom_dc_left_predictor_8x32_c
+
 void aom_dc_left_predictor_8x4_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
@@ -376,18 +441,24 @@ void aom_dc_predictor_16x32_c(uint8_t* dst,
                               const uint8_t* left);
 #define aom_dc_predictor_16x32 aom_dc_predictor_16x32_c
 
+void aom_dc_predictor_16x4_c(uint8_t* dst,
+                             ptrdiff_t y_stride,
+                             const uint8_t* above,
+                             const uint8_t* left);
+#define aom_dc_predictor_16x4 aom_dc_predictor_16x4_c
+
+void aom_dc_predictor_16x64_c(uint8_t* dst,
+                              ptrdiff_t y_stride,
+                              const uint8_t* above,
+                              const uint8_t* left);
+#define aom_dc_predictor_16x64 aom_dc_predictor_16x64_c
+
 void aom_dc_predictor_16x8_c(uint8_t* dst,
                              ptrdiff_t y_stride,
                              const uint8_t* above,
                              const uint8_t* left);
 #define aom_dc_predictor_16x8 aom_dc_predictor_16x8_c
 
-void aom_dc_predictor_2x2_c(uint8_t* dst,
-                            ptrdiff_t y_stride,
-                            const uint8_t* above,
-                            const uint8_t* left);
-#define aom_dc_predictor_2x2 aom_dc_predictor_2x2_c
-
 void aom_dc_predictor_32x16_c(uint8_t* dst,
                               ptrdiff_t y_stride,
                               const uint8_t* above,
@@ -410,6 +481,18 @@ void aom_dc_predictor_32x64_c(uint8_t* dst,
                               const uint8_t* left);
 #define aom_dc_predictor_32x64 aom_dc_predictor_32x64_c
 
+void aom_dc_predictor_32x8_c(uint8_t* dst,
+                             ptrdiff_t y_stride,
+                             const uint8_t* above,
+                             const uint8_t* left);
+#define aom_dc_predictor_32x8 aom_dc_predictor_32x8_c
+
+void aom_dc_predictor_4x16_c(uint8_t* dst,
+                             ptrdiff_t y_stride,
+                             const uint8_t* above,
+                             const uint8_t* left);
+#define aom_dc_predictor_4x16 aom_dc_predictor_4x16_c
+
 void aom_dc_predictor_4x4_c(uint8_t* dst,
                             ptrdiff_t y_stride,
                             const uint8_t* above,
@@ -426,6 +509,12 @@ void aom_dc_predictor_4x8_c(uint8_t* dst,
                             const uint8_t* left);
 #define aom_dc_predictor_4x8 aom_dc_predictor_4x8_c
 
+void aom_dc_predictor_64x16_c(uint8_t* dst,
+                              ptrdiff_t y_stride,
+                              const uint8_t* above,
+                              const uint8_t* left);
+#define aom_dc_predictor_64x16 aom_dc_predictor_64x16_c
+
 void aom_dc_predictor_64x32_c(uint8_t* dst,
                               ptrdiff_t y_stride,
                               const uint8_t* above,
@@ -444,6 +533,12 @@ void aom_dc_predictor_8x16_c(uint8_t* dst,
                              const uint8_t* left);
 #define aom_dc_predictor_8x16 aom_dc_predictor_8x16_c
 
+void aom_dc_predictor_8x32_c(uint8_t* dst,
+                             ptrdiff_t y_stride,
+                             const uint8_t* above,
+                             const uint8_t* left);
+#define aom_dc_predictor_8x32 aom_dc_predictor_8x32_c
+
 void aom_dc_predictor_8x4_c(uint8_t* dst,
                             ptrdiff_t y_stride,
                             const uint8_t* above,
@@ -476,18 +571,24 @@ void aom_dc_top_predictor_16x32_c(uint8_t* dst,
                                   const uint8_t* left);
 #define aom_dc_top_predictor_16x32 aom_dc_top_predictor_16x32_c
 
+void aom_dc_top_predictor_16x4_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+#define aom_dc_top_predictor_16x4 aom_dc_top_predictor_16x4_c
+
+void aom_dc_top_predictor_16x64_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+#define aom_dc_top_predictor_16x64 aom_dc_top_predictor_16x64_c
+
 void aom_dc_top_predictor_16x8_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
                                  const uint8_t* left);
 #define aom_dc_top_predictor_16x8 aom_dc_top_predictor_16x8_c
 
-void aom_dc_top_predictor_2x2_c(uint8_t* dst,
-                                ptrdiff_t y_stride,
-                                const uint8_t* above,
-                                const uint8_t* left);
-#define aom_dc_top_predictor_2x2 aom_dc_top_predictor_2x2_c
-
 void aom_dc_top_predictor_32x16_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
@@ -510,6 +611,18 @@ void aom_dc_top_predictor_32x64_c(uint8_t* dst,
                                   const uint8_t* left);
 #define aom_dc_top_predictor_32x64 aom_dc_top_predictor_32x64_c
 
+void aom_dc_top_predictor_32x8_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+#define aom_dc_top_predictor_32x8 aom_dc_top_predictor_32x8_c
+
+void aom_dc_top_predictor_4x16_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+#define aom_dc_top_predictor_4x16 aom_dc_top_predictor_4x16_c
+
 void aom_dc_top_predictor_4x4_c(uint8_t* dst,
                                 ptrdiff_t y_stride,
                                 const uint8_t* above,
@@ -526,6 +639,12 @@ void aom_dc_top_predictor_4x8_c(uint8_t* dst,
                                 const uint8_t* left);
 #define aom_dc_top_predictor_4x8 aom_dc_top_predictor_4x8_c
 
+void aom_dc_top_predictor_64x16_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+#define aom_dc_top_predictor_64x16 aom_dc_top_predictor_64x16_c
+
 void aom_dc_top_predictor_64x32_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
@@ -544,6 +663,12 @@ void aom_dc_top_predictor_8x16_c(uint8_t* dst,
                                  const uint8_t* left);
 #define aom_dc_top_predictor_8x16 aom_dc_top_predictor_8x16_c
 
+void aom_dc_top_predictor_8x32_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+#define aom_dc_top_predictor_8x32 aom_dc_top_predictor_8x32_c
+
 void aom_dc_top_predictor_8x4_c(uint8_t* dst,
                                 ptrdiff_t y_stride,
                                 const uint8_t* above,
@@ -1014,18 +1139,24 @@ void aom_h_predictor_16x32_c(uint8_t* dst,
                              const uint8_t* left);
 #define aom_h_predictor_16x32 aom_h_predictor_16x32_c
 
+void aom_h_predictor_16x4_c(uint8_t* dst,
+                            ptrdiff_t y_stride,
+                            const uint8_t* above,
+                            const uint8_t* left);
+#define aom_h_predictor_16x4 aom_h_predictor_16x4_c
+
+void aom_h_predictor_16x64_c(uint8_t* dst,
+                             ptrdiff_t y_stride,
+                             const uint8_t* above,
+                             const uint8_t* left);
+#define aom_h_predictor_16x64 aom_h_predictor_16x64_c
+
 void aom_h_predictor_16x8_c(uint8_t* dst,
                             ptrdiff_t y_stride,
                             const uint8_t* above,
                             const uint8_t* left);
 #define aom_h_predictor_16x8 aom_h_predictor_16x8_c
 
-void aom_h_predictor_2x2_c(uint8_t* dst,
-                           ptrdiff_t y_stride,
-                           const uint8_t* above,
-                           const uint8_t* left);
-#define aom_h_predictor_2x2 aom_h_predictor_2x2_c
-
 void aom_h_predictor_32x16_c(uint8_t* dst,
                              ptrdiff_t y_stride,
                              const uint8_t* above,
@@ -1048,6 +1179,18 @@ void aom_h_predictor_32x64_c(uint8_t* dst,
                              const uint8_t* left);
 #define aom_h_predictor_32x64 aom_h_predictor_32x64_c
 
+void aom_h_predictor_32x8_c(uint8_t* dst,
+                            ptrdiff_t y_stride,
+                            const uint8_t* above,
+                            const uint8_t* left);
+#define aom_h_predictor_32x8 aom_h_predictor_32x8_c
+
+void aom_h_predictor_4x16_c(uint8_t* dst,
+                            ptrdiff_t y_stride,
+                            const uint8_t* above,
+                            const uint8_t* left);
+#define aom_h_predictor_4x16 aom_h_predictor_4x16_c
+
 void aom_h_predictor_4x4_c(uint8_t* dst,
                            ptrdiff_t y_stride,
                            const uint8_t* above,
@@ -1064,6 +1207,12 @@ void aom_h_predictor_4x8_c(uint8_t* dst,
                            const uint8_t* left);
 #define aom_h_predictor_4x8 aom_h_predictor_4x8_c
 
+void aom_h_predictor_64x16_c(uint8_t* dst,
+                             ptrdiff_t y_stride,
+                             const uint8_t* above,
+                             const uint8_t* left);
+#define aom_h_predictor_64x16 aom_h_predictor_64x16_c
+
 void aom_h_predictor_64x32_c(uint8_t* dst,
                              ptrdiff_t y_stride,
                              const uint8_t* above,
@@ -1082,6 +1231,12 @@ void aom_h_predictor_8x16_c(uint8_t* dst,
                             const uint8_t* left);
 #define aom_h_predictor_8x16 aom_h_predictor_8x16_c
 
+void aom_h_predictor_8x32_c(uint8_t* dst,
+                            ptrdiff_t y_stride,
+                            const uint8_t* above,
+                            const uint8_t* left);
+#define aom_h_predictor_8x32 aom_h_predictor_8x32_c
+
 void aom_h_predictor_8x4_c(uint8_t* dst,
                            ptrdiff_t y_stride,
                            const uint8_t* above,
@@ -1124,11 +1279,6 @@ void aom_hadamard_8x8_neon(const int16_t* src_diff,
                            tran_low_t* coeff);
 #define aom_hadamard_8x8 aom_hadamard_8x8_neon
 
-void aom_hadamard_8x8_dual_c(const int16_t* src_diff,
-                             ptrdiff_t src_stride,
-                             int16_t* coeff);
-#define aom_hadamard_8x8_dual aom_hadamard_8x8_dual_c
-
 void aom_hadamard_lp_16x16_c(const int16_t* src_diff,
                              ptrdiff_t src_stride,
                              int16_t* coeff);
@@ -1145,6 +1295,14 @@ void aom_hadamard_lp_8x8_neon(const int16_t* src_diff,
                               int16_t* coeff);
 #define aom_hadamard_lp_8x8 aom_hadamard_lp_8x8_neon
 
+void aom_hadamard_lp_8x8_dual_c(const int16_t* src_diff,
+                                ptrdiff_t src_stride,
+                                int16_t* coeff);
+void aom_hadamard_lp_8x8_dual_neon(const int16_t* src_diff,
+                                   ptrdiff_t src_stride,
+                                   int16_t* coeff);
+#define aom_hadamard_lp_8x8_dual aom_hadamard_lp_8x8_dual_neon
+
 void aom_ifft16x16_float_c(const float* input, float* temp, float* output);
 #define aom_ifft16x16_float aom_ifft16x16_float_c
 
@@ -1160,18 +1318,32 @@ void aom_ifft4x4_float_c(const float* input, float* temp, float* output);
 void aom_ifft8x8_float_c(const float* input, float* temp, float* output);
 #define aom_ifft8x8_float aom_ifft8x8_float_c
 
-int16_t aom_int_pro_col_c(const uint8_t* ref, const int width);
-int16_t aom_int_pro_col_neon(const uint8_t* ref, const int width);
+void aom_int_pro_col_c(int16_t* vbuf,
+                       const uint8_t* ref,
+                       const int ref_stride,
+                       const int width,
+                       const int height,
+                       int norm_factor);
+void aom_int_pro_col_neon(int16_t* vbuf,
+                          const uint8_t* ref,
+                          const int ref_stride,
+                          const int width,
+                          const int height,
+                          int norm_factor);
 #define aom_int_pro_col aom_int_pro_col_neon
 
-void aom_int_pro_row_c(int16_t hbuf[16],
+void aom_int_pro_row_c(int16_t* hbuf,
                        const uint8_t* ref,
                        const int ref_stride,
-                       const int height);
-void aom_int_pro_row_neon(int16_t hbuf[16],
+                       const int width,
+                       const int height,
+                       int norm_factor);
+void aom_int_pro_row_neon(int16_t* hbuf,
                           const uint8_t* ref,
                           const int ref_stride,
-                          const int height);
+                          const int width,
+                          const int height,
+                          int norm_factor);
 #define aom_int_pro_row aom_int_pro_row_neon
 
 void aom_lowbd_blend_a64_d16_mask_c(uint8_t* dst,
@@ -1237,7 +1409,12 @@ void aom_lpf_horizontal_14_quad_c(uint8_t* s,
                                   const uint8_t* blimit0,
                                   const uint8_t* limit0,
                                   const uint8_t* thresh0);
-#define aom_lpf_horizontal_14_quad aom_lpf_horizontal_14_quad_c
+void aom_lpf_horizontal_14_quad_neon(uint8_t* s,
+                                     int pitch,
+                                     const uint8_t* blimit0,
+                                     const uint8_t* limit0,
+                                     const uint8_t* thresh0);
+#define aom_lpf_horizontal_14_quad aom_lpf_horizontal_14_quad_neon
 
 void aom_lpf_horizontal_4_c(uint8_t* s,
                             int pitch,
@@ -1274,7 +1451,12 @@ void aom_lpf_horizontal_4_quad_c(uint8_t* s,
                                  const uint8_t* blimit0,
                                  const uint8_t* limit0,
                                  const uint8_t* thresh0);
-#define aom_lpf_horizontal_4_quad aom_lpf_horizontal_4_quad_c
+void aom_lpf_horizontal_4_quad_neon(uint8_t* s,
+                                    int pitch,
+                                    const uint8_t* blimit0,
+                                    const uint8_t* limit0,
+                                    const uint8_t* thresh0);
+#define aom_lpf_horizontal_4_quad aom_lpf_horizontal_4_quad_neon
 
 void aom_lpf_horizontal_6_c(uint8_t* s,
                             int pitch,
@@ -1311,7 +1493,12 @@ void aom_lpf_horizontal_6_quad_c(uint8_t* s,
                                  const uint8_t* blimit0,
                                  const uint8_t* limit0,
                                  const uint8_t* thresh0);
-#define aom_lpf_horizontal_6_quad aom_lpf_horizontal_6_quad_c
+void aom_lpf_horizontal_6_quad_neon(uint8_t* s,
+                                    int pitch,
+                                    const uint8_t* blimit0,
+                                    const uint8_t* limit0,
+                                    const uint8_t* thresh0);
+#define aom_lpf_horizontal_6_quad aom_lpf_horizontal_6_quad_neon
 
 void aom_lpf_horizontal_8_c(uint8_t* s,
                             int pitch,
@@ -1348,7 +1535,12 @@ void aom_lpf_horizontal_8_quad_c(uint8_t* s,
                                  const uint8_t* blimit0,
                                  const uint8_t* limit0,
                                  const uint8_t* thresh0);
-#define aom_lpf_horizontal_8_quad aom_lpf_horizontal_8_quad_c
+void aom_lpf_horizontal_8_quad_neon(uint8_t* s,
+                                    int pitch,
+                                    const uint8_t* blimit0,
+                                    const uint8_t* limit0,
+                                    const uint8_t* thresh0);
+#define aom_lpf_horizontal_8_quad aom_lpf_horizontal_8_quad_neon
 
 void aom_lpf_vertical_14_c(uint8_t* s,
                            int pitch,
@@ -1385,7 +1577,12 @@ void aom_lpf_vertical_14_quad_c(uint8_t* s,
                                 const uint8_t* blimit0,
                                 const uint8_t* limit0,
                                 const uint8_t* thresh0);
-#define aom_lpf_vertical_14_quad aom_lpf_vertical_14_quad_c
+void aom_lpf_vertical_14_quad_neon(uint8_t* s,
+                                   int pitch,
+                                   const uint8_t* blimit0,
+                                   const uint8_t* limit0,
+                                   const uint8_t* thresh0);
+#define aom_lpf_vertical_14_quad aom_lpf_vertical_14_quad_neon
 
 void aom_lpf_vertical_4_c(uint8_t* s,
                           int pitch,
@@ -1422,7 +1619,12 @@ void aom_lpf_vertical_4_quad_c(uint8_t* s,
                                const uint8_t* blimit0,
                                const uint8_t* limit0,
                                const uint8_t* thresh0);
-#define aom_lpf_vertical_4_quad aom_lpf_vertical_4_quad_c
+void aom_lpf_vertical_4_quad_neon(uint8_t* s,
+                                  int pitch,
+                                  const uint8_t* blimit0,
+                                  const uint8_t* limit0,
+                                  const uint8_t* thresh0);
+#define aom_lpf_vertical_4_quad aom_lpf_vertical_4_quad_neon
 
 void aom_lpf_vertical_6_c(uint8_t* s,
                           int pitch,
@@ -1459,7 +1661,12 @@ void aom_lpf_vertical_6_quad_c(uint8_t* s,
                                const uint8_t* blimit0,
                                const uint8_t* limit0,
                                const uint8_t* thresh0);
-#define aom_lpf_vertical_6_quad aom_lpf_vertical_6_quad_c
+void aom_lpf_vertical_6_quad_neon(uint8_t* s,
+                                  int pitch,
+                                  const uint8_t* blimit0,
+                                  const uint8_t* limit0,
+                                  const uint8_t* thresh0);
+#define aom_lpf_vertical_6_quad aom_lpf_vertical_6_quad_neon
 
 void aom_lpf_vertical_8_c(uint8_t* s,
                           int pitch,
@@ -1496,7 +1703,12 @@ void aom_lpf_vertical_8_quad_c(uint8_t* s,
                                const uint8_t* blimit0,
                                const uint8_t* limit0,
                                const uint8_t* thresh0);
-#define aom_lpf_vertical_8_quad aom_lpf_vertical_8_quad_c
+void aom_lpf_vertical_8_quad_neon(uint8_t* s,
+                                  int pitch,
+                                  const uint8_t* blimit0,
+                                  const uint8_t* limit0,
+                                  const uint8_t* thresh0);
+#define aom_lpf_vertical_8_quad aom_lpf_vertical_8_quad_neon
 
 unsigned int aom_masked_sad128x128_c(const uint8_t* src,
                                      int src_stride,
@@ -2070,21 +2282,43 @@ unsigned int aom_mse16x8_c(const uint8_t* src_ptr,
                            const uint8_t* ref_ptr,
                            int recon_stride,
                            unsigned int* sse);
-#define aom_mse16x8 aom_mse16x8_c
+unsigned int aom_mse16x8_neon(const uint8_t* src_ptr,
+                              int source_stride,
+                              const uint8_t* ref_ptr,
+                              int recon_stride,
+                              unsigned int* sse);
+#define aom_mse16x8 aom_mse16x8_neon
 
 unsigned int aom_mse8x16_c(const uint8_t* src_ptr,
                            int source_stride,
                            const uint8_t* ref_ptr,
                            int recon_stride,
                            unsigned int* sse);
-#define aom_mse8x16 aom_mse8x16_c
+unsigned int aom_mse8x16_neon(const uint8_t* src_ptr,
+                              int source_stride,
+                              const uint8_t* ref_ptr,
+                              int recon_stride,
+                              unsigned int* sse);
+#define aom_mse8x16 aom_mse8x16_neon
 
 unsigned int aom_mse8x8_c(const uint8_t* src_ptr,
                           int source_stride,
                           const uint8_t* ref_ptr,
                           int recon_stride,
                           unsigned int* sse);
-#define aom_mse8x8 aom_mse8x8_c
+unsigned int aom_mse8x8_neon(const uint8_t* src_ptr,
+                             int source_stride,
+                             const uint8_t* ref_ptr,
+                             int recon_stride,
+                             unsigned int* sse);
+#define aom_mse8x8 aom_mse8x8_neon
+
+uint64_t aom_mse_16xh_16bit_c(uint8_t* dst,
+                              int dstride,
+                              uint16_t* src,
+                              int w,
+                              int h);
+#define aom_mse_16xh_16bit aom_mse_16xh_16bit_c
 
 uint64_t aom_mse_wxh_16bit_c(uint8_t* dst,
                              int dstride,
@@ -2098,85 +2332,191 @@ void aom_paeth_predictor_16x16_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
                                  const uint8_t* left);
-#define aom_paeth_predictor_16x16 aom_paeth_predictor_16x16_c
+void aom_paeth_predictor_16x16_neon(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+#define aom_paeth_predictor_16x16 aom_paeth_predictor_16x16_neon
 
 void aom_paeth_predictor_16x32_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
                                  const uint8_t* left);
-#define aom_paeth_predictor_16x32 aom_paeth_predictor_16x32_c
+void aom_paeth_predictor_16x32_neon(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+#define aom_paeth_predictor_16x32 aom_paeth_predictor_16x32_neon
 
-void aom_paeth_predictor_16x8_c(uint8_t* dst,
+void aom_paeth_predictor_16x4_c(uint8_t* dst,
                                 ptrdiff_t y_stride,
                                 const uint8_t* above,
                                 const uint8_t* left);
-#define aom_paeth_predictor_16x8 aom_paeth_predictor_16x8_c
+void aom_paeth_predictor_16x4_neon(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+#define aom_paeth_predictor_16x4 aom_paeth_predictor_16x4_neon
 
-void aom_paeth_predictor_2x2_c(uint8_t* dst,
-                               ptrdiff_t y_stride,
-                               const uint8_t* above,
-                               const uint8_t* left);
-#define aom_paeth_predictor_2x2 aom_paeth_predictor_2x2_c
+void aom_paeth_predictor_16x64_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+void aom_paeth_predictor_16x64_neon(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+#define aom_paeth_predictor_16x64 aom_paeth_predictor_16x64_neon
+
+void aom_paeth_predictor_16x8_c(uint8_t* dst,
+                                ptrdiff_t y_stride,
+                                const uint8_t* above,
+                                const uint8_t* left);
+void aom_paeth_predictor_16x8_neon(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+#define aom_paeth_predictor_16x8 aom_paeth_predictor_16x8_neon
 
 void aom_paeth_predictor_32x16_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
                                  const uint8_t* left);
-#define aom_paeth_predictor_32x16 aom_paeth_predictor_32x16_c
+void aom_paeth_predictor_32x16_neon(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+#define aom_paeth_predictor_32x16 aom_paeth_predictor_32x16_neon
 
 void aom_paeth_predictor_32x32_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
                                  const uint8_t* left);
-#define aom_paeth_predictor_32x32 aom_paeth_predictor_32x32_c
+void aom_paeth_predictor_32x32_neon(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+#define aom_paeth_predictor_32x32 aom_paeth_predictor_32x32_neon
 
 void aom_paeth_predictor_32x64_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
                                  const uint8_t* left);
-#define aom_paeth_predictor_32x64 aom_paeth_predictor_32x64_c
+void aom_paeth_predictor_32x64_neon(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+#define aom_paeth_predictor_32x64 aom_paeth_predictor_32x64_neon
+
+void aom_paeth_predictor_32x8_c(uint8_t* dst,
+                                ptrdiff_t y_stride,
+                                const uint8_t* above,
+                                const uint8_t* left);
+void aom_paeth_predictor_32x8_neon(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+#define aom_paeth_predictor_32x8 aom_paeth_predictor_32x8_neon
+
+void aom_paeth_predictor_4x16_c(uint8_t* dst,
+                                ptrdiff_t y_stride,
+                                const uint8_t* above,
+                                const uint8_t* left);
+void aom_paeth_predictor_4x16_neon(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+#define aom_paeth_predictor_4x16 aom_paeth_predictor_4x16_neon
 
 void aom_paeth_predictor_4x4_c(uint8_t* dst,
                                ptrdiff_t y_stride,
                                const uint8_t* above,
                                const uint8_t* left);
-#define aom_paeth_predictor_4x4 aom_paeth_predictor_4x4_c
+void aom_paeth_predictor_4x4_neon(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+#define aom_paeth_predictor_4x4 aom_paeth_predictor_4x4_neon
 
 void aom_paeth_predictor_4x8_c(uint8_t* dst,
                                ptrdiff_t y_stride,
                                const uint8_t* above,
                                const uint8_t* left);
-#define aom_paeth_predictor_4x8 aom_paeth_predictor_4x8_c
+void aom_paeth_predictor_4x8_neon(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+#define aom_paeth_predictor_4x8 aom_paeth_predictor_4x8_neon
+
+void aom_paeth_predictor_64x16_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+void aom_paeth_predictor_64x16_neon(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+#define aom_paeth_predictor_64x16 aom_paeth_predictor_64x16_neon
 
 void aom_paeth_predictor_64x32_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
                                  const uint8_t* left);
-#define aom_paeth_predictor_64x32 aom_paeth_predictor_64x32_c
+void aom_paeth_predictor_64x32_neon(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+#define aom_paeth_predictor_64x32 aom_paeth_predictor_64x32_neon
 
 void aom_paeth_predictor_64x64_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
                                  const uint8_t* left);
-#define aom_paeth_predictor_64x64 aom_paeth_predictor_64x64_c
+void aom_paeth_predictor_64x64_neon(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+#define aom_paeth_predictor_64x64 aom_paeth_predictor_64x64_neon
 
 void aom_paeth_predictor_8x16_c(uint8_t* dst,
                                 ptrdiff_t y_stride,
                                 const uint8_t* above,
                                 const uint8_t* left);
-#define aom_paeth_predictor_8x16 aom_paeth_predictor_8x16_c
+void aom_paeth_predictor_8x16_neon(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+#define aom_paeth_predictor_8x16 aom_paeth_predictor_8x16_neon
+
+void aom_paeth_predictor_8x32_c(uint8_t* dst,
+                                ptrdiff_t y_stride,
+                                const uint8_t* above,
+                                const uint8_t* left);
+void aom_paeth_predictor_8x32_neon(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+#define aom_paeth_predictor_8x32 aom_paeth_predictor_8x32_neon
 
 void aom_paeth_predictor_8x4_c(uint8_t* dst,
                                ptrdiff_t y_stride,
                                const uint8_t* above,
                                const uint8_t* left);
-#define aom_paeth_predictor_8x4 aom_paeth_predictor_8x4_c
+void aom_paeth_predictor_8x4_neon(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+#define aom_paeth_predictor_8x4 aom_paeth_predictor_8x4_neon
 
 void aom_paeth_predictor_8x8_c(uint8_t* dst,
                                ptrdiff_t y_stride,
                                const uint8_t* above,
                                const uint8_t* left);
-#define aom_paeth_predictor_8x8 aom_paeth_predictor_8x8_c
+void aom_paeth_predictor_8x8_neon(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+#define aom_paeth_predictor_8x8 aom_paeth_predictor_8x8_neon
 
 void aom_pixel_scale_c(const int16_t* src_diff,
                        ptrdiff_t src_stride,
@@ -2238,20 +2578,6 @@ void aom_quantize_b_32x32_neon(const tran_low_t* coeff_ptr,
                                const int16_t* iscan);
 #define aom_quantize_b_32x32 aom_quantize_b_32x32_neon
 
-void aom_quantize_b_32x32_adaptive_c(const tran_low_t* coeff_ptr,
-                                     intptr_t n_coeffs,
-                                     const int16_t* zbin_ptr,
-                                     const int16_t* round_ptr,
-                                     const int16_t* quant_ptr,
-                                     const int16_t* quant_shift_ptr,
-                                     tran_low_t* qcoeff_ptr,
-                                     tran_low_t* dqcoeff_ptr,
-                                     const int16_t* dequant_ptr,
-                                     uint16_t* eob_ptr,
-                                     const int16_t* scan,
-                                     const int16_t* iscan);
-#define aom_quantize_b_32x32_adaptive aom_quantize_b_32x32_adaptive_c
-
 void aom_quantize_b_64x64_c(const tran_low_t* coeff_ptr,
                             intptr_t n_coeffs,
                             const int16_t* zbin_ptr,
@@ -2278,34 +2604,6 @@ void aom_quantize_b_64x64_neon(const tran_low_t* coeff_ptr,
                                const int16_t* iscan);
 #define aom_quantize_b_64x64 aom_quantize_b_64x64_neon
 
-void aom_quantize_b_64x64_adaptive_c(const tran_low_t* coeff_ptr,
-                                     intptr_t n_coeffs,
-                                     const int16_t* zbin_ptr,
-                                     const int16_t* round_ptr,
-                                     const int16_t* quant_ptr,
-                                     const int16_t* quant_shift_ptr,
-                                     tran_low_t* qcoeff_ptr,
-                                     tran_low_t* dqcoeff_ptr,
-                                     const int16_t* dequant_ptr,
-                                     uint16_t* eob_ptr,
-                                     const int16_t* scan,
-                                     const int16_t* iscan);
-#define aom_quantize_b_64x64_adaptive aom_quantize_b_64x64_adaptive_c
-
-void aom_quantize_b_adaptive_c(const tran_low_t* coeff_ptr,
-                               intptr_t n_coeffs,
-                               const int16_t* zbin_ptr,
-                               const int16_t* round_ptr,
-                               const int16_t* quant_ptr,
-                               const int16_t* quant_shift_ptr,
-                               tran_low_t* qcoeff_ptr,
-                               tran_low_t* dqcoeff_ptr,
-                               const int16_t* dequant_ptr,
-                               uint16_t* eob_ptr,
-                               const int16_t* scan,
-                               const int16_t* iscan);
-#define aom_quantize_b_adaptive aom_quantize_b_adaptive_c
-
 unsigned int aom_sad128x128_c(const uint8_t* src_ptr,
                               int src_stride,
                               const uint8_t* ref_ptr,
@@ -2328,7 +2626,12 @@ void aom_sad128x128x4d_c(const uint8_t* src_ptr,
                          const uint8_t* const ref_ptr[4],
                          int ref_stride,
                          uint32_t sad_array[4]);
-#define aom_sad128x128x4d aom_sad128x128x4d_c
+void aom_sad128x128x4d_neon(const uint8_t* src_ptr,
+                            int src_stride,
+                            const uint8_t* const ref_ptr[4],
+                            int ref_stride,
+                            uint32_t sad_array[4]);
+#define aom_sad128x128x4d aom_sad128x128x4d_neon
 
 void aom_sad128x128x4d_avg_c(const uint8_t* src_ptr,
                              int src_stride,
@@ -2342,7 +2645,11 @@ unsigned int aom_sad128x64_c(const uint8_t* src_ptr,
                              int src_stride,
                              const uint8_t* ref_ptr,
                              int ref_stride);
-#define aom_sad128x64 aom_sad128x64_c
+unsigned int aom_sad128x64_neon(const uint8_t* src_ptr,
+                                int src_stride,
+                                const uint8_t* ref_ptr,
+                                int ref_stride);
+#define aom_sad128x64 aom_sad128x64_neon
 
 unsigned int aom_sad128x64_avg_c(const uint8_t* src_ptr,
                                  int src_stride,
@@ -2356,7 +2663,12 @@ void aom_sad128x64x4d_c(const uint8_t* src_ptr,
                         const uint8_t* const ref_ptr[4],
                         int ref_stride,
                         uint32_t sad_array[4]);
-#define aom_sad128x64x4d aom_sad128x64x4d_c
+void aom_sad128x64x4d_neon(const uint8_t* src_ptr,
+                           int src_stride,
+                           const uint8_t* const ref_ptr[4],
+                           int ref_stride,
+                           uint32_t sad_array[4]);
+#define aom_sad128x64x4d aom_sad128x64x4d_neon
 
 void aom_sad128x64x4d_avg_c(const uint8_t* src_ptr,
                             int src_stride,
@@ -2415,7 +2727,11 @@ unsigned int aom_sad16x32_c(const uint8_t* src_ptr,
                             int src_stride,
                             const uint8_t* ref_ptr,
                             int ref_stride);
-#define aom_sad16x32 aom_sad16x32_c
+unsigned int aom_sad16x32_neon(const uint8_t* src_ptr,
+                               int src_stride,
+                               const uint8_t* ref_ptr,
+                               int ref_stride);
+#define aom_sad16x32 aom_sad16x32_neon
 
 unsigned int aom_sad16x32_avg_c(const uint8_t* src_ptr,
                                 int src_stride,
@@ -2429,7 +2745,12 @@ void aom_sad16x32x4d_c(const uint8_t* src_ptr,
                        const uint8_t* const ref_ptr[4],
                        int ref_stride,
                        uint32_t sad_array[4]);
-#define aom_sad16x32x4d aom_sad16x32x4d_c
+void aom_sad16x32x4d_neon(const uint8_t* src_ptr,
+                          int src_stride,
+                          const uint8_t* const ref_ptr[4],
+                          int ref_stride,
+                          uint32_t sad_array[4]);
+#define aom_sad16x32x4d aom_sad16x32x4d_neon
 
 void aom_sad16x32x4d_avg_c(const uint8_t* src_ptr,
                            int src_stride,
@@ -2461,7 +2782,12 @@ void aom_sad16x8x4d_c(const uint8_t* src_ptr,
                       const uint8_t* const ref_ptr[4],
                       int ref_stride,
                       uint32_t sad_array[4]);
-#define aom_sad16x8x4d aom_sad16x8x4d_c
+void aom_sad16x8x4d_neon(const uint8_t* src_ptr,
+                         int src_stride,
+                         const uint8_t* const ref_ptr[4],
+                         int ref_stride,
+                         uint32_t sad_array[4]);
+#define aom_sad16x8x4d aom_sad16x8x4d_neon
 
 void aom_sad16x8x4d_avg_c(const uint8_t* src_ptr,
                           int src_stride,
@@ -2483,7 +2809,11 @@ unsigned int aom_sad32x16_c(const uint8_t* src_ptr,
                             int src_stride,
                             const uint8_t* ref_ptr,
                             int ref_stride);
-#define aom_sad32x16 aom_sad32x16_c
+unsigned int aom_sad32x16_neon(const uint8_t* src_ptr,
+                               int src_stride,
+                               const uint8_t* ref_ptr,
+                               int ref_stride);
+#define aom_sad32x16 aom_sad32x16_neon
 
 unsigned int aom_sad32x16_avg_c(const uint8_t* src_ptr,
                                 int src_stride,
@@ -2497,7 +2827,12 @@ void aom_sad32x16x4d_c(const uint8_t* src_ptr,
                        const uint8_t* const ref_ptr[4],
                        int ref_stride,
                        uint32_t sad_array[4]);
-#define aom_sad32x16x4d aom_sad32x16x4d_c
+void aom_sad32x16x4d_neon(const uint8_t* src_ptr,
+                          int src_stride,
+                          const uint8_t* const ref_ptr[4],
+                          int ref_stride,
+                          uint32_t sad_array[4]);
+#define aom_sad32x16x4d aom_sad32x16x4d_neon
 
 void aom_sad32x16x4d_avg_c(const uint8_t* src_ptr,
                            int src_stride,
@@ -2548,7 +2883,11 @@ unsigned int aom_sad32x64_c(const uint8_t* src_ptr,
                             int src_stride,
                             const uint8_t* ref_ptr,
                             int ref_stride);
-#define aom_sad32x64 aom_sad32x64_c
+unsigned int aom_sad32x64_neon(const uint8_t* src_ptr,
+                               int src_stride,
+                               const uint8_t* ref_ptr,
+                               int ref_stride);
+#define aom_sad32x64 aom_sad32x64_neon
 
 unsigned int aom_sad32x64_avg_c(const uint8_t* src_ptr,
                                 int src_stride,
@@ -2562,7 +2901,12 @@ void aom_sad32x64x4d_c(const uint8_t* src_ptr,
                        const uint8_t* const ref_ptr[4],
                        int ref_stride,
                        uint32_t sad_array[4]);
-#define aom_sad32x64x4d aom_sad32x64x4d_c
+void aom_sad32x64x4d_neon(const uint8_t* src_ptr,
+                          int src_stride,
+                          const uint8_t* const ref_ptr[4],
+                          int ref_stride,
+                          uint32_t sad_array[4]);
+#define aom_sad32x64x4d aom_sad32x64x4d_neon
 
 void aom_sad32x64x4d_avg_c(const uint8_t* src_ptr,
                            int src_stride,
@@ -2602,7 +2946,12 @@ void aom_sad4x4x4d_c(const uint8_t* src_ptr,
                      const uint8_t* const ref_ptr[4],
                      int ref_stride,
                      uint32_t sad_array[4]);
-#define aom_sad4x4x4d aom_sad4x4x4d_c
+void aom_sad4x4x4d_neon(const uint8_t* src_ptr,
+                        int src_stride,
+                        const uint8_t* const ref_ptr[4],
+                        int ref_stride,
+                        uint32_t sad_array[4]);
+#define aom_sad4x4x4d aom_sad4x4x4d_neon
 
 void aom_sad4x4x4d_avg_c(const uint8_t* src_ptr,
                          int src_stride,
@@ -2616,7 +2965,11 @@ unsigned int aom_sad4x8_c(const uint8_t* src_ptr,
                           int src_stride,
                           const uint8_t* ref_ptr,
                           int ref_stride);
-#define aom_sad4x8 aom_sad4x8_c
+unsigned int aom_sad4x8_neon(const uint8_t* src_ptr,
+                             int src_stride,
+                             const uint8_t* ref_ptr,
+                             int ref_stride);
+#define aom_sad4x8 aom_sad4x8_neon
 
 unsigned int aom_sad4x8_avg_c(const uint8_t* src_ptr,
                               int src_stride,
@@ -2630,7 +2983,12 @@ void aom_sad4x8x4d_c(const uint8_t* src_ptr,
                      const uint8_t* const ref_ptr[4],
                      int ref_stride,
                      uint32_t sad_array[4]);
-#define aom_sad4x8x4d aom_sad4x8x4d_c
+void aom_sad4x8x4d_neon(const uint8_t* src_ptr,
+                        int src_stride,
+                        const uint8_t* const ref_ptr[4],
+                        int ref_stride,
+                        uint32_t sad_array[4]);
+#define aom_sad4x8x4d aom_sad4x8x4d_neon
 
 void aom_sad4x8x4d_avg_c(const uint8_t* src_ptr,
                          int src_stride,
@@ -2652,7 +3010,11 @@ unsigned int aom_sad64x128_c(const uint8_t* src_ptr,
                              int src_stride,
                              const uint8_t* ref_ptr,
                              int ref_stride);
-#define aom_sad64x128 aom_sad64x128_c
+unsigned int aom_sad64x128_neon(const uint8_t* src_ptr,
+                                int src_stride,
+                                const uint8_t* ref_ptr,
+                                int ref_stride);
+#define aom_sad64x128 aom_sad64x128_neon
 
 unsigned int aom_sad64x128_avg_c(const uint8_t* src_ptr,
                                  int src_stride,
@@ -2666,7 +3028,12 @@ void aom_sad64x128x4d_c(const uint8_t* src_ptr,
                         const uint8_t* const ref_ptr[4],
                         int ref_stride,
                         uint32_t sad_array[4]);
-#define aom_sad64x128x4d aom_sad64x128x4d_c
+void aom_sad64x128x4d_neon(const uint8_t* src_ptr,
+                           int src_stride,
+                           const uint8_t* const ref_ptr[4],
+                           int ref_stride,
+                           uint32_t sad_array[4]);
+#define aom_sad64x128x4d aom_sad64x128x4d_neon
 
 void aom_sad64x128x4d_avg_c(const uint8_t* src_ptr,
                             int src_stride,
@@ -2680,7 +3047,11 @@ unsigned int aom_sad64x32_c(const uint8_t* src_ptr,
                             int src_stride,
                             const uint8_t* ref_ptr,
                             int ref_stride);
-#define aom_sad64x32 aom_sad64x32_c
+unsigned int aom_sad64x32_neon(const uint8_t* src_ptr,
+                               int src_stride,
+                               const uint8_t* ref_ptr,
+                               int ref_stride);
+#define aom_sad64x32 aom_sad64x32_neon
 
 unsigned int aom_sad64x32_avg_c(const uint8_t* src_ptr,
                                 int src_stride,
@@ -2694,7 +3065,12 @@ void aom_sad64x32x4d_c(const uint8_t* src_ptr,
                        const uint8_t* const ref_ptr[4],
                        int ref_stride,
                        uint32_t sad_array[4]);
-#define aom_sad64x32x4d aom_sad64x32x4d_c
+void aom_sad64x32x4d_neon(const uint8_t* src_ptr,
+                          int src_stride,
+                          const uint8_t* const ref_ptr[4],
+                          int ref_stride,
+                          uint32_t sad_array[4]);
+#define aom_sad64x32x4d aom_sad64x32x4d_neon
 
 void aom_sad64x32x4d_avg_c(const uint8_t* src_ptr,
                            int src_stride,
@@ -2771,7 +3147,12 @@ void aom_sad8x16x4d_c(const uint8_t* src_ptr,
                       const uint8_t* const ref_ptr[4],
                       int ref_stride,
                       uint32_t sad_array[4]);
-#define aom_sad8x16x4d aom_sad8x16x4d_c
+void aom_sad8x16x4d_neon(const uint8_t* src_ptr,
+                         int src_stride,
+                         const uint8_t* const ref_ptr[4],
+                         int ref_stride,
+                         uint32_t sad_array[4]);
+#define aom_sad8x16x4d aom_sad8x16x4d_neon
 
 void aom_sad8x16x4d_avg_c(const uint8_t* src_ptr,
                           int src_stride,
@@ -2785,7 +3166,11 @@ unsigned int aom_sad8x4_c(const uint8_t* src_ptr,
                           int src_stride,
                           const uint8_t* ref_ptr,
                           int ref_stride);
-#define aom_sad8x4 aom_sad8x4_c
+unsigned int aom_sad8x4_neon(const uint8_t* src_ptr,
+                             int src_stride,
+                             const uint8_t* ref_ptr,
+                             int ref_stride);
+#define aom_sad8x4 aom_sad8x4_neon
 
 unsigned int aom_sad8x4_avg_c(const uint8_t* src_ptr,
                               int src_stride,
@@ -2799,7 +3184,12 @@ void aom_sad8x4x4d_c(const uint8_t* src_ptr,
                      const uint8_t* const ref_ptr[4],
                      int ref_stride,
                      uint32_t sad_array[4]);
-#define aom_sad8x4x4d aom_sad8x4x4d_c
+void aom_sad8x4x4d_neon(const uint8_t* src_ptr,
+                        int src_stride,
+                        const uint8_t* const ref_ptr[4],
+                        int ref_stride,
+                        uint32_t sad_array[4]);
+#define aom_sad8x4x4d aom_sad8x4x4d_neon
 
 void aom_sad8x4x4d_avg_c(const uint8_t* src_ptr,
                          int src_stride,
@@ -2831,7 +3221,12 @@ void aom_sad8x8x4d_c(const uint8_t* src_ptr,
                      const uint8_t* const ref_ptr[4],
                      int ref_stride,
                      uint32_t sad_array[4]);
-#define aom_sad8x8x4d aom_sad8x8x4d_c
+void aom_sad8x8x4d_neon(const uint8_t* src_ptr,
+                        int src_stride,
+                        const uint8_t* const ref_ptr[4],
+                        int ref_stride,
+                        uint32_t sad_array[4]);
+#define aom_sad8x8x4d aom_sad8x8x4d_neon
 
 void aom_sad8x8x4d_avg_c(const uint8_t* src_ptr,
                          int src_stride,
@@ -3219,85 +3614,191 @@ void aom_smooth_h_predictor_16x16_c(uint8_t* dst,
                                     ptrdiff_t y_stride,
                                     const uint8_t* above,
                                     const uint8_t* left);
-#define aom_smooth_h_predictor_16x16 aom_smooth_h_predictor_16x16_c
+void aom_smooth_h_predictor_16x16_neon(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+#define aom_smooth_h_predictor_16x16 aom_smooth_h_predictor_16x16_neon
 
 void aom_smooth_h_predictor_16x32_c(uint8_t* dst,
                                     ptrdiff_t y_stride,
                                     const uint8_t* above,
                                     const uint8_t* left);
-#define aom_smooth_h_predictor_16x32 aom_smooth_h_predictor_16x32_c
+void aom_smooth_h_predictor_16x32_neon(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+#define aom_smooth_h_predictor_16x32 aom_smooth_h_predictor_16x32_neon
 
-void aom_smooth_h_predictor_16x8_c(uint8_t* dst,
+void aom_smooth_h_predictor_16x4_c(uint8_t* dst,
                                    ptrdiff_t y_stride,
                                    const uint8_t* above,
                                    const uint8_t* left);
-#define aom_smooth_h_predictor_16x8 aom_smooth_h_predictor_16x8_c
+void aom_smooth_h_predictor_16x4_neon(uint8_t* dst,
+                                      ptrdiff_t y_stride,
+                                      const uint8_t* above,
+                                      const uint8_t* left);
+#define aom_smooth_h_predictor_16x4 aom_smooth_h_predictor_16x4_neon
 
-void aom_smooth_h_predictor_2x2_c(uint8_t* dst,
-                                  ptrdiff_t y_stride,
-                                  const uint8_t* above,
-                                  const uint8_t* left);
-#define aom_smooth_h_predictor_2x2 aom_smooth_h_predictor_2x2_c
+void aom_smooth_h_predictor_16x64_c(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+void aom_smooth_h_predictor_16x64_neon(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+#define aom_smooth_h_predictor_16x64 aom_smooth_h_predictor_16x64_neon
+
+void aom_smooth_h_predictor_16x8_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+void aom_smooth_h_predictor_16x8_neon(uint8_t* dst,
+                                      ptrdiff_t y_stride,
+                                      const uint8_t* above,
+                                      const uint8_t* left);
+#define aom_smooth_h_predictor_16x8 aom_smooth_h_predictor_16x8_neon
 
 void aom_smooth_h_predictor_32x16_c(uint8_t* dst,
                                     ptrdiff_t y_stride,
                                     const uint8_t* above,
                                     const uint8_t* left);
-#define aom_smooth_h_predictor_32x16 aom_smooth_h_predictor_32x16_c
+void aom_smooth_h_predictor_32x16_neon(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+#define aom_smooth_h_predictor_32x16 aom_smooth_h_predictor_32x16_neon
 
 void aom_smooth_h_predictor_32x32_c(uint8_t* dst,
                                     ptrdiff_t y_stride,
                                     const uint8_t* above,
                                     const uint8_t* left);
-#define aom_smooth_h_predictor_32x32 aom_smooth_h_predictor_32x32_c
+void aom_smooth_h_predictor_32x32_neon(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+#define aom_smooth_h_predictor_32x32 aom_smooth_h_predictor_32x32_neon
 
 void aom_smooth_h_predictor_32x64_c(uint8_t* dst,
                                     ptrdiff_t y_stride,
                                     const uint8_t* above,
                                     const uint8_t* left);
-#define aom_smooth_h_predictor_32x64 aom_smooth_h_predictor_32x64_c
+void aom_smooth_h_predictor_32x64_neon(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+#define aom_smooth_h_predictor_32x64 aom_smooth_h_predictor_32x64_neon
+
+void aom_smooth_h_predictor_32x8_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+void aom_smooth_h_predictor_32x8_neon(uint8_t* dst,
+                                      ptrdiff_t y_stride,
+                                      const uint8_t* above,
+                                      const uint8_t* left);
+#define aom_smooth_h_predictor_32x8 aom_smooth_h_predictor_32x8_neon
+
+void aom_smooth_h_predictor_4x16_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+void aom_smooth_h_predictor_4x16_neon(uint8_t* dst,
+                                      ptrdiff_t y_stride,
+                                      const uint8_t* above,
+                                      const uint8_t* left);
+#define aom_smooth_h_predictor_4x16 aom_smooth_h_predictor_4x16_neon
 
 void aom_smooth_h_predictor_4x4_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
                                   const uint8_t* left);
-#define aom_smooth_h_predictor_4x4 aom_smooth_h_predictor_4x4_c
+void aom_smooth_h_predictor_4x4_neon(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+#define aom_smooth_h_predictor_4x4 aom_smooth_h_predictor_4x4_neon
 
 void aom_smooth_h_predictor_4x8_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
                                   const uint8_t* left);
-#define aom_smooth_h_predictor_4x8 aom_smooth_h_predictor_4x8_c
+void aom_smooth_h_predictor_4x8_neon(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+#define aom_smooth_h_predictor_4x8 aom_smooth_h_predictor_4x8_neon
+
+void aom_smooth_h_predictor_64x16_c(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+void aom_smooth_h_predictor_64x16_neon(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+#define aom_smooth_h_predictor_64x16 aom_smooth_h_predictor_64x16_neon
 
 void aom_smooth_h_predictor_64x32_c(uint8_t* dst,
                                     ptrdiff_t y_stride,
                                     const uint8_t* above,
                                     const uint8_t* left);
-#define aom_smooth_h_predictor_64x32 aom_smooth_h_predictor_64x32_c
+void aom_smooth_h_predictor_64x32_neon(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+#define aom_smooth_h_predictor_64x32 aom_smooth_h_predictor_64x32_neon
 
 void aom_smooth_h_predictor_64x64_c(uint8_t* dst,
                                     ptrdiff_t y_stride,
                                     const uint8_t* above,
                                     const uint8_t* left);
-#define aom_smooth_h_predictor_64x64 aom_smooth_h_predictor_64x64_c
+void aom_smooth_h_predictor_64x64_neon(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+#define aom_smooth_h_predictor_64x64 aom_smooth_h_predictor_64x64_neon
 
 void aom_smooth_h_predictor_8x16_c(uint8_t* dst,
                                    ptrdiff_t y_stride,
                                    const uint8_t* above,
                                    const uint8_t* left);
-#define aom_smooth_h_predictor_8x16 aom_smooth_h_predictor_8x16_c
+void aom_smooth_h_predictor_8x16_neon(uint8_t* dst,
+                                      ptrdiff_t y_stride,
+                                      const uint8_t* above,
+                                      const uint8_t* left);
+#define aom_smooth_h_predictor_8x16 aom_smooth_h_predictor_8x16_neon
+
+void aom_smooth_h_predictor_8x32_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+void aom_smooth_h_predictor_8x32_neon(uint8_t* dst,
+                                      ptrdiff_t y_stride,
+                                      const uint8_t* above,
+                                      const uint8_t* left);
+#define aom_smooth_h_predictor_8x32 aom_smooth_h_predictor_8x32_neon
 
 void aom_smooth_h_predictor_8x4_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
                                   const uint8_t* left);
-#define aom_smooth_h_predictor_8x4 aom_smooth_h_predictor_8x4_c
+void aom_smooth_h_predictor_8x4_neon(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+#define aom_smooth_h_predictor_8x4 aom_smooth_h_predictor_8x4_neon
 
 void aom_smooth_h_predictor_8x8_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
                                   const uint8_t* left);
-#define aom_smooth_h_predictor_8x8 aom_smooth_h_predictor_8x8_c
+void aom_smooth_h_predictor_8x8_neon(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+#define aom_smooth_h_predictor_8x8 aom_smooth_h_predictor_8x8_neon
 
 void aom_smooth_predictor_16x16_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
@@ -3319,6 +3820,26 @@ void aom_smooth_predictor_16x32_neon(uint8_t* dst,
                                      const uint8_t* left);
 #define aom_smooth_predictor_16x32 aom_smooth_predictor_16x32_neon
 
+void aom_smooth_predictor_16x4_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+void aom_smooth_predictor_16x4_neon(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+#define aom_smooth_predictor_16x4 aom_smooth_predictor_16x4_neon
+
+void aom_smooth_predictor_16x64_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+void aom_smooth_predictor_16x64_neon(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+#define aom_smooth_predictor_16x64 aom_smooth_predictor_16x64_neon
+
 void aom_smooth_predictor_16x8_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
@@ -3329,12 +3850,6 @@ void aom_smooth_predictor_16x8_neon(uint8_t* dst,
                                     const uint8_t* left);
 #define aom_smooth_predictor_16x8 aom_smooth_predictor_16x8_neon
 
-void aom_smooth_predictor_2x2_c(uint8_t* dst,
-                                ptrdiff_t y_stride,
-                                const uint8_t* above,
-                                const uint8_t* left);
-#define aom_smooth_predictor_2x2 aom_smooth_predictor_2x2_c
-
 void aom_smooth_predictor_32x16_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
@@ -3365,6 +3880,26 @@ void aom_smooth_predictor_32x64_neon(uint8_t* dst,
                                      const uint8_t* left);
 #define aom_smooth_predictor_32x64 aom_smooth_predictor_32x64_neon
 
+void aom_smooth_predictor_32x8_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+void aom_smooth_predictor_32x8_neon(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+#define aom_smooth_predictor_32x8 aom_smooth_predictor_32x8_neon
+
+void aom_smooth_predictor_4x16_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+void aom_smooth_predictor_4x16_neon(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+#define aom_smooth_predictor_4x16 aom_smooth_predictor_4x16_neon
+
 void aom_smooth_predictor_4x4_c(uint8_t* dst,
                                 ptrdiff_t y_stride,
                                 const uint8_t* above,
@@ -3385,6 +3920,16 @@ void aom_smooth_predictor_4x8_neon(uint8_t* dst,
                                    const uint8_t* left);
 #define aom_smooth_predictor_4x8 aom_smooth_predictor_4x8_neon
 
+void aom_smooth_predictor_64x16_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+void aom_smooth_predictor_64x16_neon(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+#define aom_smooth_predictor_64x16 aom_smooth_predictor_64x16_neon
+
 void aom_smooth_predictor_64x32_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
@@ -3415,6 +3960,16 @@ void aom_smooth_predictor_8x16_neon(uint8_t* dst,
                                     const uint8_t* left);
 #define aom_smooth_predictor_8x16 aom_smooth_predictor_8x16_neon
 
+void aom_smooth_predictor_8x32_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+void aom_smooth_predictor_8x32_neon(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+#define aom_smooth_predictor_8x32 aom_smooth_predictor_8x32_neon
+
 void aom_smooth_predictor_8x4_c(uint8_t* dst,
                                 ptrdiff_t y_stride,
                                 const uint8_t* above,
@@ -3439,85 +3994,191 @@ void aom_smooth_v_predictor_16x16_c(uint8_t* dst,
                                     ptrdiff_t y_stride,
                                     const uint8_t* above,
                                     const uint8_t* left);
-#define aom_smooth_v_predictor_16x16 aom_smooth_v_predictor_16x16_c
+void aom_smooth_v_predictor_16x16_neon(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+#define aom_smooth_v_predictor_16x16 aom_smooth_v_predictor_16x16_neon
 
 void aom_smooth_v_predictor_16x32_c(uint8_t* dst,
                                     ptrdiff_t y_stride,
                                     const uint8_t* above,
                                     const uint8_t* left);
-#define aom_smooth_v_predictor_16x32 aom_smooth_v_predictor_16x32_c
+void aom_smooth_v_predictor_16x32_neon(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+#define aom_smooth_v_predictor_16x32 aom_smooth_v_predictor_16x32_neon
 
-void aom_smooth_v_predictor_16x8_c(uint8_t* dst,
+void aom_smooth_v_predictor_16x4_c(uint8_t* dst,
                                    ptrdiff_t y_stride,
                                    const uint8_t* above,
                                    const uint8_t* left);
-#define aom_smooth_v_predictor_16x8 aom_smooth_v_predictor_16x8_c
+void aom_smooth_v_predictor_16x4_neon(uint8_t* dst,
+                                      ptrdiff_t y_stride,
+                                      const uint8_t* above,
+                                      const uint8_t* left);
+#define aom_smooth_v_predictor_16x4 aom_smooth_v_predictor_16x4_neon
 
-void aom_smooth_v_predictor_2x2_c(uint8_t* dst,
-                                  ptrdiff_t y_stride,
-                                  const uint8_t* above,
-                                  const uint8_t* left);
-#define aom_smooth_v_predictor_2x2 aom_smooth_v_predictor_2x2_c
+void aom_smooth_v_predictor_16x64_c(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+void aom_smooth_v_predictor_16x64_neon(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+#define aom_smooth_v_predictor_16x64 aom_smooth_v_predictor_16x64_neon
+
+void aom_smooth_v_predictor_16x8_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+void aom_smooth_v_predictor_16x8_neon(uint8_t* dst,
+                                      ptrdiff_t y_stride,
+                                      const uint8_t* above,
+                                      const uint8_t* left);
+#define aom_smooth_v_predictor_16x8 aom_smooth_v_predictor_16x8_neon
 
 void aom_smooth_v_predictor_32x16_c(uint8_t* dst,
                                     ptrdiff_t y_stride,
                                     const uint8_t* above,
                                     const uint8_t* left);
-#define aom_smooth_v_predictor_32x16 aom_smooth_v_predictor_32x16_c
+void aom_smooth_v_predictor_32x16_neon(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+#define aom_smooth_v_predictor_32x16 aom_smooth_v_predictor_32x16_neon
 
 void aom_smooth_v_predictor_32x32_c(uint8_t* dst,
                                     ptrdiff_t y_stride,
                                     const uint8_t* above,
                                     const uint8_t* left);
-#define aom_smooth_v_predictor_32x32 aom_smooth_v_predictor_32x32_c
+void aom_smooth_v_predictor_32x32_neon(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+#define aom_smooth_v_predictor_32x32 aom_smooth_v_predictor_32x32_neon
 
 void aom_smooth_v_predictor_32x64_c(uint8_t* dst,
                                     ptrdiff_t y_stride,
                                     const uint8_t* above,
                                     const uint8_t* left);
-#define aom_smooth_v_predictor_32x64 aom_smooth_v_predictor_32x64_c
+void aom_smooth_v_predictor_32x64_neon(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+#define aom_smooth_v_predictor_32x64 aom_smooth_v_predictor_32x64_neon
+
+void aom_smooth_v_predictor_32x8_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+void aom_smooth_v_predictor_32x8_neon(uint8_t* dst,
+                                      ptrdiff_t y_stride,
+                                      const uint8_t* above,
+                                      const uint8_t* left);
+#define aom_smooth_v_predictor_32x8 aom_smooth_v_predictor_32x8_neon
+
+void aom_smooth_v_predictor_4x16_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+void aom_smooth_v_predictor_4x16_neon(uint8_t* dst,
+                                      ptrdiff_t y_stride,
+                                      const uint8_t* above,
+                                      const uint8_t* left);
+#define aom_smooth_v_predictor_4x16 aom_smooth_v_predictor_4x16_neon
 
 void aom_smooth_v_predictor_4x4_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
                                   const uint8_t* left);
-#define aom_smooth_v_predictor_4x4 aom_smooth_v_predictor_4x4_c
+void aom_smooth_v_predictor_4x4_neon(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+#define aom_smooth_v_predictor_4x4 aom_smooth_v_predictor_4x4_neon
 
 void aom_smooth_v_predictor_4x8_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
                                   const uint8_t* left);
-#define aom_smooth_v_predictor_4x8 aom_smooth_v_predictor_4x8_c
+void aom_smooth_v_predictor_4x8_neon(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+#define aom_smooth_v_predictor_4x8 aom_smooth_v_predictor_4x8_neon
+
+void aom_smooth_v_predictor_64x16_c(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+void aom_smooth_v_predictor_64x16_neon(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+#define aom_smooth_v_predictor_64x16 aom_smooth_v_predictor_64x16_neon
 
 void aom_smooth_v_predictor_64x32_c(uint8_t* dst,
                                     ptrdiff_t y_stride,
                                     const uint8_t* above,
                                     const uint8_t* left);
-#define aom_smooth_v_predictor_64x32 aom_smooth_v_predictor_64x32_c
+void aom_smooth_v_predictor_64x32_neon(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+#define aom_smooth_v_predictor_64x32 aom_smooth_v_predictor_64x32_neon
 
 void aom_smooth_v_predictor_64x64_c(uint8_t* dst,
                                     ptrdiff_t y_stride,
                                     const uint8_t* above,
                                     const uint8_t* left);
-#define aom_smooth_v_predictor_64x64 aom_smooth_v_predictor_64x64_c
+void aom_smooth_v_predictor_64x64_neon(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+#define aom_smooth_v_predictor_64x64 aom_smooth_v_predictor_64x64_neon
 
 void aom_smooth_v_predictor_8x16_c(uint8_t* dst,
                                    ptrdiff_t y_stride,
                                    const uint8_t* above,
                                    const uint8_t* left);
-#define aom_smooth_v_predictor_8x16 aom_smooth_v_predictor_8x16_c
+void aom_smooth_v_predictor_8x16_neon(uint8_t* dst,
+                                      ptrdiff_t y_stride,
+                                      const uint8_t* above,
+                                      const uint8_t* left);
+#define aom_smooth_v_predictor_8x16 aom_smooth_v_predictor_8x16_neon
+
+void aom_smooth_v_predictor_8x32_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+void aom_smooth_v_predictor_8x32_neon(uint8_t* dst,
+                                      ptrdiff_t y_stride,
+                                      const uint8_t* above,
+                                      const uint8_t* left);
+#define aom_smooth_v_predictor_8x32 aom_smooth_v_predictor_8x32_neon
 
 void aom_smooth_v_predictor_8x4_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
                                   const uint8_t* left);
-#define aom_smooth_v_predictor_8x4 aom_smooth_v_predictor_8x4_c
+void aom_smooth_v_predictor_8x4_neon(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+#define aom_smooth_v_predictor_8x4 aom_smooth_v_predictor_8x4_neon
 
 void aom_smooth_v_predictor_8x8_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
                                   const uint8_t* left);
-#define aom_smooth_v_predictor_8x8 aom_smooth_v_predictor_8x8_c
+void aom_smooth_v_predictor_8x8_neon(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+#define aom_smooth_v_predictor_8x8 aom_smooth_v_predictor_8x8_neon
 
 int64_t aom_sse_c(const uint8_t* a,
                   int a_stride,
@@ -3996,7 +4657,12 @@ uint64_t aom_sum_sse_2d_i16_c(const int16_t* src,
                               int width,
                               int height,
                               int* sum);
-#define aom_sum_sse_2d_i16 aom_sum_sse_2d_i16_c
+uint64_t aom_sum_sse_2d_i16_neon(const int16_t* src,
+                                 int src_stride,
+                                 int width,
+                                 int height,
+                                 int* sum);
+#define aom_sum_sse_2d_i16 aom_sum_sse_2d_i16_neon
 
 void aom_v_predictor_16x16_c(uint8_t* dst,
                              ptrdiff_t y_stride,
@@ -4014,18 +4680,24 @@ void aom_v_predictor_16x32_c(uint8_t* dst,
                              const uint8_t* left);
 #define aom_v_predictor_16x32 aom_v_predictor_16x32_c
 
+void aom_v_predictor_16x4_c(uint8_t* dst,
+                            ptrdiff_t y_stride,
+                            const uint8_t* above,
+                            const uint8_t* left);
+#define aom_v_predictor_16x4 aom_v_predictor_16x4_c
+
+void aom_v_predictor_16x64_c(uint8_t* dst,
+                             ptrdiff_t y_stride,
+                             const uint8_t* above,
+                             const uint8_t* left);
+#define aom_v_predictor_16x64 aom_v_predictor_16x64_c
+
 void aom_v_predictor_16x8_c(uint8_t* dst,
                             ptrdiff_t y_stride,
                             const uint8_t* above,
                             const uint8_t* left);
 #define aom_v_predictor_16x8 aom_v_predictor_16x8_c
 
-void aom_v_predictor_2x2_c(uint8_t* dst,
-                           ptrdiff_t y_stride,
-                           const uint8_t* above,
-                           const uint8_t* left);
-#define aom_v_predictor_2x2 aom_v_predictor_2x2_c
-
 void aom_v_predictor_32x16_c(uint8_t* dst,
                              ptrdiff_t y_stride,
                              const uint8_t* above,
@@ -4048,6 +4720,18 @@ void aom_v_predictor_32x64_c(uint8_t* dst,
                              const uint8_t* left);
 #define aom_v_predictor_32x64 aom_v_predictor_32x64_c
 
+void aom_v_predictor_32x8_c(uint8_t* dst,
+                            ptrdiff_t y_stride,
+                            const uint8_t* above,
+                            const uint8_t* left);
+#define aom_v_predictor_32x8 aom_v_predictor_32x8_c
+
+void aom_v_predictor_4x16_c(uint8_t* dst,
+                            ptrdiff_t y_stride,
+                            const uint8_t* above,
+                            const uint8_t* left);
+#define aom_v_predictor_4x16 aom_v_predictor_4x16_c
+
 void aom_v_predictor_4x4_c(uint8_t* dst,
                            ptrdiff_t y_stride,
                            const uint8_t* above,
@@ -4064,6 +4748,12 @@ void aom_v_predictor_4x8_c(uint8_t* dst,
                            const uint8_t* left);
 #define aom_v_predictor_4x8 aom_v_predictor_4x8_c
 
+void aom_v_predictor_64x16_c(uint8_t* dst,
+                             ptrdiff_t y_stride,
+                             const uint8_t* above,
+                             const uint8_t* left);
+#define aom_v_predictor_64x16 aom_v_predictor_64x16_c
+
 void aom_v_predictor_64x32_c(uint8_t* dst,
                              ptrdiff_t y_stride,
                              const uint8_t* above,
@@ -4082,6 +4772,12 @@ void aom_v_predictor_8x16_c(uint8_t* dst,
                             const uint8_t* left);
 #define aom_v_predictor_8x16 aom_v_predictor_8x16_c
 
+void aom_v_predictor_8x32_c(uint8_t* dst,
+                            ptrdiff_t y_stride,
+                            const uint8_t* above,
+                            const uint8_t* left);
+#define aom_v_predictor_8x32 aom_v_predictor_8x32_c
+
 void aom_v_predictor_8x4_c(uint8_t* dst,
                            ptrdiff_t y_stride,
                            const uint8_t* above,
@@ -4317,8 +5013,8 @@ unsigned int aom_variance8x8_neon(const uint8_t* src_ptr,
                                   unsigned int* sse);
 #define aom_variance8x8 aom_variance8x8_neon
 
-int aom_vector_var_c(const int16_t* ref, const int16_t* src, const int bwl);
-int aom_vector_var_neon(const int16_t* ref, const int16_t* src, const int bwl);
+int aom_vector_var_c(const int16_t* ref, const int16_t* src, int bwl);
+int aom_vector_var_neon(const int16_t* ref, const int16_t* src, int bwl);
 #define aom_vector_var aom_vector_var_neon
 
 void aom_dsp_rtcd(void);
diff --git a/chromium/third_party/libaom/source/config/linux/arm-neon/config/aom_scale_rtcd.h b/chromium/third_party/libaom/source/config/linux/arm-neon/config/aom_scale_rtcd.h
index e27b558dd38..7db7b9d0f84 100644
--- a/chromium/third_party/libaom/source/config/linux/arm-neon/config/aom_scale_rtcd.h
+++ b/chromium/third_party/libaom/source/config/linux/arm-neon/config/aom_scale_rtcd.h
@@ -18,6 +18,12 @@ void aom_extend_frame_borders_c(struct yv12_buffer_config* ybf,
                                 const int num_planes);
 #define aom_extend_frame_borders aom_extend_frame_borders_c
 
+void aom_extend_frame_borders_plane_row_c(const struct yv12_buffer_config* ybf,
+                                          int plane,
+                                          int v_start,
+                                          int v_end);
+#define aom_extend_frame_borders_plane_row aom_extend_frame_borders_plane_row_c
+
 void aom_extend_frame_borders_y_c(struct yv12_buffer_config* ybf);
 #define aom_extend_frame_borders_y aom_extend_frame_borders_y_c
 
diff --git a/chromium/third_party/libaom/source/config/linux/arm-neon/config/av1_rtcd.h b/chromium/third_party/libaom/source/config/linux/arm-neon/config/av1_rtcd.h
index 23e831cc591..fd9ac835ad7 100644
--- a/chromium/third_party/libaom/source/config/linux/arm-neon/config/av1_rtcd.h
+++ b/chromium/third_party/libaom/source/config/linux/arm-neon/config/av1_rtcd.h
@@ -183,6 +183,30 @@ void aom_upsampled_pred_c(MACROBLOCKD* xd,
                           int subpel_search);
 #define aom_upsampled_pred aom_upsampled_pred_c
 
+void av1_apply_selfguided_restoration_c(const uint8_t* dat,
+                                        int width,
+                                        int height,
+                                        int stride,
+                                        int eps,
+                                        const int* xqd,
+                                        uint8_t* dst,
+                                        int dst_stride,
+                                        int32_t* tmpbuf,
+                                        int bit_depth,
+                                        int highbd);
+void av1_apply_selfguided_restoration_neon(const uint8_t* dat,
+                                           int width,
+                                           int height,
+                                           int stride,
+                                           int eps,
+                                           const int* xqd,
+                                           uint8_t* dst,
+                                           int dst_stride,
+                                           int32_t* tmpbuf,
+                                           int bit_depth,
+                                           int highbd);
+#define av1_apply_selfguided_restoration av1_apply_selfguided_restoration_neon
+
 int64_t av1_block_error_c(const tran_low_t* coeff,
                           const tran_low_t* dqcoeff,
                           intptr_t block_size,
@@ -234,6 +258,14 @@ void av1_build_compound_diffwtd_mask_d16_neon(uint8_t* mask,
 #define av1_build_compound_diffwtd_mask_d16 \
   av1_build_compound_diffwtd_mask_d16_neon
 
+int64_t av1_calc_frame_error_c(const uint8_t* const ref,
+                               int stride,
+                               const uint8_t* const dst,
+                               int p_width,
+                               int p_height,
+                               int p_stride);
+#define av1_calc_frame_error av1_calc_frame_error_c
+
 void av1_calc_indices_dim1_c(const int* data,
                              const int* centroids,
                              uint8_t* indices,
@@ -1314,6 +1346,28 @@ void av1_round_shift_array_c(int32_t* arr, int size, int bit);
 void av1_round_shift_array_neon(int32_t* arr, int size, int bit);
 #define av1_round_shift_array av1_round_shift_array_neon
 
+int av1_selfguided_restoration_c(const uint8_t* dgd8,
+                                 int width,
+                                 int height,
+                                 int dgd_stride,
+                                 int32_t* flt0,
+                                 int32_t* flt1,
+                                 int flt_stride,
+                                 int sgr_params_idx,
+                                 int bit_depth,
+                                 int highbd);
+int av1_selfguided_restoration_neon(const uint8_t* dgd8,
+                                    int width,
+                                    int height,
+                                    int dgd_stride,
+                                    int32_t* flt0,
+                                    int32_t* flt1,
+                                    int flt_stride,
+                                    int sgr_params_idx,
+                                    int bit_depth,
+                                    int highbd);
+#define av1_selfguided_restoration av1_selfguided_restoration_neon
+
 void av1_txb_init_levels_c(const tran_low_t* const coeff,
                            const int width,
                            const int height,
@@ -1330,6 +1384,44 @@ void av1_upsample_intra_edge_c(uint8_t* p, int sz);
 void av1_upsample_intra_edge_high_c(uint16_t* p, int sz, int bd);
 #define av1_upsample_intra_edge_high av1_upsample_intra_edge_high_c
 
+void av1_warp_affine_c(const int32_t* mat,
+                       const uint8_t* ref,
+                       int width,
+                       int height,
+                       int stride,
+                       uint8_t* pred,
+                       int p_col,
+                       int p_row,
+                       int p_width,
+                       int p_height,
+                       int p_stride,
+                       int subsampling_x,
+                       int subsampling_y,
+                       ConvolveParams* conv_params,
+                       int16_t alpha,
+                       int16_t beta,
+                       int16_t gamma,
+                       int16_t delta);
+void av1_warp_affine_neon(const int32_t* mat,
+                          const uint8_t* ref,
+                          int width,
+                          int height,
+                          int stride,
+                          uint8_t* pred,
+                          int p_col,
+                          int p_row,
+                          int p_width,
+                          int p_height,
+                          int p_stride,
+                          int subsampling_x,
+                          int subsampling_y,
+                          ConvolveParams* conv_params,
+                          int16_t alpha,
+                          int16_t beta,
+                          int16_t gamma,
+                          int16_t delta);
+#define av1_warp_affine av1_warp_affine_neon
+
 void av1_wedge_compute_delta_squares_c(int16_t* d,
                                        const int16_t* a,
                                        const int16_t* b,
@@ -1346,7 +1438,11 @@ uint64_t av1_wedge_sse_from_residuals_c(const int16_t* r1,
                                         const int16_t* d,
                                         const uint8_t* m,
                                         int N);
-#define av1_wedge_sse_from_residuals av1_wedge_sse_from_residuals_c
+uint64_t av1_wedge_sse_from_residuals_neon(const int16_t* r1,
+                                           const int16_t* d,
+                                           const uint8_t* m,
+                                           int N);
+#define av1_wedge_sse_from_residuals av1_wedge_sse_from_residuals_neon
 
 void av1_wiener_convolve_add_src_c(const uint8_t* src,
                                    ptrdiff_t src_stride,
@@ -1610,7 +1706,15 @@ void cdef_find_dir_dual_c(const uint16_t* img1,
                           int coeff_shift,
                           int* out1,
                           int* out2);
-#define cdef_find_dir_dual cdef_find_dir_dual_c
+void cdef_find_dir_dual_neon(const uint16_t* img1,
+                             const uint16_t* img2,
+                             int stride,
+                             int32_t* var1,
+                             int32_t* var2,
+                             int coeff_shift,
+                             int* out1,
+                             int* out2);
+#define cdef_find_dir_dual cdef_find_dir_dual_neon
 
 cfl_subsample_lbd_fn cfl_get_luma_subsampling_420_lbd_c(TX_SIZE tx_size);
 cfl_subsample_lbd_fn cfl_get_luma_subsampling_420_lbd_neon(TX_SIZE tx_size);
diff --git a/chromium/third_party/libaom/source/config/linux/arm/config/aom_config.asm b/chromium/third_party/libaom/source/config/linux/arm/config/aom_config.asm
index 4e05e95c16b..248f8f4e0a9 100644
--- a/chromium/third_party/libaom/source/config/linux/arm/config/aom_config.asm
+++ b/chromium/third_party/libaom/source/config/linux/arm/config/aom_config.asm
@@ -9,7 +9,6 @@
 ; PATENTS file, you can obtain it at www.aomedia.org/license/patent.
 ;
 ARCH_ARM equ 1
-ARCH_MIPS equ 0
 ARCH_PPC equ 0
 ARCH_X86 equ 0
 ARCH_X86_64 equ 0
@@ -33,8 +32,6 @@ CONFIG_DISABLE_FULL_PIXEL_SPLIT_8X8 equ 1
 CONFIG_ENTROPY_STATS equ 0
 CONFIG_EXCLUDE_SIMD_MISMATCH equ 0
 CONFIG_FPMT_TEST equ 0
-CONFIG_FRAME_PARALLEL_ENCODE equ 0
-CONFIG_FRAME_PARALLEL_ENCODE_2 equ 0
 CONFIG_GCC equ 1
 CONFIG_GCOV equ 0
 CONFIG_GPROF equ 0
@@ -50,7 +47,7 @@ CONFIG_NORMAL_TILE_MODE equ 1
 CONFIG_OPTICAL_FLOW_API equ 0
 CONFIG_OS_SUPPORT equ 1
 CONFIG_PARTITION_SEARCH_ORDER equ 0
-CONFIG_PIC equ 0
+CONFIG_PIC equ 1
 CONFIG_RATECTRL_LOG equ 0
 CONFIG_RD_COMMAND equ 0
 CONFIG_RD_DEBUG equ 0
@@ -69,14 +66,11 @@ CONFIG_WEBM_IO equ 1
 DECODE_HEIGHT_LIMIT equ 16384
 DECODE_WIDTH_LIMIT equ 16384
 FORCE_HIGHBITDEPTH_DECODING equ 0
+HAVE_ARM_CRC32 equ 0
 HAVE_AVX equ 0
 HAVE_AVX2 equ 0
-HAVE_DSPR2 equ 0
 HAVE_FEXCEPT equ 1
-HAVE_MIPS32 equ 0
-HAVE_MIPS64 equ 0
 HAVE_MMX equ 0
-HAVE_MSA equ 0
 HAVE_NEON equ 0
 HAVE_PTHREAD_H equ 1
 HAVE_SSE equ 0
diff --git a/chromium/third_party/libaom/source/config/linux/arm/config/aom_config.c b/chromium/third_party/libaom/source/config/linux/arm/config/aom_config.c
index c6697d9573f..becd745fe8b 100644
--- a/chromium/third_party/libaom/source/config/linux/arm/config/aom_config.c
+++ b/chromium/third_party/libaom/source/config/linux/arm/config/aom_config.c
@@ -9,5 +9,5 @@
  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
  */
 #include "aom/aom_codec.h"
-static const char* const cfg = "cmake ../source/libaom -G \"Unix Makefiles\" -DCMAKE_TOOLCHAIN_FILE=\"../source/libaom/build/cmake/toolchains/armv7-linux-gcc.cmake\" -DCONFIG_AV1_ENCODER=1 -DCONFIG_LIBYUV=0 -DCONFIG_REALTIME_ONLY=1 -DCONFIG_AV1_HIGHBITDEPTH=0 -DCONFIG_AV1_TEMPORAL_DENOISING=1 -DCONFIG_MAX_DECODE_PROFILE=0 -DCONFIG_NORMAL_TILE_MODE=1 -DCONFIG_SIZE_LIMIT=1 -DDECODE_HEIGHT_LIMIT=16384 -DDECODE_WIDTH_LIMIT=16384 -DENABLE_NEON=0";
+static const char* const cfg = "cmake ../source/libaom -G \"Unix Makefiles\" -DCMAKE_TOOLCHAIN_FILE=\"../source/libaom/build/cmake/toolchains/armv7-linux-gcc.cmake\" -DCONFIG_AV1_ENCODER=1 -DCONFIG_LIBYUV=0 -DCONFIG_AV1_HIGHBITDEPTH=0 -DCONFIG_AV1_TEMPORAL_DENOISING=1 -DCONFIG_REALTIME_ONLY=1 -DCONFIG_MAX_DECODE_PROFILE=0 -DCONFIG_NORMAL_TILE_MODE=1 -DCONFIG_SIZE_LIMIT=1 -DDECODE_HEIGHT_LIMIT=16384 -DDECODE_WIDTH_LIMIT=16384 -DENABLE_NEON=0";
 const char *aom_codec_build_config(void) {return cfg;}
diff --git a/chromium/third_party/libaom/source/config/linux/arm/config/aom_config.h b/chromium/third_party/libaom/source/config/linux/arm/config/aom_config.h
index 9783b9c7618..84ad7fe1f88 100644
--- a/chromium/third_party/libaom/source/config/linux/arm/config/aom_config.h
+++ b/chromium/third_party/libaom/source/config/linux/arm/config/aom_config.h
@@ -11,7 +11,6 @@
 #ifndef AOM_CONFIG_H_
 #define AOM_CONFIG_H_
 #define ARCH_ARM 1
-#define ARCH_MIPS 0
 #define ARCH_PPC 0
 #define ARCH_X86 0
 #define ARCH_X86_64 0
@@ -35,8 +34,6 @@
 #define CONFIG_ENTROPY_STATS 0
 #define CONFIG_EXCLUDE_SIMD_MISMATCH 0
 #define CONFIG_FPMT_TEST 0
-#define CONFIG_FRAME_PARALLEL_ENCODE 0
-#define CONFIG_FRAME_PARALLEL_ENCODE_2 0
 #define CONFIG_GCC 1
 #define CONFIG_GCOV 0
 #define CONFIG_GPROF 0
@@ -52,7 +49,7 @@
 #define CONFIG_OPTICAL_FLOW_API 0
 #define CONFIG_OS_SUPPORT 1
 #define CONFIG_PARTITION_SEARCH_ORDER 0
-#define CONFIG_PIC 0
+#define CONFIG_PIC 1
 #define CONFIG_RATECTRL_LOG 0
 #define CONFIG_RD_COMMAND 0
 #define CONFIG_RD_DEBUG 0
@@ -71,14 +68,11 @@
 #define DECODE_HEIGHT_LIMIT 16384
 #define DECODE_WIDTH_LIMIT 16384
 #define FORCE_HIGHBITDEPTH_DECODING 0
+#define HAVE_ARM_CRC32 0
 #define HAVE_AVX 0
 #define HAVE_AVX2 0
-#define HAVE_DSPR2 0
 #define HAVE_FEXCEPT 1
-#define HAVE_MIPS32 0
-#define HAVE_MIPS64 0
 #define HAVE_MMX 0
-#define HAVE_MSA 0
 #define HAVE_NEON 0
 #define HAVE_PTHREAD_H 1
 #define HAVE_SSE 0
diff --git a/chromium/third_party/libaom/source/config/linux/arm/config/aom_dsp_rtcd.h b/chromium/third_party/libaom/source/config/linux/arm/config/aom_dsp_rtcd.h
index a87855d6013..acbb9455b2d 100644
--- a/chromium/third_party/libaom/source/config/linux/arm/config/aom_dsp_rtcd.h
+++ b/chromium/third_party/libaom/source/config/linux/arm/config/aom_dsp_rtcd.h
@@ -146,18 +146,24 @@ void aom_dc_128_predictor_16x32_c(uint8_t* dst,
                                   const uint8_t* left);
 #define aom_dc_128_predictor_16x32 aom_dc_128_predictor_16x32_c
 
+void aom_dc_128_predictor_16x4_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+#define aom_dc_128_predictor_16x4 aom_dc_128_predictor_16x4_c
+
+void aom_dc_128_predictor_16x64_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+#define aom_dc_128_predictor_16x64 aom_dc_128_predictor_16x64_c
+
 void aom_dc_128_predictor_16x8_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
                                  const uint8_t* left);
 #define aom_dc_128_predictor_16x8 aom_dc_128_predictor_16x8_c
 
-void aom_dc_128_predictor_2x2_c(uint8_t* dst,
-                                ptrdiff_t y_stride,
-                                const uint8_t* above,
-                                const uint8_t* left);
-#define aom_dc_128_predictor_2x2 aom_dc_128_predictor_2x2_c
-
 void aom_dc_128_predictor_32x16_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
@@ -176,6 +182,18 @@ void aom_dc_128_predictor_32x64_c(uint8_t* dst,
                                   const uint8_t* left);
 #define aom_dc_128_predictor_32x64 aom_dc_128_predictor_32x64_c
 
+void aom_dc_128_predictor_32x8_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+#define aom_dc_128_predictor_32x8 aom_dc_128_predictor_32x8_c
+
+void aom_dc_128_predictor_4x16_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+#define aom_dc_128_predictor_4x16 aom_dc_128_predictor_4x16_c
+
 void aom_dc_128_predictor_4x4_c(uint8_t* dst,
                                 ptrdiff_t y_stride,
                                 const uint8_t* above,
@@ -188,6 +206,12 @@ void aom_dc_128_predictor_4x8_c(uint8_t* dst,
                                 const uint8_t* left);
 #define aom_dc_128_predictor_4x8 aom_dc_128_predictor_4x8_c
 
+void aom_dc_128_predictor_64x16_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+#define aom_dc_128_predictor_64x16 aom_dc_128_predictor_64x16_c
+
 void aom_dc_128_predictor_64x32_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
@@ -206,6 +230,12 @@ void aom_dc_128_predictor_8x16_c(uint8_t* dst,
                                  const uint8_t* left);
 #define aom_dc_128_predictor_8x16 aom_dc_128_predictor_8x16_c
 
+void aom_dc_128_predictor_8x32_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+#define aom_dc_128_predictor_8x32 aom_dc_128_predictor_8x32_c
+
 void aom_dc_128_predictor_8x4_c(uint8_t* dst,
                                 ptrdiff_t y_stride,
                                 const uint8_t* above,
@@ -230,18 +260,24 @@ void aom_dc_left_predictor_16x32_c(uint8_t* dst,
                                    const uint8_t* left);
 #define aom_dc_left_predictor_16x32 aom_dc_left_predictor_16x32_c
 
+void aom_dc_left_predictor_16x4_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+#define aom_dc_left_predictor_16x4 aom_dc_left_predictor_16x4_c
+
+void aom_dc_left_predictor_16x64_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+#define aom_dc_left_predictor_16x64 aom_dc_left_predictor_16x64_c
+
 void aom_dc_left_predictor_16x8_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
                                   const uint8_t* left);
 #define aom_dc_left_predictor_16x8 aom_dc_left_predictor_16x8_c
 
-void aom_dc_left_predictor_2x2_c(uint8_t* dst,
-                                 ptrdiff_t y_stride,
-                                 const uint8_t* above,
-                                 const uint8_t* left);
-#define aom_dc_left_predictor_2x2 aom_dc_left_predictor_2x2_c
-
 void aom_dc_left_predictor_32x16_c(uint8_t* dst,
                                    ptrdiff_t y_stride,
                                    const uint8_t* above,
@@ -260,6 +296,18 @@ void aom_dc_left_predictor_32x64_c(uint8_t* dst,
                                    const uint8_t* left);
 #define aom_dc_left_predictor_32x64 aom_dc_left_predictor_32x64_c
 
+void aom_dc_left_predictor_32x8_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+#define aom_dc_left_predictor_32x8 aom_dc_left_predictor_32x8_c
+
+void aom_dc_left_predictor_4x16_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+#define aom_dc_left_predictor_4x16 aom_dc_left_predictor_4x16_c
+
 void aom_dc_left_predictor_4x4_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
@@ -272,6 +320,12 @@ void aom_dc_left_predictor_4x8_c(uint8_t* dst,
                                  const uint8_t* left);
 #define aom_dc_left_predictor_4x8 aom_dc_left_predictor_4x8_c
 
+void aom_dc_left_predictor_64x16_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+#define aom_dc_left_predictor_64x16 aom_dc_left_predictor_64x16_c
+
 void aom_dc_left_predictor_64x32_c(uint8_t* dst,
                                    ptrdiff_t y_stride,
                                    const uint8_t* above,
@@ -290,6 +344,12 @@ void aom_dc_left_predictor_8x16_c(uint8_t* dst,
                                   const uint8_t* left);
 #define aom_dc_left_predictor_8x16 aom_dc_left_predictor_8x16_c
 
+void aom_dc_left_predictor_8x32_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+#define aom_dc_left_predictor_8x32 aom_dc_left_predictor_8x32_c
+
 void aom_dc_left_predictor_8x4_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
@@ -314,18 +374,24 @@ void aom_dc_predictor_16x32_c(uint8_t* dst,
                               const uint8_t* left);
 #define aom_dc_predictor_16x32 aom_dc_predictor_16x32_c
 
+void aom_dc_predictor_16x4_c(uint8_t* dst,
+                             ptrdiff_t y_stride,
+                             const uint8_t* above,
+                             const uint8_t* left);
+#define aom_dc_predictor_16x4 aom_dc_predictor_16x4_c
+
+void aom_dc_predictor_16x64_c(uint8_t* dst,
+                              ptrdiff_t y_stride,
+                              const uint8_t* above,
+                              const uint8_t* left);
+#define aom_dc_predictor_16x64 aom_dc_predictor_16x64_c
+
 void aom_dc_predictor_16x8_c(uint8_t* dst,
                              ptrdiff_t y_stride,
                              const uint8_t* above,
                              const uint8_t* left);
 #define aom_dc_predictor_16x8 aom_dc_predictor_16x8_c
 
-void aom_dc_predictor_2x2_c(uint8_t* dst,
-                            ptrdiff_t y_stride,
-                            const uint8_t* above,
-                            const uint8_t* left);
-#define aom_dc_predictor_2x2 aom_dc_predictor_2x2_c
-
 void aom_dc_predictor_32x16_c(uint8_t* dst,
                               ptrdiff_t y_stride,
                               const uint8_t* above,
@@ -344,6 +410,18 @@ void aom_dc_predictor_32x64_c(uint8_t* dst,
                               const uint8_t* left);
 #define aom_dc_predictor_32x64 aom_dc_predictor_32x64_c
 
+void aom_dc_predictor_32x8_c(uint8_t* dst,
+                             ptrdiff_t y_stride,
+                             const uint8_t* above,
+                             const uint8_t* left);
+#define aom_dc_predictor_32x8 aom_dc_predictor_32x8_c
+
+void aom_dc_predictor_4x16_c(uint8_t* dst,
+                             ptrdiff_t y_stride,
+                             const uint8_t* above,
+                             const uint8_t* left);
+#define aom_dc_predictor_4x16 aom_dc_predictor_4x16_c
+
 void aom_dc_predictor_4x4_c(uint8_t* dst,
                             ptrdiff_t y_stride,
                             const uint8_t* above,
@@ -356,6 +434,12 @@ void aom_dc_predictor_4x8_c(uint8_t* dst,
                             const uint8_t* left);
 #define aom_dc_predictor_4x8 aom_dc_predictor_4x8_c
 
+void aom_dc_predictor_64x16_c(uint8_t* dst,
+                              ptrdiff_t y_stride,
+                              const uint8_t* above,
+                              const uint8_t* left);
+#define aom_dc_predictor_64x16 aom_dc_predictor_64x16_c
+
 void aom_dc_predictor_64x32_c(uint8_t* dst,
                               ptrdiff_t y_stride,
                               const uint8_t* above,
@@ -374,6 +458,12 @@ void aom_dc_predictor_8x16_c(uint8_t* dst,
                              const uint8_t* left);
 #define aom_dc_predictor_8x16 aom_dc_predictor_8x16_c
 
+void aom_dc_predictor_8x32_c(uint8_t* dst,
+                             ptrdiff_t y_stride,
+                             const uint8_t* above,
+                             const uint8_t* left);
+#define aom_dc_predictor_8x32 aom_dc_predictor_8x32_c
+
 void aom_dc_predictor_8x4_c(uint8_t* dst,
                             ptrdiff_t y_stride,
                             const uint8_t* above,
@@ -398,18 +488,24 @@ void aom_dc_top_predictor_16x32_c(uint8_t* dst,
                                   const uint8_t* left);
 #define aom_dc_top_predictor_16x32 aom_dc_top_predictor_16x32_c
 
+void aom_dc_top_predictor_16x4_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+#define aom_dc_top_predictor_16x4 aom_dc_top_predictor_16x4_c
+
+void aom_dc_top_predictor_16x64_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+#define aom_dc_top_predictor_16x64 aom_dc_top_predictor_16x64_c
+
 void aom_dc_top_predictor_16x8_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
                                  const uint8_t* left);
 #define aom_dc_top_predictor_16x8 aom_dc_top_predictor_16x8_c
 
-void aom_dc_top_predictor_2x2_c(uint8_t* dst,
-                                ptrdiff_t y_stride,
-                                const uint8_t* above,
-                                const uint8_t* left);
-#define aom_dc_top_predictor_2x2 aom_dc_top_predictor_2x2_c
-
 void aom_dc_top_predictor_32x16_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
@@ -428,6 +524,18 @@ void aom_dc_top_predictor_32x64_c(uint8_t* dst,
                                   const uint8_t* left);
 #define aom_dc_top_predictor_32x64 aom_dc_top_predictor_32x64_c
 
+void aom_dc_top_predictor_32x8_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+#define aom_dc_top_predictor_32x8 aom_dc_top_predictor_32x8_c
+
+void aom_dc_top_predictor_4x16_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+#define aom_dc_top_predictor_4x16 aom_dc_top_predictor_4x16_c
+
 void aom_dc_top_predictor_4x4_c(uint8_t* dst,
                                 ptrdiff_t y_stride,
                                 const uint8_t* above,
@@ -440,6 +548,12 @@ void aom_dc_top_predictor_4x8_c(uint8_t* dst,
                                 const uint8_t* left);
 #define aom_dc_top_predictor_4x8 aom_dc_top_predictor_4x8_c
 
+void aom_dc_top_predictor_64x16_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+#define aom_dc_top_predictor_64x16 aom_dc_top_predictor_64x16_c
+
 void aom_dc_top_predictor_64x32_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
@@ -458,6 +572,12 @@ void aom_dc_top_predictor_8x16_c(uint8_t* dst,
                                  const uint8_t* left);
 #define aom_dc_top_predictor_8x16 aom_dc_top_predictor_8x16_c
 
+void aom_dc_top_predictor_8x32_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+#define aom_dc_top_predictor_8x32 aom_dc_top_predictor_8x32_c
+
 void aom_dc_top_predictor_8x4_c(uint8_t* dst,
                                 ptrdiff_t y_stride,
                                 const uint8_t* above,
@@ -895,18 +1015,24 @@ void aom_h_predictor_16x32_c(uint8_t* dst,
                              const uint8_t* left);
 #define aom_h_predictor_16x32 aom_h_predictor_16x32_c
 
+void aom_h_predictor_16x4_c(uint8_t* dst,
+                            ptrdiff_t y_stride,
+                            const uint8_t* above,
+                            const uint8_t* left);
+#define aom_h_predictor_16x4 aom_h_predictor_16x4_c
+
+void aom_h_predictor_16x64_c(uint8_t* dst,
+                             ptrdiff_t y_stride,
+                             const uint8_t* above,
+                             const uint8_t* left);
+#define aom_h_predictor_16x64 aom_h_predictor_16x64_c
+
 void aom_h_predictor_16x8_c(uint8_t* dst,
                             ptrdiff_t y_stride,
                             const uint8_t* above,
                             const uint8_t* left);
 #define aom_h_predictor_16x8 aom_h_predictor_16x8_c
 
-void aom_h_predictor_2x2_c(uint8_t* dst,
-                           ptrdiff_t y_stride,
-                           const uint8_t* above,
-                           const uint8_t* left);
-#define aom_h_predictor_2x2 aom_h_predictor_2x2_c
-
 void aom_h_predictor_32x16_c(uint8_t* dst,
                              ptrdiff_t y_stride,
                              const uint8_t* above,
@@ -925,6 +1051,18 @@ void aom_h_predictor_32x64_c(uint8_t* dst,
                              const uint8_t* left);
 #define aom_h_predictor_32x64 aom_h_predictor_32x64_c
 
+void aom_h_predictor_32x8_c(uint8_t* dst,
+                            ptrdiff_t y_stride,
+                            const uint8_t* above,
+                            const uint8_t* left);
+#define aom_h_predictor_32x8 aom_h_predictor_32x8_c
+
+void aom_h_predictor_4x16_c(uint8_t* dst,
+                            ptrdiff_t y_stride,
+                            const uint8_t* above,
+                            const uint8_t* left);
+#define aom_h_predictor_4x16 aom_h_predictor_4x16_c
+
 void aom_h_predictor_4x4_c(uint8_t* dst,
                            ptrdiff_t y_stride,
                            const uint8_t* above,
@@ -937,6 +1075,12 @@ void aom_h_predictor_4x8_c(uint8_t* dst,
                            const uint8_t* left);
 #define aom_h_predictor_4x8 aom_h_predictor_4x8_c
 
+void aom_h_predictor_64x16_c(uint8_t* dst,
+                             ptrdiff_t y_stride,
+                             const uint8_t* above,
+                             const uint8_t* left);
+#define aom_h_predictor_64x16 aom_h_predictor_64x16_c
+
 void aom_h_predictor_64x32_c(uint8_t* dst,
                              ptrdiff_t y_stride,
                              const uint8_t* above,
@@ -955,6 +1099,12 @@ void aom_h_predictor_8x16_c(uint8_t* dst,
                             const uint8_t* left);
 #define aom_h_predictor_8x16 aom_h_predictor_8x16_c
 
+void aom_h_predictor_8x32_c(uint8_t* dst,
+                            ptrdiff_t y_stride,
+                            const uint8_t* above,
+                            const uint8_t* left);
+#define aom_h_predictor_8x32 aom_h_predictor_8x32_c
+
 void aom_h_predictor_8x4_c(uint8_t* dst,
                            ptrdiff_t y_stride,
                            const uint8_t* above,
@@ -987,11 +1137,6 @@ void aom_hadamard_8x8_c(const int16_t* src_diff,
                         tran_low_t* coeff);
 #define aom_hadamard_8x8 aom_hadamard_8x8_c
 
-void aom_hadamard_8x8_dual_c(const int16_t* src_diff,
-                             ptrdiff_t src_stride,
-                             int16_t* coeff);
-#define aom_hadamard_8x8_dual aom_hadamard_8x8_dual_c
-
 void aom_hadamard_lp_16x16_c(const int16_t* src_diff,
                              ptrdiff_t src_stride,
                              int16_t* coeff);
@@ -1002,6 +1147,11 @@ void aom_hadamard_lp_8x8_c(const int16_t* src_diff,
                            int16_t* coeff);
 #define aom_hadamard_lp_8x8 aom_hadamard_lp_8x8_c
 
+void aom_hadamard_lp_8x8_dual_c(const int16_t* src_diff,
+                                ptrdiff_t src_stride,
+                                int16_t* coeff);
+#define aom_hadamard_lp_8x8_dual aom_hadamard_lp_8x8_dual_c
+
 void aom_ifft16x16_float_c(const float* input, float* temp, float* output);
 #define aom_ifft16x16_float aom_ifft16x16_float_c
 
@@ -1017,13 +1167,20 @@ void aom_ifft4x4_float_c(const float* input, float* temp, float* output);
 void aom_ifft8x8_float_c(const float* input, float* temp, float* output);
 #define aom_ifft8x8_float aom_ifft8x8_float_c
 
-int16_t aom_int_pro_col_c(const uint8_t* ref, const int width);
+void aom_int_pro_col_c(int16_t* vbuf,
+                       const uint8_t* ref,
+                       const int ref_stride,
+                       const int width,
+                       const int height,
+                       int norm_factor);
 #define aom_int_pro_col aom_int_pro_col_c
 
-void aom_int_pro_row_c(int16_t hbuf[16],
+void aom_int_pro_row_c(int16_t* hbuf,
                        const uint8_t* ref,
                        const int ref_stride,
-                       const int height);
+                       const int width,
+                       const int height,
+                       int norm_factor);
 #define aom_int_pro_row aom_int_pro_row_c
 
 void aom_lowbd_blend_a64_d16_mask_c(uint8_t* dst,
@@ -1816,6 +1973,13 @@ unsigned int aom_mse8x8_c(const uint8_t* src_ptr,
                           unsigned int* sse);
 #define aom_mse8x8 aom_mse8x8_c
 
+uint64_t aom_mse_16xh_16bit_c(uint8_t* dst,
+                              int dstride,
+                              uint16_t* src,
+                              int w,
+                              int h);
+#define aom_mse_16xh_16bit aom_mse_16xh_16bit_c
+
 uint64_t aom_mse_wxh_16bit_c(uint8_t* dst,
                              int dstride,
                              uint16_t* src,
@@ -1836,18 +2000,24 @@ void aom_paeth_predictor_16x32_c(uint8_t* dst,
                                  const uint8_t* left);
 #define aom_paeth_predictor_16x32 aom_paeth_predictor_16x32_c
 
+void aom_paeth_predictor_16x4_c(uint8_t* dst,
+                                ptrdiff_t y_stride,
+                                const uint8_t* above,
+                                const uint8_t* left);
+#define aom_paeth_predictor_16x4 aom_paeth_predictor_16x4_c
+
+void aom_paeth_predictor_16x64_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+#define aom_paeth_predictor_16x64 aom_paeth_predictor_16x64_c
+
 void aom_paeth_predictor_16x8_c(uint8_t* dst,
                                 ptrdiff_t y_stride,
                                 const uint8_t* above,
                                 const uint8_t* left);
 #define aom_paeth_predictor_16x8 aom_paeth_predictor_16x8_c
 
-void aom_paeth_predictor_2x2_c(uint8_t* dst,
-                               ptrdiff_t y_stride,
-                               const uint8_t* above,
-                               const uint8_t* left);
-#define aom_paeth_predictor_2x2 aom_paeth_predictor_2x2_c
-
 void aom_paeth_predictor_32x16_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
@@ -1866,6 +2036,18 @@ void aom_paeth_predictor_32x64_c(uint8_t* dst,
                                  const uint8_t* left);
 #define aom_paeth_predictor_32x64 aom_paeth_predictor_32x64_c
 
+void aom_paeth_predictor_32x8_c(uint8_t* dst,
+                                ptrdiff_t y_stride,
+                                const uint8_t* above,
+                                const uint8_t* left);
+#define aom_paeth_predictor_32x8 aom_paeth_predictor_32x8_c
+
+void aom_paeth_predictor_4x16_c(uint8_t* dst,
+                                ptrdiff_t y_stride,
+                                const uint8_t* above,
+                                const uint8_t* left);
+#define aom_paeth_predictor_4x16 aom_paeth_predictor_4x16_c
+
 void aom_paeth_predictor_4x4_c(uint8_t* dst,
                                ptrdiff_t y_stride,
                                const uint8_t* above,
@@ -1878,6 +2060,12 @@ void aom_paeth_predictor_4x8_c(uint8_t* dst,
                                const uint8_t* left);
 #define aom_paeth_predictor_4x8 aom_paeth_predictor_4x8_c
 
+void aom_paeth_predictor_64x16_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+#define aom_paeth_predictor_64x16 aom_paeth_predictor_64x16_c
+
 void aom_paeth_predictor_64x32_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
@@ -1896,6 +2084,12 @@ void aom_paeth_predictor_8x16_c(uint8_t* dst,
                                 const uint8_t* left);
 #define aom_paeth_predictor_8x16 aom_paeth_predictor_8x16_c
 
+void aom_paeth_predictor_8x32_c(uint8_t* dst,
+                                ptrdiff_t y_stride,
+                                const uint8_t* above,
+                                const uint8_t* left);
+#define aom_paeth_predictor_8x32 aom_paeth_predictor_8x32_c
+
 void aom_paeth_predictor_8x4_c(uint8_t* dst,
                                ptrdiff_t y_stride,
                                const uint8_t* above,
@@ -1944,20 +2138,6 @@ void aom_quantize_b_32x32_c(const tran_low_t* coeff_ptr,
                             const int16_t* iscan);
 #define aom_quantize_b_32x32 aom_quantize_b_32x32_c
 
-void aom_quantize_b_32x32_adaptive_c(const tran_low_t* coeff_ptr,
-                                     intptr_t n_coeffs,
-                                     const int16_t* zbin_ptr,
-                                     const int16_t* round_ptr,
-                                     const int16_t* quant_ptr,
-                                     const int16_t* quant_shift_ptr,
-                                     tran_low_t* qcoeff_ptr,
-                                     tran_low_t* dqcoeff_ptr,
-                                     const int16_t* dequant_ptr,
-                                     uint16_t* eob_ptr,
-                                     const int16_t* scan,
-                                     const int16_t* iscan);
-#define aom_quantize_b_32x32_adaptive aom_quantize_b_32x32_adaptive_c
-
 void aom_quantize_b_64x64_c(const tran_low_t* coeff_ptr,
                             intptr_t n_coeffs,
                             const int16_t* zbin_ptr,
@@ -1972,34 +2152,6 @@ void aom_quantize_b_64x64_c(const tran_low_t* coeff_ptr,
                             const int16_t* iscan);
 #define aom_quantize_b_64x64 aom_quantize_b_64x64_c
 
-void aom_quantize_b_64x64_adaptive_c(const tran_low_t* coeff_ptr,
-                                     intptr_t n_coeffs,
-                                     const int16_t* zbin_ptr,
-                                     const int16_t* round_ptr,
-                                     const int16_t* quant_ptr,
-                                     const int16_t* quant_shift_ptr,
-                                     tran_low_t* qcoeff_ptr,
-                                     tran_low_t* dqcoeff_ptr,
-                                     const int16_t* dequant_ptr,
-                                     uint16_t* eob_ptr,
-                                     const int16_t* scan,
-                                     const int16_t* iscan);
-#define aom_quantize_b_64x64_adaptive aom_quantize_b_64x64_adaptive_c
-
-void aom_quantize_b_adaptive_c(const tran_low_t* coeff_ptr,
-                               intptr_t n_coeffs,
-                               const int16_t* zbin_ptr,
-                               const int16_t* round_ptr,
-                               const int16_t* quant_ptr,
-                               const int16_t* quant_shift_ptr,
-                               tran_low_t* qcoeff_ptr,
-                               tran_low_t* dqcoeff_ptr,
-                               const int16_t* dequant_ptr,
-                               uint16_t* eob_ptr,
-                               const int16_t* scan,
-                               const int16_t* iscan);
-#define aom_quantize_b_adaptive aom_quantize_b_adaptive_c
-
 unsigned int aom_sad128x128_c(const uint8_t* src_ptr,
                               int src_stride,
                               const uint8_t* ref_ptr,
@@ -2735,18 +2887,24 @@ void aom_smooth_h_predictor_16x32_c(uint8_t* dst,
                                     const uint8_t* left);
 #define aom_smooth_h_predictor_16x32 aom_smooth_h_predictor_16x32_c
 
+void aom_smooth_h_predictor_16x4_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+#define aom_smooth_h_predictor_16x4 aom_smooth_h_predictor_16x4_c
+
+void aom_smooth_h_predictor_16x64_c(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+#define aom_smooth_h_predictor_16x64 aom_smooth_h_predictor_16x64_c
+
 void aom_smooth_h_predictor_16x8_c(uint8_t* dst,
                                    ptrdiff_t y_stride,
                                    const uint8_t* above,
                                    const uint8_t* left);
 #define aom_smooth_h_predictor_16x8 aom_smooth_h_predictor_16x8_c
 
-void aom_smooth_h_predictor_2x2_c(uint8_t* dst,
-                                  ptrdiff_t y_stride,
-                                  const uint8_t* above,
-                                  const uint8_t* left);
-#define aom_smooth_h_predictor_2x2 aom_smooth_h_predictor_2x2_c
-
 void aom_smooth_h_predictor_32x16_c(uint8_t* dst,
                                     ptrdiff_t y_stride,
                                     const uint8_t* above,
@@ -2765,6 +2923,18 @@ void aom_smooth_h_predictor_32x64_c(uint8_t* dst,
                                     const uint8_t* left);
 #define aom_smooth_h_predictor_32x64 aom_smooth_h_predictor_32x64_c
 
+void aom_smooth_h_predictor_32x8_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+#define aom_smooth_h_predictor_32x8 aom_smooth_h_predictor_32x8_c
+
+void aom_smooth_h_predictor_4x16_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+#define aom_smooth_h_predictor_4x16 aom_smooth_h_predictor_4x16_c
+
 void aom_smooth_h_predictor_4x4_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
@@ -2777,6 +2947,12 @@ void aom_smooth_h_predictor_4x8_c(uint8_t* dst,
                                   const uint8_t* left);
 #define aom_smooth_h_predictor_4x8 aom_smooth_h_predictor_4x8_c
 
+void aom_smooth_h_predictor_64x16_c(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+#define aom_smooth_h_predictor_64x16 aom_smooth_h_predictor_64x16_c
+
 void aom_smooth_h_predictor_64x32_c(uint8_t* dst,
                                     ptrdiff_t y_stride,
                                     const uint8_t* above,
@@ -2795,6 +2971,12 @@ void aom_smooth_h_predictor_8x16_c(uint8_t* dst,
                                    const uint8_t* left);
 #define aom_smooth_h_predictor_8x16 aom_smooth_h_predictor_8x16_c
 
+void aom_smooth_h_predictor_8x32_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+#define aom_smooth_h_predictor_8x32 aom_smooth_h_predictor_8x32_c
+
 void aom_smooth_h_predictor_8x4_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
@@ -2819,18 +3001,24 @@ void aom_smooth_predictor_16x32_c(uint8_t* dst,
                                   const uint8_t* left);
 #define aom_smooth_predictor_16x32 aom_smooth_predictor_16x32_c
 
+void aom_smooth_predictor_16x4_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+#define aom_smooth_predictor_16x4 aom_smooth_predictor_16x4_c
+
+void aom_smooth_predictor_16x64_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+#define aom_smooth_predictor_16x64 aom_smooth_predictor_16x64_c
+
 void aom_smooth_predictor_16x8_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
                                  const uint8_t* left);
 #define aom_smooth_predictor_16x8 aom_smooth_predictor_16x8_c
 
-void aom_smooth_predictor_2x2_c(uint8_t* dst,
-                                ptrdiff_t y_stride,
-                                const uint8_t* above,
-                                const uint8_t* left);
-#define aom_smooth_predictor_2x2 aom_smooth_predictor_2x2_c
-
 void aom_smooth_predictor_32x16_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
@@ -2849,6 +3037,18 @@ void aom_smooth_predictor_32x64_c(uint8_t* dst,
                                   const uint8_t* left);
 #define aom_smooth_predictor_32x64 aom_smooth_predictor_32x64_c
 
+void aom_smooth_predictor_32x8_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+#define aom_smooth_predictor_32x8 aom_smooth_predictor_32x8_c
+
+void aom_smooth_predictor_4x16_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+#define aom_smooth_predictor_4x16 aom_smooth_predictor_4x16_c
+
 void aom_smooth_predictor_4x4_c(uint8_t* dst,
                                 ptrdiff_t y_stride,
                                 const uint8_t* above,
@@ -2861,6 +3061,12 @@ void aom_smooth_predictor_4x8_c(uint8_t* dst,
                                 const uint8_t* left);
 #define aom_smooth_predictor_4x8 aom_smooth_predictor_4x8_c
 
+void aom_smooth_predictor_64x16_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+#define aom_smooth_predictor_64x16 aom_smooth_predictor_64x16_c
+
 void aom_smooth_predictor_64x32_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
@@ -2879,6 +3085,12 @@ void aom_smooth_predictor_8x16_c(uint8_t* dst,
                                  const uint8_t* left);
 #define aom_smooth_predictor_8x16 aom_smooth_predictor_8x16_c
 
+void aom_smooth_predictor_8x32_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+#define aom_smooth_predictor_8x32 aom_smooth_predictor_8x32_c
+
 void aom_smooth_predictor_8x4_c(uint8_t* dst,
                                 ptrdiff_t y_stride,
                                 const uint8_t* above,
@@ -2903,18 +3115,24 @@ void aom_smooth_v_predictor_16x32_c(uint8_t* dst,
                                     const uint8_t* left);
 #define aom_smooth_v_predictor_16x32 aom_smooth_v_predictor_16x32_c
 
+void aom_smooth_v_predictor_16x4_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+#define aom_smooth_v_predictor_16x4 aom_smooth_v_predictor_16x4_c
+
+void aom_smooth_v_predictor_16x64_c(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+#define aom_smooth_v_predictor_16x64 aom_smooth_v_predictor_16x64_c
+
 void aom_smooth_v_predictor_16x8_c(uint8_t* dst,
                                    ptrdiff_t y_stride,
                                    const uint8_t* above,
                                    const uint8_t* left);
 #define aom_smooth_v_predictor_16x8 aom_smooth_v_predictor_16x8_c
 
-void aom_smooth_v_predictor_2x2_c(uint8_t* dst,
-                                  ptrdiff_t y_stride,
-                                  const uint8_t* above,
-                                  const uint8_t* left);
-#define aom_smooth_v_predictor_2x2 aom_smooth_v_predictor_2x2_c
-
 void aom_smooth_v_predictor_32x16_c(uint8_t* dst,
                                     ptrdiff_t y_stride,
                                     const uint8_t* above,
@@ -2933,6 +3151,18 @@ void aom_smooth_v_predictor_32x64_c(uint8_t* dst,
                                     const uint8_t* left);
 #define aom_smooth_v_predictor_32x64 aom_smooth_v_predictor_32x64_c
 
+void aom_smooth_v_predictor_32x8_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+#define aom_smooth_v_predictor_32x8 aom_smooth_v_predictor_32x8_c
+
+void aom_smooth_v_predictor_4x16_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+#define aom_smooth_v_predictor_4x16 aom_smooth_v_predictor_4x16_c
+
 void aom_smooth_v_predictor_4x4_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
@@ -2945,6 +3175,12 @@ void aom_smooth_v_predictor_4x8_c(uint8_t* dst,
                                   const uint8_t* left);
 #define aom_smooth_v_predictor_4x8 aom_smooth_v_predictor_4x8_c
 
+void aom_smooth_v_predictor_64x16_c(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+#define aom_smooth_v_predictor_64x16 aom_smooth_v_predictor_64x16_c
+
 void aom_smooth_v_predictor_64x32_c(uint8_t* dst,
                                     ptrdiff_t y_stride,
                                     const uint8_t* above,
@@ -2963,6 +3199,12 @@ void aom_smooth_v_predictor_8x16_c(uint8_t* dst,
                                    const uint8_t* left);
 #define aom_smooth_v_predictor_8x16 aom_smooth_v_predictor_8x16_c
 
+void aom_smooth_v_predictor_8x32_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+#define aom_smooth_v_predictor_8x32 aom_smooth_v_predictor_8x32_c
+
 void aom_smooth_v_predictor_8x4_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
@@ -3336,18 +3578,24 @@ void aom_v_predictor_16x32_c(uint8_t* dst,
                              const uint8_t* left);
 #define aom_v_predictor_16x32 aom_v_predictor_16x32_c
 
+void aom_v_predictor_16x4_c(uint8_t* dst,
+                            ptrdiff_t y_stride,
+                            const uint8_t* above,
+                            const uint8_t* left);
+#define aom_v_predictor_16x4 aom_v_predictor_16x4_c
+
+void aom_v_predictor_16x64_c(uint8_t* dst,
+                             ptrdiff_t y_stride,
+                             const uint8_t* above,
+                             const uint8_t* left);
+#define aom_v_predictor_16x64 aom_v_predictor_16x64_c
+
 void aom_v_predictor_16x8_c(uint8_t* dst,
                             ptrdiff_t y_stride,
                             const uint8_t* above,
                             const uint8_t* left);
 #define aom_v_predictor_16x8 aom_v_predictor_16x8_c
 
-void aom_v_predictor_2x2_c(uint8_t* dst,
-                           ptrdiff_t y_stride,
-                           const uint8_t* above,
-                           const uint8_t* left);
-#define aom_v_predictor_2x2 aom_v_predictor_2x2_c
-
 void aom_v_predictor_32x16_c(uint8_t* dst,
                              ptrdiff_t y_stride,
                              const uint8_t* above,
@@ -3366,6 +3614,18 @@ void aom_v_predictor_32x64_c(uint8_t* dst,
                              const uint8_t* left);
 #define aom_v_predictor_32x64 aom_v_predictor_32x64_c
 
+void aom_v_predictor_32x8_c(uint8_t* dst,
+                            ptrdiff_t y_stride,
+                            const uint8_t* above,
+                            const uint8_t* left);
+#define aom_v_predictor_32x8 aom_v_predictor_32x8_c
+
+void aom_v_predictor_4x16_c(uint8_t* dst,
+                            ptrdiff_t y_stride,
+                            const uint8_t* above,
+                            const uint8_t* left);
+#define aom_v_predictor_4x16 aom_v_predictor_4x16_c
+
 void aom_v_predictor_4x4_c(uint8_t* dst,
                            ptrdiff_t y_stride,
                            const uint8_t* above,
@@ -3378,6 +3638,12 @@ void aom_v_predictor_4x8_c(uint8_t* dst,
                            const uint8_t* left);
 #define aom_v_predictor_4x8 aom_v_predictor_4x8_c
 
+void aom_v_predictor_64x16_c(uint8_t* dst,
+                             ptrdiff_t y_stride,
+                             const uint8_t* above,
+                             const uint8_t* left);
+#define aom_v_predictor_64x16 aom_v_predictor_64x16_c
+
 void aom_v_predictor_64x32_c(uint8_t* dst,
                              ptrdiff_t y_stride,
                              const uint8_t* above,
@@ -3396,6 +3662,12 @@ void aom_v_predictor_8x16_c(uint8_t* dst,
                             const uint8_t* left);
 #define aom_v_predictor_8x16 aom_v_predictor_8x16_c
 
+void aom_v_predictor_8x32_c(uint8_t* dst,
+                            ptrdiff_t y_stride,
+                            const uint8_t* above,
+                            const uint8_t* left);
+#define aom_v_predictor_8x32 aom_v_predictor_8x32_c
+
 void aom_v_predictor_8x4_c(uint8_t* dst,
                            ptrdiff_t y_stride,
                            const uint8_t* above,
@@ -3547,7 +3819,7 @@ unsigned int aom_variance8x8_c(const uint8_t* src_ptr,
                                unsigned int* sse);
 #define aom_variance8x8 aom_variance8x8_c
 
-int aom_vector_var_c(const int16_t* ref, const int16_t* src, const int bwl);
+int aom_vector_var_c(const int16_t* ref, const int16_t* src, int bwl);
 #define aom_vector_var aom_vector_var_c
 
 void aom_dsp_rtcd(void);
diff --git a/chromium/third_party/libaom/source/config/linux/arm/config/aom_scale_rtcd.h b/chromium/third_party/libaom/source/config/linux/arm/config/aom_scale_rtcd.h
index e27b558dd38..7db7b9d0f84 100644
--- a/chromium/third_party/libaom/source/config/linux/arm/config/aom_scale_rtcd.h
+++ b/chromium/third_party/libaom/source/config/linux/arm/config/aom_scale_rtcd.h
@@ -18,6 +18,12 @@ void aom_extend_frame_borders_c(struct yv12_buffer_config* ybf,
                                 const int num_planes);
 #define aom_extend_frame_borders aom_extend_frame_borders_c
 
+void aom_extend_frame_borders_plane_row_c(const struct yv12_buffer_config* ybf,
+                                          int plane,
+                                          int v_start,
+                                          int v_end);
+#define aom_extend_frame_borders_plane_row aom_extend_frame_borders_plane_row_c
+
 void aom_extend_frame_borders_y_c(struct yv12_buffer_config* ybf);
 #define aom_extend_frame_borders_y aom_extend_frame_borders_y_c
 
diff --git a/chromium/third_party/libaom/source/config/linux/arm/config/av1_rtcd.h b/chromium/third_party/libaom/source/config/linux/arm/config/av1_rtcd.h
index eb237126baa..dc3e04f11a4 100644
--- a/chromium/third_party/libaom/source/config/linux/arm/config/av1_rtcd.h
+++ b/chromium/third_party/libaom/source/config/linux/arm/config/av1_rtcd.h
@@ -168,6 +168,19 @@ void aom_upsampled_pred_c(MACROBLOCKD* xd,
                           int subpel_search);
 #define aom_upsampled_pred aom_upsampled_pred_c
 
+void av1_apply_selfguided_restoration_c(const uint8_t* dat,
+                                        int width,
+                                        int height,
+                                        int stride,
+                                        int eps,
+                                        const int* xqd,
+                                        uint8_t* dst,
+                                        int dst_stride,
+                                        int32_t* tmpbuf,
+                                        int bit_depth,
+                                        int highbd);
+#define av1_apply_selfguided_restoration av1_apply_selfguided_restoration_c
+
 int64_t av1_block_error_c(const tran_low_t* coeff,
                           const tran_low_t* dqcoeff,
                           intptr_t block_size,
@@ -202,6 +215,14 @@ void av1_build_compound_diffwtd_mask_d16_c(uint8_t* mask,
 #define av1_build_compound_diffwtd_mask_d16 \
   av1_build_compound_diffwtd_mask_d16_c
 
+int64_t av1_calc_frame_error_c(const uint8_t* const ref,
+                               int stride,
+                               const uint8_t* const dst,
+                               int p_width,
+                               int p_height,
+                               int p_stride);
+#define av1_calc_frame_error av1_calc_frame_error_c
+
 void av1_calc_indices_dim1_c(const int* data,
                              const int* centroids,
                              uint8_t* indices,
@@ -875,6 +896,18 @@ void av1_resize_and_extend_frame_c(const YV12_BUFFER_CONFIG* src,
 void av1_round_shift_array_c(int32_t* arr, int size, int bit);
 #define av1_round_shift_array av1_round_shift_array_c
 
+int av1_selfguided_restoration_c(const uint8_t* dgd8,
+                                 int width,
+                                 int height,
+                                 int dgd_stride,
+                                 int32_t* flt0,
+                                 int32_t* flt1,
+                                 int flt_stride,
+                                 int sgr_params_idx,
+                                 int bit_depth,
+                                 int highbd);
+#define av1_selfguided_restoration av1_selfguided_restoration_c
+
 void av1_txb_init_levels_c(const tran_low_t* const coeff,
                            const int width,
                            const int height,
@@ -887,6 +920,26 @@ void av1_upsample_intra_edge_c(uint8_t* p, int sz);
 void av1_upsample_intra_edge_high_c(uint16_t* p, int sz, int bd);
 #define av1_upsample_intra_edge_high av1_upsample_intra_edge_high_c
 
+void av1_warp_affine_c(const int32_t* mat,
+                       const uint8_t* ref,
+                       int width,
+                       int height,
+                       int stride,
+                       uint8_t* pred,
+                       int p_col,
+                       int p_row,
+                       int p_width,
+                       int p_height,
+                       int p_stride,
+                       int subsampling_x,
+                       int subsampling_y,
+                       ConvolveParams* conv_params,
+                       int16_t alpha,
+                       int16_t beta,
+                       int16_t gamma,
+                       int16_t delta);
+#define av1_warp_affine av1_warp_affine_c
+
 void av1_wedge_compute_delta_squares_c(int16_t* d,
                                        const int16_t* a,
                                        const int16_t* b,
diff --git a/chromium/third_party/libaom/source/config/linux/arm64/config/aom_config.asm b/chromium/third_party/libaom/source/config/linux/arm64/config/aom_config.asm
index 15df49ee5ab..c5ebd6ab9ba 100644
--- a/chromium/third_party/libaom/source/config/linux/arm64/config/aom_config.asm
+++ b/chromium/third_party/libaom/source/config/linux/arm64/config/aom_config.asm
@@ -9,7 +9,6 @@
 ; PATENTS file, you can obtain it at www.aomedia.org/license/patent.
 ;
 ARCH_ARM equ 1
-ARCH_MIPS equ 0
 ARCH_PPC equ 0
 ARCH_X86 equ 0
 ARCH_X86_64 equ 0
@@ -33,8 +32,6 @@ CONFIG_DISABLE_FULL_PIXEL_SPLIT_8X8 equ 1
 CONFIG_ENTROPY_STATS equ 0
 CONFIG_EXCLUDE_SIMD_MISMATCH equ 0
 CONFIG_FPMT_TEST equ 0
-CONFIG_FRAME_PARALLEL_ENCODE equ 0
-CONFIG_FRAME_PARALLEL_ENCODE_2 equ 0
 CONFIG_GCC equ 1
 CONFIG_GCOV equ 0
 CONFIG_GPROF equ 0
@@ -50,7 +47,7 @@ CONFIG_NORMAL_TILE_MODE equ 1
 CONFIG_OPTICAL_FLOW_API equ 0
 CONFIG_OS_SUPPORT equ 1
 CONFIG_PARTITION_SEARCH_ORDER equ 0
-CONFIG_PIC equ 0
+CONFIG_PIC equ 1
 CONFIG_RATECTRL_LOG equ 0
 CONFIG_RD_COMMAND equ 0
 CONFIG_RD_DEBUG equ 0
@@ -69,14 +66,11 @@ CONFIG_WEBM_IO equ 1
 DECODE_HEIGHT_LIMIT equ 16384
 DECODE_WIDTH_LIMIT equ 16384
 FORCE_HIGHBITDEPTH_DECODING equ 0
+HAVE_ARM_CRC32 equ 0
 HAVE_AVX equ 0
 HAVE_AVX2 equ 0
-HAVE_DSPR2 equ 0
 HAVE_FEXCEPT equ 1
-HAVE_MIPS32 equ 0
-HAVE_MIPS64 equ 0
 HAVE_MMX equ 0
-HAVE_MSA equ 0
 HAVE_NEON equ 1
 HAVE_PTHREAD_H equ 1
 HAVE_SSE equ 0
diff --git a/chromium/third_party/libaom/source/config/linux/arm64/config/aom_config.c b/chromium/third_party/libaom/source/config/linux/arm64/config/aom_config.c
index 7c8f15b9d65..ff9196735fc 100644
--- a/chromium/third_party/libaom/source/config/linux/arm64/config/aom_config.c
+++ b/chromium/third_party/libaom/source/config/linux/arm64/config/aom_config.c
@@ -9,5 +9,5 @@
  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
  */
 #include "aom/aom_codec.h"
-static const char* const cfg = "cmake ../source/libaom -G \"Unix Makefiles\" -DCMAKE_TOOLCHAIN_FILE=\"../source/libaom/build/cmake/toolchains/arm64-linux-gcc.cmake\" -DCONFIG_AV1_ENCODER=1 -DCONFIG_LIBYUV=0 -DCONFIG_REALTIME_ONLY=1 -DCONFIG_AV1_HIGHBITDEPTH=0 -DCONFIG_AV1_TEMPORAL_DENOISING=1 -DCONFIG_MAX_DECODE_PROFILE=0 -DCONFIG_NORMAL_TILE_MODE=1 -DCONFIG_SIZE_LIMIT=1 -DDECODE_HEIGHT_LIMIT=16384 -DDECODE_WIDTH_LIMIT=16384";
+static const char* const cfg = "cmake ../source/libaom -G \"Unix Makefiles\" -DCMAKE_TOOLCHAIN_FILE=\"../source/libaom/build/cmake/toolchains/arm64-linux-gcc.cmake\" -DCONFIG_AV1_ENCODER=1 -DCONFIG_LIBYUV=0 -DCONFIG_AV1_HIGHBITDEPTH=0 -DCONFIG_AV1_TEMPORAL_DENOISING=1 -DCONFIG_REALTIME_ONLY=1 -DCONFIG_MAX_DECODE_PROFILE=0 -DCONFIG_NORMAL_TILE_MODE=1 -DCONFIG_SIZE_LIMIT=1 -DDECODE_HEIGHT_LIMIT=16384 -DDECODE_WIDTH_LIMIT=16384";
 const char *aom_codec_build_config(void) {return cfg;}
diff --git a/chromium/third_party/libaom/source/config/linux/arm64/config/aom_config.h b/chromium/third_party/libaom/source/config/linux/arm64/config/aom_config.h
index f8e23f257de..dbbde9993bd 100644
--- a/chromium/third_party/libaom/source/config/linux/arm64/config/aom_config.h
+++ b/chromium/third_party/libaom/source/config/linux/arm64/config/aom_config.h
@@ -11,7 +11,6 @@
 #ifndef AOM_CONFIG_H_
 #define AOM_CONFIG_H_
 #define ARCH_ARM 1
-#define ARCH_MIPS 0
 #define ARCH_PPC 0
 #define ARCH_X86 0
 #define ARCH_X86_64 0
@@ -35,8 +34,6 @@
 #define CONFIG_ENTROPY_STATS 0
 #define CONFIG_EXCLUDE_SIMD_MISMATCH 0
 #define CONFIG_FPMT_TEST 0
-#define CONFIG_FRAME_PARALLEL_ENCODE 0
-#define CONFIG_FRAME_PARALLEL_ENCODE_2 0
 #define CONFIG_GCC 1
 #define CONFIG_GCOV 0
 #define CONFIG_GPROF 0
@@ -52,7 +49,7 @@
 #define CONFIG_OPTICAL_FLOW_API 0
 #define CONFIG_OS_SUPPORT 1
 #define CONFIG_PARTITION_SEARCH_ORDER 0
-#define CONFIG_PIC 0
+#define CONFIG_PIC 1
 #define CONFIG_RATECTRL_LOG 0
 #define CONFIG_RD_COMMAND 0
 #define CONFIG_RD_DEBUG 0
@@ -71,14 +68,11 @@
 #define DECODE_HEIGHT_LIMIT 16384
 #define DECODE_WIDTH_LIMIT 16384
 #define FORCE_HIGHBITDEPTH_DECODING 0
+#define HAVE_ARM_CRC32 0
 #define HAVE_AVX 0
 #define HAVE_AVX2 0
-#define HAVE_DSPR2 0
 #define HAVE_FEXCEPT 1
-#define HAVE_MIPS32 0
-#define HAVE_MIPS64 0
 #define HAVE_MMX 0
-#define HAVE_MSA 0
 #define HAVE_NEON 1
 #define HAVE_PTHREAD_H 1
 #define HAVE_SSE 0
diff --git a/chromium/third_party/libaom/source/config/linux/arm64/config/aom_dsp_rtcd.h b/chromium/third_party/libaom/source/config/linux/arm64/config/aom_dsp_rtcd.h
index 155c11857c0..c2d2ac85aaa 100644
--- a/chromium/third_party/libaom/source/config/linux/arm64/config/aom_dsp_rtcd.h
+++ b/chromium/third_party/libaom/source/config/linux/arm64/config/aom_dsp_rtcd.h
@@ -34,7 +34,12 @@ void aom_avg_8x8_quad_c(const uint8_t* s,
                         int x16_idx,
                         int y16_idx,
                         int* avg);
-#define aom_avg_8x8_quad aom_avg_8x8_quad_c
+void aom_avg_8x8_quad_neon(const uint8_t* s,
+                           int p,
+                           int x16_idx,
+                           int y16_idx,
+                           int* avg);
+#define aom_avg_8x8_quad aom_avg_8x8_quad_neon
 
 void aom_blend_a64_hmask_c(uint8_t* dst,
                            uint32_t dst_stride,
@@ -176,18 +181,24 @@ void aom_dc_128_predictor_16x32_c(uint8_t* dst,
                                   const uint8_t* left);
 #define aom_dc_128_predictor_16x32 aom_dc_128_predictor_16x32_c
 
+void aom_dc_128_predictor_16x4_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+#define aom_dc_128_predictor_16x4 aom_dc_128_predictor_16x4_c
+
+void aom_dc_128_predictor_16x64_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+#define aom_dc_128_predictor_16x64 aom_dc_128_predictor_16x64_c
+
 void aom_dc_128_predictor_16x8_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
                                  const uint8_t* left);
 #define aom_dc_128_predictor_16x8 aom_dc_128_predictor_16x8_c
 
-void aom_dc_128_predictor_2x2_c(uint8_t* dst,
-                                ptrdiff_t y_stride,
-                                const uint8_t* above,
-                                const uint8_t* left);
-#define aom_dc_128_predictor_2x2 aom_dc_128_predictor_2x2_c
-
 void aom_dc_128_predictor_32x16_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
@@ -210,6 +221,18 @@ void aom_dc_128_predictor_32x64_c(uint8_t* dst,
                                   const uint8_t* left);
 #define aom_dc_128_predictor_32x64 aom_dc_128_predictor_32x64_c
 
+void aom_dc_128_predictor_32x8_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+#define aom_dc_128_predictor_32x8 aom_dc_128_predictor_32x8_c
+
+void aom_dc_128_predictor_4x16_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+#define aom_dc_128_predictor_4x16 aom_dc_128_predictor_4x16_c
+
 void aom_dc_128_predictor_4x4_c(uint8_t* dst,
                                 ptrdiff_t y_stride,
                                 const uint8_t* above,
@@ -226,6 +249,12 @@ void aom_dc_128_predictor_4x8_c(uint8_t* dst,
                                 const uint8_t* left);
 #define aom_dc_128_predictor_4x8 aom_dc_128_predictor_4x8_c
 
+void aom_dc_128_predictor_64x16_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+#define aom_dc_128_predictor_64x16 aom_dc_128_predictor_64x16_c
+
 void aom_dc_128_predictor_64x32_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
@@ -244,6 +273,12 @@ void aom_dc_128_predictor_8x16_c(uint8_t* dst,
                                  const uint8_t* left);
 #define aom_dc_128_predictor_8x16 aom_dc_128_predictor_8x16_c
 
+void aom_dc_128_predictor_8x32_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+#define aom_dc_128_predictor_8x32 aom_dc_128_predictor_8x32_c
+
 void aom_dc_128_predictor_8x4_c(uint8_t* dst,
                                 ptrdiff_t y_stride,
                                 const uint8_t* above,
@@ -276,18 +311,24 @@ void aom_dc_left_predictor_16x32_c(uint8_t* dst,
                                    const uint8_t* left);
 #define aom_dc_left_predictor_16x32 aom_dc_left_predictor_16x32_c
 
+void aom_dc_left_predictor_16x4_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+#define aom_dc_left_predictor_16x4 aom_dc_left_predictor_16x4_c
+
+void aom_dc_left_predictor_16x64_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+#define aom_dc_left_predictor_16x64 aom_dc_left_predictor_16x64_c
+
 void aom_dc_left_predictor_16x8_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
                                   const uint8_t* left);
 #define aom_dc_left_predictor_16x8 aom_dc_left_predictor_16x8_c
 
-void aom_dc_left_predictor_2x2_c(uint8_t* dst,
-                                 ptrdiff_t y_stride,
-                                 const uint8_t* above,
-                                 const uint8_t* left);
-#define aom_dc_left_predictor_2x2 aom_dc_left_predictor_2x2_c
-
 void aom_dc_left_predictor_32x16_c(uint8_t* dst,
                                    ptrdiff_t y_stride,
                                    const uint8_t* above,
@@ -310,6 +351,18 @@ void aom_dc_left_predictor_32x64_c(uint8_t* dst,
                                    const uint8_t* left);
 #define aom_dc_left_predictor_32x64 aom_dc_left_predictor_32x64_c
 
+void aom_dc_left_predictor_32x8_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+#define aom_dc_left_predictor_32x8 aom_dc_left_predictor_32x8_c
+
+void aom_dc_left_predictor_4x16_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+#define aom_dc_left_predictor_4x16 aom_dc_left_predictor_4x16_c
+
 void aom_dc_left_predictor_4x4_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
@@ -326,6 +379,12 @@ void aom_dc_left_predictor_4x8_c(uint8_t* dst,
                                  const uint8_t* left);
 #define aom_dc_left_predictor_4x8 aom_dc_left_predictor_4x8_c
 
+void aom_dc_left_predictor_64x16_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+#define aom_dc_left_predictor_64x16 aom_dc_left_predictor_64x16_c
+
 void aom_dc_left_predictor_64x32_c(uint8_t* dst,
                                    ptrdiff_t y_stride,
                                    const uint8_t* above,
@@ -344,6 +403,12 @@ void aom_dc_left_predictor_8x16_c(uint8_t* dst,
                                   const uint8_t* left);
 #define aom_dc_left_predictor_8x16 aom_dc_left_predictor_8x16_c
 
+void aom_dc_left_predictor_8x32_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+#define aom_dc_left_predictor_8x32 aom_dc_left_predictor_8x32_c
+
 void aom_dc_left_predictor_8x4_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
@@ -376,18 +441,24 @@ void aom_dc_predictor_16x32_c(uint8_t* dst,
                               const uint8_t* left);
 #define aom_dc_predictor_16x32 aom_dc_predictor_16x32_c
 
+void aom_dc_predictor_16x4_c(uint8_t* dst,
+                             ptrdiff_t y_stride,
+                             const uint8_t* above,
+                             const uint8_t* left);
+#define aom_dc_predictor_16x4 aom_dc_predictor_16x4_c
+
+void aom_dc_predictor_16x64_c(uint8_t* dst,
+                              ptrdiff_t y_stride,
+                              const uint8_t* above,
+                              const uint8_t* left);
+#define aom_dc_predictor_16x64 aom_dc_predictor_16x64_c
+
 void aom_dc_predictor_16x8_c(uint8_t* dst,
                              ptrdiff_t y_stride,
                              const uint8_t* above,
                              const uint8_t* left);
 #define aom_dc_predictor_16x8 aom_dc_predictor_16x8_c
 
-void aom_dc_predictor_2x2_c(uint8_t* dst,
-                            ptrdiff_t y_stride,
-                            const uint8_t* above,
-                            const uint8_t* left);
-#define aom_dc_predictor_2x2 aom_dc_predictor_2x2_c
-
 void aom_dc_predictor_32x16_c(uint8_t* dst,
                               ptrdiff_t y_stride,
                               const uint8_t* above,
@@ -410,6 +481,18 @@ void aom_dc_predictor_32x64_c(uint8_t* dst,
                               const uint8_t* left);
 #define aom_dc_predictor_32x64 aom_dc_predictor_32x64_c
 
+void aom_dc_predictor_32x8_c(uint8_t* dst,
+                             ptrdiff_t y_stride,
+                             const uint8_t* above,
+                             const uint8_t* left);
+#define aom_dc_predictor_32x8 aom_dc_predictor_32x8_c
+
+void aom_dc_predictor_4x16_c(uint8_t* dst,
+                             ptrdiff_t y_stride,
+                             const uint8_t* above,
+                             const uint8_t* left);
+#define aom_dc_predictor_4x16 aom_dc_predictor_4x16_c
+
 void aom_dc_predictor_4x4_c(uint8_t* dst,
                             ptrdiff_t y_stride,
                             const uint8_t* above,
@@ -426,6 +509,12 @@ void aom_dc_predictor_4x8_c(uint8_t* dst,
                             const uint8_t* left);
 #define aom_dc_predictor_4x8 aom_dc_predictor_4x8_c
 
+void aom_dc_predictor_64x16_c(uint8_t* dst,
+                              ptrdiff_t y_stride,
+                              const uint8_t* above,
+                              const uint8_t* left);
+#define aom_dc_predictor_64x16 aom_dc_predictor_64x16_c
+
 void aom_dc_predictor_64x32_c(uint8_t* dst,
                               ptrdiff_t y_stride,
                               const uint8_t* above,
@@ -444,6 +533,12 @@ void aom_dc_predictor_8x16_c(uint8_t* dst,
                              const uint8_t* left);
 #define aom_dc_predictor_8x16 aom_dc_predictor_8x16_c
 
+void aom_dc_predictor_8x32_c(uint8_t* dst,
+                             ptrdiff_t y_stride,
+                             const uint8_t* above,
+                             const uint8_t* left);
+#define aom_dc_predictor_8x32 aom_dc_predictor_8x32_c
+
 void aom_dc_predictor_8x4_c(uint8_t* dst,
                             ptrdiff_t y_stride,
                             const uint8_t* above,
@@ -476,18 +571,24 @@ void aom_dc_top_predictor_16x32_c(uint8_t* dst,
                                   const uint8_t* left);
 #define aom_dc_top_predictor_16x32 aom_dc_top_predictor_16x32_c
 
+void aom_dc_top_predictor_16x4_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+#define aom_dc_top_predictor_16x4 aom_dc_top_predictor_16x4_c
+
+void aom_dc_top_predictor_16x64_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+#define aom_dc_top_predictor_16x64 aom_dc_top_predictor_16x64_c
+
 void aom_dc_top_predictor_16x8_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
                                  const uint8_t* left);
 #define aom_dc_top_predictor_16x8 aom_dc_top_predictor_16x8_c
 
-void aom_dc_top_predictor_2x2_c(uint8_t* dst,
-                                ptrdiff_t y_stride,
-                                const uint8_t* above,
-                                const uint8_t* left);
-#define aom_dc_top_predictor_2x2 aom_dc_top_predictor_2x2_c
-
 void aom_dc_top_predictor_32x16_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
@@ -510,6 +611,18 @@ void aom_dc_top_predictor_32x64_c(uint8_t* dst,
                                   const uint8_t* left);
 #define aom_dc_top_predictor_32x64 aom_dc_top_predictor_32x64_c
 
+void aom_dc_top_predictor_32x8_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+#define aom_dc_top_predictor_32x8 aom_dc_top_predictor_32x8_c
+
+void aom_dc_top_predictor_4x16_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+#define aom_dc_top_predictor_4x16 aom_dc_top_predictor_4x16_c
+
 void aom_dc_top_predictor_4x4_c(uint8_t* dst,
                                 ptrdiff_t y_stride,
                                 const uint8_t* above,
@@ -526,6 +639,12 @@ void aom_dc_top_predictor_4x8_c(uint8_t* dst,
                                 const uint8_t* left);
 #define aom_dc_top_predictor_4x8 aom_dc_top_predictor_4x8_c
 
+void aom_dc_top_predictor_64x16_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+#define aom_dc_top_predictor_64x16 aom_dc_top_predictor_64x16_c
+
 void aom_dc_top_predictor_64x32_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
@@ -544,6 +663,12 @@ void aom_dc_top_predictor_8x16_c(uint8_t* dst,
                                  const uint8_t* left);
 #define aom_dc_top_predictor_8x16 aom_dc_top_predictor_8x16_c
 
+void aom_dc_top_predictor_8x32_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+#define aom_dc_top_predictor_8x32 aom_dc_top_predictor_8x32_c
+
 void aom_dc_top_predictor_8x4_c(uint8_t* dst,
                                 ptrdiff_t y_stride,
                                 const uint8_t* above,
@@ -1014,18 +1139,24 @@ void aom_h_predictor_16x32_c(uint8_t* dst,
                              const uint8_t* left);
 #define aom_h_predictor_16x32 aom_h_predictor_16x32_c
 
+void aom_h_predictor_16x4_c(uint8_t* dst,
+                            ptrdiff_t y_stride,
+                            const uint8_t* above,
+                            const uint8_t* left);
+#define aom_h_predictor_16x4 aom_h_predictor_16x4_c
+
+void aom_h_predictor_16x64_c(uint8_t* dst,
+                             ptrdiff_t y_stride,
+                             const uint8_t* above,
+                             const uint8_t* left);
+#define aom_h_predictor_16x64 aom_h_predictor_16x64_c
+
 void aom_h_predictor_16x8_c(uint8_t* dst,
                             ptrdiff_t y_stride,
                             const uint8_t* above,
                             const uint8_t* left);
 #define aom_h_predictor_16x8 aom_h_predictor_16x8_c
 
-void aom_h_predictor_2x2_c(uint8_t* dst,
-                           ptrdiff_t y_stride,
-                           const uint8_t* above,
-                           const uint8_t* left);
-#define aom_h_predictor_2x2 aom_h_predictor_2x2_c
-
 void aom_h_predictor_32x16_c(uint8_t* dst,
                              ptrdiff_t y_stride,
                              const uint8_t* above,
@@ -1048,6 +1179,18 @@ void aom_h_predictor_32x64_c(uint8_t* dst,
                              const uint8_t* left);
 #define aom_h_predictor_32x64 aom_h_predictor_32x64_c
 
+void aom_h_predictor_32x8_c(uint8_t* dst,
+                            ptrdiff_t y_stride,
+                            const uint8_t* above,
+                            const uint8_t* left);
+#define aom_h_predictor_32x8 aom_h_predictor_32x8_c
+
+void aom_h_predictor_4x16_c(uint8_t* dst,
+                            ptrdiff_t y_stride,
+                            const uint8_t* above,
+                            const uint8_t* left);
+#define aom_h_predictor_4x16 aom_h_predictor_4x16_c
+
 void aom_h_predictor_4x4_c(uint8_t* dst,
                            ptrdiff_t y_stride,
                            const uint8_t* above,
@@ -1064,6 +1207,12 @@ void aom_h_predictor_4x8_c(uint8_t* dst,
                            const uint8_t* left);
 #define aom_h_predictor_4x8 aom_h_predictor_4x8_c
 
+void aom_h_predictor_64x16_c(uint8_t* dst,
+                             ptrdiff_t y_stride,
+                             const uint8_t* above,
+                             const uint8_t* left);
+#define aom_h_predictor_64x16 aom_h_predictor_64x16_c
+
 void aom_h_predictor_64x32_c(uint8_t* dst,
                              ptrdiff_t y_stride,
                              const uint8_t* above,
@@ -1082,6 +1231,12 @@ void aom_h_predictor_8x16_c(uint8_t* dst,
                             const uint8_t* left);
 #define aom_h_predictor_8x16 aom_h_predictor_8x16_c
 
+void aom_h_predictor_8x32_c(uint8_t* dst,
+                            ptrdiff_t y_stride,
+                            const uint8_t* above,
+                            const uint8_t* left);
+#define aom_h_predictor_8x32 aom_h_predictor_8x32_c
+
 void aom_h_predictor_8x4_c(uint8_t* dst,
                            ptrdiff_t y_stride,
                            const uint8_t* above,
@@ -1124,11 +1279,6 @@ void aom_hadamard_8x8_neon(const int16_t* src_diff,
                            tran_low_t* coeff);
 #define aom_hadamard_8x8 aom_hadamard_8x8_neon
 
-void aom_hadamard_8x8_dual_c(const int16_t* src_diff,
-                             ptrdiff_t src_stride,
-                             int16_t* coeff);
-#define aom_hadamard_8x8_dual aom_hadamard_8x8_dual_c
-
 void aom_hadamard_lp_16x16_c(const int16_t* src_diff,
                              ptrdiff_t src_stride,
                              int16_t* coeff);
@@ -1145,6 +1295,14 @@ void aom_hadamard_lp_8x8_neon(const int16_t* src_diff,
                               int16_t* coeff);
 #define aom_hadamard_lp_8x8 aom_hadamard_lp_8x8_neon
 
+void aom_hadamard_lp_8x8_dual_c(const int16_t* src_diff,
+                                ptrdiff_t src_stride,
+                                int16_t* coeff);
+void aom_hadamard_lp_8x8_dual_neon(const int16_t* src_diff,
+                                   ptrdiff_t src_stride,
+                                   int16_t* coeff);
+#define aom_hadamard_lp_8x8_dual aom_hadamard_lp_8x8_dual_neon
+
 void aom_ifft16x16_float_c(const float* input, float* temp, float* output);
 #define aom_ifft16x16_float aom_ifft16x16_float_c
 
@@ -1160,18 +1318,32 @@ void aom_ifft4x4_float_c(const float* input, float* temp, float* output);
 void aom_ifft8x8_float_c(const float* input, float* temp, float* output);
 #define aom_ifft8x8_float aom_ifft8x8_float_c
 
-int16_t aom_int_pro_col_c(const uint8_t* ref, const int width);
-int16_t aom_int_pro_col_neon(const uint8_t* ref, const int width);
+void aom_int_pro_col_c(int16_t* vbuf,
+                       const uint8_t* ref,
+                       const int ref_stride,
+                       const int width,
+                       const int height,
+                       int norm_factor);
+void aom_int_pro_col_neon(int16_t* vbuf,
+                          const uint8_t* ref,
+                          const int ref_stride,
+                          const int width,
+                          const int height,
+                          int norm_factor);
 #define aom_int_pro_col aom_int_pro_col_neon
 
-void aom_int_pro_row_c(int16_t hbuf[16],
+void aom_int_pro_row_c(int16_t* hbuf,
                        const uint8_t* ref,
                        const int ref_stride,
-                       const int height);
-void aom_int_pro_row_neon(int16_t hbuf[16],
+                       const int width,
+                       const int height,
+                       int norm_factor);
+void aom_int_pro_row_neon(int16_t* hbuf,
                           const uint8_t* ref,
                           const int ref_stride,
-                          const int height);
+                          const int width,
+                          const int height,
+                          int norm_factor);
 #define aom_int_pro_row aom_int_pro_row_neon
 
 void aom_lowbd_blend_a64_d16_mask_c(uint8_t* dst,
@@ -1237,7 +1409,12 @@ void aom_lpf_horizontal_14_quad_c(uint8_t* s,
                                   const uint8_t* blimit0,
                                   const uint8_t* limit0,
                                   const uint8_t* thresh0);
-#define aom_lpf_horizontal_14_quad aom_lpf_horizontal_14_quad_c
+void aom_lpf_horizontal_14_quad_neon(uint8_t* s,
+                                     int pitch,
+                                     const uint8_t* blimit0,
+                                     const uint8_t* limit0,
+                                     const uint8_t* thresh0);
+#define aom_lpf_horizontal_14_quad aom_lpf_horizontal_14_quad_neon
 
 void aom_lpf_horizontal_4_c(uint8_t* s,
                             int pitch,
@@ -1274,7 +1451,12 @@ void aom_lpf_horizontal_4_quad_c(uint8_t* s,
                                  const uint8_t* blimit0,
                                  const uint8_t* limit0,
                                  const uint8_t* thresh0);
-#define aom_lpf_horizontal_4_quad aom_lpf_horizontal_4_quad_c
+void aom_lpf_horizontal_4_quad_neon(uint8_t* s,
+                                    int pitch,
+                                    const uint8_t* blimit0,
+                                    const uint8_t* limit0,
+                                    const uint8_t* thresh0);
+#define aom_lpf_horizontal_4_quad aom_lpf_horizontal_4_quad_neon
 
 void aom_lpf_horizontal_6_c(uint8_t* s,
                             int pitch,
@@ -1311,7 +1493,12 @@ void aom_lpf_horizontal_6_quad_c(uint8_t* s,
                                  const uint8_t* blimit0,
                                  const uint8_t* limit0,
                                  const uint8_t* thresh0);
-#define aom_lpf_horizontal_6_quad aom_lpf_horizontal_6_quad_c
+void aom_lpf_horizontal_6_quad_neon(uint8_t* s,
+                                    int pitch,
+                                    const uint8_t* blimit0,
+                                    const uint8_t* limit0,
+                                    const uint8_t* thresh0);
+#define aom_lpf_horizontal_6_quad aom_lpf_horizontal_6_quad_neon
 
 void aom_lpf_horizontal_8_c(uint8_t* s,
                             int pitch,
@@ -1348,7 +1535,12 @@ void aom_lpf_horizontal_8_quad_c(uint8_t* s,
                                  const uint8_t* blimit0,
                                  const uint8_t* limit0,
                                  const uint8_t* thresh0);
-#define aom_lpf_horizontal_8_quad aom_lpf_horizontal_8_quad_c
+void aom_lpf_horizontal_8_quad_neon(uint8_t* s,
+                                    int pitch,
+                                    const uint8_t* blimit0,
+                                    const uint8_t* limit0,
+                                    const uint8_t* thresh0);
+#define aom_lpf_horizontal_8_quad aom_lpf_horizontal_8_quad_neon
 
 void aom_lpf_vertical_14_c(uint8_t* s,
                            int pitch,
@@ -1385,7 +1577,12 @@ void aom_lpf_vertical_14_quad_c(uint8_t* s,
                                 const uint8_t* blimit0,
                                 const uint8_t* limit0,
                                 const uint8_t* thresh0);
-#define aom_lpf_vertical_14_quad aom_lpf_vertical_14_quad_c
+void aom_lpf_vertical_14_quad_neon(uint8_t* s,
+                                   int pitch,
+                                   const uint8_t* blimit0,
+                                   const uint8_t* limit0,
+                                   const uint8_t* thresh0);
+#define aom_lpf_vertical_14_quad aom_lpf_vertical_14_quad_neon
 
 void aom_lpf_vertical_4_c(uint8_t* s,
                           int pitch,
@@ -1422,7 +1619,12 @@ void aom_lpf_vertical_4_quad_c(uint8_t* s,
                                const uint8_t* blimit0,
                                const uint8_t* limit0,
                                const uint8_t* thresh0);
-#define aom_lpf_vertical_4_quad aom_lpf_vertical_4_quad_c
+void aom_lpf_vertical_4_quad_neon(uint8_t* s,
+                                  int pitch,
+                                  const uint8_t* blimit0,
+                                  const uint8_t* limit0,
+                                  const uint8_t* thresh0);
+#define aom_lpf_vertical_4_quad aom_lpf_vertical_4_quad_neon
 
 void aom_lpf_vertical_6_c(uint8_t* s,
                           int pitch,
@@ -1459,7 +1661,12 @@ void aom_lpf_vertical_6_quad_c(uint8_t* s,
                                const uint8_t* blimit0,
                                const uint8_t* limit0,
                                const uint8_t* thresh0);
-#define aom_lpf_vertical_6_quad aom_lpf_vertical_6_quad_c
+void aom_lpf_vertical_6_quad_neon(uint8_t* s,
+                                  int pitch,
+                                  const uint8_t* blimit0,
+                                  const uint8_t* limit0,
+                                  const uint8_t* thresh0);
+#define aom_lpf_vertical_6_quad aom_lpf_vertical_6_quad_neon
 
 void aom_lpf_vertical_8_c(uint8_t* s,
                           int pitch,
@@ -1496,7 +1703,12 @@ void aom_lpf_vertical_8_quad_c(uint8_t* s,
                                const uint8_t* blimit0,
                                const uint8_t* limit0,
                                const uint8_t* thresh0);
-#define aom_lpf_vertical_8_quad aom_lpf_vertical_8_quad_c
+void aom_lpf_vertical_8_quad_neon(uint8_t* s,
+                                  int pitch,
+                                  const uint8_t* blimit0,
+                                  const uint8_t* limit0,
+                                  const uint8_t* thresh0);
+#define aom_lpf_vertical_8_quad aom_lpf_vertical_8_quad_neon
 
 unsigned int aom_masked_sad128x128_c(const uint8_t* src,
                                      int src_stride,
@@ -2070,21 +2282,43 @@ unsigned int aom_mse16x8_c(const uint8_t* src_ptr,
                            const uint8_t* ref_ptr,
                            int recon_stride,
                            unsigned int* sse);
-#define aom_mse16x8 aom_mse16x8_c
+unsigned int aom_mse16x8_neon(const uint8_t* src_ptr,
+                              int source_stride,
+                              const uint8_t* ref_ptr,
+                              int recon_stride,
+                              unsigned int* sse);
+#define aom_mse16x8 aom_mse16x8_neon
 
 unsigned int aom_mse8x16_c(const uint8_t* src_ptr,
                            int source_stride,
                            const uint8_t* ref_ptr,
                            int recon_stride,
                            unsigned int* sse);
-#define aom_mse8x16 aom_mse8x16_c
+unsigned int aom_mse8x16_neon(const uint8_t* src_ptr,
+                              int source_stride,
+                              const uint8_t* ref_ptr,
+                              int recon_stride,
+                              unsigned int* sse);
+#define aom_mse8x16 aom_mse8x16_neon
 
 unsigned int aom_mse8x8_c(const uint8_t* src_ptr,
                           int source_stride,
                           const uint8_t* ref_ptr,
                           int recon_stride,
                           unsigned int* sse);
-#define aom_mse8x8 aom_mse8x8_c
+unsigned int aom_mse8x8_neon(const uint8_t* src_ptr,
+                             int source_stride,
+                             const uint8_t* ref_ptr,
+                             int recon_stride,
+                             unsigned int* sse);
+#define aom_mse8x8 aom_mse8x8_neon
+
+uint64_t aom_mse_16xh_16bit_c(uint8_t* dst,
+                              int dstride,
+                              uint16_t* src,
+                              int w,
+                              int h);
+#define aom_mse_16xh_16bit aom_mse_16xh_16bit_c
 
 uint64_t aom_mse_wxh_16bit_c(uint8_t* dst,
                              int dstride,
@@ -2098,85 +2332,191 @@ void aom_paeth_predictor_16x16_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
                                  const uint8_t* left);
-#define aom_paeth_predictor_16x16 aom_paeth_predictor_16x16_c
+void aom_paeth_predictor_16x16_neon(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+#define aom_paeth_predictor_16x16 aom_paeth_predictor_16x16_neon
 
 void aom_paeth_predictor_16x32_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
                                  const uint8_t* left);
-#define aom_paeth_predictor_16x32 aom_paeth_predictor_16x32_c
+void aom_paeth_predictor_16x32_neon(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+#define aom_paeth_predictor_16x32 aom_paeth_predictor_16x32_neon
 
-void aom_paeth_predictor_16x8_c(uint8_t* dst,
+void aom_paeth_predictor_16x4_c(uint8_t* dst,
                                 ptrdiff_t y_stride,
                                 const uint8_t* above,
                                 const uint8_t* left);
-#define aom_paeth_predictor_16x8 aom_paeth_predictor_16x8_c
+void aom_paeth_predictor_16x4_neon(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+#define aom_paeth_predictor_16x4 aom_paeth_predictor_16x4_neon
 
-void aom_paeth_predictor_2x2_c(uint8_t* dst,
-                               ptrdiff_t y_stride,
-                               const uint8_t* above,
-                               const uint8_t* left);
-#define aom_paeth_predictor_2x2 aom_paeth_predictor_2x2_c
+void aom_paeth_predictor_16x64_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+void aom_paeth_predictor_16x64_neon(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+#define aom_paeth_predictor_16x64 aom_paeth_predictor_16x64_neon
+
+void aom_paeth_predictor_16x8_c(uint8_t* dst,
+                                ptrdiff_t y_stride,
+                                const uint8_t* above,
+                                const uint8_t* left);
+void aom_paeth_predictor_16x8_neon(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+#define aom_paeth_predictor_16x8 aom_paeth_predictor_16x8_neon
 
 void aom_paeth_predictor_32x16_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
                                  const uint8_t* left);
-#define aom_paeth_predictor_32x16 aom_paeth_predictor_32x16_c
+void aom_paeth_predictor_32x16_neon(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+#define aom_paeth_predictor_32x16 aom_paeth_predictor_32x16_neon
 
 void aom_paeth_predictor_32x32_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
                                  const uint8_t* left);
-#define aom_paeth_predictor_32x32 aom_paeth_predictor_32x32_c
+void aom_paeth_predictor_32x32_neon(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+#define aom_paeth_predictor_32x32 aom_paeth_predictor_32x32_neon
 
 void aom_paeth_predictor_32x64_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
                                  const uint8_t* left);
-#define aom_paeth_predictor_32x64 aom_paeth_predictor_32x64_c
+void aom_paeth_predictor_32x64_neon(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+#define aom_paeth_predictor_32x64 aom_paeth_predictor_32x64_neon
+
+void aom_paeth_predictor_32x8_c(uint8_t* dst,
+                                ptrdiff_t y_stride,
+                                const uint8_t* above,
+                                const uint8_t* left);
+void aom_paeth_predictor_32x8_neon(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+#define aom_paeth_predictor_32x8 aom_paeth_predictor_32x8_neon
+
+void aom_paeth_predictor_4x16_c(uint8_t* dst,
+                                ptrdiff_t y_stride,
+                                const uint8_t* above,
+                                const uint8_t* left);
+void aom_paeth_predictor_4x16_neon(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+#define aom_paeth_predictor_4x16 aom_paeth_predictor_4x16_neon
 
 void aom_paeth_predictor_4x4_c(uint8_t* dst,
                                ptrdiff_t y_stride,
                                const uint8_t* above,
                                const uint8_t* left);
-#define aom_paeth_predictor_4x4 aom_paeth_predictor_4x4_c
+void aom_paeth_predictor_4x4_neon(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+#define aom_paeth_predictor_4x4 aom_paeth_predictor_4x4_neon
 
 void aom_paeth_predictor_4x8_c(uint8_t* dst,
                                ptrdiff_t y_stride,
                                const uint8_t* above,
                                const uint8_t* left);
-#define aom_paeth_predictor_4x8 aom_paeth_predictor_4x8_c
+void aom_paeth_predictor_4x8_neon(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+#define aom_paeth_predictor_4x8 aom_paeth_predictor_4x8_neon
+
+void aom_paeth_predictor_64x16_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+void aom_paeth_predictor_64x16_neon(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+#define aom_paeth_predictor_64x16 aom_paeth_predictor_64x16_neon
 
 void aom_paeth_predictor_64x32_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
                                  const uint8_t* left);
-#define aom_paeth_predictor_64x32 aom_paeth_predictor_64x32_c
+void aom_paeth_predictor_64x32_neon(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+#define aom_paeth_predictor_64x32 aom_paeth_predictor_64x32_neon
 
 void aom_paeth_predictor_64x64_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
                                  const uint8_t* left);
-#define aom_paeth_predictor_64x64 aom_paeth_predictor_64x64_c
+void aom_paeth_predictor_64x64_neon(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+#define aom_paeth_predictor_64x64 aom_paeth_predictor_64x64_neon
 
 void aom_paeth_predictor_8x16_c(uint8_t* dst,
                                 ptrdiff_t y_stride,
                                 const uint8_t* above,
                                 const uint8_t* left);
-#define aom_paeth_predictor_8x16 aom_paeth_predictor_8x16_c
+void aom_paeth_predictor_8x16_neon(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+#define aom_paeth_predictor_8x16 aom_paeth_predictor_8x16_neon
+
+void aom_paeth_predictor_8x32_c(uint8_t* dst,
+                                ptrdiff_t y_stride,
+                                const uint8_t* above,
+                                const uint8_t* left);
+void aom_paeth_predictor_8x32_neon(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+#define aom_paeth_predictor_8x32 aom_paeth_predictor_8x32_neon
 
 void aom_paeth_predictor_8x4_c(uint8_t* dst,
                                ptrdiff_t y_stride,
                                const uint8_t* above,
                                const uint8_t* left);
-#define aom_paeth_predictor_8x4 aom_paeth_predictor_8x4_c
+void aom_paeth_predictor_8x4_neon(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+#define aom_paeth_predictor_8x4 aom_paeth_predictor_8x4_neon
 
 void aom_paeth_predictor_8x8_c(uint8_t* dst,
                                ptrdiff_t y_stride,
                                const uint8_t* above,
                                const uint8_t* left);
-#define aom_paeth_predictor_8x8 aom_paeth_predictor_8x8_c
+void aom_paeth_predictor_8x8_neon(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+#define aom_paeth_predictor_8x8 aom_paeth_predictor_8x8_neon
 
 void aom_pixel_scale_c(const int16_t* src_diff,
                        ptrdiff_t src_stride,
@@ -2238,20 +2578,6 @@ void aom_quantize_b_32x32_neon(const tran_low_t* coeff_ptr,
                                const int16_t* iscan);
 #define aom_quantize_b_32x32 aom_quantize_b_32x32_neon
 
-void aom_quantize_b_32x32_adaptive_c(const tran_low_t* coeff_ptr,
-                                     intptr_t n_coeffs,
-                                     const int16_t* zbin_ptr,
-                                     const int16_t* round_ptr,
-                                     const int16_t* quant_ptr,
-                                     const int16_t* quant_shift_ptr,
-                                     tran_low_t* qcoeff_ptr,
-                                     tran_low_t* dqcoeff_ptr,
-                                     const int16_t* dequant_ptr,
-                                     uint16_t* eob_ptr,
-                                     const int16_t* scan,
-                                     const int16_t* iscan);
-#define aom_quantize_b_32x32_adaptive aom_quantize_b_32x32_adaptive_c
-
 void aom_quantize_b_64x64_c(const tran_low_t* coeff_ptr,
                             intptr_t n_coeffs,
                             const int16_t* zbin_ptr,
@@ -2278,34 +2604,6 @@ void aom_quantize_b_64x64_neon(const tran_low_t* coeff_ptr,
                                const int16_t* iscan);
 #define aom_quantize_b_64x64 aom_quantize_b_64x64_neon
 
-void aom_quantize_b_64x64_adaptive_c(const tran_low_t* coeff_ptr,
-                                     intptr_t n_coeffs,
-                                     const int16_t* zbin_ptr,
-                                     const int16_t* round_ptr,
-                                     const int16_t* quant_ptr,
-                                     const int16_t* quant_shift_ptr,
-                                     tran_low_t* qcoeff_ptr,
-                                     tran_low_t* dqcoeff_ptr,
-                                     const int16_t* dequant_ptr,
-                                     uint16_t* eob_ptr,
-                                     const int16_t* scan,
-                                     const int16_t* iscan);
-#define aom_quantize_b_64x64_adaptive aom_quantize_b_64x64_adaptive_c
-
-void aom_quantize_b_adaptive_c(const tran_low_t* coeff_ptr,
-                               intptr_t n_coeffs,
-                               const int16_t* zbin_ptr,
-                               const int16_t* round_ptr,
-                               const int16_t* quant_ptr,
-                               const int16_t* quant_shift_ptr,
-                               tran_low_t* qcoeff_ptr,
-                               tran_low_t* dqcoeff_ptr,
-                               const int16_t* dequant_ptr,
-                               uint16_t* eob_ptr,
-                               const int16_t* scan,
-                               const int16_t* iscan);
-#define aom_quantize_b_adaptive aom_quantize_b_adaptive_c
-
 unsigned int aom_sad128x128_c(const uint8_t* src_ptr,
                               int src_stride,
                               const uint8_t* ref_ptr,
@@ -2328,7 +2626,12 @@ void aom_sad128x128x4d_c(const uint8_t* src_ptr,
                          const uint8_t* const ref_ptr[4],
                          int ref_stride,
                          uint32_t sad_array[4]);
-#define aom_sad128x128x4d aom_sad128x128x4d_c
+void aom_sad128x128x4d_neon(const uint8_t* src_ptr,
+                            int src_stride,
+                            const uint8_t* const ref_ptr[4],
+                            int ref_stride,
+                            uint32_t sad_array[4]);
+#define aom_sad128x128x4d aom_sad128x128x4d_neon
 
 void aom_sad128x128x4d_avg_c(const uint8_t* src_ptr,
                              int src_stride,
@@ -2342,7 +2645,11 @@ unsigned int aom_sad128x64_c(const uint8_t* src_ptr,
                              int src_stride,
                              const uint8_t* ref_ptr,
                              int ref_stride);
-#define aom_sad128x64 aom_sad128x64_c
+unsigned int aom_sad128x64_neon(const uint8_t* src_ptr,
+                                int src_stride,
+                                const uint8_t* ref_ptr,
+                                int ref_stride);
+#define aom_sad128x64 aom_sad128x64_neon
 
 unsigned int aom_sad128x64_avg_c(const uint8_t* src_ptr,
                                  int src_stride,
@@ -2356,7 +2663,12 @@ void aom_sad128x64x4d_c(const uint8_t* src_ptr,
                         const uint8_t* const ref_ptr[4],
                         int ref_stride,
                         uint32_t sad_array[4]);
-#define aom_sad128x64x4d aom_sad128x64x4d_c
+void aom_sad128x64x4d_neon(const uint8_t* src_ptr,
+                           int src_stride,
+                           const uint8_t* const ref_ptr[4],
+                           int ref_stride,
+                           uint32_t sad_array[4]);
+#define aom_sad128x64x4d aom_sad128x64x4d_neon
 
 void aom_sad128x64x4d_avg_c(const uint8_t* src_ptr,
                             int src_stride,
@@ -2415,7 +2727,11 @@ unsigned int aom_sad16x32_c(const uint8_t* src_ptr,
                             int src_stride,
                             const uint8_t* ref_ptr,
                             int ref_stride);
-#define aom_sad16x32 aom_sad16x32_c
+unsigned int aom_sad16x32_neon(const uint8_t* src_ptr,
+                               int src_stride,
+                               const uint8_t* ref_ptr,
+                               int ref_stride);
+#define aom_sad16x32 aom_sad16x32_neon
 
 unsigned int aom_sad16x32_avg_c(const uint8_t* src_ptr,
                                 int src_stride,
@@ -2429,7 +2745,12 @@ void aom_sad16x32x4d_c(const uint8_t* src_ptr,
                        const uint8_t* const ref_ptr[4],
                        int ref_stride,
                        uint32_t sad_array[4]);
-#define aom_sad16x32x4d aom_sad16x32x4d_c
+void aom_sad16x32x4d_neon(const uint8_t* src_ptr,
+                          int src_stride,
+                          const uint8_t* const ref_ptr[4],
+                          int ref_stride,
+                          uint32_t sad_array[4]);
+#define aom_sad16x32x4d aom_sad16x32x4d_neon
 
 void aom_sad16x32x4d_avg_c(const uint8_t* src_ptr,
                            int src_stride,
@@ -2461,7 +2782,12 @@ void aom_sad16x8x4d_c(const uint8_t* src_ptr,
                       const uint8_t* const ref_ptr[4],
                       int ref_stride,
                       uint32_t sad_array[4]);
-#define aom_sad16x8x4d aom_sad16x8x4d_c
+void aom_sad16x8x4d_neon(const uint8_t* src_ptr,
+                         int src_stride,
+                         const uint8_t* const ref_ptr[4],
+                         int ref_stride,
+                         uint32_t sad_array[4]);
+#define aom_sad16x8x4d aom_sad16x8x4d_neon
 
 void aom_sad16x8x4d_avg_c(const uint8_t* src_ptr,
                           int src_stride,
@@ -2483,7 +2809,11 @@ unsigned int aom_sad32x16_c(const uint8_t* src_ptr,
                             int src_stride,
                             const uint8_t* ref_ptr,
                             int ref_stride);
-#define aom_sad32x16 aom_sad32x16_c
+unsigned int aom_sad32x16_neon(const uint8_t* src_ptr,
+                               int src_stride,
+                               const uint8_t* ref_ptr,
+                               int ref_stride);
+#define aom_sad32x16 aom_sad32x16_neon
 
 unsigned int aom_sad32x16_avg_c(const uint8_t* src_ptr,
                                 int src_stride,
@@ -2497,7 +2827,12 @@ void aom_sad32x16x4d_c(const uint8_t* src_ptr,
                        const uint8_t* const ref_ptr[4],
                        int ref_stride,
                        uint32_t sad_array[4]);
-#define aom_sad32x16x4d aom_sad32x16x4d_c
+void aom_sad32x16x4d_neon(const uint8_t* src_ptr,
+                          int src_stride,
+                          const uint8_t* const ref_ptr[4],
+                          int ref_stride,
+                          uint32_t sad_array[4]);
+#define aom_sad32x16x4d aom_sad32x16x4d_neon
 
 void aom_sad32x16x4d_avg_c(const uint8_t* src_ptr,
                            int src_stride,
@@ -2548,7 +2883,11 @@ unsigned int aom_sad32x64_c(const uint8_t* src_ptr,
                             int src_stride,
                             const uint8_t* ref_ptr,
                             int ref_stride);
-#define aom_sad32x64 aom_sad32x64_c
+unsigned int aom_sad32x64_neon(const uint8_t* src_ptr,
+                               int src_stride,
+                               const uint8_t* ref_ptr,
+                               int ref_stride);
+#define aom_sad32x64 aom_sad32x64_neon
 
 unsigned int aom_sad32x64_avg_c(const uint8_t* src_ptr,
                                 int src_stride,
@@ -2562,7 +2901,12 @@ void aom_sad32x64x4d_c(const uint8_t* src_ptr,
                        const uint8_t* const ref_ptr[4],
                        int ref_stride,
                        uint32_t sad_array[4]);
-#define aom_sad32x64x4d aom_sad32x64x4d_c
+void aom_sad32x64x4d_neon(const uint8_t* src_ptr,
+                          int src_stride,
+                          const uint8_t* const ref_ptr[4],
+                          int ref_stride,
+                          uint32_t sad_array[4]);
+#define aom_sad32x64x4d aom_sad32x64x4d_neon
 
 void aom_sad32x64x4d_avg_c(const uint8_t* src_ptr,
                            int src_stride,
@@ -2602,7 +2946,12 @@ void aom_sad4x4x4d_c(const uint8_t* src_ptr,
                      const uint8_t* const ref_ptr[4],
                      int ref_stride,
                      uint32_t sad_array[4]);
-#define aom_sad4x4x4d aom_sad4x4x4d_c
+void aom_sad4x4x4d_neon(const uint8_t* src_ptr,
+                        int src_stride,
+                        const uint8_t* const ref_ptr[4],
+                        int ref_stride,
+                        uint32_t sad_array[4]);
+#define aom_sad4x4x4d aom_sad4x4x4d_neon
 
 void aom_sad4x4x4d_avg_c(const uint8_t* src_ptr,
                          int src_stride,
@@ -2616,7 +2965,11 @@ unsigned int aom_sad4x8_c(const uint8_t* src_ptr,
                           int src_stride,
                           const uint8_t* ref_ptr,
                           int ref_stride);
-#define aom_sad4x8 aom_sad4x8_c
+unsigned int aom_sad4x8_neon(const uint8_t* src_ptr,
+                             int src_stride,
+                             const uint8_t* ref_ptr,
+                             int ref_stride);
+#define aom_sad4x8 aom_sad4x8_neon
 
 unsigned int aom_sad4x8_avg_c(const uint8_t* src_ptr,
                               int src_stride,
@@ -2630,7 +2983,12 @@ void aom_sad4x8x4d_c(const uint8_t* src_ptr,
                      const uint8_t* const ref_ptr[4],
                      int ref_stride,
                      uint32_t sad_array[4]);
-#define aom_sad4x8x4d aom_sad4x8x4d_c
+void aom_sad4x8x4d_neon(const uint8_t* src_ptr,
+                        int src_stride,
+                        const uint8_t* const ref_ptr[4],
+                        int ref_stride,
+                        uint32_t sad_array[4]);
+#define aom_sad4x8x4d aom_sad4x8x4d_neon
 
 void aom_sad4x8x4d_avg_c(const uint8_t* src_ptr,
                          int src_stride,
@@ -2652,7 +3010,11 @@ unsigned int aom_sad64x128_c(const uint8_t* src_ptr,
                              int src_stride,
                              const uint8_t* ref_ptr,
                              int ref_stride);
-#define aom_sad64x128 aom_sad64x128_c
+unsigned int aom_sad64x128_neon(const uint8_t* src_ptr,
+                                int src_stride,
+                                const uint8_t* ref_ptr,
+                                int ref_stride);
+#define aom_sad64x128 aom_sad64x128_neon
 
 unsigned int aom_sad64x128_avg_c(const uint8_t* src_ptr,
                                  int src_stride,
@@ -2666,7 +3028,12 @@ void aom_sad64x128x4d_c(const uint8_t* src_ptr,
                         const uint8_t* const ref_ptr[4],
                         int ref_stride,
                         uint32_t sad_array[4]);
-#define aom_sad64x128x4d aom_sad64x128x4d_c
+void aom_sad64x128x4d_neon(const uint8_t* src_ptr,
+                           int src_stride,
+                           const uint8_t* const ref_ptr[4],
+                           int ref_stride,
+                           uint32_t sad_array[4]);
+#define aom_sad64x128x4d aom_sad64x128x4d_neon
 
 void aom_sad64x128x4d_avg_c(const uint8_t* src_ptr,
                             int src_stride,
@@ -2680,7 +3047,11 @@ unsigned int aom_sad64x32_c(const uint8_t* src_ptr,
                             int src_stride,
                             const uint8_t* ref_ptr,
                             int ref_stride);
-#define aom_sad64x32 aom_sad64x32_c
+unsigned int aom_sad64x32_neon(const uint8_t* src_ptr,
+                               int src_stride,
+                               const uint8_t* ref_ptr,
+                               int ref_stride);
+#define aom_sad64x32 aom_sad64x32_neon
 
 unsigned int aom_sad64x32_avg_c(const uint8_t* src_ptr,
                                 int src_stride,
@@ -2694,7 +3065,12 @@ void aom_sad64x32x4d_c(const uint8_t* src_ptr,
                        const uint8_t* const ref_ptr[4],
                        int ref_stride,
                        uint32_t sad_array[4]);
-#define aom_sad64x32x4d aom_sad64x32x4d_c
+void aom_sad64x32x4d_neon(const uint8_t* src_ptr,
+                          int src_stride,
+                          const uint8_t* const ref_ptr[4],
+                          int ref_stride,
+                          uint32_t sad_array[4]);
+#define aom_sad64x32x4d aom_sad64x32x4d_neon
 
 void aom_sad64x32x4d_avg_c(const uint8_t* src_ptr,
                            int src_stride,
@@ -2771,7 +3147,12 @@ void aom_sad8x16x4d_c(const uint8_t* src_ptr,
                       const uint8_t* const ref_ptr[4],
                       int ref_stride,
                       uint32_t sad_array[4]);
-#define aom_sad8x16x4d aom_sad8x16x4d_c
+void aom_sad8x16x4d_neon(const uint8_t* src_ptr,
+                         int src_stride,
+                         const uint8_t* const ref_ptr[4],
+                         int ref_stride,
+                         uint32_t sad_array[4]);
+#define aom_sad8x16x4d aom_sad8x16x4d_neon
 
 void aom_sad8x16x4d_avg_c(const uint8_t* src_ptr,
                           int src_stride,
@@ -2785,7 +3166,11 @@ unsigned int aom_sad8x4_c(const uint8_t* src_ptr,
                           int src_stride,
                           const uint8_t* ref_ptr,
                           int ref_stride);
-#define aom_sad8x4 aom_sad8x4_c
+unsigned int aom_sad8x4_neon(const uint8_t* src_ptr,
+                             int src_stride,
+                             const uint8_t* ref_ptr,
+                             int ref_stride);
+#define aom_sad8x4 aom_sad8x4_neon
 
 unsigned int aom_sad8x4_avg_c(const uint8_t* src_ptr,
                               int src_stride,
@@ -2799,7 +3184,12 @@ void aom_sad8x4x4d_c(const uint8_t* src_ptr,
                      const uint8_t* const ref_ptr[4],
                      int ref_stride,
                      uint32_t sad_array[4]);
-#define aom_sad8x4x4d aom_sad8x4x4d_c
+void aom_sad8x4x4d_neon(const uint8_t* src_ptr,
+                        int src_stride,
+                        const uint8_t* const ref_ptr[4],
+                        int ref_stride,
+                        uint32_t sad_array[4]);
+#define aom_sad8x4x4d aom_sad8x4x4d_neon
 
 void aom_sad8x4x4d_avg_c(const uint8_t* src_ptr,
                          int src_stride,
@@ -2831,7 +3221,12 @@ void aom_sad8x8x4d_c(const uint8_t* src_ptr,
                      const uint8_t* const ref_ptr[4],
                      int ref_stride,
                      uint32_t sad_array[4]);
-#define aom_sad8x8x4d aom_sad8x8x4d_c
+void aom_sad8x8x4d_neon(const uint8_t* src_ptr,
+                        int src_stride,
+                        const uint8_t* const ref_ptr[4],
+                        int ref_stride,
+                        uint32_t sad_array[4]);
+#define aom_sad8x8x4d aom_sad8x8x4d_neon
 
 void aom_sad8x8x4d_avg_c(const uint8_t* src_ptr,
                          int src_stride,
@@ -3219,85 +3614,191 @@ void aom_smooth_h_predictor_16x16_c(uint8_t* dst,
                                     ptrdiff_t y_stride,
                                     const uint8_t* above,
                                     const uint8_t* left);
-#define aom_smooth_h_predictor_16x16 aom_smooth_h_predictor_16x16_c
+void aom_smooth_h_predictor_16x16_neon(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+#define aom_smooth_h_predictor_16x16 aom_smooth_h_predictor_16x16_neon
 
 void aom_smooth_h_predictor_16x32_c(uint8_t* dst,
                                     ptrdiff_t y_stride,
                                     const uint8_t* above,
                                     const uint8_t* left);
-#define aom_smooth_h_predictor_16x32 aom_smooth_h_predictor_16x32_c
+void aom_smooth_h_predictor_16x32_neon(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+#define aom_smooth_h_predictor_16x32 aom_smooth_h_predictor_16x32_neon
 
-void aom_smooth_h_predictor_16x8_c(uint8_t* dst,
+void aom_smooth_h_predictor_16x4_c(uint8_t* dst,
                                    ptrdiff_t y_stride,
                                    const uint8_t* above,
                                    const uint8_t* left);
-#define aom_smooth_h_predictor_16x8 aom_smooth_h_predictor_16x8_c
+void aom_smooth_h_predictor_16x4_neon(uint8_t* dst,
+                                      ptrdiff_t y_stride,
+                                      const uint8_t* above,
+                                      const uint8_t* left);
+#define aom_smooth_h_predictor_16x4 aom_smooth_h_predictor_16x4_neon
 
-void aom_smooth_h_predictor_2x2_c(uint8_t* dst,
-                                  ptrdiff_t y_stride,
-                                  const uint8_t* above,
-                                  const uint8_t* left);
-#define aom_smooth_h_predictor_2x2 aom_smooth_h_predictor_2x2_c
+void aom_smooth_h_predictor_16x64_c(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+void aom_smooth_h_predictor_16x64_neon(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+#define aom_smooth_h_predictor_16x64 aom_smooth_h_predictor_16x64_neon
+
+void aom_smooth_h_predictor_16x8_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+void aom_smooth_h_predictor_16x8_neon(uint8_t* dst,
+                                      ptrdiff_t y_stride,
+                                      const uint8_t* above,
+                                      const uint8_t* left);
+#define aom_smooth_h_predictor_16x8 aom_smooth_h_predictor_16x8_neon
 
 void aom_smooth_h_predictor_32x16_c(uint8_t* dst,
                                     ptrdiff_t y_stride,
                                     const uint8_t* above,
                                     const uint8_t* left);
-#define aom_smooth_h_predictor_32x16 aom_smooth_h_predictor_32x16_c
+void aom_smooth_h_predictor_32x16_neon(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+#define aom_smooth_h_predictor_32x16 aom_smooth_h_predictor_32x16_neon
 
 void aom_smooth_h_predictor_32x32_c(uint8_t* dst,
                                     ptrdiff_t y_stride,
                                     const uint8_t* above,
                                     const uint8_t* left);
-#define aom_smooth_h_predictor_32x32 aom_smooth_h_predictor_32x32_c
+void aom_smooth_h_predictor_32x32_neon(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+#define aom_smooth_h_predictor_32x32 aom_smooth_h_predictor_32x32_neon
 
 void aom_smooth_h_predictor_32x64_c(uint8_t* dst,
                                     ptrdiff_t y_stride,
                                     const uint8_t* above,
                                     const uint8_t* left);
-#define aom_smooth_h_predictor_32x64 aom_smooth_h_predictor_32x64_c
+void aom_smooth_h_predictor_32x64_neon(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+#define aom_smooth_h_predictor_32x64 aom_smooth_h_predictor_32x64_neon
+
+void aom_smooth_h_predictor_32x8_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+void aom_smooth_h_predictor_32x8_neon(uint8_t* dst,
+                                      ptrdiff_t y_stride,
+                                      const uint8_t* above,
+                                      const uint8_t* left);
+#define aom_smooth_h_predictor_32x8 aom_smooth_h_predictor_32x8_neon
+
+void aom_smooth_h_predictor_4x16_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+void aom_smooth_h_predictor_4x16_neon(uint8_t* dst,
+                                      ptrdiff_t y_stride,
+                                      const uint8_t* above,
+                                      const uint8_t* left);
+#define aom_smooth_h_predictor_4x16 aom_smooth_h_predictor_4x16_neon
 
 void aom_smooth_h_predictor_4x4_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
                                   const uint8_t* left);
-#define aom_smooth_h_predictor_4x4 aom_smooth_h_predictor_4x4_c
+void aom_smooth_h_predictor_4x4_neon(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+#define aom_smooth_h_predictor_4x4 aom_smooth_h_predictor_4x4_neon
 
 void aom_smooth_h_predictor_4x8_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
                                   const uint8_t* left);
-#define aom_smooth_h_predictor_4x8 aom_smooth_h_predictor_4x8_c
+void aom_smooth_h_predictor_4x8_neon(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+#define aom_smooth_h_predictor_4x8 aom_smooth_h_predictor_4x8_neon
+
+void aom_smooth_h_predictor_64x16_c(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+void aom_smooth_h_predictor_64x16_neon(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+#define aom_smooth_h_predictor_64x16 aom_smooth_h_predictor_64x16_neon
 
 void aom_smooth_h_predictor_64x32_c(uint8_t* dst,
                                     ptrdiff_t y_stride,
                                     const uint8_t* above,
                                     const uint8_t* left);
-#define aom_smooth_h_predictor_64x32 aom_smooth_h_predictor_64x32_c
+void aom_smooth_h_predictor_64x32_neon(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+#define aom_smooth_h_predictor_64x32 aom_smooth_h_predictor_64x32_neon
 
 void aom_smooth_h_predictor_64x64_c(uint8_t* dst,
                                     ptrdiff_t y_stride,
                                     const uint8_t* above,
                                     const uint8_t* left);
-#define aom_smooth_h_predictor_64x64 aom_smooth_h_predictor_64x64_c
+void aom_smooth_h_predictor_64x64_neon(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+#define aom_smooth_h_predictor_64x64 aom_smooth_h_predictor_64x64_neon
 
 void aom_smooth_h_predictor_8x16_c(uint8_t* dst,
                                    ptrdiff_t y_stride,
                                    const uint8_t* above,
                                    const uint8_t* left);
-#define aom_smooth_h_predictor_8x16 aom_smooth_h_predictor_8x16_c
+void aom_smooth_h_predictor_8x16_neon(uint8_t* dst,
+                                      ptrdiff_t y_stride,
+                                      const uint8_t* above,
+                                      const uint8_t* left);
+#define aom_smooth_h_predictor_8x16 aom_smooth_h_predictor_8x16_neon
+
+void aom_smooth_h_predictor_8x32_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+void aom_smooth_h_predictor_8x32_neon(uint8_t* dst,
+                                      ptrdiff_t y_stride,
+                                      const uint8_t* above,
+                                      const uint8_t* left);
+#define aom_smooth_h_predictor_8x32 aom_smooth_h_predictor_8x32_neon
 
 void aom_smooth_h_predictor_8x4_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
                                   const uint8_t* left);
-#define aom_smooth_h_predictor_8x4 aom_smooth_h_predictor_8x4_c
+void aom_smooth_h_predictor_8x4_neon(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+#define aom_smooth_h_predictor_8x4 aom_smooth_h_predictor_8x4_neon
 
 void aom_smooth_h_predictor_8x8_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
                                   const uint8_t* left);
-#define aom_smooth_h_predictor_8x8 aom_smooth_h_predictor_8x8_c
+void aom_smooth_h_predictor_8x8_neon(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+#define aom_smooth_h_predictor_8x8 aom_smooth_h_predictor_8x8_neon
 
 void aom_smooth_predictor_16x16_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
@@ -3319,6 +3820,26 @@ void aom_smooth_predictor_16x32_neon(uint8_t* dst,
                                      const uint8_t* left);
 #define aom_smooth_predictor_16x32 aom_smooth_predictor_16x32_neon
 
+void aom_smooth_predictor_16x4_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+void aom_smooth_predictor_16x4_neon(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+#define aom_smooth_predictor_16x4 aom_smooth_predictor_16x4_neon
+
+void aom_smooth_predictor_16x64_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+void aom_smooth_predictor_16x64_neon(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+#define aom_smooth_predictor_16x64 aom_smooth_predictor_16x64_neon
+
 void aom_smooth_predictor_16x8_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
@@ -3329,12 +3850,6 @@ void aom_smooth_predictor_16x8_neon(uint8_t* dst,
                                     const uint8_t* left);
 #define aom_smooth_predictor_16x8 aom_smooth_predictor_16x8_neon
 
-void aom_smooth_predictor_2x2_c(uint8_t* dst,
-                                ptrdiff_t y_stride,
-                                const uint8_t* above,
-                                const uint8_t* left);
-#define aom_smooth_predictor_2x2 aom_smooth_predictor_2x2_c
-
 void aom_smooth_predictor_32x16_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
@@ -3365,6 +3880,26 @@ void aom_smooth_predictor_32x64_neon(uint8_t* dst,
                                      const uint8_t* left);
 #define aom_smooth_predictor_32x64 aom_smooth_predictor_32x64_neon
 
+void aom_smooth_predictor_32x8_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+void aom_smooth_predictor_32x8_neon(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+#define aom_smooth_predictor_32x8 aom_smooth_predictor_32x8_neon
+
+void aom_smooth_predictor_4x16_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+void aom_smooth_predictor_4x16_neon(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+#define aom_smooth_predictor_4x16 aom_smooth_predictor_4x16_neon
+
 void aom_smooth_predictor_4x4_c(uint8_t* dst,
                                 ptrdiff_t y_stride,
                                 const uint8_t* above,
@@ -3385,6 +3920,16 @@ void aom_smooth_predictor_4x8_neon(uint8_t* dst,
                                    const uint8_t* left);
 #define aom_smooth_predictor_4x8 aom_smooth_predictor_4x8_neon
 
+void aom_smooth_predictor_64x16_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+void aom_smooth_predictor_64x16_neon(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+#define aom_smooth_predictor_64x16 aom_smooth_predictor_64x16_neon
+
 void aom_smooth_predictor_64x32_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
@@ -3415,6 +3960,16 @@ void aom_smooth_predictor_8x16_neon(uint8_t* dst,
                                     const uint8_t* left);
 #define aom_smooth_predictor_8x16 aom_smooth_predictor_8x16_neon
 
+void aom_smooth_predictor_8x32_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+void aom_smooth_predictor_8x32_neon(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+#define aom_smooth_predictor_8x32 aom_smooth_predictor_8x32_neon
+
 void aom_smooth_predictor_8x4_c(uint8_t* dst,
                                 ptrdiff_t y_stride,
                                 const uint8_t* above,
@@ -3439,85 +3994,191 @@ void aom_smooth_v_predictor_16x16_c(uint8_t* dst,
                                     ptrdiff_t y_stride,
                                     const uint8_t* above,
                                     const uint8_t* left);
-#define aom_smooth_v_predictor_16x16 aom_smooth_v_predictor_16x16_c
+void aom_smooth_v_predictor_16x16_neon(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+#define aom_smooth_v_predictor_16x16 aom_smooth_v_predictor_16x16_neon
 
 void aom_smooth_v_predictor_16x32_c(uint8_t* dst,
                                     ptrdiff_t y_stride,
                                     const uint8_t* above,
                                     const uint8_t* left);
-#define aom_smooth_v_predictor_16x32 aom_smooth_v_predictor_16x32_c
+void aom_smooth_v_predictor_16x32_neon(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+#define aom_smooth_v_predictor_16x32 aom_smooth_v_predictor_16x32_neon
 
-void aom_smooth_v_predictor_16x8_c(uint8_t* dst,
+void aom_smooth_v_predictor_16x4_c(uint8_t* dst,
                                    ptrdiff_t y_stride,
                                    const uint8_t* above,
                                    const uint8_t* left);
-#define aom_smooth_v_predictor_16x8 aom_smooth_v_predictor_16x8_c
+void aom_smooth_v_predictor_16x4_neon(uint8_t* dst,
+                                      ptrdiff_t y_stride,
+                                      const uint8_t* above,
+                                      const uint8_t* left);
+#define aom_smooth_v_predictor_16x4 aom_smooth_v_predictor_16x4_neon
 
-void aom_smooth_v_predictor_2x2_c(uint8_t* dst,
-                                  ptrdiff_t y_stride,
-                                  const uint8_t* above,
-                                  const uint8_t* left);
-#define aom_smooth_v_predictor_2x2 aom_smooth_v_predictor_2x2_c
+void aom_smooth_v_predictor_16x64_c(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+void aom_smooth_v_predictor_16x64_neon(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+#define aom_smooth_v_predictor_16x64 aom_smooth_v_predictor_16x64_neon
+
+void aom_smooth_v_predictor_16x8_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+void aom_smooth_v_predictor_16x8_neon(uint8_t* dst,
+                                      ptrdiff_t y_stride,
+                                      const uint8_t* above,
+                                      const uint8_t* left);
+#define aom_smooth_v_predictor_16x8 aom_smooth_v_predictor_16x8_neon
 
 void aom_smooth_v_predictor_32x16_c(uint8_t* dst,
                                     ptrdiff_t y_stride,
                                     const uint8_t* above,
                                     const uint8_t* left);
-#define aom_smooth_v_predictor_32x16 aom_smooth_v_predictor_32x16_c
+void aom_smooth_v_predictor_32x16_neon(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+#define aom_smooth_v_predictor_32x16 aom_smooth_v_predictor_32x16_neon
 
 void aom_smooth_v_predictor_32x32_c(uint8_t* dst,
                                     ptrdiff_t y_stride,
                                     const uint8_t* above,
                                     const uint8_t* left);
-#define aom_smooth_v_predictor_32x32 aom_smooth_v_predictor_32x32_c
+void aom_smooth_v_predictor_32x32_neon(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+#define aom_smooth_v_predictor_32x32 aom_smooth_v_predictor_32x32_neon
 
 void aom_smooth_v_predictor_32x64_c(uint8_t* dst,
                                     ptrdiff_t y_stride,
                                     const uint8_t* above,
                                     const uint8_t* left);
-#define aom_smooth_v_predictor_32x64 aom_smooth_v_predictor_32x64_c
+void aom_smooth_v_predictor_32x64_neon(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+#define aom_smooth_v_predictor_32x64 aom_smooth_v_predictor_32x64_neon
+
+void aom_smooth_v_predictor_32x8_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+void aom_smooth_v_predictor_32x8_neon(uint8_t* dst,
+                                      ptrdiff_t y_stride,
+                                      const uint8_t* above,
+                                      const uint8_t* left);
+#define aom_smooth_v_predictor_32x8 aom_smooth_v_predictor_32x8_neon
+
+void aom_smooth_v_predictor_4x16_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+void aom_smooth_v_predictor_4x16_neon(uint8_t* dst,
+                                      ptrdiff_t y_stride,
+                                      const uint8_t* above,
+                                      const uint8_t* left);
+#define aom_smooth_v_predictor_4x16 aom_smooth_v_predictor_4x16_neon
 
 void aom_smooth_v_predictor_4x4_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
                                   const uint8_t* left);
-#define aom_smooth_v_predictor_4x4 aom_smooth_v_predictor_4x4_c
+void aom_smooth_v_predictor_4x4_neon(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+#define aom_smooth_v_predictor_4x4 aom_smooth_v_predictor_4x4_neon
 
 void aom_smooth_v_predictor_4x8_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
                                   const uint8_t* left);
-#define aom_smooth_v_predictor_4x8 aom_smooth_v_predictor_4x8_c
+void aom_smooth_v_predictor_4x8_neon(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+#define aom_smooth_v_predictor_4x8 aom_smooth_v_predictor_4x8_neon
+
+void aom_smooth_v_predictor_64x16_c(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+void aom_smooth_v_predictor_64x16_neon(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+#define aom_smooth_v_predictor_64x16 aom_smooth_v_predictor_64x16_neon
 
 void aom_smooth_v_predictor_64x32_c(uint8_t* dst,
                                     ptrdiff_t y_stride,
                                     const uint8_t* above,
                                     const uint8_t* left);
-#define aom_smooth_v_predictor_64x32 aom_smooth_v_predictor_64x32_c
+void aom_smooth_v_predictor_64x32_neon(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+#define aom_smooth_v_predictor_64x32 aom_smooth_v_predictor_64x32_neon
 
 void aom_smooth_v_predictor_64x64_c(uint8_t* dst,
                                     ptrdiff_t y_stride,
                                     const uint8_t* above,
                                     const uint8_t* left);
-#define aom_smooth_v_predictor_64x64 aom_smooth_v_predictor_64x64_c
+void aom_smooth_v_predictor_64x64_neon(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+#define aom_smooth_v_predictor_64x64 aom_smooth_v_predictor_64x64_neon
 
 void aom_smooth_v_predictor_8x16_c(uint8_t* dst,
                                    ptrdiff_t y_stride,
                                    const uint8_t* above,
                                    const uint8_t* left);
-#define aom_smooth_v_predictor_8x16 aom_smooth_v_predictor_8x16_c
+void aom_smooth_v_predictor_8x16_neon(uint8_t* dst,
+                                      ptrdiff_t y_stride,
+                                      const uint8_t* above,
+                                      const uint8_t* left);
+#define aom_smooth_v_predictor_8x16 aom_smooth_v_predictor_8x16_neon
+
+void aom_smooth_v_predictor_8x32_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+void aom_smooth_v_predictor_8x32_neon(uint8_t* dst,
+                                      ptrdiff_t y_stride,
+                                      const uint8_t* above,
+                                      const uint8_t* left);
+#define aom_smooth_v_predictor_8x32 aom_smooth_v_predictor_8x32_neon
 
 void aom_smooth_v_predictor_8x4_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
                                   const uint8_t* left);
-#define aom_smooth_v_predictor_8x4 aom_smooth_v_predictor_8x4_c
+void aom_smooth_v_predictor_8x4_neon(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+#define aom_smooth_v_predictor_8x4 aom_smooth_v_predictor_8x4_neon
 
 void aom_smooth_v_predictor_8x8_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
                                   const uint8_t* left);
-#define aom_smooth_v_predictor_8x8 aom_smooth_v_predictor_8x8_c
+void aom_smooth_v_predictor_8x8_neon(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+#define aom_smooth_v_predictor_8x8 aom_smooth_v_predictor_8x8_neon
 
 int64_t aom_sse_c(const uint8_t* a,
                   int a_stride,
@@ -3996,7 +4657,12 @@ uint64_t aom_sum_sse_2d_i16_c(const int16_t* src,
                               int width,
                               int height,
                               int* sum);
-#define aom_sum_sse_2d_i16 aom_sum_sse_2d_i16_c
+uint64_t aom_sum_sse_2d_i16_neon(const int16_t* src,
+                                 int src_stride,
+                                 int width,
+                                 int height,
+                                 int* sum);
+#define aom_sum_sse_2d_i16 aom_sum_sse_2d_i16_neon
 
 void aom_v_predictor_16x16_c(uint8_t* dst,
                              ptrdiff_t y_stride,
@@ -4014,18 +4680,24 @@ void aom_v_predictor_16x32_c(uint8_t* dst,
                              const uint8_t* left);
 #define aom_v_predictor_16x32 aom_v_predictor_16x32_c
 
+void aom_v_predictor_16x4_c(uint8_t* dst,
+                            ptrdiff_t y_stride,
+                            const uint8_t* above,
+                            const uint8_t* left);
+#define aom_v_predictor_16x4 aom_v_predictor_16x4_c
+
+void aom_v_predictor_16x64_c(uint8_t* dst,
+                             ptrdiff_t y_stride,
+                             const uint8_t* above,
+                             const uint8_t* left);
+#define aom_v_predictor_16x64 aom_v_predictor_16x64_c
+
 void aom_v_predictor_16x8_c(uint8_t* dst,
                             ptrdiff_t y_stride,
                             const uint8_t* above,
                             const uint8_t* left);
 #define aom_v_predictor_16x8 aom_v_predictor_16x8_c
 
-void aom_v_predictor_2x2_c(uint8_t* dst,
-                           ptrdiff_t y_stride,
-                           const uint8_t* above,
-                           const uint8_t* left);
-#define aom_v_predictor_2x2 aom_v_predictor_2x2_c
-
 void aom_v_predictor_32x16_c(uint8_t* dst,
                              ptrdiff_t y_stride,
                              const uint8_t* above,
@@ -4048,6 +4720,18 @@ void aom_v_predictor_32x64_c(uint8_t* dst,
                              const uint8_t* left);
 #define aom_v_predictor_32x64 aom_v_predictor_32x64_c
 
+void aom_v_predictor_32x8_c(uint8_t* dst,
+                            ptrdiff_t y_stride,
+                            const uint8_t* above,
+                            const uint8_t* left);
+#define aom_v_predictor_32x8 aom_v_predictor_32x8_c
+
+void aom_v_predictor_4x16_c(uint8_t* dst,
+                            ptrdiff_t y_stride,
+                            const uint8_t* above,
+                            const uint8_t* left);
+#define aom_v_predictor_4x16 aom_v_predictor_4x16_c
+
 void aom_v_predictor_4x4_c(uint8_t* dst,
                            ptrdiff_t y_stride,
                            const uint8_t* above,
@@ -4064,6 +4748,12 @@ void aom_v_predictor_4x8_c(uint8_t* dst,
                            const uint8_t* left);
 #define aom_v_predictor_4x8 aom_v_predictor_4x8_c
 
+void aom_v_predictor_64x16_c(uint8_t* dst,
+                             ptrdiff_t y_stride,
+                             const uint8_t* above,
+                             const uint8_t* left);
+#define aom_v_predictor_64x16 aom_v_predictor_64x16_c
+
 void aom_v_predictor_64x32_c(uint8_t* dst,
                              ptrdiff_t y_stride,
                              const uint8_t* above,
@@ -4082,6 +4772,12 @@ void aom_v_predictor_8x16_c(uint8_t* dst,
                             const uint8_t* left);
 #define aom_v_predictor_8x16 aom_v_predictor_8x16_c
 
+void aom_v_predictor_8x32_c(uint8_t* dst,
+                            ptrdiff_t y_stride,
+                            const uint8_t* above,
+                            const uint8_t* left);
+#define aom_v_predictor_8x32 aom_v_predictor_8x32_c
+
 void aom_v_predictor_8x4_c(uint8_t* dst,
                            ptrdiff_t y_stride,
                            const uint8_t* above,
@@ -4317,8 +5013,8 @@ unsigned int aom_variance8x8_neon(const uint8_t* src_ptr,
                                   unsigned int* sse);
 #define aom_variance8x8 aom_variance8x8_neon
 
-int aom_vector_var_c(const int16_t* ref, const int16_t* src, const int bwl);
-int aom_vector_var_neon(const int16_t* ref, const int16_t* src, const int bwl);
+int aom_vector_var_c(const int16_t* ref, const int16_t* src, int bwl);
+int aom_vector_var_neon(const int16_t* ref, const int16_t* src, int bwl);
 #define aom_vector_var aom_vector_var_neon
 
 void aom_dsp_rtcd(void);
diff --git a/chromium/third_party/libaom/source/config/linux/arm64/config/aom_scale_rtcd.h b/chromium/third_party/libaom/source/config/linux/arm64/config/aom_scale_rtcd.h
index e27b558dd38..7db7b9d0f84 100644
--- a/chromium/third_party/libaom/source/config/linux/arm64/config/aom_scale_rtcd.h
+++ b/chromium/third_party/libaom/source/config/linux/arm64/config/aom_scale_rtcd.h
@@ -18,6 +18,12 @@ void aom_extend_frame_borders_c(struct yv12_buffer_config* ybf,
                                 const int num_planes);
 #define aom_extend_frame_borders aom_extend_frame_borders_c
 
+void aom_extend_frame_borders_plane_row_c(const struct yv12_buffer_config* ybf,
+                                          int plane,
+                                          int v_start,
+                                          int v_end);
+#define aom_extend_frame_borders_plane_row aom_extend_frame_borders_plane_row_c
+
 void aom_extend_frame_borders_y_c(struct yv12_buffer_config* ybf);
 #define aom_extend_frame_borders_y aom_extend_frame_borders_y_c
 
diff --git a/chromium/third_party/libaom/source/config/linux/arm64/config/av1_rtcd.h b/chromium/third_party/libaom/source/config/linux/arm64/config/av1_rtcd.h
index 23e831cc591..fd9ac835ad7 100644
--- a/chromium/third_party/libaom/source/config/linux/arm64/config/av1_rtcd.h
+++ b/chromium/third_party/libaom/source/config/linux/arm64/config/av1_rtcd.h
@@ -183,6 +183,30 @@ void aom_upsampled_pred_c(MACROBLOCKD* xd,
                           int subpel_search);
 #define aom_upsampled_pred aom_upsampled_pred_c
 
+void av1_apply_selfguided_restoration_c(const uint8_t* dat,
+                                        int width,
+                                        int height,
+                                        int stride,
+                                        int eps,
+                                        const int* xqd,
+                                        uint8_t* dst,
+                                        int dst_stride,
+                                        int32_t* tmpbuf,
+                                        int bit_depth,
+                                        int highbd);
+void av1_apply_selfguided_restoration_neon(const uint8_t* dat,
+                                           int width,
+                                           int height,
+                                           int stride,
+                                           int eps,
+                                           const int* xqd,
+                                           uint8_t* dst,
+                                           int dst_stride,
+                                           int32_t* tmpbuf,
+                                           int bit_depth,
+                                           int highbd);
+#define av1_apply_selfguided_restoration av1_apply_selfguided_restoration_neon
+
 int64_t av1_block_error_c(const tran_low_t* coeff,
                           const tran_low_t* dqcoeff,
                           intptr_t block_size,
@@ -234,6 +258,14 @@ void av1_build_compound_diffwtd_mask_d16_neon(uint8_t* mask,
 #define av1_build_compound_diffwtd_mask_d16 \
   av1_build_compound_diffwtd_mask_d16_neon
 
+int64_t av1_calc_frame_error_c(const uint8_t* const ref,
+                               int stride,
+                               const uint8_t* const dst,
+                               int p_width,
+                               int p_height,
+                               int p_stride);
+#define av1_calc_frame_error av1_calc_frame_error_c
+
 void av1_calc_indices_dim1_c(const int* data,
                              const int* centroids,
                              uint8_t* indices,
@@ -1314,6 +1346,28 @@ void av1_round_shift_array_c(int32_t* arr, int size, int bit);
 void av1_round_shift_array_neon(int32_t* arr, int size, int bit);
 #define av1_round_shift_array av1_round_shift_array_neon
 
+int av1_selfguided_restoration_c(const uint8_t* dgd8,
+                                 int width,
+                                 int height,
+                                 int dgd_stride,
+                                 int32_t* flt0,
+                                 int32_t* flt1,
+                                 int flt_stride,
+                                 int sgr_params_idx,
+                                 int bit_depth,
+                                 int highbd);
+int av1_selfguided_restoration_neon(const uint8_t* dgd8,
+                                    int width,
+                                    int height,
+                                    int dgd_stride,
+                                    int32_t* flt0,
+                                    int32_t* flt1,
+                                    int flt_stride,
+                                    int sgr_params_idx,
+                                    int bit_depth,
+                                    int highbd);
+#define av1_selfguided_restoration av1_selfguided_restoration_neon
+
 void av1_txb_init_levels_c(const tran_low_t* const coeff,
                            const int width,
                            const int height,
@@ -1330,6 +1384,44 @@ void av1_upsample_intra_edge_c(uint8_t* p, int sz);
 void av1_upsample_intra_edge_high_c(uint16_t* p, int sz, int bd);
 #define av1_upsample_intra_edge_high av1_upsample_intra_edge_high_c
 
+void av1_warp_affine_c(const int32_t* mat,
+                       const uint8_t* ref,
+                       int width,
+                       int height,
+                       int stride,
+                       uint8_t* pred,
+                       int p_col,
+                       int p_row,
+                       int p_width,
+                       int p_height,
+                       int p_stride,
+                       int subsampling_x,
+                       int subsampling_y,
+                       ConvolveParams* conv_params,
+                       int16_t alpha,
+                       int16_t beta,
+                       int16_t gamma,
+                       int16_t delta);
+void av1_warp_affine_neon(const int32_t* mat,
+                          const uint8_t* ref,
+                          int width,
+                          int height,
+                          int stride,
+                          uint8_t* pred,
+                          int p_col,
+                          int p_row,
+                          int p_width,
+                          int p_height,
+                          int p_stride,
+                          int subsampling_x,
+                          int subsampling_y,
+                          ConvolveParams* conv_params,
+                          int16_t alpha,
+                          int16_t beta,
+                          int16_t gamma,
+                          int16_t delta);
+#define av1_warp_affine av1_warp_affine_neon
+
 void av1_wedge_compute_delta_squares_c(int16_t* d,
                                        const int16_t* a,
                                        const int16_t* b,
@@ -1346,7 +1438,11 @@ uint64_t av1_wedge_sse_from_residuals_c(const int16_t* r1,
                                         const int16_t* d,
                                         const uint8_t* m,
                                         int N);
-#define av1_wedge_sse_from_residuals av1_wedge_sse_from_residuals_c
+uint64_t av1_wedge_sse_from_residuals_neon(const int16_t* r1,
+                                           const int16_t* d,
+                                           const uint8_t* m,
+                                           int N);
+#define av1_wedge_sse_from_residuals av1_wedge_sse_from_residuals_neon
 
 void av1_wiener_convolve_add_src_c(const uint8_t* src,
                                    ptrdiff_t src_stride,
@@ -1610,7 +1706,15 @@ void cdef_find_dir_dual_c(const uint16_t* img1,
                           int coeff_shift,
                           int* out1,
                           int* out2);
-#define cdef_find_dir_dual cdef_find_dir_dual_c
+void cdef_find_dir_dual_neon(const uint16_t* img1,
+                             const uint16_t* img2,
+                             int stride,
+                             int32_t* var1,
+                             int32_t* var2,
+                             int coeff_shift,
+                             int* out1,
+                             int* out2);
+#define cdef_find_dir_dual cdef_find_dir_dual_neon
 
 cfl_subsample_lbd_fn cfl_get_luma_subsampling_420_lbd_c(TX_SIZE tx_size);
 cfl_subsample_lbd_fn cfl_get_luma_subsampling_420_lbd_neon(TX_SIZE tx_size);
diff --git a/chromium/third_party/libaom/source/config/linux/generic/config/aom_config.asm b/chromium/third_party/libaom/source/config/linux/generic/config/aom_config.asm
index 5e741322b8d..72d2a4e1773 100644
--- a/chromium/third_party/libaom/source/config/linux/generic/config/aom_config.asm
+++ b/chromium/third_party/libaom/source/config/linux/generic/config/aom_config.asm
@@ -9,7 +9,6 @@
 ; PATENTS file, you can obtain it at www.aomedia.org/license/patent.
 ;
 ARCH_ARM equ 0
-ARCH_MIPS equ 0
 ARCH_PPC equ 0
 ARCH_X86 equ 0
 ARCH_X86_64 equ 0
@@ -33,8 +32,6 @@ CONFIG_DISABLE_FULL_PIXEL_SPLIT_8X8 equ 1
 CONFIG_ENTROPY_STATS equ 0
 CONFIG_EXCLUDE_SIMD_MISMATCH equ 0
 CONFIG_FPMT_TEST equ 0
-CONFIG_FRAME_PARALLEL_ENCODE equ 0
-CONFIG_FRAME_PARALLEL_ENCODE_2 equ 0
 CONFIG_GCC equ 1
 CONFIG_GCOV equ 0
 CONFIG_GPROF equ 0
@@ -50,7 +47,7 @@ CONFIG_NORMAL_TILE_MODE equ 1
 CONFIG_OPTICAL_FLOW_API equ 0
 CONFIG_OS_SUPPORT equ 1
 CONFIG_PARTITION_SEARCH_ORDER equ 0
-CONFIG_PIC equ 0
+CONFIG_PIC equ 1
 CONFIG_RATECTRL_LOG equ 0
 CONFIG_RD_COMMAND equ 0
 CONFIG_RD_DEBUG equ 0
@@ -69,14 +66,11 @@ CONFIG_WEBM_IO equ 1
 DECODE_HEIGHT_LIMIT equ 16384
 DECODE_WIDTH_LIMIT equ 16384
 FORCE_HIGHBITDEPTH_DECODING equ 0
+HAVE_ARM_CRC32 equ 0
 HAVE_AVX equ 0
 HAVE_AVX2 equ 0
-HAVE_DSPR2 equ 0
 HAVE_FEXCEPT equ 1
-HAVE_MIPS32 equ 0
-HAVE_MIPS64 equ 0
 HAVE_MMX equ 0
-HAVE_MSA equ 0
 HAVE_NEON equ 0
 HAVE_PTHREAD_H equ 1
 HAVE_SSE equ 0
diff --git a/chromium/third_party/libaom/source/config/linux/generic/config/aom_config.c b/chromium/third_party/libaom/source/config/linux/generic/config/aom_config.c
index d4d7e87a64e..d7a0494ec2c 100644
--- a/chromium/third_party/libaom/source/config/linux/generic/config/aom_config.c
+++ b/chromium/third_party/libaom/source/config/linux/generic/config/aom_config.c
@@ -9,5 +9,5 @@
  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
  */
 #include "aom/aom_codec.h"
-static const char* const cfg = "cmake ../source/libaom -G \"Unix Makefiles\" -DAOM_TARGET_CPU=generic -DCONFIG_AV1_ENCODER=1 -DCONFIG_LIBYUV=0 -DCONFIG_REALTIME_ONLY=1 -DCONFIG_AV1_HIGHBITDEPTH=0 -DCONFIG_AV1_TEMPORAL_DENOISING=1 -DCONFIG_MAX_DECODE_PROFILE=0 -DCONFIG_NORMAL_TILE_MODE=1 -DCONFIG_SIZE_LIMIT=1 -DDECODE_HEIGHT_LIMIT=16384 -DDECODE_WIDTH_LIMIT=16384";
+static const char* const cfg = "cmake ../source/libaom -G \"Unix Makefiles\" -DAOM_TARGET_CPU=generic -DCONFIG_AV1_ENCODER=1 -DCONFIG_LIBYUV=0 -DCONFIG_AV1_HIGHBITDEPTH=0 -DCONFIG_AV1_TEMPORAL_DENOISING=1 -DCONFIG_REALTIME_ONLY=1 -DCONFIG_MAX_DECODE_PROFILE=0 -DCONFIG_NORMAL_TILE_MODE=1 -DCONFIG_SIZE_LIMIT=1 -DDECODE_HEIGHT_LIMIT=16384 -DDECODE_WIDTH_LIMIT=16384";
 const char *aom_codec_build_config(void) {return cfg;}
diff --git a/chromium/third_party/libaom/source/config/linux/generic/config/aom_config.h b/chromium/third_party/libaom/source/config/linux/generic/config/aom_config.h
index 39b030dd504..c14a67e478b 100644
--- a/chromium/third_party/libaom/source/config/linux/generic/config/aom_config.h
+++ b/chromium/third_party/libaom/source/config/linux/generic/config/aom_config.h
@@ -11,7 +11,6 @@
 #ifndef AOM_CONFIG_H_
 #define AOM_CONFIG_H_
 #define ARCH_ARM 0
-#define ARCH_MIPS 0
 #define ARCH_PPC 0
 #define ARCH_X86 0
 #define ARCH_X86_64 0
@@ -35,8 +34,6 @@
 #define CONFIG_ENTROPY_STATS 0
 #define CONFIG_EXCLUDE_SIMD_MISMATCH 0
 #define CONFIG_FPMT_TEST 0
-#define CONFIG_FRAME_PARALLEL_ENCODE 0
-#define CONFIG_FRAME_PARALLEL_ENCODE_2 0
 #define CONFIG_GCC 1
 #define CONFIG_GCOV 0
 #define CONFIG_GPROF 0
@@ -52,7 +49,7 @@
 #define CONFIG_OPTICAL_FLOW_API 0
 #define CONFIG_OS_SUPPORT 1
 #define CONFIG_PARTITION_SEARCH_ORDER 0
-#define CONFIG_PIC 0
+#define CONFIG_PIC 1
 #define CONFIG_RATECTRL_LOG 0
 #define CONFIG_RD_COMMAND 0
 #define CONFIG_RD_DEBUG 0
@@ -71,14 +68,11 @@
 #define DECODE_HEIGHT_LIMIT 16384
 #define DECODE_WIDTH_LIMIT 16384
 #define FORCE_HIGHBITDEPTH_DECODING 0
+#define HAVE_ARM_CRC32 0
 #define HAVE_AVX 0
 #define HAVE_AVX2 0
-#define HAVE_DSPR2 0
 #define HAVE_FEXCEPT 1
-#define HAVE_MIPS32 0
-#define HAVE_MIPS64 0
 #define HAVE_MMX 0
-#define HAVE_MSA 0
 #define HAVE_NEON 0
 #define HAVE_PTHREAD_H 1
 #define HAVE_SSE 0
diff --git a/chromium/third_party/libaom/source/config/linux/generic/config/aom_dsp_rtcd.h b/chromium/third_party/libaom/source/config/linux/generic/config/aom_dsp_rtcd.h
index 5fbcf9c3384..c6d2debf5ed 100644
--- a/chromium/third_party/libaom/source/config/linux/generic/config/aom_dsp_rtcd.h
+++ b/chromium/third_party/libaom/source/config/linux/generic/config/aom_dsp_rtcd.h
@@ -146,18 +146,24 @@ void aom_dc_128_predictor_16x32_c(uint8_t* dst,
                                   const uint8_t* left);
 #define aom_dc_128_predictor_16x32 aom_dc_128_predictor_16x32_c
 
+void aom_dc_128_predictor_16x4_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+#define aom_dc_128_predictor_16x4 aom_dc_128_predictor_16x4_c
+
+void aom_dc_128_predictor_16x64_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+#define aom_dc_128_predictor_16x64 aom_dc_128_predictor_16x64_c
+
 void aom_dc_128_predictor_16x8_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
                                  const uint8_t* left);
 #define aom_dc_128_predictor_16x8 aom_dc_128_predictor_16x8_c
 
-void aom_dc_128_predictor_2x2_c(uint8_t* dst,
-                                ptrdiff_t y_stride,
-                                const uint8_t* above,
-                                const uint8_t* left);
-#define aom_dc_128_predictor_2x2 aom_dc_128_predictor_2x2_c
-
 void aom_dc_128_predictor_32x16_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
@@ -176,6 +182,18 @@ void aom_dc_128_predictor_32x64_c(uint8_t* dst,
                                   const uint8_t* left);
 #define aom_dc_128_predictor_32x64 aom_dc_128_predictor_32x64_c
 
+void aom_dc_128_predictor_32x8_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+#define aom_dc_128_predictor_32x8 aom_dc_128_predictor_32x8_c
+
+void aom_dc_128_predictor_4x16_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+#define aom_dc_128_predictor_4x16 aom_dc_128_predictor_4x16_c
+
 void aom_dc_128_predictor_4x4_c(uint8_t* dst,
                                 ptrdiff_t y_stride,
                                 const uint8_t* above,
@@ -188,6 +206,12 @@ void aom_dc_128_predictor_4x8_c(uint8_t* dst,
                                 const uint8_t* left);
 #define aom_dc_128_predictor_4x8 aom_dc_128_predictor_4x8_c
 
+void aom_dc_128_predictor_64x16_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+#define aom_dc_128_predictor_64x16 aom_dc_128_predictor_64x16_c
+
 void aom_dc_128_predictor_64x32_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
@@ -206,6 +230,12 @@ void aom_dc_128_predictor_8x16_c(uint8_t* dst,
                                  const uint8_t* left);
 #define aom_dc_128_predictor_8x16 aom_dc_128_predictor_8x16_c
 
+void aom_dc_128_predictor_8x32_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+#define aom_dc_128_predictor_8x32 aom_dc_128_predictor_8x32_c
+
 void aom_dc_128_predictor_8x4_c(uint8_t* dst,
                                 ptrdiff_t y_stride,
                                 const uint8_t* above,
@@ -230,18 +260,24 @@ void aom_dc_left_predictor_16x32_c(uint8_t* dst,
                                    const uint8_t* left);
 #define aom_dc_left_predictor_16x32 aom_dc_left_predictor_16x32_c
 
+void aom_dc_left_predictor_16x4_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+#define aom_dc_left_predictor_16x4 aom_dc_left_predictor_16x4_c
+
+void aom_dc_left_predictor_16x64_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+#define aom_dc_left_predictor_16x64 aom_dc_left_predictor_16x64_c
+
 void aom_dc_left_predictor_16x8_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
                                   const uint8_t* left);
 #define aom_dc_left_predictor_16x8 aom_dc_left_predictor_16x8_c
 
-void aom_dc_left_predictor_2x2_c(uint8_t* dst,
-                                 ptrdiff_t y_stride,
-                                 const uint8_t* above,
-                                 const uint8_t* left);
-#define aom_dc_left_predictor_2x2 aom_dc_left_predictor_2x2_c
-
 void aom_dc_left_predictor_32x16_c(uint8_t* dst,
                                    ptrdiff_t y_stride,
                                    const uint8_t* above,
@@ -260,6 +296,18 @@ void aom_dc_left_predictor_32x64_c(uint8_t* dst,
                                    const uint8_t* left);
 #define aom_dc_left_predictor_32x64 aom_dc_left_predictor_32x64_c
 
+void aom_dc_left_predictor_32x8_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+#define aom_dc_left_predictor_32x8 aom_dc_left_predictor_32x8_c
+
+void aom_dc_left_predictor_4x16_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+#define aom_dc_left_predictor_4x16 aom_dc_left_predictor_4x16_c
+
 void aom_dc_left_predictor_4x4_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
@@ -272,6 +320,12 @@ void aom_dc_left_predictor_4x8_c(uint8_t* dst,
                                  const uint8_t* left);
 #define aom_dc_left_predictor_4x8 aom_dc_left_predictor_4x8_c
 
+void aom_dc_left_predictor_64x16_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+#define aom_dc_left_predictor_64x16 aom_dc_left_predictor_64x16_c
+
 void aom_dc_left_predictor_64x32_c(uint8_t* dst,
                                    ptrdiff_t y_stride,
                                    const uint8_t* above,
@@ -290,6 +344,12 @@ void aom_dc_left_predictor_8x16_c(uint8_t* dst,
                                   const uint8_t* left);
 #define aom_dc_left_predictor_8x16 aom_dc_left_predictor_8x16_c
 
+void aom_dc_left_predictor_8x32_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+#define aom_dc_left_predictor_8x32 aom_dc_left_predictor_8x32_c
+
 void aom_dc_left_predictor_8x4_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
@@ -314,18 +374,24 @@ void aom_dc_predictor_16x32_c(uint8_t* dst,
                               const uint8_t* left);
 #define aom_dc_predictor_16x32 aom_dc_predictor_16x32_c
 
+void aom_dc_predictor_16x4_c(uint8_t* dst,
+                             ptrdiff_t y_stride,
+                             const uint8_t* above,
+                             const uint8_t* left);
+#define aom_dc_predictor_16x4 aom_dc_predictor_16x4_c
+
+void aom_dc_predictor_16x64_c(uint8_t* dst,
+                              ptrdiff_t y_stride,
+                              const uint8_t* above,
+                              const uint8_t* left);
+#define aom_dc_predictor_16x64 aom_dc_predictor_16x64_c
+
 void aom_dc_predictor_16x8_c(uint8_t* dst,
                              ptrdiff_t y_stride,
                              const uint8_t* above,
                              const uint8_t* left);
 #define aom_dc_predictor_16x8 aom_dc_predictor_16x8_c
 
-void aom_dc_predictor_2x2_c(uint8_t* dst,
-                            ptrdiff_t y_stride,
-                            const uint8_t* above,
-                            const uint8_t* left);
-#define aom_dc_predictor_2x2 aom_dc_predictor_2x2_c
-
 void aom_dc_predictor_32x16_c(uint8_t* dst,
                               ptrdiff_t y_stride,
                               const uint8_t* above,
@@ -344,6 +410,18 @@ void aom_dc_predictor_32x64_c(uint8_t* dst,
                               const uint8_t* left);
 #define aom_dc_predictor_32x64 aom_dc_predictor_32x64_c
 
+void aom_dc_predictor_32x8_c(uint8_t* dst,
+                             ptrdiff_t y_stride,
+                             const uint8_t* above,
+                             const uint8_t* left);
+#define aom_dc_predictor_32x8 aom_dc_predictor_32x8_c
+
+void aom_dc_predictor_4x16_c(uint8_t* dst,
+                             ptrdiff_t y_stride,
+                             const uint8_t* above,
+                             const uint8_t* left);
+#define aom_dc_predictor_4x16 aom_dc_predictor_4x16_c
+
 void aom_dc_predictor_4x4_c(uint8_t* dst,
                             ptrdiff_t y_stride,
                             const uint8_t* above,
@@ -356,6 +434,12 @@ void aom_dc_predictor_4x8_c(uint8_t* dst,
                             const uint8_t* left);
 #define aom_dc_predictor_4x8 aom_dc_predictor_4x8_c
 
+void aom_dc_predictor_64x16_c(uint8_t* dst,
+                              ptrdiff_t y_stride,
+                              const uint8_t* above,
+                              const uint8_t* left);
+#define aom_dc_predictor_64x16 aom_dc_predictor_64x16_c
+
 void aom_dc_predictor_64x32_c(uint8_t* dst,
                               ptrdiff_t y_stride,
                               const uint8_t* above,
@@ -374,6 +458,12 @@ void aom_dc_predictor_8x16_c(uint8_t* dst,
                              const uint8_t* left);
 #define aom_dc_predictor_8x16 aom_dc_predictor_8x16_c
 
+void aom_dc_predictor_8x32_c(uint8_t* dst,
+                             ptrdiff_t y_stride,
+                             const uint8_t* above,
+                             const uint8_t* left);
+#define aom_dc_predictor_8x32 aom_dc_predictor_8x32_c
+
 void aom_dc_predictor_8x4_c(uint8_t* dst,
                             ptrdiff_t y_stride,
                             const uint8_t* above,
@@ -398,18 +488,24 @@ void aom_dc_top_predictor_16x32_c(uint8_t* dst,
                                   const uint8_t* left);
 #define aom_dc_top_predictor_16x32 aom_dc_top_predictor_16x32_c
 
+void aom_dc_top_predictor_16x4_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+#define aom_dc_top_predictor_16x4 aom_dc_top_predictor_16x4_c
+
+void aom_dc_top_predictor_16x64_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+#define aom_dc_top_predictor_16x64 aom_dc_top_predictor_16x64_c
+
 void aom_dc_top_predictor_16x8_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
                                  const uint8_t* left);
 #define aom_dc_top_predictor_16x8 aom_dc_top_predictor_16x8_c
 
-void aom_dc_top_predictor_2x2_c(uint8_t* dst,
-                                ptrdiff_t y_stride,
-                                const uint8_t* above,
-                                const uint8_t* left);
-#define aom_dc_top_predictor_2x2 aom_dc_top_predictor_2x2_c
-
 void aom_dc_top_predictor_32x16_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
@@ -428,6 +524,18 @@ void aom_dc_top_predictor_32x64_c(uint8_t* dst,
                                   const uint8_t* left);
 #define aom_dc_top_predictor_32x64 aom_dc_top_predictor_32x64_c
 
+void aom_dc_top_predictor_32x8_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+#define aom_dc_top_predictor_32x8 aom_dc_top_predictor_32x8_c
+
+void aom_dc_top_predictor_4x16_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+#define aom_dc_top_predictor_4x16 aom_dc_top_predictor_4x16_c
+
 void aom_dc_top_predictor_4x4_c(uint8_t* dst,
                                 ptrdiff_t y_stride,
                                 const uint8_t* above,
@@ -440,6 +548,12 @@ void aom_dc_top_predictor_4x8_c(uint8_t* dst,
                                 const uint8_t* left);
 #define aom_dc_top_predictor_4x8 aom_dc_top_predictor_4x8_c
 
+void aom_dc_top_predictor_64x16_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+#define aom_dc_top_predictor_64x16 aom_dc_top_predictor_64x16_c
+
 void aom_dc_top_predictor_64x32_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
@@ -458,6 +572,12 @@ void aom_dc_top_predictor_8x16_c(uint8_t* dst,
                                  const uint8_t* left);
 #define aom_dc_top_predictor_8x16 aom_dc_top_predictor_8x16_c
 
+void aom_dc_top_predictor_8x32_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+#define aom_dc_top_predictor_8x32 aom_dc_top_predictor_8x32_c
+
 void aom_dc_top_predictor_8x4_c(uint8_t* dst,
                                 ptrdiff_t y_stride,
                                 const uint8_t* above,
@@ -895,18 +1015,24 @@ void aom_h_predictor_16x32_c(uint8_t* dst,
                              const uint8_t* left);
 #define aom_h_predictor_16x32 aom_h_predictor_16x32_c
 
+void aom_h_predictor_16x4_c(uint8_t* dst,
+                            ptrdiff_t y_stride,
+                            const uint8_t* above,
+                            const uint8_t* left);
+#define aom_h_predictor_16x4 aom_h_predictor_16x4_c
+
+void aom_h_predictor_16x64_c(uint8_t* dst,
+                             ptrdiff_t y_stride,
+                             const uint8_t* above,
+                             const uint8_t* left);
+#define aom_h_predictor_16x64 aom_h_predictor_16x64_c
+
 void aom_h_predictor_16x8_c(uint8_t* dst,
                             ptrdiff_t y_stride,
                             const uint8_t* above,
                             const uint8_t* left);
 #define aom_h_predictor_16x8 aom_h_predictor_16x8_c
 
-void aom_h_predictor_2x2_c(uint8_t* dst,
-                           ptrdiff_t y_stride,
-                           const uint8_t* above,
-                           const uint8_t* left);
-#define aom_h_predictor_2x2 aom_h_predictor_2x2_c
-
 void aom_h_predictor_32x16_c(uint8_t* dst,
                              ptrdiff_t y_stride,
                              const uint8_t* above,
@@ -925,6 +1051,18 @@ void aom_h_predictor_32x64_c(uint8_t* dst,
                              const uint8_t* left);
 #define aom_h_predictor_32x64 aom_h_predictor_32x64_c
 
+void aom_h_predictor_32x8_c(uint8_t* dst,
+                            ptrdiff_t y_stride,
+                            const uint8_t* above,
+                            const uint8_t* left);
+#define aom_h_predictor_32x8 aom_h_predictor_32x8_c
+
+void aom_h_predictor_4x16_c(uint8_t* dst,
+                            ptrdiff_t y_stride,
+                            const uint8_t* above,
+                            const uint8_t* left);
+#define aom_h_predictor_4x16 aom_h_predictor_4x16_c
+
 void aom_h_predictor_4x4_c(uint8_t* dst,
                            ptrdiff_t y_stride,
                            const uint8_t* above,
@@ -937,6 +1075,12 @@ void aom_h_predictor_4x8_c(uint8_t* dst,
                            const uint8_t* left);
 #define aom_h_predictor_4x8 aom_h_predictor_4x8_c
 
+void aom_h_predictor_64x16_c(uint8_t* dst,
+                             ptrdiff_t y_stride,
+                             const uint8_t* above,
+                             const uint8_t* left);
+#define aom_h_predictor_64x16 aom_h_predictor_64x16_c
+
 void aom_h_predictor_64x32_c(uint8_t* dst,
                              ptrdiff_t y_stride,
                              const uint8_t* above,
@@ -955,6 +1099,12 @@ void aom_h_predictor_8x16_c(uint8_t* dst,
                             const uint8_t* left);
 #define aom_h_predictor_8x16 aom_h_predictor_8x16_c
 
+void aom_h_predictor_8x32_c(uint8_t* dst,
+                            ptrdiff_t y_stride,
+                            const uint8_t* above,
+                            const uint8_t* left);
+#define aom_h_predictor_8x32 aom_h_predictor_8x32_c
+
 void aom_h_predictor_8x4_c(uint8_t* dst,
                            ptrdiff_t y_stride,
                            const uint8_t* above,
@@ -987,11 +1137,6 @@ void aom_hadamard_8x8_c(const int16_t* src_diff,
                         tran_low_t* coeff);
 #define aom_hadamard_8x8 aom_hadamard_8x8_c
 
-void aom_hadamard_8x8_dual_c(const int16_t* src_diff,
-                             ptrdiff_t src_stride,
-                             int16_t* coeff);
-#define aom_hadamard_8x8_dual aom_hadamard_8x8_dual_c
-
 void aom_hadamard_lp_16x16_c(const int16_t* src_diff,
                              ptrdiff_t src_stride,
                              int16_t* coeff);
@@ -1002,6 +1147,11 @@ void aom_hadamard_lp_8x8_c(const int16_t* src_diff,
                            int16_t* coeff);
 #define aom_hadamard_lp_8x8 aom_hadamard_lp_8x8_c
 
+void aom_hadamard_lp_8x8_dual_c(const int16_t* src_diff,
+                                ptrdiff_t src_stride,
+                                int16_t* coeff);
+#define aom_hadamard_lp_8x8_dual aom_hadamard_lp_8x8_dual_c
+
 void aom_ifft16x16_float_c(const float* input, float* temp, float* output);
 #define aom_ifft16x16_float aom_ifft16x16_float_c
 
@@ -1017,13 +1167,20 @@ void aom_ifft4x4_float_c(const float* input, float* temp, float* output);
 void aom_ifft8x8_float_c(const float* input, float* temp, float* output);
 #define aom_ifft8x8_float aom_ifft8x8_float_c
 
-int16_t aom_int_pro_col_c(const uint8_t* ref, const int width);
+void aom_int_pro_col_c(int16_t* vbuf,
+                       const uint8_t* ref,
+                       const int ref_stride,
+                       const int width,
+                       const int height,
+                       int norm_factor);
 #define aom_int_pro_col aom_int_pro_col_c
 
-void aom_int_pro_row_c(int16_t hbuf[16],
+void aom_int_pro_row_c(int16_t* hbuf,
                        const uint8_t* ref,
                        const int ref_stride,
-                       const int height);
+                       const int width,
+                       const int height,
+                       int norm_factor);
 #define aom_int_pro_row aom_int_pro_row_c
 
 void aom_lowbd_blend_a64_d16_mask_c(uint8_t* dst,
@@ -1816,6 +1973,13 @@ unsigned int aom_mse8x8_c(const uint8_t* src_ptr,
                           unsigned int* sse);
 #define aom_mse8x8 aom_mse8x8_c
 
+uint64_t aom_mse_16xh_16bit_c(uint8_t* dst,
+                              int dstride,
+                              uint16_t* src,
+                              int w,
+                              int h);
+#define aom_mse_16xh_16bit aom_mse_16xh_16bit_c
+
 uint64_t aom_mse_wxh_16bit_c(uint8_t* dst,
                              int dstride,
                              uint16_t* src,
@@ -1836,18 +2000,24 @@ void aom_paeth_predictor_16x32_c(uint8_t* dst,
                                  const uint8_t* left);
 #define aom_paeth_predictor_16x32 aom_paeth_predictor_16x32_c
 
+void aom_paeth_predictor_16x4_c(uint8_t* dst,
+                                ptrdiff_t y_stride,
+                                const uint8_t* above,
+                                const uint8_t* left);
+#define aom_paeth_predictor_16x4 aom_paeth_predictor_16x4_c
+
+void aom_paeth_predictor_16x64_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+#define aom_paeth_predictor_16x64 aom_paeth_predictor_16x64_c
+
 void aom_paeth_predictor_16x8_c(uint8_t* dst,
                                 ptrdiff_t y_stride,
                                 const uint8_t* above,
                                 const uint8_t* left);
 #define aom_paeth_predictor_16x8 aom_paeth_predictor_16x8_c
 
-void aom_paeth_predictor_2x2_c(uint8_t* dst,
-                               ptrdiff_t y_stride,
-                               const uint8_t* above,
-                               const uint8_t* left);
-#define aom_paeth_predictor_2x2 aom_paeth_predictor_2x2_c
-
 void aom_paeth_predictor_32x16_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
@@ -1866,6 +2036,18 @@ void aom_paeth_predictor_32x64_c(uint8_t* dst,
                                  const uint8_t* left);
 #define aom_paeth_predictor_32x64 aom_paeth_predictor_32x64_c
 
+void aom_paeth_predictor_32x8_c(uint8_t* dst,
+                                ptrdiff_t y_stride,
+                                const uint8_t* above,
+                                const uint8_t* left);
+#define aom_paeth_predictor_32x8 aom_paeth_predictor_32x8_c
+
+void aom_paeth_predictor_4x16_c(uint8_t* dst,
+                                ptrdiff_t y_stride,
+                                const uint8_t* above,
+                                const uint8_t* left);
+#define aom_paeth_predictor_4x16 aom_paeth_predictor_4x16_c
+
 void aom_paeth_predictor_4x4_c(uint8_t* dst,
                                ptrdiff_t y_stride,
                                const uint8_t* above,
@@ -1878,6 +2060,12 @@ void aom_paeth_predictor_4x8_c(uint8_t* dst,
                                const uint8_t* left);
 #define aom_paeth_predictor_4x8 aom_paeth_predictor_4x8_c
 
+void aom_paeth_predictor_64x16_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+#define aom_paeth_predictor_64x16 aom_paeth_predictor_64x16_c
+
 void aom_paeth_predictor_64x32_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
@@ -1896,6 +2084,12 @@ void aom_paeth_predictor_8x16_c(uint8_t* dst,
                                 const uint8_t* left);
 #define aom_paeth_predictor_8x16 aom_paeth_predictor_8x16_c
 
+void aom_paeth_predictor_8x32_c(uint8_t* dst,
+                                ptrdiff_t y_stride,
+                                const uint8_t* above,
+                                const uint8_t* left);
+#define aom_paeth_predictor_8x32 aom_paeth_predictor_8x32_c
+
 void aom_paeth_predictor_8x4_c(uint8_t* dst,
                                ptrdiff_t y_stride,
                                const uint8_t* above,
@@ -1944,20 +2138,6 @@ void aom_quantize_b_32x32_c(const tran_low_t* coeff_ptr,
                             const int16_t* iscan);
 #define aom_quantize_b_32x32 aom_quantize_b_32x32_c
 
-void aom_quantize_b_32x32_adaptive_c(const tran_low_t* coeff_ptr,
-                                     intptr_t n_coeffs,
-                                     const int16_t* zbin_ptr,
-                                     const int16_t* round_ptr,
-                                     const int16_t* quant_ptr,
-                                     const int16_t* quant_shift_ptr,
-                                     tran_low_t* qcoeff_ptr,
-                                     tran_low_t* dqcoeff_ptr,
-                                     const int16_t* dequant_ptr,
-                                     uint16_t* eob_ptr,
-                                     const int16_t* scan,
-                                     const int16_t* iscan);
-#define aom_quantize_b_32x32_adaptive aom_quantize_b_32x32_adaptive_c
-
 void aom_quantize_b_64x64_c(const tran_low_t* coeff_ptr,
                             intptr_t n_coeffs,
                             const int16_t* zbin_ptr,
@@ -1972,34 +2152,6 @@ void aom_quantize_b_64x64_c(const tran_low_t* coeff_ptr,
                             const int16_t* iscan);
 #define aom_quantize_b_64x64 aom_quantize_b_64x64_c
 
-void aom_quantize_b_64x64_adaptive_c(const tran_low_t* coeff_ptr,
-                                     intptr_t n_coeffs,
-                                     const int16_t* zbin_ptr,
-                                     const int16_t* round_ptr,
-                                     const int16_t* quant_ptr,
-                                     const int16_t* quant_shift_ptr,
-                                     tran_low_t* qcoeff_ptr,
-                                     tran_low_t* dqcoeff_ptr,
-                                     const int16_t* dequant_ptr,
-                                     uint16_t* eob_ptr,
-                                     const int16_t* scan,
-                                     const int16_t* iscan);
-#define aom_quantize_b_64x64_adaptive aom_quantize_b_64x64_adaptive_c
-
-void aom_quantize_b_adaptive_c(const tran_low_t* coeff_ptr,
-                               intptr_t n_coeffs,
-                               const int16_t* zbin_ptr,
-                               const int16_t* round_ptr,
-                               const int16_t* quant_ptr,
-                               const int16_t* quant_shift_ptr,
-                               tran_low_t* qcoeff_ptr,
-                               tran_low_t* dqcoeff_ptr,
-                               const int16_t* dequant_ptr,
-                               uint16_t* eob_ptr,
-                               const int16_t* scan,
-                               const int16_t* iscan);
-#define aom_quantize_b_adaptive aom_quantize_b_adaptive_c
-
 unsigned int aom_sad128x128_c(const uint8_t* src_ptr,
                               int src_stride,
                               const uint8_t* ref_ptr,
@@ -2735,18 +2887,24 @@ void aom_smooth_h_predictor_16x32_c(uint8_t* dst,
                                     const uint8_t* left);
 #define aom_smooth_h_predictor_16x32 aom_smooth_h_predictor_16x32_c
 
+void aom_smooth_h_predictor_16x4_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+#define aom_smooth_h_predictor_16x4 aom_smooth_h_predictor_16x4_c
+
+void aom_smooth_h_predictor_16x64_c(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+#define aom_smooth_h_predictor_16x64 aom_smooth_h_predictor_16x64_c
+
 void aom_smooth_h_predictor_16x8_c(uint8_t* dst,
                                    ptrdiff_t y_stride,
                                    const uint8_t* above,
                                    const uint8_t* left);
 #define aom_smooth_h_predictor_16x8 aom_smooth_h_predictor_16x8_c
 
-void aom_smooth_h_predictor_2x2_c(uint8_t* dst,
-                                  ptrdiff_t y_stride,
-                                  const uint8_t* above,
-                                  const uint8_t* left);
-#define aom_smooth_h_predictor_2x2 aom_smooth_h_predictor_2x2_c
-
 void aom_smooth_h_predictor_32x16_c(uint8_t* dst,
                                     ptrdiff_t y_stride,
                                     const uint8_t* above,
@@ -2765,6 +2923,18 @@ void aom_smooth_h_predictor_32x64_c(uint8_t* dst,
                                     const uint8_t* left);
 #define aom_smooth_h_predictor_32x64 aom_smooth_h_predictor_32x64_c
 
+void aom_smooth_h_predictor_32x8_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+#define aom_smooth_h_predictor_32x8 aom_smooth_h_predictor_32x8_c
+
+void aom_smooth_h_predictor_4x16_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+#define aom_smooth_h_predictor_4x16 aom_smooth_h_predictor_4x16_c
+
 void aom_smooth_h_predictor_4x4_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
@@ -2777,6 +2947,12 @@ void aom_smooth_h_predictor_4x8_c(uint8_t* dst,
                                   const uint8_t* left);
 #define aom_smooth_h_predictor_4x8 aom_smooth_h_predictor_4x8_c
 
+void aom_smooth_h_predictor_64x16_c(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+#define aom_smooth_h_predictor_64x16 aom_smooth_h_predictor_64x16_c
+
 void aom_smooth_h_predictor_64x32_c(uint8_t* dst,
                                     ptrdiff_t y_stride,
                                     const uint8_t* above,
@@ -2795,6 +2971,12 @@ void aom_smooth_h_predictor_8x16_c(uint8_t* dst,
                                    const uint8_t* left);
 #define aom_smooth_h_predictor_8x16 aom_smooth_h_predictor_8x16_c
 
+void aom_smooth_h_predictor_8x32_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+#define aom_smooth_h_predictor_8x32 aom_smooth_h_predictor_8x32_c
+
 void aom_smooth_h_predictor_8x4_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
@@ -2819,18 +3001,24 @@ void aom_smooth_predictor_16x32_c(uint8_t* dst,
                                   const uint8_t* left);
 #define aom_smooth_predictor_16x32 aom_smooth_predictor_16x32_c
 
+void aom_smooth_predictor_16x4_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+#define aom_smooth_predictor_16x4 aom_smooth_predictor_16x4_c
+
+void aom_smooth_predictor_16x64_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+#define aom_smooth_predictor_16x64 aom_smooth_predictor_16x64_c
+
 void aom_smooth_predictor_16x8_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
                                  const uint8_t* left);
 #define aom_smooth_predictor_16x8 aom_smooth_predictor_16x8_c
 
-void aom_smooth_predictor_2x2_c(uint8_t* dst,
-                                ptrdiff_t y_stride,
-                                const uint8_t* above,
-                                const uint8_t* left);
-#define aom_smooth_predictor_2x2 aom_smooth_predictor_2x2_c
-
 void aom_smooth_predictor_32x16_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
@@ -2849,6 +3037,18 @@ void aom_smooth_predictor_32x64_c(uint8_t* dst,
                                   const uint8_t* left);
 #define aom_smooth_predictor_32x64 aom_smooth_predictor_32x64_c
 
+void aom_smooth_predictor_32x8_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+#define aom_smooth_predictor_32x8 aom_smooth_predictor_32x8_c
+
+void aom_smooth_predictor_4x16_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+#define aom_smooth_predictor_4x16 aom_smooth_predictor_4x16_c
+
 void aom_smooth_predictor_4x4_c(uint8_t* dst,
                                 ptrdiff_t y_stride,
                                 const uint8_t* above,
@@ -2861,6 +3061,12 @@ void aom_smooth_predictor_4x8_c(uint8_t* dst,
                                 const uint8_t* left);
 #define aom_smooth_predictor_4x8 aom_smooth_predictor_4x8_c
 
+void aom_smooth_predictor_64x16_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+#define aom_smooth_predictor_64x16 aom_smooth_predictor_64x16_c
+
 void aom_smooth_predictor_64x32_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
@@ -2879,6 +3085,12 @@ void aom_smooth_predictor_8x16_c(uint8_t* dst,
                                  const uint8_t* left);
 #define aom_smooth_predictor_8x16 aom_smooth_predictor_8x16_c
 
+void aom_smooth_predictor_8x32_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+#define aom_smooth_predictor_8x32 aom_smooth_predictor_8x32_c
+
 void aom_smooth_predictor_8x4_c(uint8_t* dst,
                                 ptrdiff_t y_stride,
                                 const uint8_t* above,
@@ -2903,18 +3115,24 @@ void aom_smooth_v_predictor_16x32_c(uint8_t* dst,
                                     const uint8_t* left);
 #define aom_smooth_v_predictor_16x32 aom_smooth_v_predictor_16x32_c
 
+void aom_smooth_v_predictor_16x4_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+#define aom_smooth_v_predictor_16x4 aom_smooth_v_predictor_16x4_c
+
+void aom_smooth_v_predictor_16x64_c(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+#define aom_smooth_v_predictor_16x64 aom_smooth_v_predictor_16x64_c
+
 void aom_smooth_v_predictor_16x8_c(uint8_t* dst,
                                    ptrdiff_t y_stride,
                                    const uint8_t* above,
                                    const uint8_t* left);
 #define aom_smooth_v_predictor_16x8 aom_smooth_v_predictor_16x8_c
 
-void aom_smooth_v_predictor_2x2_c(uint8_t* dst,
-                                  ptrdiff_t y_stride,
-                                  const uint8_t* above,
-                                  const uint8_t* left);
-#define aom_smooth_v_predictor_2x2 aom_smooth_v_predictor_2x2_c
-
 void aom_smooth_v_predictor_32x16_c(uint8_t* dst,
                                     ptrdiff_t y_stride,
                                     const uint8_t* above,
@@ -2933,6 +3151,18 @@ void aom_smooth_v_predictor_32x64_c(uint8_t* dst,
                                     const uint8_t* left);
 #define aom_smooth_v_predictor_32x64 aom_smooth_v_predictor_32x64_c
 
+void aom_smooth_v_predictor_32x8_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+#define aom_smooth_v_predictor_32x8 aom_smooth_v_predictor_32x8_c
+
+void aom_smooth_v_predictor_4x16_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+#define aom_smooth_v_predictor_4x16 aom_smooth_v_predictor_4x16_c
+
 void aom_smooth_v_predictor_4x4_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
@@ -2945,6 +3175,12 @@ void aom_smooth_v_predictor_4x8_c(uint8_t* dst,
                                   const uint8_t* left);
 #define aom_smooth_v_predictor_4x8 aom_smooth_v_predictor_4x8_c
 
+void aom_smooth_v_predictor_64x16_c(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+#define aom_smooth_v_predictor_64x16 aom_smooth_v_predictor_64x16_c
+
 void aom_smooth_v_predictor_64x32_c(uint8_t* dst,
                                     ptrdiff_t y_stride,
                                     const uint8_t* above,
@@ -2963,6 +3199,12 @@ void aom_smooth_v_predictor_8x16_c(uint8_t* dst,
                                    const uint8_t* left);
 #define aom_smooth_v_predictor_8x16 aom_smooth_v_predictor_8x16_c
 
+void aom_smooth_v_predictor_8x32_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+#define aom_smooth_v_predictor_8x32 aom_smooth_v_predictor_8x32_c
+
 void aom_smooth_v_predictor_8x4_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
@@ -3336,18 +3578,24 @@ void aom_v_predictor_16x32_c(uint8_t* dst,
                              const uint8_t* left);
 #define aom_v_predictor_16x32 aom_v_predictor_16x32_c
 
+void aom_v_predictor_16x4_c(uint8_t* dst,
+                            ptrdiff_t y_stride,
+                            const uint8_t* above,
+                            const uint8_t* left);
+#define aom_v_predictor_16x4 aom_v_predictor_16x4_c
+
+void aom_v_predictor_16x64_c(uint8_t* dst,
+                             ptrdiff_t y_stride,
+                             const uint8_t* above,
+                             const uint8_t* left);
+#define aom_v_predictor_16x64 aom_v_predictor_16x64_c
+
 void aom_v_predictor_16x8_c(uint8_t* dst,
                             ptrdiff_t y_stride,
                             const uint8_t* above,
                             const uint8_t* left);
 #define aom_v_predictor_16x8 aom_v_predictor_16x8_c
 
-void aom_v_predictor_2x2_c(uint8_t* dst,
-                           ptrdiff_t y_stride,
-                           const uint8_t* above,
-                           const uint8_t* left);
-#define aom_v_predictor_2x2 aom_v_predictor_2x2_c
-
 void aom_v_predictor_32x16_c(uint8_t* dst,
                              ptrdiff_t y_stride,
                              const uint8_t* above,
@@ -3366,6 +3614,18 @@ void aom_v_predictor_32x64_c(uint8_t* dst,
                              const uint8_t* left);
 #define aom_v_predictor_32x64 aom_v_predictor_32x64_c
 
+void aom_v_predictor_32x8_c(uint8_t* dst,
+                            ptrdiff_t y_stride,
+                            const uint8_t* above,
+                            const uint8_t* left);
+#define aom_v_predictor_32x8 aom_v_predictor_32x8_c
+
+void aom_v_predictor_4x16_c(uint8_t* dst,
+                            ptrdiff_t y_stride,
+                            const uint8_t* above,
+                            const uint8_t* left);
+#define aom_v_predictor_4x16 aom_v_predictor_4x16_c
+
 void aom_v_predictor_4x4_c(uint8_t* dst,
                            ptrdiff_t y_stride,
                            const uint8_t* above,
@@ -3378,6 +3638,12 @@ void aom_v_predictor_4x8_c(uint8_t* dst,
                            const uint8_t* left);
 #define aom_v_predictor_4x8 aom_v_predictor_4x8_c
 
+void aom_v_predictor_64x16_c(uint8_t* dst,
+                             ptrdiff_t y_stride,
+                             const uint8_t* above,
+                             const uint8_t* left);
+#define aom_v_predictor_64x16 aom_v_predictor_64x16_c
+
 void aom_v_predictor_64x32_c(uint8_t* dst,
                              ptrdiff_t y_stride,
                              const uint8_t* above,
@@ -3396,6 +3662,12 @@ void aom_v_predictor_8x16_c(uint8_t* dst,
                             const uint8_t* left);
 #define aom_v_predictor_8x16 aom_v_predictor_8x16_c
 
+void aom_v_predictor_8x32_c(uint8_t* dst,
+                            ptrdiff_t y_stride,
+                            const uint8_t* above,
+                            const uint8_t* left);
+#define aom_v_predictor_8x32 aom_v_predictor_8x32_c
+
 void aom_v_predictor_8x4_c(uint8_t* dst,
                            ptrdiff_t y_stride,
                            const uint8_t* above,
@@ -3547,7 +3819,7 @@ unsigned int aom_variance8x8_c(const uint8_t* src_ptr,
                                unsigned int* sse);
 #define aom_variance8x8 aom_variance8x8_c
 
-int aom_vector_var_c(const int16_t* ref, const int16_t* src, const int bwl);
+int aom_vector_var_c(const int16_t* ref, const int16_t* src, int bwl);
 #define aom_vector_var aom_vector_var_c
 
 void aom_dsp_rtcd(void);
diff --git a/chromium/third_party/libaom/source/config/linux/generic/config/aom_scale_rtcd.h b/chromium/third_party/libaom/source/config/linux/generic/config/aom_scale_rtcd.h
index 42fb72d91b3..bc2e218d618 100644
--- a/chromium/third_party/libaom/source/config/linux/generic/config/aom_scale_rtcd.h
+++ b/chromium/third_party/libaom/source/config/linux/generic/config/aom_scale_rtcd.h
@@ -18,6 +18,12 @@ void aom_extend_frame_borders_c(struct yv12_buffer_config* ybf,
                                 const int num_planes);
 #define aom_extend_frame_borders aom_extend_frame_borders_c
 
+void aom_extend_frame_borders_plane_row_c(const struct yv12_buffer_config* ybf,
+                                          int plane,
+                                          int v_start,
+                                          int v_end);
+#define aom_extend_frame_borders_plane_row aom_extend_frame_borders_plane_row_c
+
 void aom_extend_frame_borders_y_c(struct yv12_buffer_config* ybf);
 #define aom_extend_frame_borders_y aom_extend_frame_borders_y_c
 
diff --git a/chromium/third_party/libaom/source/config/linux/generic/config/av1_rtcd.h b/chromium/third_party/libaom/source/config/linux/generic/config/av1_rtcd.h
index eb649a65e9c..c0ed827d1fd 100644
--- a/chromium/third_party/libaom/source/config/linux/generic/config/av1_rtcd.h
+++ b/chromium/third_party/libaom/source/config/linux/generic/config/av1_rtcd.h
@@ -168,6 +168,19 @@ void aom_upsampled_pred_c(MACROBLOCKD* xd,
                           int subpel_search);
 #define aom_upsampled_pred aom_upsampled_pred_c
 
+void av1_apply_selfguided_restoration_c(const uint8_t* dat,
+                                        int width,
+                                        int height,
+                                        int stride,
+                                        int eps,
+                                        const int* xqd,
+                                        uint8_t* dst,
+                                        int dst_stride,
+                                        int32_t* tmpbuf,
+                                        int bit_depth,
+                                        int highbd);
+#define av1_apply_selfguided_restoration av1_apply_selfguided_restoration_c
+
 int64_t av1_block_error_c(const tran_low_t* coeff,
                           const tran_low_t* dqcoeff,
                           intptr_t block_size,
@@ -202,6 +215,14 @@ void av1_build_compound_diffwtd_mask_d16_c(uint8_t* mask,
 #define av1_build_compound_diffwtd_mask_d16 \
   av1_build_compound_diffwtd_mask_d16_c
 
+int64_t av1_calc_frame_error_c(const uint8_t* const ref,
+                               int stride,
+                               const uint8_t* const dst,
+                               int p_width,
+                               int p_height,
+                               int p_stride);
+#define av1_calc_frame_error av1_calc_frame_error_c
+
 void av1_calc_indices_dim1_c(const int* data,
                              const int* centroids,
                              uint8_t* indices,
@@ -875,6 +896,18 @@ void av1_resize_and_extend_frame_c(const YV12_BUFFER_CONFIG* src,
 void av1_round_shift_array_c(int32_t* arr, int size, int bit);
 #define av1_round_shift_array av1_round_shift_array_c
 
+int av1_selfguided_restoration_c(const uint8_t* dgd8,
+                                 int width,
+                                 int height,
+                                 int dgd_stride,
+                                 int32_t* flt0,
+                                 int32_t* flt1,
+                                 int flt_stride,
+                                 int sgr_params_idx,
+                                 int bit_depth,
+                                 int highbd);
+#define av1_selfguided_restoration av1_selfguided_restoration_c
+
 void av1_txb_init_levels_c(const tran_low_t* const coeff,
                            const int width,
                            const int height,
@@ -887,6 +920,26 @@ void av1_upsample_intra_edge_c(uint8_t* p, int sz);
 void av1_upsample_intra_edge_high_c(uint16_t* p, int sz, int bd);
 #define av1_upsample_intra_edge_high av1_upsample_intra_edge_high_c
 
+void av1_warp_affine_c(const int32_t* mat,
+                       const uint8_t* ref,
+                       int width,
+                       int height,
+                       int stride,
+                       uint8_t* pred,
+                       int p_col,
+                       int p_row,
+                       int p_width,
+                       int p_height,
+                       int p_stride,
+                       int subsampling_x,
+                       int subsampling_y,
+                       ConvolveParams* conv_params,
+                       int16_t alpha,
+                       int16_t beta,
+                       int16_t gamma,
+                       int16_t delta);
+#define av1_warp_affine av1_warp_affine_c
+
 void av1_wedge_compute_delta_squares_c(int16_t* d,
                                        const int16_t* a,
                                        const int16_t* b,
diff --git a/chromium/third_party/libaom/source/config/linux/ia32/config/aom_config.asm b/chromium/third_party/libaom/source/config/linux/ia32/config/aom_config.asm
index 9fe46ace251..a9a8b07cc83 100644
--- a/chromium/third_party/libaom/source/config/linux/ia32/config/aom_config.asm
+++ b/chromium/third_party/libaom/source/config/linux/ia32/config/aom_config.asm
@@ -1,5 +1,4 @@
 %define ARCH_ARM 0
-%define ARCH_MIPS 0
 %define ARCH_PPC 0
 %define ARCH_X86 1
 %define ARCH_X86_64 0
@@ -23,8 +22,6 @@
 %define CONFIG_ENTROPY_STATS 0
 %define CONFIG_EXCLUDE_SIMD_MISMATCH 0
 %define CONFIG_FPMT_TEST 0
-%define CONFIG_FRAME_PARALLEL_ENCODE 0
-%define CONFIG_FRAME_PARALLEL_ENCODE_2 0
 %define CONFIG_GCC 1
 %define CONFIG_GCOV 0
 %define CONFIG_GPROF 0
@@ -59,14 +56,11 @@
 %define DECODE_HEIGHT_LIMIT 16384
 %define DECODE_WIDTH_LIMIT 16384
 %define FORCE_HIGHBITDEPTH_DECODING 0
+%define HAVE_ARM_CRC32 0
 %define HAVE_AVX 1
 %define HAVE_AVX2 1
-%define HAVE_DSPR2 0
 %define HAVE_FEXCEPT 1
-%define HAVE_MIPS32 0
-%define HAVE_MIPS64 0
 %define HAVE_MMX 1
-%define HAVE_MSA 0
 %define HAVE_NEON 0
 %define HAVE_PTHREAD_H 1
 %define HAVE_SSE 1
diff --git a/chromium/third_party/libaom/source/config/linux/ia32/config/aom_config.c b/chromium/third_party/libaom/source/config/linux/ia32/config/aom_config.c
index aaa8142b8b8..dadb41a0d60 100644
--- a/chromium/third_party/libaom/source/config/linux/ia32/config/aom_config.c
+++ b/chromium/third_party/libaom/source/config/linux/ia32/config/aom_config.c
@@ -9,5 +9,5 @@
  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
  */
 #include "aom/aom_codec.h"
-static const char* const cfg = "cmake ../source/libaom -G \"Unix Makefiles\" -DCMAKE_TOOLCHAIN_FILE=\"../source/libaom/build/cmake/toolchains/x86-linux.cmake\" -DAOM_RTCD_FLAGS=--require-mmx;--require-sse;--require-sse2 -DCONFIG_AV1_ENCODER=1 -DCONFIG_LIBYUV=0 -DCONFIG_PIC=1 -DCONFIG_REALTIME_ONLY=1 -DCONFIG_AV1_HIGHBITDEPTH=0 -DCONFIG_AV1_TEMPORAL_DENOISING=1 -DCONFIG_MAX_DECODE_PROFILE=0 -DCONFIG_NORMAL_TILE_MODE=1 -DCONFIG_SIZE_LIMIT=1 -DDECODE_HEIGHT_LIMIT=16384 -DDECODE_WIDTH_LIMIT=16384";
+static const char* const cfg = "cmake ../source/libaom -G \"Unix Makefiles\" -DCMAKE_TOOLCHAIN_FILE=\"../source/libaom/build/cmake/toolchains/x86-linux.cmake\" -DAOM_RTCD_FLAGS=--require-mmx;--require-sse;--require-sse2 -DCONFIG_AV1_ENCODER=1 -DCONFIG_LIBYUV=0 -DCONFIG_AV1_HIGHBITDEPTH=0 -DCONFIG_AV1_TEMPORAL_DENOISING=1 -DCONFIG_PIC=1 -DCONFIG_REALTIME_ONLY=1 -DCONFIG_MAX_DECODE_PROFILE=0 -DCONFIG_NORMAL_TILE_MODE=1 -DCONFIG_SIZE_LIMIT=1 -DDECODE_HEIGHT_LIMIT=16384 -DDECODE_WIDTH_LIMIT=16384";
 const char *aom_codec_build_config(void) {return cfg;}
diff --git a/chromium/third_party/libaom/source/config/linux/ia32/config/aom_config.h b/chromium/third_party/libaom/source/config/linux/ia32/config/aom_config.h
index 6c9ef357eef..83afb10a538 100644
--- a/chromium/third_party/libaom/source/config/linux/ia32/config/aom_config.h
+++ b/chromium/third_party/libaom/source/config/linux/ia32/config/aom_config.h
@@ -11,7 +11,6 @@
 #ifndef AOM_CONFIG_H_
 #define AOM_CONFIG_H_
 #define ARCH_ARM 0
-#define ARCH_MIPS 0
 #define ARCH_PPC 0
 #define ARCH_X86 1
 #define ARCH_X86_64 0
@@ -35,8 +34,6 @@
 #define CONFIG_ENTROPY_STATS 0
 #define CONFIG_EXCLUDE_SIMD_MISMATCH 0
 #define CONFIG_FPMT_TEST 0
-#define CONFIG_FRAME_PARALLEL_ENCODE 0
-#define CONFIG_FRAME_PARALLEL_ENCODE_2 0
 #define CONFIG_GCC 1
 #define CONFIG_GCOV 0
 #define CONFIG_GPROF 0
@@ -71,14 +68,11 @@
 #define DECODE_HEIGHT_LIMIT 16384
 #define DECODE_WIDTH_LIMIT 16384
 #define FORCE_HIGHBITDEPTH_DECODING 0
+#define HAVE_ARM_CRC32 0
 #define HAVE_AVX 1
 #define HAVE_AVX2 1
-#define HAVE_DSPR2 0
 #define HAVE_FEXCEPT 1
-#define HAVE_MIPS32 0
-#define HAVE_MIPS64 0
 #define HAVE_MMX 1
-#define HAVE_MSA 0
 #define HAVE_NEON 0
 #define HAVE_PTHREAD_H 1
 #define HAVE_SSE 1
diff --git a/chromium/third_party/libaom/source/config/linux/ia32/config/aom_dsp_rtcd.h b/chromium/third_party/libaom/source/config/linux/ia32/config/aom_dsp_rtcd.h
index aa189c45f4a..d217a7b9fba 100644
--- a/chromium/third_party/libaom/source/config/linux/ia32/config/aom_dsp_rtcd.h
+++ b/chromium/third_party/libaom/source/config/linux/ia32/config/aom_dsp_rtcd.h
@@ -360,6 +360,26 @@ void aom_dc_128_predictor_16x32_sse2(uint8_t* dst,
                                      const uint8_t* left);
 #define aom_dc_128_predictor_16x32 aom_dc_128_predictor_16x32_sse2
 
+void aom_dc_128_predictor_16x4_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+void aom_dc_128_predictor_16x4_sse2(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+#define aom_dc_128_predictor_16x4 aom_dc_128_predictor_16x4_sse2
+
+void aom_dc_128_predictor_16x64_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+void aom_dc_128_predictor_16x64_sse2(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+#define aom_dc_128_predictor_16x64 aom_dc_128_predictor_16x64_sse2
+
 void aom_dc_128_predictor_16x8_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
@@ -370,12 +390,6 @@ void aom_dc_128_predictor_16x8_sse2(uint8_t* dst,
                                     const uint8_t* left);
 #define aom_dc_128_predictor_16x8 aom_dc_128_predictor_16x8_sse2
 
-void aom_dc_128_predictor_2x2_c(uint8_t* dst,
-                                ptrdiff_t y_stride,
-                                const uint8_t* above,
-                                const uint8_t* left);
-#define aom_dc_128_predictor_2x2 aom_dc_128_predictor_2x2_c
-
 void aom_dc_128_predictor_32x16_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
@@ -427,6 +441,26 @@ RTCD_EXTERN void (*aom_dc_128_predictor_32x64)(uint8_t* dst,
                                                const uint8_t* above,
                                                const uint8_t* left);
 
+void aom_dc_128_predictor_32x8_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+void aom_dc_128_predictor_32x8_sse2(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+#define aom_dc_128_predictor_32x8 aom_dc_128_predictor_32x8_sse2
+
+void aom_dc_128_predictor_4x16_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+void aom_dc_128_predictor_4x16_sse2(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+#define aom_dc_128_predictor_4x16 aom_dc_128_predictor_4x16_sse2
+
 void aom_dc_128_predictor_4x4_c(uint8_t* dst,
                                 ptrdiff_t y_stride,
                                 const uint8_t* above,
@@ -447,6 +481,23 @@ void aom_dc_128_predictor_4x8_sse2(uint8_t* dst,
                                    const uint8_t* left);
 #define aom_dc_128_predictor_4x8 aom_dc_128_predictor_4x8_sse2
 
+void aom_dc_128_predictor_64x16_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+void aom_dc_128_predictor_64x16_sse2(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+void aom_dc_128_predictor_64x16_avx2(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+RTCD_EXTERN void (*aom_dc_128_predictor_64x16)(uint8_t* dst,
+                                               ptrdiff_t y_stride,
+                                               const uint8_t* above,
+                                               const uint8_t* left);
+
 void aom_dc_128_predictor_64x32_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
@@ -491,6 +542,16 @@ void aom_dc_128_predictor_8x16_sse2(uint8_t* dst,
                                     const uint8_t* left);
 #define aom_dc_128_predictor_8x16 aom_dc_128_predictor_8x16_sse2
 
+void aom_dc_128_predictor_8x32_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+void aom_dc_128_predictor_8x32_sse2(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+#define aom_dc_128_predictor_8x32 aom_dc_128_predictor_8x32_sse2
+
 void aom_dc_128_predictor_8x4_c(uint8_t* dst,
                                 ptrdiff_t y_stride,
                                 const uint8_t* above,
@@ -531,6 +592,26 @@ void aom_dc_left_predictor_16x32_sse2(uint8_t* dst,
                                       const uint8_t* left);
 #define aom_dc_left_predictor_16x32 aom_dc_left_predictor_16x32_sse2
 
+void aom_dc_left_predictor_16x4_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+void aom_dc_left_predictor_16x4_sse2(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+#define aom_dc_left_predictor_16x4 aom_dc_left_predictor_16x4_sse2
+
+void aom_dc_left_predictor_16x64_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+void aom_dc_left_predictor_16x64_sse2(uint8_t* dst,
+                                      ptrdiff_t y_stride,
+                                      const uint8_t* above,
+                                      const uint8_t* left);
+#define aom_dc_left_predictor_16x64 aom_dc_left_predictor_16x64_sse2
+
 void aom_dc_left_predictor_16x8_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
@@ -541,12 +622,6 @@ void aom_dc_left_predictor_16x8_sse2(uint8_t* dst,
                                      const uint8_t* left);
 #define aom_dc_left_predictor_16x8 aom_dc_left_predictor_16x8_sse2
 
-void aom_dc_left_predictor_2x2_c(uint8_t* dst,
-                                 ptrdiff_t y_stride,
-                                 const uint8_t* above,
-                                 const uint8_t* left);
-#define aom_dc_left_predictor_2x2 aom_dc_left_predictor_2x2_c
-
 void aom_dc_left_predictor_32x16_c(uint8_t* dst,
                                    ptrdiff_t y_stride,
                                    const uint8_t* above,
@@ -598,6 +673,26 @@ RTCD_EXTERN void (*aom_dc_left_predictor_32x64)(uint8_t* dst,
                                                 const uint8_t* above,
                                                 const uint8_t* left);
 
+void aom_dc_left_predictor_32x8_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+void aom_dc_left_predictor_32x8_sse2(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+#define aom_dc_left_predictor_32x8 aom_dc_left_predictor_32x8_sse2
+
+void aom_dc_left_predictor_4x16_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+void aom_dc_left_predictor_4x16_sse2(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+#define aom_dc_left_predictor_4x16 aom_dc_left_predictor_4x16_sse2
+
 void aom_dc_left_predictor_4x4_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
@@ -618,6 +713,23 @@ void aom_dc_left_predictor_4x8_sse2(uint8_t* dst,
                                     const uint8_t* left);
 #define aom_dc_left_predictor_4x8 aom_dc_left_predictor_4x8_sse2
 
+void aom_dc_left_predictor_64x16_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+void aom_dc_left_predictor_64x16_sse2(uint8_t* dst,
+                                      ptrdiff_t y_stride,
+                                      const uint8_t* above,
+                                      const uint8_t* left);
+void aom_dc_left_predictor_64x16_avx2(uint8_t* dst,
+                                      ptrdiff_t y_stride,
+                                      const uint8_t* above,
+                                      const uint8_t* left);
+RTCD_EXTERN void (*aom_dc_left_predictor_64x16)(uint8_t* dst,
+                                                ptrdiff_t y_stride,
+                                                const uint8_t* above,
+                                                const uint8_t* left);
+
 void aom_dc_left_predictor_64x32_c(uint8_t* dst,
                                    ptrdiff_t y_stride,
                                    const uint8_t* above,
@@ -662,6 +774,16 @@ void aom_dc_left_predictor_8x16_sse2(uint8_t* dst,
                                      const uint8_t* left);
 #define aom_dc_left_predictor_8x16 aom_dc_left_predictor_8x16_sse2
 
+void aom_dc_left_predictor_8x32_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+void aom_dc_left_predictor_8x32_sse2(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+#define aom_dc_left_predictor_8x32 aom_dc_left_predictor_8x32_sse2
+
 void aom_dc_left_predictor_8x4_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
@@ -702,6 +824,26 @@ void aom_dc_predictor_16x32_sse2(uint8_t* dst,
                                  const uint8_t* left);
 #define aom_dc_predictor_16x32 aom_dc_predictor_16x32_sse2
 
+void aom_dc_predictor_16x4_c(uint8_t* dst,
+                             ptrdiff_t y_stride,
+                             const uint8_t* above,
+                             const uint8_t* left);
+void aom_dc_predictor_16x4_sse2(uint8_t* dst,
+                                ptrdiff_t y_stride,
+                                const uint8_t* above,
+                                const uint8_t* left);
+#define aom_dc_predictor_16x4 aom_dc_predictor_16x4_sse2
+
+void aom_dc_predictor_16x64_c(uint8_t* dst,
+                              ptrdiff_t y_stride,
+                              const uint8_t* above,
+                              const uint8_t* left);
+void aom_dc_predictor_16x64_sse2(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+#define aom_dc_predictor_16x64 aom_dc_predictor_16x64_sse2
+
 void aom_dc_predictor_16x8_c(uint8_t* dst,
                              ptrdiff_t y_stride,
                              const uint8_t* above,
@@ -712,12 +854,6 @@ void aom_dc_predictor_16x8_sse2(uint8_t* dst,
                                 const uint8_t* left);
 #define aom_dc_predictor_16x8 aom_dc_predictor_16x8_sse2
 
-void aom_dc_predictor_2x2_c(uint8_t* dst,
-                            ptrdiff_t y_stride,
-                            const uint8_t* above,
-                            const uint8_t* left);
-#define aom_dc_predictor_2x2 aom_dc_predictor_2x2_c
-
 void aom_dc_predictor_32x16_c(uint8_t* dst,
                               ptrdiff_t y_stride,
                               const uint8_t* above,
@@ -769,6 +905,26 @@ RTCD_EXTERN void (*aom_dc_predictor_32x64)(uint8_t* dst,
                                            const uint8_t* above,
                                            const uint8_t* left);
 
+void aom_dc_predictor_32x8_c(uint8_t* dst,
+                             ptrdiff_t y_stride,
+                             const uint8_t* above,
+                             const uint8_t* left);
+void aom_dc_predictor_32x8_sse2(uint8_t* dst,
+                                ptrdiff_t y_stride,
+                                const uint8_t* above,
+                                const uint8_t* left);
+#define aom_dc_predictor_32x8 aom_dc_predictor_32x8_sse2
+
+void aom_dc_predictor_4x16_c(uint8_t* dst,
+                             ptrdiff_t y_stride,
+                             const uint8_t* above,
+                             const uint8_t* left);
+void aom_dc_predictor_4x16_sse2(uint8_t* dst,
+                                ptrdiff_t y_stride,
+                                const uint8_t* above,
+                                const uint8_t* left);
+#define aom_dc_predictor_4x16 aom_dc_predictor_4x16_sse2
+
 void aom_dc_predictor_4x4_c(uint8_t* dst,
                             ptrdiff_t y_stride,
                             const uint8_t* above,
@@ -789,6 +945,23 @@ void aom_dc_predictor_4x8_sse2(uint8_t* dst,
                                const uint8_t* left);
 #define aom_dc_predictor_4x8 aom_dc_predictor_4x8_sse2
 
+void aom_dc_predictor_64x16_c(uint8_t* dst,
+                              ptrdiff_t y_stride,
+                              const uint8_t* above,
+                              const uint8_t* left);
+void aom_dc_predictor_64x16_sse2(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+void aom_dc_predictor_64x16_avx2(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+RTCD_EXTERN void (*aom_dc_predictor_64x16)(uint8_t* dst,
+                                           ptrdiff_t y_stride,
+                                           const uint8_t* above,
+                                           const uint8_t* left);
+
 void aom_dc_predictor_64x32_c(uint8_t* dst,
                               ptrdiff_t y_stride,
                               const uint8_t* above,
@@ -833,6 +1006,16 @@ void aom_dc_predictor_8x16_sse2(uint8_t* dst,
                                 const uint8_t* left);
 #define aom_dc_predictor_8x16 aom_dc_predictor_8x16_sse2
 
+void aom_dc_predictor_8x32_c(uint8_t* dst,
+                             ptrdiff_t y_stride,
+                             const uint8_t* above,
+                             const uint8_t* left);
+void aom_dc_predictor_8x32_sse2(uint8_t* dst,
+                                ptrdiff_t y_stride,
+                                const uint8_t* above,
+                                const uint8_t* left);
+#define aom_dc_predictor_8x32 aom_dc_predictor_8x32_sse2
+
 void aom_dc_predictor_8x4_c(uint8_t* dst,
                             ptrdiff_t y_stride,
                             const uint8_t* above,
@@ -873,6 +1056,26 @@ void aom_dc_top_predictor_16x32_sse2(uint8_t* dst,
                                      const uint8_t* left);
 #define aom_dc_top_predictor_16x32 aom_dc_top_predictor_16x32_sse2
 
+void aom_dc_top_predictor_16x4_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+void aom_dc_top_predictor_16x4_sse2(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+#define aom_dc_top_predictor_16x4 aom_dc_top_predictor_16x4_sse2
+
+void aom_dc_top_predictor_16x64_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+void aom_dc_top_predictor_16x64_sse2(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+#define aom_dc_top_predictor_16x64 aom_dc_top_predictor_16x64_sse2
+
 void aom_dc_top_predictor_16x8_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
@@ -883,12 +1086,6 @@ void aom_dc_top_predictor_16x8_sse2(uint8_t* dst,
                                     const uint8_t* left);
 #define aom_dc_top_predictor_16x8 aom_dc_top_predictor_16x8_sse2
 
-void aom_dc_top_predictor_2x2_c(uint8_t* dst,
-                                ptrdiff_t y_stride,
-                                const uint8_t* above,
-                                const uint8_t* left);
-#define aom_dc_top_predictor_2x2 aom_dc_top_predictor_2x2_c
-
 void aom_dc_top_predictor_32x16_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
@@ -940,6 +1137,26 @@ RTCD_EXTERN void (*aom_dc_top_predictor_32x64)(uint8_t* dst,
                                                const uint8_t* above,
                                                const uint8_t* left);
 
+void aom_dc_top_predictor_32x8_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+void aom_dc_top_predictor_32x8_sse2(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+#define aom_dc_top_predictor_32x8 aom_dc_top_predictor_32x8_sse2
+
+void aom_dc_top_predictor_4x16_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+void aom_dc_top_predictor_4x16_sse2(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+#define aom_dc_top_predictor_4x16 aom_dc_top_predictor_4x16_sse2
+
 void aom_dc_top_predictor_4x4_c(uint8_t* dst,
                                 ptrdiff_t y_stride,
                                 const uint8_t* above,
@@ -960,6 +1177,23 @@ void aom_dc_top_predictor_4x8_sse2(uint8_t* dst,
                                    const uint8_t* left);
 #define aom_dc_top_predictor_4x8 aom_dc_top_predictor_4x8_sse2
 
+void aom_dc_top_predictor_64x16_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+void aom_dc_top_predictor_64x16_sse2(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+void aom_dc_top_predictor_64x16_avx2(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+RTCD_EXTERN void (*aom_dc_top_predictor_64x16)(uint8_t* dst,
+                                               ptrdiff_t y_stride,
+                                               const uint8_t* above,
+                                               const uint8_t* left);
+
 void aom_dc_top_predictor_64x32_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
@@ -1004,6 +1238,16 @@ void aom_dc_top_predictor_8x16_sse2(uint8_t* dst,
                                     const uint8_t* left);
 #define aom_dc_top_predictor_8x16 aom_dc_top_predictor_8x16_sse2
 
+void aom_dc_top_predictor_8x32_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+void aom_dc_top_predictor_8x32_sse2(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+#define aom_dc_top_predictor_8x32 aom_dc_top_predictor_8x32_sse2
+
 void aom_dc_top_predictor_8x4_c(uint8_t* dst,
                                 ptrdiff_t y_stride,
                                 const uint8_t* above,
@@ -2024,6 +2268,26 @@ void aom_h_predictor_16x32_sse2(uint8_t* dst,
                                 const uint8_t* left);
 #define aom_h_predictor_16x32 aom_h_predictor_16x32_sse2
 
+void aom_h_predictor_16x4_c(uint8_t* dst,
+                            ptrdiff_t y_stride,
+                            const uint8_t* above,
+                            const uint8_t* left);
+void aom_h_predictor_16x4_sse2(uint8_t* dst,
+                               ptrdiff_t y_stride,
+                               const uint8_t* above,
+                               const uint8_t* left);
+#define aom_h_predictor_16x4 aom_h_predictor_16x4_sse2
+
+void aom_h_predictor_16x64_c(uint8_t* dst,
+                             ptrdiff_t y_stride,
+                             const uint8_t* above,
+                             const uint8_t* left);
+void aom_h_predictor_16x64_sse2(uint8_t* dst,
+                                ptrdiff_t y_stride,
+                                const uint8_t* above,
+                                const uint8_t* left);
+#define aom_h_predictor_16x64 aom_h_predictor_16x64_sse2
+
 void aom_h_predictor_16x8_c(uint8_t* dst,
                             ptrdiff_t y_stride,
                             const uint8_t* above,
@@ -2034,12 +2298,6 @@ void aom_h_predictor_16x8_sse2(uint8_t* dst,
                                const uint8_t* left);
 #define aom_h_predictor_16x8 aom_h_predictor_16x8_sse2
 
-void aom_h_predictor_2x2_c(uint8_t* dst,
-                           ptrdiff_t y_stride,
-                           const uint8_t* above,
-                           const uint8_t* left);
-#define aom_h_predictor_2x2 aom_h_predictor_2x2_c
-
 void aom_h_predictor_32x16_c(uint8_t* dst,
                              ptrdiff_t y_stride,
                              const uint8_t* above,
@@ -2077,6 +2335,26 @@ void aom_h_predictor_32x64_sse2(uint8_t* dst,
                                 const uint8_t* left);
 #define aom_h_predictor_32x64 aom_h_predictor_32x64_sse2
 
+void aom_h_predictor_32x8_c(uint8_t* dst,
+                            ptrdiff_t y_stride,
+                            const uint8_t* above,
+                            const uint8_t* left);
+void aom_h_predictor_32x8_sse2(uint8_t* dst,
+                               ptrdiff_t y_stride,
+                               const uint8_t* above,
+                               const uint8_t* left);
+#define aom_h_predictor_32x8 aom_h_predictor_32x8_sse2
+
+void aom_h_predictor_4x16_c(uint8_t* dst,
+                            ptrdiff_t y_stride,
+                            const uint8_t* above,
+                            const uint8_t* left);
+void aom_h_predictor_4x16_sse2(uint8_t* dst,
+                               ptrdiff_t y_stride,
+                               const uint8_t* above,
+                               const uint8_t* left);
+#define aom_h_predictor_4x16 aom_h_predictor_4x16_sse2
+
 void aom_h_predictor_4x4_c(uint8_t* dst,
                            ptrdiff_t y_stride,
                            const uint8_t* above,
@@ -2097,6 +2375,16 @@ void aom_h_predictor_4x8_sse2(uint8_t* dst,
                               const uint8_t* left);
 #define aom_h_predictor_4x8 aom_h_predictor_4x8_sse2
 
+void aom_h_predictor_64x16_c(uint8_t* dst,
+                             ptrdiff_t y_stride,
+                             const uint8_t* above,
+                             const uint8_t* left);
+void aom_h_predictor_64x16_sse2(uint8_t* dst,
+                                ptrdiff_t y_stride,
+                                const uint8_t* above,
+                                const uint8_t* left);
+#define aom_h_predictor_64x16 aom_h_predictor_64x16_sse2
+
 void aom_h_predictor_64x32_c(uint8_t* dst,
                              ptrdiff_t y_stride,
                              const uint8_t* above,
@@ -2127,6 +2415,16 @@ void aom_h_predictor_8x16_sse2(uint8_t* dst,
                                const uint8_t* left);
 #define aom_h_predictor_8x16 aom_h_predictor_8x16_sse2
 
+void aom_h_predictor_8x32_c(uint8_t* dst,
+                            ptrdiff_t y_stride,
+                            const uint8_t* above,
+                            const uint8_t* left);
+void aom_h_predictor_8x32_sse2(uint8_t* dst,
+                               ptrdiff_t y_stride,
+                               const uint8_t* above,
+                               const uint8_t* left);
+#define aom_h_predictor_8x32 aom_h_predictor_8x32_sse2
+
 void aom_h_predictor_8x4_c(uint8_t* dst,
                            ptrdiff_t y_stride,
                            const uint8_t* above,
@@ -2189,19 +2487,6 @@ void aom_hadamard_8x8_sse2(const int16_t* src_diff,
                            tran_low_t* coeff);
 #define aom_hadamard_8x8 aom_hadamard_8x8_sse2
 
-void aom_hadamard_8x8_dual_c(const int16_t* src_diff,
-                             ptrdiff_t src_stride,
-                             int16_t* coeff);
-void aom_hadamard_8x8_dual_sse2(const int16_t* src_diff,
-                                ptrdiff_t src_stride,
-                                int16_t* coeff);
-void aom_hadamard_8x8_dual_avx2(const int16_t* src_diff,
-                                ptrdiff_t src_stride,
-                                int16_t* coeff);
-RTCD_EXTERN void (*aom_hadamard_8x8_dual)(const int16_t* src_diff,
-                                          ptrdiff_t src_stride,
-                                          int16_t* coeff);
-
 void aom_hadamard_lp_16x16_c(const int16_t* src_diff,
                              ptrdiff_t src_stride,
                              int16_t* coeff);
@@ -2223,6 +2508,19 @@ void aom_hadamard_lp_8x8_sse2(const int16_t* src_diff,
                               int16_t* coeff);
 #define aom_hadamard_lp_8x8 aom_hadamard_lp_8x8_sse2
 
+void aom_hadamard_lp_8x8_dual_c(const int16_t* src_diff,
+                                ptrdiff_t src_stride,
+                                int16_t* coeff);
+void aom_hadamard_lp_8x8_dual_sse2(const int16_t* src_diff,
+                                   ptrdiff_t src_stride,
+                                   int16_t* coeff);
+void aom_hadamard_lp_8x8_dual_avx2(const int16_t* src_diff,
+                                   ptrdiff_t src_stride,
+                                   int16_t* coeff);
+RTCD_EXTERN void (*aom_hadamard_lp_8x8_dual)(const int16_t* src_diff,
+                                             ptrdiff_t src_stride,
+                                             int16_t* coeff);
+
 void aom_ifft16x16_float_c(const float* input, float* temp, float* output);
 void aom_ifft16x16_float_sse2(const float* input, float* temp, float* output);
 void aom_ifft16x16_float_avx2(const float* input, float* temp, float* output);
@@ -2251,19 +2549,55 @@ RTCD_EXTERN void (*aom_ifft8x8_float)(const float* input,
                                       float* temp,
                                       float* output);
 
-int16_t aom_int_pro_col_c(const uint8_t* ref, const int width);
-int16_t aom_int_pro_col_sse2(const uint8_t* ref, const int width);
-#define aom_int_pro_col aom_int_pro_col_sse2
+void aom_int_pro_col_c(int16_t* vbuf,
+                       const uint8_t* ref,
+                       const int ref_stride,
+                       const int width,
+                       const int height,
+                       int norm_factor);
+void aom_int_pro_col_sse2(int16_t* vbuf,
+                          const uint8_t* ref,
+                          const int ref_stride,
+                          const int width,
+                          const int height,
+                          int norm_factor);
+void aom_int_pro_col_avx2(int16_t* vbuf,
+                          const uint8_t* ref,
+                          const int ref_stride,
+                          const int width,
+                          const int height,
+                          int norm_factor);
+RTCD_EXTERN void (*aom_int_pro_col)(int16_t* vbuf,
+                                    const uint8_t* ref,
+                                    const int ref_stride,
+                                    const int width,
+                                    const int height,
+                                    int norm_factor);
 
-void aom_int_pro_row_c(int16_t hbuf[16],
+void aom_int_pro_row_c(int16_t* hbuf,
                        const uint8_t* ref,
                        const int ref_stride,
-                       const int height);
-void aom_int_pro_row_sse2(int16_t hbuf[16],
+                       const int width,
+                       const int height,
+                       int norm_factor);
+void aom_int_pro_row_sse2(int16_t* hbuf,
+                          const uint8_t* ref,
+                          const int ref_stride,
+                          const int width,
+                          const int height,
+                          int norm_factor);
+void aom_int_pro_row_avx2(int16_t* hbuf,
                           const uint8_t* ref,
                           const int ref_stride,
-                          const int height);
-#define aom_int_pro_row aom_int_pro_row_sse2
+                          const int width,
+                          const int height,
+                          int norm_factor);
+RTCD_EXTERN void (*aom_int_pro_row)(int16_t* hbuf,
+                                    const uint8_t* ref,
+                                    const int ref_stride,
+                                    const int width,
+                                    const int height,
+                                    int norm_factor);
 
 void aom_lowbd_blend_a64_d16_mask_c(uint8_t* dst,
                                     uint32_t dst_stride,
@@ -4298,6 +4632,27 @@ unsigned int aom_mse8x8_sse2(const uint8_t* src_ptr,
                              unsigned int* sse);
 #define aom_mse8x8 aom_mse8x8_sse2
 
+uint64_t aom_mse_16xh_16bit_c(uint8_t* dst,
+                              int dstride,
+                              uint16_t* src,
+                              int w,
+                              int h);
+uint64_t aom_mse_16xh_16bit_sse2(uint8_t* dst,
+                                 int dstride,
+                                 uint16_t* src,
+                                 int w,
+                                 int h);
+uint64_t aom_mse_16xh_16bit_avx2(uint8_t* dst,
+                                 int dstride,
+                                 uint16_t* src,
+                                 int w,
+                                 int h);
+RTCD_EXTERN uint64_t (*aom_mse_16xh_16bit)(uint8_t* dst,
+                                           int dstride,
+                                           uint16_t* src,
+                                           int w,
+                                           int h);
+
 uint64_t aom_mse_wxh_16bit_c(uint8_t* dst,
                              int dstride,
                              uint16_t* src,
@@ -4357,6 +4712,36 @@ RTCD_EXTERN void (*aom_paeth_predictor_16x32)(uint8_t* dst,
                                               const uint8_t* above,
                                               const uint8_t* left);
 
+void aom_paeth_predictor_16x4_c(uint8_t* dst,
+                                ptrdiff_t y_stride,
+                                const uint8_t* above,
+                                const uint8_t* left);
+void aom_paeth_predictor_16x4_ssse3(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+RTCD_EXTERN void (*aom_paeth_predictor_16x4)(uint8_t* dst,
+                                             ptrdiff_t y_stride,
+                                             const uint8_t* above,
+                                             const uint8_t* left);
+
+void aom_paeth_predictor_16x64_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+void aom_paeth_predictor_16x64_ssse3(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+void aom_paeth_predictor_16x64_avx2(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+RTCD_EXTERN void (*aom_paeth_predictor_16x64)(uint8_t* dst,
+                                              ptrdiff_t y_stride,
+                                              const uint8_t* above,
+                                              const uint8_t* left);
+
 void aom_paeth_predictor_16x8_c(uint8_t* dst,
                                 ptrdiff_t y_stride,
                                 const uint8_t* above,
@@ -4374,12 +4759,6 @@ RTCD_EXTERN void (*aom_paeth_predictor_16x8)(uint8_t* dst,
                                              const uint8_t* above,
                                              const uint8_t* left);
 
-void aom_paeth_predictor_2x2_c(uint8_t* dst,
-                               ptrdiff_t y_stride,
-                               const uint8_t* above,
-                               const uint8_t* left);
-#define aom_paeth_predictor_2x2 aom_paeth_predictor_2x2_c
-
 void aom_paeth_predictor_32x16_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
@@ -4431,6 +4810,32 @@ RTCD_EXTERN void (*aom_paeth_predictor_32x64)(uint8_t* dst,
                                               const uint8_t* above,
                                               const uint8_t* left);
 
+void aom_paeth_predictor_32x8_c(uint8_t* dst,
+                                ptrdiff_t y_stride,
+                                const uint8_t* above,
+                                const uint8_t* left);
+void aom_paeth_predictor_32x8_ssse3(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+RTCD_EXTERN void (*aom_paeth_predictor_32x8)(uint8_t* dst,
+                                             ptrdiff_t y_stride,
+                                             const uint8_t* above,
+                                             const uint8_t* left);
+
+void aom_paeth_predictor_4x16_c(uint8_t* dst,
+                                ptrdiff_t y_stride,
+                                const uint8_t* above,
+                                const uint8_t* left);
+void aom_paeth_predictor_4x16_ssse3(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+RTCD_EXTERN void (*aom_paeth_predictor_4x16)(uint8_t* dst,
+                                             ptrdiff_t y_stride,
+                                             const uint8_t* above,
+                                             const uint8_t* left);
+
 void aom_paeth_predictor_4x4_c(uint8_t* dst,
                                ptrdiff_t y_stride,
                                const uint8_t* above,
@@ -4457,6 +4862,23 @@ RTCD_EXTERN void (*aom_paeth_predictor_4x8)(uint8_t* dst,
                                             const uint8_t* above,
                                             const uint8_t* left);
 
+void aom_paeth_predictor_64x16_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+void aom_paeth_predictor_64x16_ssse3(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+void aom_paeth_predictor_64x16_avx2(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+RTCD_EXTERN void (*aom_paeth_predictor_64x16)(uint8_t* dst,
+                                              ptrdiff_t y_stride,
+                                              const uint8_t* above,
+                                              const uint8_t* left);
+
 void aom_paeth_predictor_64x32_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
@@ -4504,6 +4926,19 @@ RTCD_EXTERN void (*aom_paeth_predictor_8x16)(uint8_t* dst,
                                              const uint8_t* above,
                                              const uint8_t* left);
 
+void aom_paeth_predictor_8x32_c(uint8_t* dst,
+                                ptrdiff_t y_stride,
+                                const uint8_t* above,
+                                const uint8_t* left);
+void aom_paeth_predictor_8x32_ssse3(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+RTCD_EXTERN void (*aom_paeth_predictor_8x32)(uint8_t* dst,
+                                             ptrdiff_t y_stride,
+                                             const uint8_t* above,
+                                             const uint8_t* left);
+
 void aom_paeth_predictor_8x4_c(uint8_t* dst,
                                ptrdiff_t y_stride,
                                const uint8_t* above,
@@ -4580,6 +5015,18 @@ void aom_quantize_b_avx(const tran_low_t* coeff_ptr,
                         uint16_t* eob_ptr,
                         const int16_t* scan,
                         const int16_t* iscan);
+void aom_quantize_b_avx2(const tran_low_t* coeff_ptr,
+                         intptr_t n_coeffs,
+                         const int16_t* zbin_ptr,
+                         const int16_t* round_ptr,
+                         const int16_t* quant_ptr,
+                         const int16_t* quant_shift_ptr,
+                         tran_low_t* qcoeff_ptr,
+                         tran_low_t* dqcoeff_ptr,
+                         const int16_t* dequant_ptr,
+                         uint16_t* eob_ptr,
+                         const int16_t* scan,
+                         const int16_t* iscan);
 RTCD_EXTERN void (*aom_quantize_b)(const tran_low_t* coeff_ptr,
                                    intptr_t n_coeffs,
                                    const int16_t* zbin_ptr,
@@ -4617,6 +5064,18 @@ void aom_quantize_b_32x32_avx(const tran_low_t* coeff_ptr,
                               uint16_t* eob_ptr,
                               const int16_t* scan,
                               const int16_t* iscan);
+void aom_quantize_b_32x32_avx2(const tran_low_t* coeff_ptr,
+                               intptr_t n_coeffs,
+                               const int16_t* zbin_ptr,
+                               const int16_t* round_ptr,
+                               const int16_t* quant_ptr,
+                               const int16_t* quant_shift_ptr,
+                               tran_low_t* qcoeff_ptr,
+                               tran_low_t* dqcoeff_ptr,
+                               const int16_t* dequant_ptr,
+                               uint16_t* eob_ptr,
+                               const int16_t* scan,
+                               const int16_t* iscan);
 RTCD_EXTERN void (*aom_quantize_b_32x32)(const tran_low_t* coeff_ptr,
                                          intptr_t n_coeffs,
                                          const int16_t* zbin_ptr,
@@ -4630,32 +5089,6 @@ RTCD_EXTERN void (*aom_quantize_b_32x32)(const tran_low_t* coeff_ptr,
                                          const int16_t* scan,
                                          const int16_t* iscan);
 
-void aom_quantize_b_32x32_adaptive_c(const tran_low_t* coeff_ptr,
-                                     intptr_t n_coeffs,
-                                     const int16_t* zbin_ptr,
-                                     const int16_t* round_ptr,
-                                     const int16_t* quant_ptr,
-                                     const int16_t* quant_shift_ptr,
-                                     tran_low_t* qcoeff_ptr,
-                                     tran_low_t* dqcoeff_ptr,
-                                     const int16_t* dequant_ptr,
-                                     uint16_t* eob_ptr,
-                                     const int16_t* scan,
-                                     const int16_t* iscan);
-void aom_quantize_b_32x32_adaptive_sse2(const tran_low_t* coeff_ptr,
-                                        intptr_t n_coeffs,
-                                        const int16_t* zbin_ptr,
-                                        const int16_t* round_ptr,
-                                        const int16_t* quant_ptr,
-                                        const int16_t* quant_shift_ptr,
-                                        tran_low_t* qcoeff_ptr,
-                                        tran_low_t* dqcoeff_ptr,
-                                        const int16_t* dequant_ptr,
-                                        uint16_t* eob_ptr,
-                                        const int16_t* scan,
-                                        const int16_t* iscan);
-#define aom_quantize_b_32x32_adaptive aom_quantize_b_32x32_adaptive_sse2
-
 void aom_quantize_b_64x64_c(const tran_low_t* coeff_ptr,
                             intptr_t n_coeffs,
                             const int16_t* zbin_ptr,
@@ -4680,6 +5113,18 @@ void aom_quantize_b_64x64_ssse3(const tran_low_t* coeff_ptr,
                                 uint16_t* eob_ptr,
                                 const int16_t* scan,
                                 const int16_t* iscan);
+void aom_quantize_b_64x64_avx2(const tran_low_t* coeff_ptr,
+                               intptr_t n_coeffs,
+                               const int16_t* zbin_ptr,
+                               const int16_t* round_ptr,
+                               const int16_t* quant_ptr,
+                               const int16_t* quant_shift_ptr,
+                               tran_low_t* qcoeff_ptr,
+                               tran_low_t* dqcoeff_ptr,
+                               const int16_t* dequant_ptr,
+                               uint16_t* eob_ptr,
+                               const int16_t* scan,
+                               const int16_t* iscan);
 RTCD_EXTERN void (*aom_quantize_b_64x64)(const tran_low_t* coeff_ptr,
                                          intptr_t n_coeffs,
                                          const int16_t* zbin_ptr,
@@ -4693,81 +5138,6 @@ RTCD_EXTERN void (*aom_quantize_b_64x64)(const tran_low_t* coeff_ptr,
                                          const int16_t* scan,
                                          const int16_t* iscan);
 
-void aom_quantize_b_64x64_adaptive_c(const tran_low_t* coeff_ptr,
-                                     intptr_t n_coeffs,
-                                     const int16_t* zbin_ptr,
-                                     const int16_t* round_ptr,
-                                     const int16_t* quant_ptr,
-                                     const int16_t* quant_shift_ptr,
-                                     tran_low_t* qcoeff_ptr,
-                                     tran_low_t* dqcoeff_ptr,
-                                     const int16_t* dequant_ptr,
-                                     uint16_t* eob_ptr,
-                                     const int16_t* scan,
-                                     const int16_t* iscan);
-void aom_quantize_b_64x64_adaptive_sse2(const tran_low_t* coeff_ptr,
-                                        intptr_t n_coeffs,
-                                        const int16_t* zbin_ptr,
-                                        const int16_t* round_ptr,
-                                        const int16_t* quant_ptr,
-                                        const int16_t* quant_shift_ptr,
-                                        tran_low_t* qcoeff_ptr,
-                                        tran_low_t* dqcoeff_ptr,
-                                        const int16_t* dequant_ptr,
-                                        uint16_t* eob_ptr,
-                                        const int16_t* scan,
-                                        const int16_t* iscan);
-#define aom_quantize_b_64x64_adaptive aom_quantize_b_64x64_adaptive_sse2
-
-void aom_quantize_b_adaptive_c(const tran_low_t* coeff_ptr,
-                               intptr_t n_coeffs,
-                               const int16_t* zbin_ptr,
-                               const int16_t* round_ptr,
-                               const int16_t* quant_ptr,
-                               const int16_t* quant_shift_ptr,
-                               tran_low_t* qcoeff_ptr,
-                               tran_low_t* dqcoeff_ptr,
-                               const int16_t* dequant_ptr,
-                               uint16_t* eob_ptr,
-                               const int16_t* scan,
-                               const int16_t* iscan);
-void aom_quantize_b_adaptive_sse2(const tran_low_t* coeff_ptr,
-                                  intptr_t n_coeffs,
-                                  const int16_t* zbin_ptr,
-                                  const int16_t* round_ptr,
-                                  const int16_t* quant_ptr,
-                                  const int16_t* quant_shift_ptr,
-                                  tran_low_t* qcoeff_ptr,
-                                  tran_low_t* dqcoeff_ptr,
-                                  const int16_t* dequant_ptr,
-                                  uint16_t* eob_ptr,
-                                  const int16_t* scan,
-                                  const int16_t* iscan);
-void aom_quantize_b_adaptive_avx2(const tran_low_t* coeff_ptr,
-                                  intptr_t n_coeffs,
-                                  const int16_t* zbin_ptr,
-                                  const int16_t* round_ptr,
-                                  const int16_t* quant_ptr,
-                                  const int16_t* quant_shift_ptr,
-                                  tran_low_t* qcoeff_ptr,
-                                  tran_low_t* dqcoeff_ptr,
-                                  const int16_t* dequant_ptr,
-                                  uint16_t* eob_ptr,
-                                  const int16_t* scan,
-                                  const int16_t* iscan);
-RTCD_EXTERN void (*aom_quantize_b_adaptive)(const tran_low_t* coeff_ptr,
-                                            intptr_t n_coeffs,
-                                            const int16_t* zbin_ptr,
-                                            const int16_t* round_ptr,
-                                            const int16_t* quant_ptr,
-                                            const int16_t* quant_shift_ptr,
-                                            tran_low_t* qcoeff_ptr,
-                                            tran_low_t* dqcoeff_ptr,
-                                            const int16_t* dequant_ptr,
-                                            uint16_t* eob_ptr,
-                                            const int16_t* scan,
-                                            const int16_t* iscan);
-
 unsigned int aom_sad128x128_c(const uint8_t* src_ptr,
                               int src_stride,
                               const uint8_t* ref_ptr,
@@ -6256,6 +6626,32 @@ RTCD_EXTERN void (*aom_smooth_h_predictor_16x32)(uint8_t* dst,
                                                  const uint8_t* above,
                                                  const uint8_t* left);
 
+void aom_smooth_h_predictor_16x4_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+void aom_smooth_h_predictor_16x4_ssse3(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_h_predictor_16x4)(uint8_t* dst,
+                                                ptrdiff_t y_stride,
+                                                const uint8_t* above,
+                                                const uint8_t* left);
+
+void aom_smooth_h_predictor_16x64_c(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+void aom_smooth_h_predictor_16x64_ssse3(uint8_t* dst,
+                                        ptrdiff_t y_stride,
+                                        const uint8_t* above,
+                                        const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_h_predictor_16x64)(uint8_t* dst,
+                                                 ptrdiff_t y_stride,
+                                                 const uint8_t* above,
+                                                 const uint8_t* left);
+
 void aom_smooth_h_predictor_16x8_c(uint8_t* dst,
                                    ptrdiff_t y_stride,
                                    const uint8_t* above,
@@ -6269,12 +6665,6 @@ RTCD_EXTERN void (*aom_smooth_h_predictor_16x8)(uint8_t* dst,
                                                 const uint8_t* above,
                                                 const uint8_t* left);
 
-void aom_smooth_h_predictor_2x2_c(uint8_t* dst,
-                                  ptrdiff_t y_stride,
-                                  const uint8_t* above,
-                                  const uint8_t* left);
-#define aom_smooth_h_predictor_2x2 aom_smooth_h_predictor_2x2_c
-
 void aom_smooth_h_predictor_32x16_c(uint8_t* dst,
                                     ptrdiff_t y_stride,
                                     const uint8_t* above,
@@ -6314,6 +6704,32 @@ RTCD_EXTERN void (*aom_smooth_h_predictor_32x64)(uint8_t* dst,
                                                  const uint8_t* above,
                                                  const uint8_t* left);
 
+void aom_smooth_h_predictor_32x8_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+void aom_smooth_h_predictor_32x8_ssse3(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_h_predictor_32x8)(uint8_t* dst,
+                                                ptrdiff_t y_stride,
+                                                const uint8_t* above,
+                                                const uint8_t* left);
+
+void aom_smooth_h_predictor_4x16_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+void aom_smooth_h_predictor_4x16_ssse3(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_h_predictor_4x16)(uint8_t* dst,
+                                                ptrdiff_t y_stride,
+                                                const uint8_t* above,
+                                                const uint8_t* left);
+
 void aom_smooth_h_predictor_4x4_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
@@ -6340,6 +6756,19 @@ RTCD_EXTERN void (*aom_smooth_h_predictor_4x8)(uint8_t* dst,
                                                const uint8_t* above,
                                                const uint8_t* left);
 
+void aom_smooth_h_predictor_64x16_c(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+void aom_smooth_h_predictor_64x16_ssse3(uint8_t* dst,
+                                        ptrdiff_t y_stride,
+                                        const uint8_t* above,
+                                        const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_h_predictor_64x16)(uint8_t* dst,
+                                                 ptrdiff_t y_stride,
+                                                 const uint8_t* above,
+                                                 const uint8_t* left);
+
 void aom_smooth_h_predictor_64x32_c(uint8_t* dst,
                                     ptrdiff_t y_stride,
                                     const uint8_t* above,
@@ -6379,6 +6808,19 @@ RTCD_EXTERN void (*aom_smooth_h_predictor_8x16)(uint8_t* dst,
                                                 const uint8_t* above,
                                                 const uint8_t* left);
 
+void aom_smooth_h_predictor_8x32_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+void aom_smooth_h_predictor_8x32_ssse3(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_h_predictor_8x32)(uint8_t* dst,
+                                                ptrdiff_t y_stride,
+                                                const uint8_t* above,
+                                                const uint8_t* left);
+
 void aom_smooth_h_predictor_8x4_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
@@ -6431,6 +6873,32 @@ RTCD_EXTERN void (*aom_smooth_predictor_16x32)(uint8_t* dst,
                                                const uint8_t* above,
                                                const uint8_t* left);
 
+void aom_smooth_predictor_16x4_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+void aom_smooth_predictor_16x4_ssse3(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_predictor_16x4)(uint8_t* dst,
+                                              ptrdiff_t y_stride,
+                                              const uint8_t* above,
+                                              const uint8_t* left);
+
+void aom_smooth_predictor_16x64_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+void aom_smooth_predictor_16x64_ssse3(uint8_t* dst,
+                                      ptrdiff_t y_stride,
+                                      const uint8_t* above,
+                                      const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_predictor_16x64)(uint8_t* dst,
+                                               ptrdiff_t y_stride,
+                                               const uint8_t* above,
+                                               const uint8_t* left);
+
 void aom_smooth_predictor_16x8_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
@@ -6444,12 +6912,6 @@ RTCD_EXTERN void (*aom_smooth_predictor_16x8)(uint8_t* dst,
                                               const uint8_t* above,
                                               const uint8_t* left);
 
-void aom_smooth_predictor_2x2_c(uint8_t* dst,
-                                ptrdiff_t y_stride,
-                                const uint8_t* above,
-                                const uint8_t* left);
-#define aom_smooth_predictor_2x2 aom_smooth_predictor_2x2_c
-
 void aom_smooth_predictor_32x16_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
@@ -6489,6 +6951,32 @@ RTCD_EXTERN void (*aom_smooth_predictor_32x64)(uint8_t* dst,
                                                const uint8_t* above,
                                                const uint8_t* left);
 
+void aom_smooth_predictor_32x8_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+void aom_smooth_predictor_32x8_ssse3(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_predictor_32x8)(uint8_t* dst,
+                                              ptrdiff_t y_stride,
+                                              const uint8_t* above,
+                                              const uint8_t* left);
+
+void aom_smooth_predictor_4x16_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+void aom_smooth_predictor_4x16_ssse3(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_predictor_4x16)(uint8_t* dst,
+                                              ptrdiff_t y_stride,
+                                              const uint8_t* above,
+                                              const uint8_t* left);
+
 void aom_smooth_predictor_4x4_c(uint8_t* dst,
                                 ptrdiff_t y_stride,
                                 const uint8_t* above,
@@ -6515,6 +7003,19 @@ RTCD_EXTERN void (*aom_smooth_predictor_4x8)(uint8_t* dst,
                                              const uint8_t* above,
                                              const uint8_t* left);
 
+void aom_smooth_predictor_64x16_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+void aom_smooth_predictor_64x16_ssse3(uint8_t* dst,
+                                      ptrdiff_t y_stride,
+                                      const uint8_t* above,
+                                      const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_predictor_64x16)(uint8_t* dst,
+                                               ptrdiff_t y_stride,
+                                               const uint8_t* above,
+                                               const uint8_t* left);
+
 void aom_smooth_predictor_64x32_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
@@ -6554,6 +7055,19 @@ RTCD_EXTERN void (*aom_smooth_predictor_8x16)(uint8_t* dst,
                                               const uint8_t* above,
                                               const uint8_t* left);
 
+void aom_smooth_predictor_8x32_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+void aom_smooth_predictor_8x32_ssse3(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_predictor_8x32)(uint8_t* dst,
+                                              ptrdiff_t y_stride,
+                                              const uint8_t* above,
+                                              const uint8_t* left);
+
 void aom_smooth_predictor_8x4_c(uint8_t* dst,
                                 ptrdiff_t y_stride,
                                 const uint8_t* above,
@@ -6606,6 +7120,32 @@ RTCD_EXTERN void (*aom_smooth_v_predictor_16x32)(uint8_t* dst,
                                                  const uint8_t* above,
                                                  const uint8_t* left);
 
+void aom_smooth_v_predictor_16x4_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+void aom_smooth_v_predictor_16x4_ssse3(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_v_predictor_16x4)(uint8_t* dst,
+                                                ptrdiff_t y_stride,
+                                                const uint8_t* above,
+                                                const uint8_t* left);
+
+void aom_smooth_v_predictor_16x64_c(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+void aom_smooth_v_predictor_16x64_ssse3(uint8_t* dst,
+                                        ptrdiff_t y_stride,
+                                        const uint8_t* above,
+                                        const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_v_predictor_16x64)(uint8_t* dst,
+                                                 ptrdiff_t y_stride,
+                                                 const uint8_t* above,
+                                                 const uint8_t* left);
+
 void aom_smooth_v_predictor_16x8_c(uint8_t* dst,
                                    ptrdiff_t y_stride,
                                    const uint8_t* above,
@@ -6619,12 +7159,6 @@ RTCD_EXTERN void (*aom_smooth_v_predictor_16x8)(uint8_t* dst,
                                                 const uint8_t* above,
                                                 const uint8_t* left);
 
-void aom_smooth_v_predictor_2x2_c(uint8_t* dst,
-                                  ptrdiff_t y_stride,
-                                  const uint8_t* above,
-                                  const uint8_t* left);
-#define aom_smooth_v_predictor_2x2 aom_smooth_v_predictor_2x2_c
-
 void aom_smooth_v_predictor_32x16_c(uint8_t* dst,
                                     ptrdiff_t y_stride,
                                     const uint8_t* above,
@@ -6664,6 +7198,32 @@ RTCD_EXTERN void (*aom_smooth_v_predictor_32x64)(uint8_t* dst,
                                                  const uint8_t* above,
                                                  const uint8_t* left);
 
+void aom_smooth_v_predictor_32x8_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+void aom_smooth_v_predictor_32x8_ssse3(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_v_predictor_32x8)(uint8_t* dst,
+                                                ptrdiff_t y_stride,
+                                                const uint8_t* above,
+                                                const uint8_t* left);
+
+void aom_smooth_v_predictor_4x16_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+void aom_smooth_v_predictor_4x16_ssse3(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_v_predictor_4x16)(uint8_t* dst,
+                                                ptrdiff_t y_stride,
+                                                const uint8_t* above,
+                                                const uint8_t* left);
+
 void aom_smooth_v_predictor_4x4_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
@@ -6690,6 +7250,19 @@ RTCD_EXTERN void (*aom_smooth_v_predictor_4x8)(uint8_t* dst,
                                                const uint8_t* above,
                                                const uint8_t* left);
 
+void aom_smooth_v_predictor_64x16_c(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+void aom_smooth_v_predictor_64x16_ssse3(uint8_t* dst,
+                                        ptrdiff_t y_stride,
+                                        const uint8_t* above,
+                                        const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_v_predictor_64x16)(uint8_t* dst,
+                                                 ptrdiff_t y_stride,
+                                                 const uint8_t* above,
+                                                 const uint8_t* left);
+
 void aom_smooth_v_predictor_64x32_c(uint8_t* dst,
                                     ptrdiff_t y_stride,
                                     const uint8_t* above,
@@ -6729,6 +7302,19 @@ RTCD_EXTERN void (*aom_smooth_v_predictor_8x16)(uint8_t* dst,
                                                 const uint8_t* above,
                                                 const uint8_t* left);
 
+void aom_smooth_v_predictor_8x32_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+void aom_smooth_v_predictor_8x32_ssse3(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_v_predictor_8x32)(uint8_t* dst,
+                                                ptrdiff_t y_stride,
+                                                const uint8_t* above,
+                                                const uint8_t* left);
+
 void aom_smooth_v_predictor_8x4_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
@@ -8035,6 +8621,26 @@ void aom_v_predictor_16x32_sse2(uint8_t* dst,
                                 const uint8_t* left);
 #define aom_v_predictor_16x32 aom_v_predictor_16x32_sse2
 
+void aom_v_predictor_16x4_c(uint8_t* dst,
+                            ptrdiff_t y_stride,
+                            const uint8_t* above,
+                            const uint8_t* left);
+void aom_v_predictor_16x4_sse2(uint8_t* dst,
+                               ptrdiff_t y_stride,
+                               const uint8_t* above,
+                               const uint8_t* left);
+#define aom_v_predictor_16x4 aom_v_predictor_16x4_sse2
+
+void aom_v_predictor_16x64_c(uint8_t* dst,
+                             ptrdiff_t y_stride,
+                             const uint8_t* above,
+                             const uint8_t* left);
+void aom_v_predictor_16x64_sse2(uint8_t* dst,
+                                ptrdiff_t y_stride,
+                                const uint8_t* above,
+                                const uint8_t* left);
+#define aom_v_predictor_16x64 aom_v_predictor_16x64_sse2
+
 void aom_v_predictor_16x8_c(uint8_t* dst,
                             ptrdiff_t y_stride,
                             const uint8_t* above,
@@ -8045,12 +8651,6 @@ void aom_v_predictor_16x8_sse2(uint8_t* dst,
                                const uint8_t* left);
 #define aom_v_predictor_16x8 aom_v_predictor_16x8_sse2
 
-void aom_v_predictor_2x2_c(uint8_t* dst,
-                           ptrdiff_t y_stride,
-                           const uint8_t* above,
-                           const uint8_t* left);
-#define aom_v_predictor_2x2 aom_v_predictor_2x2_c
-
 void aom_v_predictor_32x16_c(uint8_t* dst,
                              ptrdiff_t y_stride,
                              const uint8_t* above,
@@ -8102,6 +8702,26 @@ RTCD_EXTERN void (*aom_v_predictor_32x64)(uint8_t* dst,
                                           const uint8_t* above,
                                           const uint8_t* left);
 
+void aom_v_predictor_32x8_c(uint8_t* dst,
+                            ptrdiff_t y_stride,
+                            const uint8_t* above,
+                            const uint8_t* left);
+void aom_v_predictor_32x8_sse2(uint8_t* dst,
+                               ptrdiff_t y_stride,
+                               const uint8_t* above,
+                               const uint8_t* left);
+#define aom_v_predictor_32x8 aom_v_predictor_32x8_sse2
+
+void aom_v_predictor_4x16_c(uint8_t* dst,
+                            ptrdiff_t y_stride,
+                            const uint8_t* above,
+                            const uint8_t* left);
+void aom_v_predictor_4x16_sse2(uint8_t* dst,
+                               ptrdiff_t y_stride,
+                               const uint8_t* above,
+                               const uint8_t* left);
+#define aom_v_predictor_4x16 aom_v_predictor_4x16_sse2
+
 void aom_v_predictor_4x4_c(uint8_t* dst,
                            ptrdiff_t y_stride,
                            const uint8_t* above,
@@ -8122,6 +8742,23 @@ void aom_v_predictor_4x8_sse2(uint8_t* dst,
                               const uint8_t* left);
 #define aom_v_predictor_4x8 aom_v_predictor_4x8_sse2
 
+void aom_v_predictor_64x16_c(uint8_t* dst,
+                             ptrdiff_t y_stride,
+                             const uint8_t* above,
+                             const uint8_t* left);
+void aom_v_predictor_64x16_sse2(uint8_t* dst,
+                                ptrdiff_t y_stride,
+                                const uint8_t* above,
+                                const uint8_t* left);
+void aom_v_predictor_64x16_avx2(uint8_t* dst,
+                                ptrdiff_t y_stride,
+                                const uint8_t* above,
+                                const uint8_t* left);
+RTCD_EXTERN void (*aom_v_predictor_64x16)(uint8_t* dst,
+                                          ptrdiff_t y_stride,
+                                          const uint8_t* above,
+                                          const uint8_t* left);
+
 void aom_v_predictor_64x32_c(uint8_t* dst,
                              ptrdiff_t y_stride,
                              const uint8_t* above,
@@ -8166,6 +8803,16 @@ void aom_v_predictor_8x16_sse2(uint8_t* dst,
                                const uint8_t* left);
 #define aom_v_predictor_8x16 aom_v_predictor_8x16_sse2
 
+void aom_v_predictor_8x32_c(uint8_t* dst,
+                            ptrdiff_t y_stride,
+                            const uint8_t* above,
+                            const uint8_t* left);
+void aom_v_predictor_8x32_sse2(uint8_t* dst,
+                               ptrdiff_t y_stride,
+                               const uint8_t* above,
+                               const uint8_t* left);
+#define aom_v_predictor_8x32 aom_v_predictor_8x32_sse2
+
 void aom_v_predictor_8x4_c(uint8_t* dst,
                            ptrdiff_t y_stride,
                            const uint8_t* above,
@@ -8526,8 +9173,12 @@ unsigned int aom_variance8x8_sse2(const uint8_t* src_ptr,
                                   unsigned int* sse);
 #define aom_variance8x8 aom_variance8x8_sse2
 
-int aom_vector_var_c(const int16_t* ref, const int16_t* src, const int bwl);
-#define aom_vector_var aom_vector_var_c
+int aom_vector_var_c(const int16_t* ref, const int16_t* src, int bwl);
+int aom_vector_var_sse4_1(const int16_t* ref, const int16_t* src, int bwl);
+int aom_vector_var_avx2(const int16_t* ref, const int16_t* src, int bwl);
+RTCD_EXTERN int (*aom_vector_var)(const int16_t* ref,
+                                  const int16_t* src,
+                                  int bwl);
 
 void aom_dsp_rtcd(void);
 
@@ -8579,6 +9230,9 @@ static void setup_rtcd_internal(void) {
   aom_dc_128_predictor_32x64 = aom_dc_128_predictor_32x64_sse2;
   if (flags & HAS_AVX2)
     aom_dc_128_predictor_32x64 = aom_dc_128_predictor_32x64_avx2;
+  aom_dc_128_predictor_64x16 = aom_dc_128_predictor_64x16_sse2;
+  if (flags & HAS_AVX2)
+    aom_dc_128_predictor_64x16 = aom_dc_128_predictor_64x16_avx2;
   aom_dc_128_predictor_64x32 = aom_dc_128_predictor_64x32_sse2;
   if (flags & HAS_AVX2)
     aom_dc_128_predictor_64x32 = aom_dc_128_predictor_64x32_avx2;
@@ -8594,6 +9248,9 @@ static void setup_rtcd_internal(void) {
   aom_dc_left_predictor_32x64 = aom_dc_left_predictor_32x64_sse2;
   if (flags & HAS_AVX2)
     aom_dc_left_predictor_32x64 = aom_dc_left_predictor_32x64_avx2;
+  aom_dc_left_predictor_64x16 = aom_dc_left_predictor_64x16_sse2;
+  if (flags & HAS_AVX2)
+    aom_dc_left_predictor_64x16 = aom_dc_left_predictor_64x16_avx2;
   aom_dc_left_predictor_64x32 = aom_dc_left_predictor_64x32_sse2;
   if (flags & HAS_AVX2)
     aom_dc_left_predictor_64x32 = aom_dc_left_predictor_64x32_avx2;
@@ -8609,6 +9266,9 @@ static void setup_rtcd_internal(void) {
   aom_dc_predictor_32x64 = aom_dc_predictor_32x64_sse2;
   if (flags & HAS_AVX2)
     aom_dc_predictor_32x64 = aom_dc_predictor_32x64_avx2;
+  aom_dc_predictor_64x16 = aom_dc_predictor_64x16_sse2;
+  if (flags & HAS_AVX2)
+    aom_dc_predictor_64x16 = aom_dc_predictor_64x16_avx2;
   aom_dc_predictor_64x32 = aom_dc_predictor_64x32_sse2;
   if (flags & HAS_AVX2)
     aom_dc_predictor_64x32 = aom_dc_predictor_64x32_avx2;
@@ -8624,6 +9284,9 @@ static void setup_rtcd_internal(void) {
   aom_dc_top_predictor_32x64 = aom_dc_top_predictor_32x64_sse2;
   if (flags & HAS_AVX2)
     aom_dc_top_predictor_32x64 = aom_dc_top_predictor_32x64_avx2;
+  aom_dc_top_predictor_64x16 = aom_dc_top_predictor_64x16_sse2;
+  if (flags & HAS_AVX2)
+    aom_dc_top_predictor_64x16 = aom_dc_top_predictor_64x16_avx2;
   aom_dc_top_predictor_64x32 = aom_dc_top_predictor_64x32_sse2;
   if (flags & HAS_AVX2)
     aom_dc_top_predictor_64x32 = aom_dc_top_predictor_64x32_avx2;
@@ -8785,12 +9448,12 @@ static void setup_rtcd_internal(void) {
   aom_hadamard_32x32 = aom_hadamard_32x32_sse2;
   if (flags & HAS_AVX2)
     aom_hadamard_32x32 = aom_hadamard_32x32_avx2;
-  aom_hadamard_8x8_dual = aom_hadamard_8x8_dual_sse2;
-  if (flags & HAS_AVX2)
-    aom_hadamard_8x8_dual = aom_hadamard_8x8_dual_avx2;
   aom_hadamard_lp_16x16 = aom_hadamard_lp_16x16_sse2;
   if (flags & HAS_AVX2)
     aom_hadamard_lp_16x16 = aom_hadamard_lp_16x16_avx2;
+  aom_hadamard_lp_8x8_dual = aom_hadamard_lp_8x8_dual_sse2;
+  if (flags & HAS_AVX2)
+    aom_hadamard_lp_8x8_dual = aom_hadamard_lp_8x8_dual_avx2;
   aom_ifft16x16_float = aom_ifft16x16_float_sse2;
   if (flags & HAS_AVX2)
     aom_ifft16x16_float = aom_ifft16x16_float_avx2;
@@ -8800,6 +9463,12 @@ static void setup_rtcd_internal(void) {
   aom_ifft8x8_float = aom_ifft8x8_float_sse2;
   if (flags & HAS_AVX2)
     aom_ifft8x8_float = aom_ifft8x8_float_avx2;
+  aom_int_pro_col = aom_int_pro_col_sse2;
+  if (flags & HAS_AVX2)
+    aom_int_pro_col = aom_int_pro_col_avx2;
+  aom_int_pro_row = aom_int_pro_row_sse2;
+  if (flags & HAS_AVX2)
+    aom_int_pro_row = aom_int_pro_row_avx2;
   aom_lowbd_blend_a64_d16_mask = aom_lowbd_blend_a64_d16_mask_c;
   if (flags & HAS_SSE4_1)
     aom_lowbd_blend_a64_d16_mask = aom_lowbd_blend_a64_d16_mask_sse4_1;
@@ -9003,6 +9672,9 @@ static void setup_rtcd_internal(void) {
   aom_mse16x16 = aom_mse16x16_sse2;
   if (flags & HAS_AVX2)
     aom_mse16x16 = aom_mse16x16_avx2;
+  aom_mse_16xh_16bit = aom_mse_16xh_16bit_sse2;
+  if (flags & HAS_AVX2)
+    aom_mse_16xh_16bit = aom_mse_16xh_16bit_avx2;
   aom_mse_wxh_16bit = aom_mse_wxh_16bit_sse2;
   if (flags & HAS_AVX2)
     aom_mse_wxh_16bit = aom_mse_wxh_16bit_avx2;
@@ -9016,6 +9688,14 @@ static void setup_rtcd_internal(void) {
     aom_paeth_predictor_16x32 = aom_paeth_predictor_16x32_ssse3;
   if (flags & HAS_AVX2)
     aom_paeth_predictor_16x32 = aom_paeth_predictor_16x32_avx2;
+  aom_paeth_predictor_16x4 = aom_paeth_predictor_16x4_c;
+  if (flags & HAS_SSSE3)
+    aom_paeth_predictor_16x4 = aom_paeth_predictor_16x4_ssse3;
+  aom_paeth_predictor_16x64 = aom_paeth_predictor_16x64_c;
+  if (flags & HAS_SSSE3)
+    aom_paeth_predictor_16x64 = aom_paeth_predictor_16x64_ssse3;
+  if (flags & HAS_AVX2)
+    aom_paeth_predictor_16x64 = aom_paeth_predictor_16x64_avx2;
   aom_paeth_predictor_16x8 = aom_paeth_predictor_16x8_c;
   if (flags & HAS_SSSE3)
     aom_paeth_predictor_16x8 = aom_paeth_predictor_16x8_ssse3;
@@ -9036,12 +9716,23 @@ static void setup_rtcd_internal(void) {
     aom_paeth_predictor_32x64 = aom_paeth_predictor_32x64_ssse3;
   if (flags & HAS_AVX2)
     aom_paeth_predictor_32x64 = aom_paeth_predictor_32x64_avx2;
+  aom_paeth_predictor_32x8 = aom_paeth_predictor_32x8_c;
+  if (flags & HAS_SSSE3)
+    aom_paeth_predictor_32x8 = aom_paeth_predictor_32x8_ssse3;
+  aom_paeth_predictor_4x16 = aom_paeth_predictor_4x16_c;
+  if (flags & HAS_SSSE3)
+    aom_paeth_predictor_4x16 = aom_paeth_predictor_4x16_ssse3;
   aom_paeth_predictor_4x4 = aom_paeth_predictor_4x4_c;
   if (flags & HAS_SSSE3)
     aom_paeth_predictor_4x4 = aom_paeth_predictor_4x4_ssse3;
   aom_paeth_predictor_4x8 = aom_paeth_predictor_4x8_c;
   if (flags & HAS_SSSE3)
     aom_paeth_predictor_4x8 = aom_paeth_predictor_4x8_ssse3;
+  aom_paeth_predictor_64x16 = aom_paeth_predictor_64x16_c;
+  if (flags & HAS_SSSE3)
+    aom_paeth_predictor_64x16 = aom_paeth_predictor_64x16_ssse3;
+  if (flags & HAS_AVX2)
+    aom_paeth_predictor_64x16 = aom_paeth_predictor_64x16_avx2;
   aom_paeth_predictor_64x32 = aom_paeth_predictor_64x32_c;
   if (flags & HAS_SSSE3)
     aom_paeth_predictor_64x32 = aom_paeth_predictor_64x32_ssse3;
@@ -9055,6 +9746,9 @@ static void setup_rtcd_internal(void) {
   aom_paeth_predictor_8x16 = aom_paeth_predictor_8x16_c;
   if (flags & HAS_SSSE3)
     aom_paeth_predictor_8x16 = aom_paeth_predictor_8x16_ssse3;
+  aom_paeth_predictor_8x32 = aom_paeth_predictor_8x32_c;
+  if (flags & HAS_SSSE3)
+    aom_paeth_predictor_8x32 = aom_paeth_predictor_8x32_ssse3;
   aom_paeth_predictor_8x4 = aom_paeth_predictor_8x4_c;
   if (flags & HAS_SSSE3)
     aom_paeth_predictor_8x4 = aom_paeth_predictor_8x4_ssse3;
@@ -9064,15 +9758,18 @@ static void setup_rtcd_internal(void) {
   aom_quantize_b = aom_quantize_b_sse2;
   if (flags & HAS_AVX)
     aom_quantize_b = aom_quantize_b_avx;
+  if (flags & HAS_AVX2)
+    aom_quantize_b = aom_quantize_b_avx2;
   aom_quantize_b_32x32 = aom_quantize_b_32x32_c;
   if (flags & HAS_AVX)
     aom_quantize_b_32x32 = aom_quantize_b_32x32_avx;
+  if (flags & HAS_AVX2)
+    aom_quantize_b_32x32 = aom_quantize_b_32x32_avx2;
   aom_quantize_b_64x64 = aom_quantize_b_64x64_c;
   if (flags & HAS_SSSE3)
     aom_quantize_b_64x64 = aom_quantize_b_64x64_ssse3;
-  aom_quantize_b_adaptive = aom_quantize_b_adaptive_sse2;
   if (flags & HAS_AVX2)
-    aom_quantize_b_adaptive = aom_quantize_b_adaptive_avx2;
+    aom_quantize_b_64x64 = aom_quantize_b_64x64_avx2;
   aom_sad128x128 = aom_sad128x128_sse2;
   if (flags & HAS_AVX2)
     aom_sad128x128 = aom_sad128x128_avx2;
@@ -9208,6 +9905,12 @@ static void setup_rtcd_internal(void) {
   aom_smooth_h_predictor_16x32 = aom_smooth_h_predictor_16x32_c;
   if (flags & HAS_SSSE3)
     aom_smooth_h_predictor_16x32 = aom_smooth_h_predictor_16x32_ssse3;
+  aom_smooth_h_predictor_16x4 = aom_smooth_h_predictor_16x4_c;
+  if (flags & HAS_SSSE3)
+    aom_smooth_h_predictor_16x4 = aom_smooth_h_predictor_16x4_ssse3;
+  aom_smooth_h_predictor_16x64 = aom_smooth_h_predictor_16x64_c;
+  if (flags & HAS_SSSE3)
+    aom_smooth_h_predictor_16x64 = aom_smooth_h_predictor_16x64_ssse3;
   aom_smooth_h_predictor_16x8 = aom_smooth_h_predictor_16x8_c;
   if (flags & HAS_SSSE3)
     aom_smooth_h_predictor_16x8 = aom_smooth_h_predictor_16x8_ssse3;
@@ -9220,12 +9923,21 @@ static void setup_rtcd_internal(void) {
   aom_smooth_h_predictor_32x64 = aom_smooth_h_predictor_32x64_c;
   if (flags & HAS_SSSE3)
     aom_smooth_h_predictor_32x64 = aom_smooth_h_predictor_32x64_ssse3;
+  aom_smooth_h_predictor_32x8 = aom_smooth_h_predictor_32x8_c;
+  if (flags & HAS_SSSE3)
+    aom_smooth_h_predictor_32x8 = aom_smooth_h_predictor_32x8_ssse3;
+  aom_smooth_h_predictor_4x16 = aom_smooth_h_predictor_4x16_c;
+  if (flags & HAS_SSSE3)
+    aom_smooth_h_predictor_4x16 = aom_smooth_h_predictor_4x16_ssse3;
   aom_smooth_h_predictor_4x4 = aom_smooth_h_predictor_4x4_c;
   if (flags & HAS_SSSE3)
     aom_smooth_h_predictor_4x4 = aom_smooth_h_predictor_4x4_ssse3;
   aom_smooth_h_predictor_4x8 = aom_smooth_h_predictor_4x8_c;
   if (flags & HAS_SSSE3)
     aom_smooth_h_predictor_4x8 = aom_smooth_h_predictor_4x8_ssse3;
+  aom_smooth_h_predictor_64x16 = aom_smooth_h_predictor_64x16_c;
+  if (flags & HAS_SSSE3)
+    aom_smooth_h_predictor_64x16 = aom_smooth_h_predictor_64x16_ssse3;
   aom_smooth_h_predictor_64x32 = aom_smooth_h_predictor_64x32_c;
   if (flags & HAS_SSSE3)
     aom_smooth_h_predictor_64x32 = aom_smooth_h_predictor_64x32_ssse3;
@@ -9235,6 +9947,9 @@ static void setup_rtcd_internal(void) {
   aom_smooth_h_predictor_8x16 = aom_smooth_h_predictor_8x16_c;
   if (flags & HAS_SSSE3)
     aom_smooth_h_predictor_8x16 = aom_smooth_h_predictor_8x16_ssse3;
+  aom_smooth_h_predictor_8x32 = aom_smooth_h_predictor_8x32_c;
+  if (flags & HAS_SSSE3)
+    aom_smooth_h_predictor_8x32 = aom_smooth_h_predictor_8x32_ssse3;
   aom_smooth_h_predictor_8x4 = aom_smooth_h_predictor_8x4_c;
   if (flags & HAS_SSSE3)
     aom_smooth_h_predictor_8x4 = aom_smooth_h_predictor_8x4_ssse3;
@@ -9247,6 +9962,12 @@ static void setup_rtcd_internal(void) {
   aom_smooth_predictor_16x32 = aom_smooth_predictor_16x32_c;
   if (flags & HAS_SSSE3)
     aom_smooth_predictor_16x32 = aom_smooth_predictor_16x32_ssse3;
+  aom_smooth_predictor_16x4 = aom_smooth_predictor_16x4_c;
+  if (flags & HAS_SSSE3)
+    aom_smooth_predictor_16x4 = aom_smooth_predictor_16x4_ssse3;
+  aom_smooth_predictor_16x64 = aom_smooth_predictor_16x64_c;
+  if (flags & HAS_SSSE3)
+    aom_smooth_predictor_16x64 = aom_smooth_predictor_16x64_ssse3;
   aom_smooth_predictor_16x8 = aom_smooth_predictor_16x8_c;
   if (flags & HAS_SSSE3)
     aom_smooth_predictor_16x8 = aom_smooth_predictor_16x8_ssse3;
@@ -9259,12 +9980,21 @@ static void setup_rtcd_internal(void) {
   aom_smooth_predictor_32x64 = aom_smooth_predictor_32x64_c;
   if (flags & HAS_SSSE3)
     aom_smooth_predictor_32x64 = aom_smooth_predictor_32x64_ssse3;
+  aom_smooth_predictor_32x8 = aom_smooth_predictor_32x8_c;
+  if (flags & HAS_SSSE3)
+    aom_smooth_predictor_32x8 = aom_smooth_predictor_32x8_ssse3;
+  aom_smooth_predictor_4x16 = aom_smooth_predictor_4x16_c;
+  if (flags & HAS_SSSE3)
+    aom_smooth_predictor_4x16 = aom_smooth_predictor_4x16_ssse3;
   aom_smooth_predictor_4x4 = aom_smooth_predictor_4x4_c;
   if (flags & HAS_SSSE3)
     aom_smooth_predictor_4x4 = aom_smooth_predictor_4x4_ssse3;
   aom_smooth_predictor_4x8 = aom_smooth_predictor_4x8_c;
   if (flags & HAS_SSSE3)
     aom_smooth_predictor_4x8 = aom_smooth_predictor_4x8_ssse3;
+  aom_smooth_predictor_64x16 = aom_smooth_predictor_64x16_c;
+  if (flags & HAS_SSSE3)
+    aom_smooth_predictor_64x16 = aom_smooth_predictor_64x16_ssse3;
   aom_smooth_predictor_64x32 = aom_smooth_predictor_64x32_c;
   if (flags & HAS_SSSE3)
     aom_smooth_predictor_64x32 = aom_smooth_predictor_64x32_ssse3;
@@ -9274,6 +10004,9 @@ static void setup_rtcd_internal(void) {
   aom_smooth_predictor_8x16 = aom_smooth_predictor_8x16_c;
   if (flags & HAS_SSSE3)
     aom_smooth_predictor_8x16 = aom_smooth_predictor_8x16_ssse3;
+  aom_smooth_predictor_8x32 = aom_smooth_predictor_8x32_c;
+  if (flags & HAS_SSSE3)
+    aom_smooth_predictor_8x32 = aom_smooth_predictor_8x32_ssse3;
   aom_smooth_predictor_8x4 = aom_smooth_predictor_8x4_c;
   if (flags & HAS_SSSE3)
     aom_smooth_predictor_8x4 = aom_smooth_predictor_8x4_ssse3;
@@ -9286,6 +10019,12 @@ static void setup_rtcd_internal(void) {
   aom_smooth_v_predictor_16x32 = aom_smooth_v_predictor_16x32_c;
   if (flags & HAS_SSSE3)
     aom_smooth_v_predictor_16x32 = aom_smooth_v_predictor_16x32_ssse3;
+  aom_smooth_v_predictor_16x4 = aom_smooth_v_predictor_16x4_c;
+  if (flags & HAS_SSSE3)
+    aom_smooth_v_predictor_16x4 = aom_smooth_v_predictor_16x4_ssse3;
+  aom_smooth_v_predictor_16x64 = aom_smooth_v_predictor_16x64_c;
+  if (flags & HAS_SSSE3)
+    aom_smooth_v_predictor_16x64 = aom_smooth_v_predictor_16x64_ssse3;
   aom_smooth_v_predictor_16x8 = aom_smooth_v_predictor_16x8_c;
   if (flags & HAS_SSSE3)
     aom_smooth_v_predictor_16x8 = aom_smooth_v_predictor_16x8_ssse3;
@@ -9298,12 +10037,21 @@ static void setup_rtcd_internal(void) {
   aom_smooth_v_predictor_32x64 = aom_smooth_v_predictor_32x64_c;
   if (flags & HAS_SSSE3)
     aom_smooth_v_predictor_32x64 = aom_smooth_v_predictor_32x64_ssse3;
+  aom_smooth_v_predictor_32x8 = aom_smooth_v_predictor_32x8_c;
+  if (flags & HAS_SSSE3)
+    aom_smooth_v_predictor_32x8 = aom_smooth_v_predictor_32x8_ssse3;
+  aom_smooth_v_predictor_4x16 = aom_smooth_v_predictor_4x16_c;
+  if (flags & HAS_SSSE3)
+    aom_smooth_v_predictor_4x16 = aom_smooth_v_predictor_4x16_ssse3;
   aom_smooth_v_predictor_4x4 = aom_smooth_v_predictor_4x4_c;
   if (flags & HAS_SSSE3)
     aom_smooth_v_predictor_4x4 = aom_smooth_v_predictor_4x4_ssse3;
   aom_smooth_v_predictor_4x8 = aom_smooth_v_predictor_4x8_c;
   if (flags & HAS_SSSE3)
     aom_smooth_v_predictor_4x8 = aom_smooth_v_predictor_4x8_ssse3;
+  aom_smooth_v_predictor_64x16 = aom_smooth_v_predictor_64x16_c;
+  if (flags & HAS_SSSE3)
+    aom_smooth_v_predictor_64x16 = aom_smooth_v_predictor_64x16_ssse3;
   aom_smooth_v_predictor_64x32 = aom_smooth_v_predictor_64x32_c;
   if (flags & HAS_SSSE3)
     aom_smooth_v_predictor_64x32 = aom_smooth_v_predictor_64x32_ssse3;
@@ -9313,6 +10061,9 @@ static void setup_rtcd_internal(void) {
   aom_smooth_v_predictor_8x16 = aom_smooth_v_predictor_8x16_c;
   if (flags & HAS_SSSE3)
     aom_smooth_v_predictor_8x16 = aom_smooth_v_predictor_8x16_ssse3;
+  aom_smooth_v_predictor_8x32 = aom_smooth_v_predictor_8x32_c;
+  if (flags & HAS_SSSE3)
+    aom_smooth_v_predictor_8x32 = aom_smooth_v_predictor_8x32_ssse3;
   aom_smooth_v_predictor_8x4 = aom_smooth_v_predictor_8x4_c;
   if (flags & HAS_SSSE3)
     aom_smooth_v_predictor_8x4 = aom_smooth_v_predictor_8x4_ssse3;
@@ -9476,6 +10227,9 @@ static void setup_rtcd_internal(void) {
   aom_v_predictor_32x64 = aom_v_predictor_32x64_sse2;
   if (flags & HAS_AVX2)
     aom_v_predictor_32x64 = aom_v_predictor_32x64_avx2;
+  aom_v_predictor_64x16 = aom_v_predictor_64x16_sse2;
+  if (flags & HAS_AVX2)
+    aom_v_predictor_64x16 = aom_v_predictor_64x16_avx2;
   aom_v_predictor_64x32 = aom_v_predictor_64x32_sse2;
   if (flags & HAS_AVX2)
     aom_v_predictor_64x32 = aom_v_predictor_64x32_avx2;
@@ -9521,6 +10275,11 @@ static void setup_rtcd_internal(void) {
   aom_variance64x64 = aom_variance64x64_sse2;
   if (flags & HAS_AVX2)
     aom_variance64x64 = aom_variance64x64_avx2;
+  aom_vector_var = aom_vector_var_c;
+  if (flags & HAS_SSE4_1)
+    aom_vector_var = aom_vector_var_sse4_1;
+  if (flags & HAS_AVX2)
+    aom_vector_var = aom_vector_var_avx2;
 }
 #endif
 
diff --git a/chromium/third_party/libaom/source/config/linux/ia32/config/aom_scale_rtcd.h b/chromium/third_party/libaom/source/config/linux/ia32/config/aom_scale_rtcd.h
index b6059e1426a..5e6c03317d6 100644
--- a/chromium/third_party/libaom/source/config/linux/ia32/config/aom_scale_rtcd.h
+++ b/chromium/third_party/libaom/source/config/linux/ia32/config/aom_scale_rtcd.h
@@ -18,6 +18,12 @@ void aom_extend_frame_borders_c(struct yv12_buffer_config* ybf,
                                 const int num_planes);
 #define aom_extend_frame_borders aom_extend_frame_borders_c
 
+void aom_extend_frame_borders_plane_row_c(const struct yv12_buffer_config* ybf,
+                                          int plane,
+                                          int v_start,
+                                          int v_end);
+#define aom_extend_frame_borders_plane_row aom_extend_frame_borders_plane_row_c
+
 void aom_extend_frame_borders_y_c(struct yv12_buffer_config* ybf);
 #define aom_extend_frame_borders_y aom_extend_frame_borders_y_c
 
diff --git a/chromium/third_party/libaom/source/config/linux/ia32/config/av1_rtcd.h b/chromium/third_party/libaom/source/config/linux/ia32/config/av1_rtcd.h
index 8d516d90bde..6d8a64d1270 100644
--- a/chromium/third_party/libaom/source/config/linux/ia32/config/av1_rtcd.h
+++ b/chromium/third_party/libaom/source/config/linux/ia32/config/av1_rtcd.h
@@ -242,6 +242,51 @@ void aom_upsampled_pred_sse2(MACROBLOCKD* xd,
                              int subpel_search);
 #define aom_upsampled_pred aom_upsampled_pred_sse2
 
+void av1_apply_selfguided_restoration_c(const uint8_t* dat,
+                                        int width,
+                                        int height,
+                                        int stride,
+                                        int eps,
+                                        const int* xqd,
+                                        uint8_t* dst,
+                                        int dst_stride,
+                                        int32_t* tmpbuf,
+                                        int bit_depth,
+                                        int highbd);
+void av1_apply_selfguided_restoration_sse4_1(const uint8_t* dat,
+                                             int width,
+                                             int height,
+                                             int stride,
+                                             int eps,
+                                             const int* xqd,
+                                             uint8_t* dst,
+                                             int dst_stride,
+                                             int32_t* tmpbuf,
+                                             int bit_depth,
+                                             int highbd);
+void av1_apply_selfguided_restoration_avx2(const uint8_t* dat,
+                                           int width,
+                                           int height,
+                                           int stride,
+                                           int eps,
+                                           const int* xqd,
+                                           uint8_t* dst,
+                                           int dst_stride,
+                                           int32_t* tmpbuf,
+                                           int bit_depth,
+                                           int highbd);
+RTCD_EXTERN void (*av1_apply_selfguided_restoration)(const uint8_t* dat,
+                                                     int width,
+                                                     int height,
+                                                     int stride,
+                                                     int eps,
+                                                     const int* xqd,
+                                                     uint8_t* dst,
+                                                     int dst_stride,
+                                                     int32_t* tmpbuf,
+                                                     int bit_depth,
+                                                     int highbd);
+
 int64_t av1_block_error_c(const tran_low_t* coeff,
                           const tran_low_t* dqcoeff,
                           intptr_t block_size,
@@ -347,6 +392,31 @@ RTCD_EXTERN void (*av1_build_compound_diffwtd_mask_d16)(
     ConvolveParams* conv_params,
     int bd);
 
+int64_t av1_calc_frame_error_c(const uint8_t* const ref,
+                               int stride,
+                               const uint8_t* const dst,
+                               int p_width,
+                               int p_height,
+                               int p_stride);
+int64_t av1_calc_frame_error_sse2(const uint8_t* const ref,
+                                  int stride,
+                                  const uint8_t* const dst,
+                                  int p_width,
+                                  int p_height,
+                                  int p_stride);
+int64_t av1_calc_frame_error_avx2(const uint8_t* const ref,
+                                  int stride,
+                                  const uint8_t* const dst,
+                                  int p_width,
+                                  int p_height,
+                                  int p_stride);
+RTCD_EXTERN int64_t (*av1_calc_frame_error)(const uint8_t* const ref,
+                                            int stride,
+                                            const uint8_t* const dst,
+                                            int p_width,
+                                            int p_height,
+                                            int p_stride);
+
 void av1_calc_indices_dim1_c(const int* data,
                              const int* centroids,
                              uint8_t* indices,
@@ -1839,6 +1909,47 @@ void av1_round_shift_array_c(int32_t* arr, int size, int bit);
 void av1_round_shift_array_sse4_1(int32_t* arr, int size, int bit);
 RTCD_EXTERN void (*av1_round_shift_array)(int32_t* arr, int size, int bit);
 
+int av1_selfguided_restoration_c(const uint8_t* dgd8,
+                                 int width,
+                                 int height,
+                                 int dgd_stride,
+                                 int32_t* flt0,
+                                 int32_t* flt1,
+                                 int flt_stride,
+                                 int sgr_params_idx,
+                                 int bit_depth,
+                                 int highbd);
+int av1_selfguided_restoration_sse4_1(const uint8_t* dgd8,
+                                      int width,
+                                      int height,
+                                      int dgd_stride,
+                                      int32_t* flt0,
+                                      int32_t* flt1,
+                                      int flt_stride,
+                                      int sgr_params_idx,
+                                      int bit_depth,
+                                      int highbd);
+int av1_selfguided_restoration_avx2(const uint8_t* dgd8,
+                                    int width,
+                                    int height,
+                                    int dgd_stride,
+                                    int32_t* flt0,
+                                    int32_t* flt1,
+                                    int flt_stride,
+                                    int sgr_params_idx,
+                                    int bit_depth,
+                                    int highbd);
+RTCD_EXTERN int (*av1_selfguided_restoration)(const uint8_t* dgd8,
+                                              int width,
+                                              int height,
+                                              int dgd_stride,
+                                              int32_t* flt0,
+                                              int32_t* flt1,
+                                              int flt_stride,
+                                              int sgr_params_idx,
+                                              int bit_depth,
+                                              int highbd);
+
 void av1_txb_init_levels_c(const tran_low_t* const coeff,
                            const int width,
                            const int height,
@@ -1864,6 +1975,79 @@ void av1_upsample_intra_edge_high_c(uint16_t* p, int sz, int bd);
 void av1_upsample_intra_edge_high_sse4_1(uint16_t* p, int sz, int bd);
 RTCD_EXTERN void (*av1_upsample_intra_edge_high)(uint16_t* p, int sz, int bd);
 
+void av1_warp_affine_c(const int32_t* mat,
+                       const uint8_t* ref,
+                       int width,
+                       int height,
+                       int stride,
+                       uint8_t* pred,
+                       int p_col,
+                       int p_row,
+                       int p_width,
+                       int p_height,
+                       int p_stride,
+                       int subsampling_x,
+                       int subsampling_y,
+                       ConvolveParams* conv_params,
+                       int16_t alpha,
+                       int16_t beta,
+                       int16_t gamma,
+                       int16_t delta);
+void av1_warp_affine_sse4_1(const int32_t* mat,
+                            const uint8_t* ref,
+                            int width,
+                            int height,
+                            int stride,
+                            uint8_t* pred,
+                            int p_col,
+                            int p_row,
+                            int p_width,
+                            int p_height,
+                            int p_stride,
+                            int subsampling_x,
+                            int subsampling_y,
+                            ConvolveParams* conv_params,
+                            int16_t alpha,
+                            int16_t beta,
+                            int16_t gamma,
+                            int16_t delta);
+void av1_warp_affine_avx2(const int32_t* mat,
+                          const uint8_t* ref,
+                          int width,
+                          int height,
+                          int stride,
+                          uint8_t* pred,
+                          int p_col,
+                          int p_row,
+                          int p_width,
+                          int p_height,
+                          int p_stride,
+                          int subsampling_x,
+                          int subsampling_y,
+                          ConvolveParams* conv_params,
+                          int16_t alpha,
+                          int16_t beta,
+                          int16_t gamma,
+                          int16_t delta);
+RTCD_EXTERN void (*av1_warp_affine)(const int32_t* mat,
+                                    const uint8_t* ref,
+                                    int width,
+                                    int height,
+                                    int stride,
+                                    uint8_t* pred,
+                                    int p_col,
+                                    int p_row,
+                                    int p_width,
+                                    int p_height,
+                                    int p_stride,
+                                    int subsampling_x,
+                                    int subsampling_y,
+                                    ConvolveParams* conv_params,
+                                    int16_t alpha,
+                                    int16_t beta,
+                                    int16_t gamma,
+                                    int16_t delta);
+
 void av1_wedge_compute_delta_squares_c(int16_t* d,
                                        const int16_t* a,
                                        const int16_t* b,
@@ -2687,6 +2871,11 @@ static void setup_rtcd_internal(void) {
   if (flags & HAS_SSSE3)
     aom_dist_wtd_comp_avg_upsampled_pred =
         aom_dist_wtd_comp_avg_upsampled_pred_ssse3;
+  av1_apply_selfguided_restoration = av1_apply_selfguided_restoration_c;
+  if (flags & HAS_SSE4_1)
+    av1_apply_selfguided_restoration = av1_apply_selfguided_restoration_sse4_1;
+  if (flags & HAS_AVX2)
+    av1_apply_selfguided_restoration = av1_apply_selfguided_restoration_avx2;
   av1_block_error = av1_block_error_sse2;
   if (flags & HAS_AVX2)
     av1_block_error = av1_block_error_avx2;
@@ -2705,6 +2894,9 @@ static void setup_rtcd_internal(void) {
   if (flags & HAS_AVX2)
     av1_build_compound_diffwtd_mask_d16 =
         av1_build_compound_diffwtd_mask_d16_avx2;
+  av1_calc_frame_error = av1_calc_frame_error_sse2;
+  if (flags & HAS_AVX2)
+    av1_calc_frame_error = av1_calc_frame_error_avx2;
   av1_calc_indices_dim1 = av1_calc_indices_dim1_sse2;
   if (flags & HAS_AVX2)
     av1_calc_indices_dim1 = av1_calc_indices_dim1_avx2;
@@ -2903,6 +3095,11 @@ static void setup_rtcd_internal(void) {
   av1_round_shift_array = av1_round_shift_array_c;
   if (flags & HAS_SSE4_1)
     av1_round_shift_array = av1_round_shift_array_sse4_1;
+  av1_selfguided_restoration = av1_selfguided_restoration_c;
+  if (flags & HAS_SSE4_1)
+    av1_selfguided_restoration = av1_selfguided_restoration_sse4_1;
+  if (flags & HAS_AVX2)
+    av1_selfguided_restoration = av1_selfguided_restoration_avx2;
   av1_txb_init_levels = av1_txb_init_levels_c;
   if (flags & HAS_SSE4_1)
     av1_txb_init_levels = av1_txb_init_levels_sse4_1;
@@ -2914,6 +3111,11 @@ static void setup_rtcd_internal(void) {
   av1_upsample_intra_edge_high = av1_upsample_intra_edge_high_c;
   if (flags & HAS_SSE4_1)
     av1_upsample_intra_edge_high = av1_upsample_intra_edge_high_sse4_1;
+  av1_warp_affine = av1_warp_affine_c;
+  if (flags & HAS_SSE4_1)
+    av1_warp_affine = av1_warp_affine_sse4_1;
+  if (flags & HAS_AVX2)
+    av1_warp_affine = av1_warp_affine_avx2;
   av1_wedge_compute_delta_squares = av1_wedge_compute_delta_squares_sse2;
   if (flags & HAS_AVX2)
     av1_wedge_compute_delta_squares = av1_wedge_compute_delta_squares_avx2;
diff --git a/chromium/third_party/libaom/source/config/linux/x64/config/aom_config.asm b/chromium/third_party/libaom/source/config/linux/x64/config/aom_config.asm
index 3641e11fa87..ae5f0f9af44 100644
--- a/chromium/third_party/libaom/source/config/linux/x64/config/aom_config.asm
+++ b/chromium/third_party/libaom/source/config/linux/x64/config/aom_config.asm
@@ -1,5 +1,4 @@
 %define ARCH_ARM 0
-%define ARCH_MIPS 0
 %define ARCH_PPC 0
 %define ARCH_X86 0
 %define ARCH_X86_64 1
@@ -23,8 +22,6 @@
 %define CONFIG_ENTROPY_STATS 0
 %define CONFIG_EXCLUDE_SIMD_MISMATCH 0
 %define CONFIG_FPMT_TEST 0
-%define CONFIG_FRAME_PARALLEL_ENCODE 0
-%define CONFIG_FRAME_PARALLEL_ENCODE_2 0
 %define CONFIG_GCC 1
 %define CONFIG_GCOV 0
 %define CONFIG_GPROF 0
@@ -40,7 +37,7 @@
 %define CONFIG_OPTICAL_FLOW_API 0
 %define CONFIG_OS_SUPPORT 1
 %define CONFIG_PARTITION_SEARCH_ORDER 0
-%define CONFIG_PIC 0
+%define CONFIG_PIC 1
 %define CONFIG_RATECTRL_LOG 0
 %define CONFIG_RD_COMMAND 0
 %define CONFIG_RD_DEBUG 0
@@ -59,14 +56,11 @@
 %define DECODE_HEIGHT_LIMIT 16384
 %define DECODE_WIDTH_LIMIT 16384
 %define FORCE_HIGHBITDEPTH_DECODING 0
+%define HAVE_ARM_CRC32 0
 %define HAVE_AVX 1
 %define HAVE_AVX2 1
-%define HAVE_DSPR2 0
 %define HAVE_FEXCEPT 1
-%define HAVE_MIPS32 0
-%define HAVE_MIPS64 0
 %define HAVE_MMX 1
-%define HAVE_MSA 0
 %define HAVE_NEON 0
 %define HAVE_PTHREAD_H 1
 %define HAVE_SSE 1
diff --git a/chromium/third_party/libaom/source/config/linux/x64/config/aom_config.c b/chromium/third_party/libaom/source/config/linux/x64/config/aom_config.c
index 8786cc6957d..4c037ce451b 100644
--- a/chromium/third_party/libaom/source/config/linux/x64/config/aom_config.c
+++ b/chromium/third_party/libaom/source/config/linux/x64/config/aom_config.c
@@ -9,5 +9,5 @@
  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
  */
 #include "aom/aom_codec.h"
-static const char* const cfg = "cmake ../source/libaom -G \"Unix Makefiles\" -DAOM_TARGET_CPU=x86_64 -DCONFIG_AV1_ENCODER=1 -DCONFIG_LIBYUV=0 -DCONFIG_REALTIME_ONLY=1 -DCONFIG_AV1_HIGHBITDEPTH=0 -DCONFIG_AV1_TEMPORAL_DENOISING=1 -DCONFIG_MAX_DECODE_PROFILE=0 -DCONFIG_NORMAL_TILE_MODE=1 -DCONFIG_SIZE_LIMIT=1 -DDECODE_HEIGHT_LIMIT=16384 -DDECODE_WIDTH_LIMIT=16384";
+static const char* const cfg = "cmake ../source/libaom -G \"Unix Makefiles\" -DAOM_TARGET_CPU=x86_64 -DCONFIG_AV1_ENCODER=1 -DCONFIG_LIBYUV=0 -DCONFIG_AV1_HIGHBITDEPTH=0 -DCONFIG_AV1_TEMPORAL_DENOISING=1 -DCONFIG_REALTIME_ONLY=1 -DCONFIG_MAX_DECODE_PROFILE=0 -DCONFIG_NORMAL_TILE_MODE=1 -DCONFIG_SIZE_LIMIT=1 -DDECODE_HEIGHT_LIMIT=16384 -DDECODE_WIDTH_LIMIT=16384";
 const char *aom_codec_build_config(void) {return cfg;}
diff --git a/chromium/third_party/libaom/source/config/linux/x64/config/aom_config.h b/chromium/third_party/libaom/source/config/linux/x64/config/aom_config.h
index 902371b5c29..1d553faa608 100644
--- a/chromium/third_party/libaom/source/config/linux/x64/config/aom_config.h
+++ b/chromium/third_party/libaom/source/config/linux/x64/config/aom_config.h
@@ -11,7 +11,6 @@
 #ifndef AOM_CONFIG_H_
 #define AOM_CONFIG_H_
 #define ARCH_ARM 0
-#define ARCH_MIPS 0
 #define ARCH_PPC 0
 #define ARCH_X86 0
 #define ARCH_X86_64 1
@@ -35,8 +34,6 @@
 #define CONFIG_ENTROPY_STATS 0
 #define CONFIG_EXCLUDE_SIMD_MISMATCH 0
 #define CONFIG_FPMT_TEST 0
-#define CONFIG_FRAME_PARALLEL_ENCODE 0
-#define CONFIG_FRAME_PARALLEL_ENCODE_2 0
 #define CONFIG_GCC 1
 #define CONFIG_GCOV 0
 #define CONFIG_GPROF 0
@@ -52,7 +49,7 @@
 #define CONFIG_OPTICAL_FLOW_API 0
 #define CONFIG_OS_SUPPORT 1
 #define CONFIG_PARTITION_SEARCH_ORDER 0
-#define CONFIG_PIC 0
+#define CONFIG_PIC 1
 #define CONFIG_RATECTRL_LOG 0
 #define CONFIG_RD_COMMAND 0
 #define CONFIG_RD_DEBUG 0
@@ -71,14 +68,11 @@
 #define DECODE_HEIGHT_LIMIT 16384
 #define DECODE_WIDTH_LIMIT 16384
 #define FORCE_HIGHBITDEPTH_DECODING 0
+#define HAVE_ARM_CRC32 0
 #define HAVE_AVX 1
 #define HAVE_AVX2 1
-#define HAVE_DSPR2 0
 #define HAVE_FEXCEPT 1
-#define HAVE_MIPS32 0
-#define HAVE_MIPS64 0
 #define HAVE_MMX 1
-#define HAVE_MSA 0
 #define HAVE_NEON 0
 #define HAVE_PTHREAD_H 1
 #define HAVE_SSE 1
diff --git a/chromium/third_party/libaom/source/config/linux/x64/config/aom_dsp_rtcd.h b/chromium/third_party/libaom/source/config/linux/x64/config/aom_dsp_rtcd.h
index b04c0cfea23..e1a2841f1fe 100644
--- a/chromium/third_party/libaom/source/config/linux/x64/config/aom_dsp_rtcd.h
+++ b/chromium/third_party/libaom/source/config/linux/x64/config/aom_dsp_rtcd.h
@@ -360,6 +360,26 @@ void aom_dc_128_predictor_16x32_sse2(uint8_t* dst,
                                      const uint8_t* left);
 #define aom_dc_128_predictor_16x32 aom_dc_128_predictor_16x32_sse2
 
+void aom_dc_128_predictor_16x4_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+void aom_dc_128_predictor_16x4_sse2(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+#define aom_dc_128_predictor_16x4 aom_dc_128_predictor_16x4_sse2
+
+void aom_dc_128_predictor_16x64_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+void aom_dc_128_predictor_16x64_sse2(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+#define aom_dc_128_predictor_16x64 aom_dc_128_predictor_16x64_sse2
+
 void aom_dc_128_predictor_16x8_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
@@ -370,12 +390,6 @@ void aom_dc_128_predictor_16x8_sse2(uint8_t* dst,
                                     const uint8_t* left);
 #define aom_dc_128_predictor_16x8 aom_dc_128_predictor_16x8_sse2
 
-void aom_dc_128_predictor_2x2_c(uint8_t* dst,
-                                ptrdiff_t y_stride,
-                                const uint8_t* above,
-                                const uint8_t* left);
-#define aom_dc_128_predictor_2x2 aom_dc_128_predictor_2x2_c
-
 void aom_dc_128_predictor_32x16_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
@@ -427,6 +441,26 @@ RTCD_EXTERN void (*aom_dc_128_predictor_32x64)(uint8_t* dst,
                                                const uint8_t* above,
                                                const uint8_t* left);
 
+void aom_dc_128_predictor_32x8_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+void aom_dc_128_predictor_32x8_sse2(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+#define aom_dc_128_predictor_32x8 aom_dc_128_predictor_32x8_sse2
+
+void aom_dc_128_predictor_4x16_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+void aom_dc_128_predictor_4x16_sse2(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+#define aom_dc_128_predictor_4x16 aom_dc_128_predictor_4x16_sse2
+
 void aom_dc_128_predictor_4x4_c(uint8_t* dst,
                                 ptrdiff_t y_stride,
                                 const uint8_t* above,
@@ -447,6 +481,23 @@ void aom_dc_128_predictor_4x8_sse2(uint8_t* dst,
                                    const uint8_t* left);
 #define aom_dc_128_predictor_4x8 aom_dc_128_predictor_4x8_sse2
 
+void aom_dc_128_predictor_64x16_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+void aom_dc_128_predictor_64x16_sse2(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+void aom_dc_128_predictor_64x16_avx2(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+RTCD_EXTERN void (*aom_dc_128_predictor_64x16)(uint8_t* dst,
+                                               ptrdiff_t y_stride,
+                                               const uint8_t* above,
+                                               const uint8_t* left);
+
 void aom_dc_128_predictor_64x32_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
@@ -491,6 +542,16 @@ void aom_dc_128_predictor_8x16_sse2(uint8_t* dst,
                                     const uint8_t* left);
 #define aom_dc_128_predictor_8x16 aom_dc_128_predictor_8x16_sse2
 
+void aom_dc_128_predictor_8x32_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+void aom_dc_128_predictor_8x32_sse2(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+#define aom_dc_128_predictor_8x32 aom_dc_128_predictor_8x32_sse2
+
 void aom_dc_128_predictor_8x4_c(uint8_t* dst,
                                 ptrdiff_t y_stride,
                                 const uint8_t* above,
@@ -531,6 +592,26 @@ void aom_dc_left_predictor_16x32_sse2(uint8_t* dst,
                                       const uint8_t* left);
 #define aom_dc_left_predictor_16x32 aom_dc_left_predictor_16x32_sse2
 
+void aom_dc_left_predictor_16x4_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+void aom_dc_left_predictor_16x4_sse2(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+#define aom_dc_left_predictor_16x4 aom_dc_left_predictor_16x4_sse2
+
+void aom_dc_left_predictor_16x64_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+void aom_dc_left_predictor_16x64_sse2(uint8_t* dst,
+                                      ptrdiff_t y_stride,
+                                      const uint8_t* above,
+                                      const uint8_t* left);
+#define aom_dc_left_predictor_16x64 aom_dc_left_predictor_16x64_sse2
+
 void aom_dc_left_predictor_16x8_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
@@ -541,12 +622,6 @@ void aom_dc_left_predictor_16x8_sse2(uint8_t* dst,
                                      const uint8_t* left);
 #define aom_dc_left_predictor_16x8 aom_dc_left_predictor_16x8_sse2
 
-void aom_dc_left_predictor_2x2_c(uint8_t* dst,
-                                 ptrdiff_t y_stride,
-                                 const uint8_t* above,
-                                 const uint8_t* left);
-#define aom_dc_left_predictor_2x2 aom_dc_left_predictor_2x2_c
-
 void aom_dc_left_predictor_32x16_c(uint8_t* dst,
                                    ptrdiff_t y_stride,
                                    const uint8_t* above,
@@ -598,6 +673,26 @@ RTCD_EXTERN void (*aom_dc_left_predictor_32x64)(uint8_t* dst,
                                                 const uint8_t* above,
                                                 const uint8_t* left);
 
+void aom_dc_left_predictor_32x8_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+void aom_dc_left_predictor_32x8_sse2(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+#define aom_dc_left_predictor_32x8 aom_dc_left_predictor_32x8_sse2
+
+void aom_dc_left_predictor_4x16_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+void aom_dc_left_predictor_4x16_sse2(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+#define aom_dc_left_predictor_4x16 aom_dc_left_predictor_4x16_sse2
+
 void aom_dc_left_predictor_4x4_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
@@ -618,6 +713,23 @@ void aom_dc_left_predictor_4x8_sse2(uint8_t* dst,
                                     const uint8_t* left);
 #define aom_dc_left_predictor_4x8 aom_dc_left_predictor_4x8_sse2
 
+void aom_dc_left_predictor_64x16_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+void aom_dc_left_predictor_64x16_sse2(uint8_t* dst,
+                                      ptrdiff_t y_stride,
+                                      const uint8_t* above,
+                                      const uint8_t* left);
+void aom_dc_left_predictor_64x16_avx2(uint8_t* dst,
+                                      ptrdiff_t y_stride,
+                                      const uint8_t* above,
+                                      const uint8_t* left);
+RTCD_EXTERN void (*aom_dc_left_predictor_64x16)(uint8_t* dst,
+                                                ptrdiff_t y_stride,
+                                                const uint8_t* above,
+                                                const uint8_t* left);
+
 void aom_dc_left_predictor_64x32_c(uint8_t* dst,
                                    ptrdiff_t y_stride,
                                    const uint8_t* above,
@@ -662,6 +774,16 @@ void aom_dc_left_predictor_8x16_sse2(uint8_t* dst,
                                      const uint8_t* left);
 #define aom_dc_left_predictor_8x16 aom_dc_left_predictor_8x16_sse2
 
+void aom_dc_left_predictor_8x32_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+void aom_dc_left_predictor_8x32_sse2(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+#define aom_dc_left_predictor_8x32 aom_dc_left_predictor_8x32_sse2
+
 void aom_dc_left_predictor_8x4_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
@@ -702,6 +824,26 @@ void aom_dc_predictor_16x32_sse2(uint8_t* dst,
                                  const uint8_t* left);
 #define aom_dc_predictor_16x32 aom_dc_predictor_16x32_sse2
 
+void aom_dc_predictor_16x4_c(uint8_t* dst,
+                             ptrdiff_t y_stride,
+                             const uint8_t* above,
+                             const uint8_t* left);
+void aom_dc_predictor_16x4_sse2(uint8_t* dst,
+                                ptrdiff_t y_stride,
+                                const uint8_t* above,
+                                const uint8_t* left);
+#define aom_dc_predictor_16x4 aom_dc_predictor_16x4_sse2
+
+void aom_dc_predictor_16x64_c(uint8_t* dst,
+                              ptrdiff_t y_stride,
+                              const uint8_t* above,
+                              const uint8_t* left);
+void aom_dc_predictor_16x64_sse2(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+#define aom_dc_predictor_16x64 aom_dc_predictor_16x64_sse2
+
 void aom_dc_predictor_16x8_c(uint8_t* dst,
                              ptrdiff_t y_stride,
                              const uint8_t* above,
@@ -712,12 +854,6 @@ void aom_dc_predictor_16x8_sse2(uint8_t* dst,
                                 const uint8_t* left);
 #define aom_dc_predictor_16x8 aom_dc_predictor_16x8_sse2
 
-void aom_dc_predictor_2x2_c(uint8_t* dst,
-                            ptrdiff_t y_stride,
-                            const uint8_t* above,
-                            const uint8_t* left);
-#define aom_dc_predictor_2x2 aom_dc_predictor_2x2_c
-
 void aom_dc_predictor_32x16_c(uint8_t* dst,
                               ptrdiff_t y_stride,
                               const uint8_t* above,
@@ -769,6 +905,26 @@ RTCD_EXTERN void (*aom_dc_predictor_32x64)(uint8_t* dst,
                                            const uint8_t* above,
                                            const uint8_t* left);
 
+void aom_dc_predictor_32x8_c(uint8_t* dst,
+                             ptrdiff_t y_stride,
+                             const uint8_t* above,
+                             const uint8_t* left);
+void aom_dc_predictor_32x8_sse2(uint8_t* dst,
+                                ptrdiff_t y_stride,
+                                const uint8_t* above,
+                                const uint8_t* left);
+#define aom_dc_predictor_32x8 aom_dc_predictor_32x8_sse2
+
+void aom_dc_predictor_4x16_c(uint8_t* dst,
+                             ptrdiff_t y_stride,
+                             const uint8_t* above,
+                             const uint8_t* left);
+void aom_dc_predictor_4x16_sse2(uint8_t* dst,
+                                ptrdiff_t y_stride,
+                                const uint8_t* above,
+                                const uint8_t* left);
+#define aom_dc_predictor_4x16 aom_dc_predictor_4x16_sse2
+
 void aom_dc_predictor_4x4_c(uint8_t* dst,
                             ptrdiff_t y_stride,
                             const uint8_t* above,
@@ -789,6 +945,23 @@ void aom_dc_predictor_4x8_sse2(uint8_t* dst,
                                const uint8_t* left);
 #define aom_dc_predictor_4x8 aom_dc_predictor_4x8_sse2
 
+void aom_dc_predictor_64x16_c(uint8_t* dst,
+                              ptrdiff_t y_stride,
+                              const uint8_t* above,
+                              const uint8_t* left);
+void aom_dc_predictor_64x16_sse2(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+void aom_dc_predictor_64x16_avx2(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+RTCD_EXTERN void (*aom_dc_predictor_64x16)(uint8_t* dst,
+                                           ptrdiff_t y_stride,
+                                           const uint8_t* above,
+                                           const uint8_t* left);
+
 void aom_dc_predictor_64x32_c(uint8_t* dst,
                               ptrdiff_t y_stride,
                               const uint8_t* above,
@@ -833,6 +1006,16 @@ void aom_dc_predictor_8x16_sse2(uint8_t* dst,
                                 const uint8_t* left);
 #define aom_dc_predictor_8x16 aom_dc_predictor_8x16_sse2
 
+void aom_dc_predictor_8x32_c(uint8_t* dst,
+                             ptrdiff_t y_stride,
+                             const uint8_t* above,
+                             const uint8_t* left);
+void aom_dc_predictor_8x32_sse2(uint8_t* dst,
+                                ptrdiff_t y_stride,
+                                const uint8_t* above,
+                                const uint8_t* left);
+#define aom_dc_predictor_8x32 aom_dc_predictor_8x32_sse2
+
 void aom_dc_predictor_8x4_c(uint8_t* dst,
                             ptrdiff_t y_stride,
                             const uint8_t* above,
@@ -873,6 +1056,26 @@ void aom_dc_top_predictor_16x32_sse2(uint8_t* dst,
                                      const uint8_t* left);
 #define aom_dc_top_predictor_16x32 aom_dc_top_predictor_16x32_sse2
 
+void aom_dc_top_predictor_16x4_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+void aom_dc_top_predictor_16x4_sse2(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+#define aom_dc_top_predictor_16x4 aom_dc_top_predictor_16x4_sse2
+
+void aom_dc_top_predictor_16x64_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+void aom_dc_top_predictor_16x64_sse2(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+#define aom_dc_top_predictor_16x64 aom_dc_top_predictor_16x64_sse2
+
 void aom_dc_top_predictor_16x8_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
@@ -883,12 +1086,6 @@ void aom_dc_top_predictor_16x8_sse2(uint8_t* dst,
                                     const uint8_t* left);
 #define aom_dc_top_predictor_16x8 aom_dc_top_predictor_16x8_sse2
 
-void aom_dc_top_predictor_2x2_c(uint8_t* dst,
-                                ptrdiff_t y_stride,
-                                const uint8_t* above,
-                                const uint8_t* left);
-#define aom_dc_top_predictor_2x2 aom_dc_top_predictor_2x2_c
-
 void aom_dc_top_predictor_32x16_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
@@ -940,6 +1137,26 @@ RTCD_EXTERN void (*aom_dc_top_predictor_32x64)(uint8_t* dst,
                                                const uint8_t* above,
                                                const uint8_t* left);
 
+void aom_dc_top_predictor_32x8_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+void aom_dc_top_predictor_32x8_sse2(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+#define aom_dc_top_predictor_32x8 aom_dc_top_predictor_32x8_sse2
+
+void aom_dc_top_predictor_4x16_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+void aom_dc_top_predictor_4x16_sse2(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+#define aom_dc_top_predictor_4x16 aom_dc_top_predictor_4x16_sse2
+
 void aom_dc_top_predictor_4x4_c(uint8_t* dst,
                                 ptrdiff_t y_stride,
                                 const uint8_t* above,
@@ -960,6 +1177,23 @@ void aom_dc_top_predictor_4x8_sse2(uint8_t* dst,
                                    const uint8_t* left);
 #define aom_dc_top_predictor_4x8 aom_dc_top_predictor_4x8_sse2
 
+void aom_dc_top_predictor_64x16_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+void aom_dc_top_predictor_64x16_sse2(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+void aom_dc_top_predictor_64x16_avx2(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+RTCD_EXTERN void (*aom_dc_top_predictor_64x16)(uint8_t* dst,
+                                               ptrdiff_t y_stride,
+                                               const uint8_t* above,
+                                               const uint8_t* left);
+
 void aom_dc_top_predictor_64x32_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
@@ -1004,6 +1238,16 @@ void aom_dc_top_predictor_8x16_sse2(uint8_t* dst,
                                     const uint8_t* left);
 #define aom_dc_top_predictor_8x16 aom_dc_top_predictor_8x16_sse2
 
+void aom_dc_top_predictor_8x32_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+void aom_dc_top_predictor_8x32_sse2(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+#define aom_dc_top_predictor_8x32 aom_dc_top_predictor_8x32_sse2
+
 void aom_dc_top_predictor_8x4_c(uint8_t* dst,
                                 ptrdiff_t y_stride,
                                 const uint8_t* above,
@@ -2027,6 +2271,26 @@ void aom_h_predictor_16x32_sse2(uint8_t* dst,
                                 const uint8_t* left);
 #define aom_h_predictor_16x32 aom_h_predictor_16x32_sse2
 
+void aom_h_predictor_16x4_c(uint8_t* dst,
+                            ptrdiff_t y_stride,
+                            const uint8_t* above,
+                            const uint8_t* left);
+void aom_h_predictor_16x4_sse2(uint8_t* dst,
+                               ptrdiff_t y_stride,
+                               const uint8_t* above,
+                               const uint8_t* left);
+#define aom_h_predictor_16x4 aom_h_predictor_16x4_sse2
+
+void aom_h_predictor_16x64_c(uint8_t* dst,
+                             ptrdiff_t y_stride,
+                             const uint8_t* above,
+                             const uint8_t* left);
+void aom_h_predictor_16x64_sse2(uint8_t* dst,
+                                ptrdiff_t y_stride,
+                                const uint8_t* above,
+                                const uint8_t* left);
+#define aom_h_predictor_16x64 aom_h_predictor_16x64_sse2
+
 void aom_h_predictor_16x8_c(uint8_t* dst,
                             ptrdiff_t y_stride,
                             const uint8_t* above,
@@ -2037,12 +2301,6 @@ void aom_h_predictor_16x8_sse2(uint8_t* dst,
                                const uint8_t* left);
 #define aom_h_predictor_16x8 aom_h_predictor_16x8_sse2
 
-void aom_h_predictor_2x2_c(uint8_t* dst,
-                           ptrdiff_t y_stride,
-                           const uint8_t* above,
-                           const uint8_t* left);
-#define aom_h_predictor_2x2 aom_h_predictor_2x2_c
-
 void aom_h_predictor_32x16_c(uint8_t* dst,
                              ptrdiff_t y_stride,
                              const uint8_t* above,
@@ -2080,6 +2338,26 @@ void aom_h_predictor_32x64_sse2(uint8_t* dst,
                                 const uint8_t* left);
 #define aom_h_predictor_32x64 aom_h_predictor_32x64_sse2
 
+void aom_h_predictor_32x8_c(uint8_t* dst,
+                            ptrdiff_t y_stride,
+                            const uint8_t* above,
+                            const uint8_t* left);
+void aom_h_predictor_32x8_sse2(uint8_t* dst,
+                               ptrdiff_t y_stride,
+                               const uint8_t* above,
+                               const uint8_t* left);
+#define aom_h_predictor_32x8 aom_h_predictor_32x8_sse2
+
+void aom_h_predictor_4x16_c(uint8_t* dst,
+                            ptrdiff_t y_stride,
+                            const uint8_t* above,
+                            const uint8_t* left);
+void aom_h_predictor_4x16_sse2(uint8_t* dst,
+                               ptrdiff_t y_stride,
+                               const uint8_t* above,
+                               const uint8_t* left);
+#define aom_h_predictor_4x16 aom_h_predictor_4x16_sse2
+
 void aom_h_predictor_4x4_c(uint8_t* dst,
                            ptrdiff_t y_stride,
                            const uint8_t* above,
@@ -2100,6 +2378,16 @@ void aom_h_predictor_4x8_sse2(uint8_t* dst,
                               const uint8_t* left);
 #define aom_h_predictor_4x8 aom_h_predictor_4x8_sse2
 
+void aom_h_predictor_64x16_c(uint8_t* dst,
+                             ptrdiff_t y_stride,
+                             const uint8_t* above,
+                             const uint8_t* left);
+void aom_h_predictor_64x16_sse2(uint8_t* dst,
+                                ptrdiff_t y_stride,
+                                const uint8_t* above,
+                                const uint8_t* left);
+#define aom_h_predictor_64x16 aom_h_predictor_64x16_sse2
+
 void aom_h_predictor_64x32_c(uint8_t* dst,
                              ptrdiff_t y_stride,
                              const uint8_t* above,
@@ -2130,6 +2418,16 @@ void aom_h_predictor_8x16_sse2(uint8_t* dst,
                                const uint8_t* left);
 #define aom_h_predictor_8x16 aom_h_predictor_8x16_sse2
 
+void aom_h_predictor_8x32_c(uint8_t* dst,
+                            ptrdiff_t y_stride,
+                            const uint8_t* above,
+                            const uint8_t* left);
+void aom_h_predictor_8x32_sse2(uint8_t* dst,
+                               ptrdiff_t y_stride,
+                               const uint8_t* above,
+                               const uint8_t* left);
+#define aom_h_predictor_8x32 aom_h_predictor_8x32_sse2
+
 void aom_h_predictor_8x4_c(uint8_t* dst,
                            ptrdiff_t y_stride,
                            const uint8_t* above,
@@ -2192,19 +2490,6 @@ void aom_hadamard_8x8_sse2(const int16_t* src_diff,
                            tran_low_t* coeff);
 #define aom_hadamard_8x8 aom_hadamard_8x8_sse2
 
-void aom_hadamard_8x8_dual_c(const int16_t* src_diff,
-                             ptrdiff_t src_stride,
-                             int16_t* coeff);
-void aom_hadamard_8x8_dual_sse2(const int16_t* src_diff,
-                                ptrdiff_t src_stride,
-                                int16_t* coeff);
-void aom_hadamard_8x8_dual_avx2(const int16_t* src_diff,
-                                ptrdiff_t src_stride,
-                                int16_t* coeff);
-RTCD_EXTERN void (*aom_hadamard_8x8_dual)(const int16_t* src_diff,
-                                          ptrdiff_t src_stride,
-                                          int16_t* coeff);
-
 void aom_hadamard_lp_16x16_c(const int16_t* src_diff,
                              ptrdiff_t src_stride,
                              int16_t* coeff);
@@ -2226,6 +2511,19 @@ void aom_hadamard_lp_8x8_sse2(const int16_t* src_diff,
                               int16_t* coeff);
 #define aom_hadamard_lp_8x8 aom_hadamard_lp_8x8_sse2
 
+void aom_hadamard_lp_8x8_dual_c(const int16_t* src_diff,
+                                ptrdiff_t src_stride,
+                                int16_t* coeff);
+void aom_hadamard_lp_8x8_dual_sse2(const int16_t* src_diff,
+                                   ptrdiff_t src_stride,
+                                   int16_t* coeff);
+void aom_hadamard_lp_8x8_dual_avx2(const int16_t* src_diff,
+                                   ptrdiff_t src_stride,
+                                   int16_t* coeff);
+RTCD_EXTERN void (*aom_hadamard_lp_8x8_dual)(const int16_t* src_diff,
+                                             ptrdiff_t src_stride,
+                                             int16_t* coeff);
+
 void aom_ifft16x16_float_c(const float* input, float* temp, float* output);
 void aom_ifft16x16_float_sse2(const float* input, float* temp, float* output);
 void aom_ifft16x16_float_avx2(const float* input, float* temp, float* output);
@@ -2254,19 +2552,55 @@ RTCD_EXTERN void (*aom_ifft8x8_float)(const float* input,
                                       float* temp,
                                       float* output);
 
-int16_t aom_int_pro_col_c(const uint8_t* ref, const int width);
-int16_t aom_int_pro_col_sse2(const uint8_t* ref, const int width);
-#define aom_int_pro_col aom_int_pro_col_sse2
+void aom_int_pro_col_c(int16_t* vbuf,
+                       const uint8_t* ref,
+                       const int ref_stride,
+                       const int width,
+                       const int height,
+                       int norm_factor);
+void aom_int_pro_col_sse2(int16_t* vbuf,
+                          const uint8_t* ref,
+                          const int ref_stride,
+                          const int width,
+                          const int height,
+                          int norm_factor);
+void aom_int_pro_col_avx2(int16_t* vbuf,
+                          const uint8_t* ref,
+                          const int ref_stride,
+                          const int width,
+                          const int height,
+                          int norm_factor);
+RTCD_EXTERN void (*aom_int_pro_col)(int16_t* vbuf,
+                                    const uint8_t* ref,
+                                    const int ref_stride,
+                                    const int width,
+                                    const int height,
+                                    int norm_factor);
 
-void aom_int_pro_row_c(int16_t hbuf[16],
+void aom_int_pro_row_c(int16_t* hbuf,
                        const uint8_t* ref,
                        const int ref_stride,
-                       const int height);
-void aom_int_pro_row_sse2(int16_t hbuf[16],
+                       const int width,
+                       const int height,
+                       int norm_factor);
+void aom_int_pro_row_sse2(int16_t* hbuf,
+                          const uint8_t* ref,
+                          const int ref_stride,
+                          const int width,
+                          const int height,
+                          int norm_factor);
+void aom_int_pro_row_avx2(int16_t* hbuf,
                           const uint8_t* ref,
                           const int ref_stride,
-                          const int height);
-#define aom_int_pro_row aom_int_pro_row_sse2
+                          const int width,
+                          const int height,
+                          int norm_factor);
+RTCD_EXTERN void (*aom_int_pro_row)(int16_t* hbuf,
+                                    const uint8_t* ref,
+                                    const int ref_stride,
+                                    const int width,
+                                    const int height,
+                                    int norm_factor);
 
 void aom_lowbd_blend_a64_d16_mask_c(uint8_t* dst,
                                     uint32_t dst_stride,
@@ -4301,6 +4635,27 @@ unsigned int aom_mse8x8_sse2(const uint8_t* src_ptr,
                              unsigned int* sse);
 #define aom_mse8x8 aom_mse8x8_sse2
 
+uint64_t aom_mse_16xh_16bit_c(uint8_t* dst,
+                              int dstride,
+                              uint16_t* src,
+                              int w,
+                              int h);
+uint64_t aom_mse_16xh_16bit_sse2(uint8_t* dst,
+                                 int dstride,
+                                 uint16_t* src,
+                                 int w,
+                                 int h);
+uint64_t aom_mse_16xh_16bit_avx2(uint8_t* dst,
+                                 int dstride,
+                                 uint16_t* src,
+                                 int w,
+                                 int h);
+RTCD_EXTERN uint64_t (*aom_mse_16xh_16bit)(uint8_t* dst,
+                                           int dstride,
+                                           uint16_t* src,
+                                           int w,
+                                           int h);
+
 uint64_t aom_mse_wxh_16bit_c(uint8_t* dst,
                              int dstride,
                              uint16_t* src,
@@ -4360,6 +4715,36 @@ RTCD_EXTERN void (*aom_paeth_predictor_16x32)(uint8_t* dst,
                                               const uint8_t* above,
                                               const uint8_t* left);
 
+void aom_paeth_predictor_16x4_c(uint8_t* dst,
+                                ptrdiff_t y_stride,
+                                const uint8_t* above,
+                                const uint8_t* left);
+void aom_paeth_predictor_16x4_ssse3(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+RTCD_EXTERN void (*aom_paeth_predictor_16x4)(uint8_t* dst,
+                                             ptrdiff_t y_stride,
+                                             const uint8_t* above,
+                                             const uint8_t* left);
+
+void aom_paeth_predictor_16x64_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+void aom_paeth_predictor_16x64_ssse3(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+void aom_paeth_predictor_16x64_avx2(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+RTCD_EXTERN void (*aom_paeth_predictor_16x64)(uint8_t* dst,
+                                              ptrdiff_t y_stride,
+                                              const uint8_t* above,
+                                              const uint8_t* left);
+
 void aom_paeth_predictor_16x8_c(uint8_t* dst,
                                 ptrdiff_t y_stride,
                                 const uint8_t* above,
@@ -4377,12 +4762,6 @@ RTCD_EXTERN void (*aom_paeth_predictor_16x8)(uint8_t* dst,
                                              const uint8_t* above,
                                              const uint8_t* left);
 
-void aom_paeth_predictor_2x2_c(uint8_t* dst,
-                               ptrdiff_t y_stride,
-                               const uint8_t* above,
-                               const uint8_t* left);
-#define aom_paeth_predictor_2x2 aom_paeth_predictor_2x2_c
-
 void aom_paeth_predictor_32x16_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
@@ -4434,6 +4813,32 @@ RTCD_EXTERN void (*aom_paeth_predictor_32x64)(uint8_t* dst,
                                               const uint8_t* above,
                                               const uint8_t* left);
 
+void aom_paeth_predictor_32x8_c(uint8_t* dst,
+                                ptrdiff_t y_stride,
+                                const uint8_t* above,
+                                const uint8_t* left);
+void aom_paeth_predictor_32x8_ssse3(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+RTCD_EXTERN void (*aom_paeth_predictor_32x8)(uint8_t* dst,
+                                             ptrdiff_t y_stride,
+                                             const uint8_t* above,
+                                             const uint8_t* left);
+
+void aom_paeth_predictor_4x16_c(uint8_t* dst,
+                                ptrdiff_t y_stride,
+                                const uint8_t* above,
+                                const uint8_t* left);
+void aom_paeth_predictor_4x16_ssse3(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+RTCD_EXTERN void (*aom_paeth_predictor_4x16)(uint8_t* dst,
+                                             ptrdiff_t y_stride,
+                                             const uint8_t* above,
+                                             const uint8_t* left);
+
 void aom_paeth_predictor_4x4_c(uint8_t* dst,
                                ptrdiff_t y_stride,
                                const uint8_t* above,
@@ -4460,6 +4865,23 @@ RTCD_EXTERN void (*aom_paeth_predictor_4x8)(uint8_t* dst,
                                             const uint8_t* above,
                                             const uint8_t* left);
 
+void aom_paeth_predictor_64x16_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+void aom_paeth_predictor_64x16_ssse3(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+void aom_paeth_predictor_64x16_avx2(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+RTCD_EXTERN void (*aom_paeth_predictor_64x16)(uint8_t* dst,
+                                              ptrdiff_t y_stride,
+                                              const uint8_t* above,
+                                              const uint8_t* left);
+
 void aom_paeth_predictor_64x32_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
@@ -4507,6 +4929,19 @@ RTCD_EXTERN void (*aom_paeth_predictor_8x16)(uint8_t* dst,
                                              const uint8_t* above,
                                              const uint8_t* left);
 
+void aom_paeth_predictor_8x32_c(uint8_t* dst,
+                                ptrdiff_t y_stride,
+                                const uint8_t* above,
+                                const uint8_t* left);
+void aom_paeth_predictor_8x32_ssse3(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+RTCD_EXTERN void (*aom_paeth_predictor_8x32)(uint8_t* dst,
+                                             ptrdiff_t y_stride,
+                                             const uint8_t* above,
+                                             const uint8_t* left);
+
 void aom_paeth_predictor_8x4_c(uint8_t* dst,
                                ptrdiff_t y_stride,
                                const uint8_t* above,
@@ -4595,6 +5030,18 @@ void aom_quantize_b_avx(const tran_low_t* coeff_ptr,
                         uint16_t* eob_ptr,
                         const int16_t* scan,
                         const int16_t* iscan);
+void aom_quantize_b_avx2(const tran_low_t* coeff_ptr,
+                         intptr_t n_coeffs,
+                         const int16_t* zbin_ptr,
+                         const int16_t* round_ptr,
+                         const int16_t* quant_ptr,
+                         const int16_t* quant_shift_ptr,
+                         tran_low_t* qcoeff_ptr,
+                         tran_low_t* dqcoeff_ptr,
+                         const int16_t* dequant_ptr,
+                         uint16_t* eob_ptr,
+                         const int16_t* scan,
+                         const int16_t* iscan);
 RTCD_EXTERN void (*aom_quantize_b)(const tran_low_t* coeff_ptr,
                                    intptr_t n_coeffs,
                                    const int16_t* zbin_ptr,
@@ -4644,6 +5091,18 @@ void aom_quantize_b_32x32_avx(const tran_low_t* coeff_ptr,
                               uint16_t* eob_ptr,
                               const int16_t* scan,
                               const int16_t* iscan);
+void aom_quantize_b_32x32_avx2(const tran_low_t* coeff_ptr,
+                               intptr_t n_coeffs,
+                               const int16_t* zbin_ptr,
+                               const int16_t* round_ptr,
+                               const int16_t* quant_ptr,
+                               const int16_t* quant_shift_ptr,
+                               tran_low_t* qcoeff_ptr,
+                               tran_low_t* dqcoeff_ptr,
+                               const int16_t* dequant_ptr,
+                               uint16_t* eob_ptr,
+                               const int16_t* scan,
+                               const int16_t* iscan);
 RTCD_EXTERN void (*aom_quantize_b_32x32)(const tran_low_t* coeff_ptr,
                                          intptr_t n_coeffs,
                                          const int16_t* zbin_ptr,
@@ -4657,32 +5116,6 @@ RTCD_EXTERN void (*aom_quantize_b_32x32)(const tran_low_t* coeff_ptr,
                                          const int16_t* scan,
                                          const int16_t* iscan);
 
-void aom_quantize_b_32x32_adaptive_c(const tran_low_t* coeff_ptr,
-                                     intptr_t n_coeffs,
-                                     const int16_t* zbin_ptr,
-                                     const int16_t* round_ptr,
-                                     const int16_t* quant_ptr,
-                                     const int16_t* quant_shift_ptr,
-                                     tran_low_t* qcoeff_ptr,
-                                     tran_low_t* dqcoeff_ptr,
-                                     const int16_t* dequant_ptr,
-                                     uint16_t* eob_ptr,
-                                     const int16_t* scan,
-                                     const int16_t* iscan);
-void aom_quantize_b_32x32_adaptive_sse2(const tran_low_t* coeff_ptr,
-                                        intptr_t n_coeffs,
-                                        const int16_t* zbin_ptr,
-                                        const int16_t* round_ptr,
-                                        const int16_t* quant_ptr,
-                                        const int16_t* quant_shift_ptr,
-                                        tran_low_t* qcoeff_ptr,
-                                        tran_low_t* dqcoeff_ptr,
-                                        const int16_t* dequant_ptr,
-                                        uint16_t* eob_ptr,
-                                        const int16_t* scan,
-                                        const int16_t* iscan);
-#define aom_quantize_b_32x32_adaptive aom_quantize_b_32x32_adaptive_sse2
-
 void aom_quantize_b_64x64_c(const tran_low_t* coeff_ptr,
                             intptr_t n_coeffs,
                             const int16_t* zbin_ptr,
@@ -4707,6 +5140,18 @@ void aom_quantize_b_64x64_ssse3(const tran_low_t* coeff_ptr,
                                 uint16_t* eob_ptr,
                                 const int16_t* scan,
                                 const int16_t* iscan);
+void aom_quantize_b_64x64_avx2(const tran_low_t* coeff_ptr,
+                               intptr_t n_coeffs,
+                               const int16_t* zbin_ptr,
+                               const int16_t* round_ptr,
+                               const int16_t* quant_ptr,
+                               const int16_t* quant_shift_ptr,
+                               tran_low_t* qcoeff_ptr,
+                               tran_low_t* dqcoeff_ptr,
+                               const int16_t* dequant_ptr,
+                               uint16_t* eob_ptr,
+                               const int16_t* scan,
+                               const int16_t* iscan);
 RTCD_EXTERN void (*aom_quantize_b_64x64)(const tran_low_t* coeff_ptr,
                                          intptr_t n_coeffs,
                                          const int16_t* zbin_ptr,
@@ -4720,81 +5165,6 @@ RTCD_EXTERN void (*aom_quantize_b_64x64)(const tran_low_t* coeff_ptr,
                                          const int16_t* scan,
                                          const int16_t* iscan);
 
-void aom_quantize_b_64x64_adaptive_c(const tran_low_t* coeff_ptr,
-                                     intptr_t n_coeffs,
-                                     const int16_t* zbin_ptr,
-                                     const int16_t* round_ptr,
-                                     const int16_t* quant_ptr,
-                                     const int16_t* quant_shift_ptr,
-                                     tran_low_t* qcoeff_ptr,
-                                     tran_low_t* dqcoeff_ptr,
-                                     const int16_t* dequant_ptr,
-                                     uint16_t* eob_ptr,
-                                     const int16_t* scan,
-                                     const int16_t* iscan);
-void aom_quantize_b_64x64_adaptive_sse2(const tran_low_t* coeff_ptr,
-                                        intptr_t n_coeffs,
-                                        const int16_t* zbin_ptr,
-                                        const int16_t* round_ptr,
-                                        const int16_t* quant_ptr,
-                                        const int16_t* quant_shift_ptr,
-                                        tran_low_t* qcoeff_ptr,
-                                        tran_low_t* dqcoeff_ptr,
-                                        const int16_t* dequant_ptr,
-                                        uint16_t* eob_ptr,
-                                        const int16_t* scan,
-                                        const int16_t* iscan);
-#define aom_quantize_b_64x64_adaptive aom_quantize_b_64x64_adaptive_sse2
-
-void aom_quantize_b_adaptive_c(const tran_low_t* coeff_ptr,
-                               intptr_t n_coeffs,
-                               const int16_t* zbin_ptr,
-                               const int16_t* round_ptr,
-                               const int16_t* quant_ptr,
-                               const int16_t* quant_shift_ptr,
-                               tran_low_t* qcoeff_ptr,
-                               tran_low_t* dqcoeff_ptr,
-                               const int16_t* dequant_ptr,
-                               uint16_t* eob_ptr,
-                               const int16_t* scan,
-                               const int16_t* iscan);
-void aom_quantize_b_adaptive_sse2(const tran_low_t* coeff_ptr,
-                                  intptr_t n_coeffs,
-                                  const int16_t* zbin_ptr,
-                                  const int16_t* round_ptr,
-                                  const int16_t* quant_ptr,
-                                  const int16_t* quant_shift_ptr,
-                                  tran_low_t* qcoeff_ptr,
-                                  tran_low_t* dqcoeff_ptr,
-                                  const int16_t* dequant_ptr,
-                                  uint16_t* eob_ptr,
-                                  const int16_t* scan,
-                                  const int16_t* iscan);
-void aom_quantize_b_adaptive_avx2(const tran_low_t* coeff_ptr,
-                                  intptr_t n_coeffs,
-                                  const int16_t* zbin_ptr,
-                                  const int16_t* round_ptr,
-                                  const int16_t* quant_ptr,
-                                  const int16_t* quant_shift_ptr,
-                                  tran_low_t* qcoeff_ptr,
-                                  tran_low_t* dqcoeff_ptr,
-                                  const int16_t* dequant_ptr,
-                                  uint16_t* eob_ptr,
-                                  const int16_t* scan,
-                                  const int16_t* iscan);
-RTCD_EXTERN void (*aom_quantize_b_adaptive)(const tran_low_t* coeff_ptr,
-                                            intptr_t n_coeffs,
-                                            const int16_t* zbin_ptr,
-                                            const int16_t* round_ptr,
-                                            const int16_t* quant_ptr,
-                                            const int16_t* quant_shift_ptr,
-                                            tran_low_t* qcoeff_ptr,
-                                            tran_low_t* dqcoeff_ptr,
-                                            const int16_t* dequant_ptr,
-                                            uint16_t* eob_ptr,
-                                            const int16_t* scan,
-                                            const int16_t* iscan);
-
 unsigned int aom_sad128x128_c(const uint8_t* src_ptr,
                               int src_stride,
                               const uint8_t* ref_ptr,
@@ -6283,6 +6653,32 @@ RTCD_EXTERN void (*aom_smooth_h_predictor_16x32)(uint8_t* dst,
                                                  const uint8_t* above,
                                                  const uint8_t* left);
 
+void aom_smooth_h_predictor_16x4_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+void aom_smooth_h_predictor_16x4_ssse3(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_h_predictor_16x4)(uint8_t* dst,
+                                                ptrdiff_t y_stride,
+                                                const uint8_t* above,
+                                                const uint8_t* left);
+
+void aom_smooth_h_predictor_16x64_c(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+void aom_smooth_h_predictor_16x64_ssse3(uint8_t* dst,
+                                        ptrdiff_t y_stride,
+                                        const uint8_t* above,
+                                        const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_h_predictor_16x64)(uint8_t* dst,
+                                                 ptrdiff_t y_stride,
+                                                 const uint8_t* above,
+                                                 const uint8_t* left);
+
 void aom_smooth_h_predictor_16x8_c(uint8_t* dst,
                                    ptrdiff_t y_stride,
                                    const uint8_t* above,
@@ -6296,12 +6692,6 @@ RTCD_EXTERN void (*aom_smooth_h_predictor_16x8)(uint8_t* dst,
                                                 const uint8_t* above,
                                                 const uint8_t* left);
 
-void aom_smooth_h_predictor_2x2_c(uint8_t* dst,
-                                  ptrdiff_t y_stride,
-                                  const uint8_t* above,
-                                  const uint8_t* left);
-#define aom_smooth_h_predictor_2x2 aom_smooth_h_predictor_2x2_c
-
 void aom_smooth_h_predictor_32x16_c(uint8_t* dst,
                                     ptrdiff_t y_stride,
                                     const uint8_t* above,
@@ -6341,6 +6731,32 @@ RTCD_EXTERN void (*aom_smooth_h_predictor_32x64)(uint8_t* dst,
                                                  const uint8_t* above,
                                                  const uint8_t* left);
 
+void aom_smooth_h_predictor_32x8_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+void aom_smooth_h_predictor_32x8_ssse3(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_h_predictor_32x8)(uint8_t* dst,
+                                                ptrdiff_t y_stride,
+                                                const uint8_t* above,
+                                                const uint8_t* left);
+
+void aom_smooth_h_predictor_4x16_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+void aom_smooth_h_predictor_4x16_ssse3(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_h_predictor_4x16)(uint8_t* dst,
+                                                ptrdiff_t y_stride,
+                                                const uint8_t* above,
+                                                const uint8_t* left);
+
 void aom_smooth_h_predictor_4x4_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
@@ -6367,6 +6783,19 @@ RTCD_EXTERN void (*aom_smooth_h_predictor_4x8)(uint8_t* dst,
                                                const uint8_t* above,
                                                const uint8_t* left);
 
+void aom_smooth_h_predictor_64x16_c(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+void aom_smooth_h_predictor_64x16_ssse3(uint8_t* dst,
+                                        ptrdiff_t y_stride,
+                                        const uint8_t* above,
+                                        const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_h_predictor_64x16)(uint8_t* dst,
+                                                 ptrdiff_t y_stride,
+                                                 const uint8_t* above,
+                                                 const uint8_t* left);
+
 void aom_smooth_h_predictor_64x32_c(uint8_t* dst,
                                     ptrdiff_t y_stride,
                                     const uint8_t* above,
@@ -6406,6 +6835,19 @@ RTCD_EXTERN void (*aom_smooth_h_predictor_8x16)(uint8_t* dst,
                                                 const uint8_t* above,
                                                 const uint8_t* left);
 
+void aom_smooth_h_predictor_8x32_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+void aom_smooth_h_predictor_8x32_ssse3(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_h_predictor_8x32)(uint8_t* dst,
+                                                ptrdiff_t y_stride,
+                                                const uint8_t* above,
+                                                const uint8_t* left);
+
 void aom_smooth_h_predictor_8x4_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
@@ -6458,6 +6900,32 @@ RTCD_EXTERN void (*aom_smooth_predictor_16x32)(uint8_t* dst,
                                                const uint8_t* above,
                                                const uint8_t* left);
 
+void aom_smooth_predictor_16x4_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+void aom_smooth_predictor_16x4_ssse3(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_predictor_16x4)(uint8_t* dst,
+                                              ptrdiff_t y_stride,
+                                              const uint8_t* above,
+                                              const uint8_t* left);
+
+void aom_smooth_predictor_16x64_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+void aom_smooth_predictor_16x64_ssse3(uint8_t* dst,
+                                      ptrdiff_t y_stride,
+                                      const uint8_t* above,
+                                      const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_predictor_16x64)(uint8_t* dst,
+                                               ptrdiff_t y_stride,
+                                               const uint8_t* above,
+                                               const uint8_t* left);
+
 void aom_smooth_predictor_16x8_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
@@ -6471,12 +6939,6 @@ RTCD_EXTERN void (*aom_smooth_predictor_16x8)(uint8_t* dst,
                                               const uint8_t* above,
                                               const uint8_t* left);
 
-void aom_smooth_predictor_2x2_c(uint8_t* dst,
-                                ptrdiff_t y_stride,
-                                const uint8_t* above,
-                                const uint8_t* left);
-#define aom_smooth_predictor_2x2 aom_smooth_predictor_2x2_c
-
 void aom_smooth_predictor_32x16_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
@@ -6516,6 +6978,32 @@ RTCD_EXTERN void (*aom_smooth_predictor_32x64)(uint8_t* dst,
                                                const uint8_t* above,
                                                const uint8_t* left);
 
+void aom_smooth_predictor_32x8_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+void aom_smooth_predictor_32x8_ssse3(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_predictor_32x8)(uint8_t* dst,
+                                              ptrdiff_t y_stride,
+                                              const uint8_t* above,
+                                              const uint8_t* left);
+
+void aom_smooth_predictor_4x16_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+void aom_smooth_predictor_4x16_ssse3(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_predictor_4x16)(uint8_t* dst,
+                                              ptrdiff_t y_stride,
+                                              const uint8_t* above,
+                                              const uint8_t* left);
+
 void aom_smooth_predictor_4x4_c(uint8_t* dst,
                                 ptrdiff_t y_stride,
                                 const uint8_t* above,
@@ -6542,6 +7030,19 @@ RTCD_EXTERN void (*aom_smooth_predictor_4x8)(uint8_t* dst,
                                              const uint8_t* above,
                                              const uint8_t* left);
 
+void aom_smooth_predictor_64x16_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+void aom_smooth_predictor_64x16_ssse3(uint8_t* dst,
+                                      ptrdiff_t y_stride,
+                                      const uint8_t* above,
+                                      const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_predictor_64x16)(uint8_t* dst,
+                                               ptrdiff_t y_stride,
+                                               const uint8_t* above,
+                                               const uint8_t* left);
+
 void aom_smooth_predictor_64x32_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
@@ -6581,6 +7082,19 @@ RTCD_EXTERN void (*aom_smooth_predictor_8x16)(uint8_t* dst,
                                               const uint8_t* above,
                                               const uint8_t* left);
 
+void aom_smooth_predictor_8x32_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+void aom_smooth_predictor_8x32_ssse3(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_predictor_8x32)(uint8_t* dst,
+                                              ptrdiff_t y_stride,
+                                              const uint8_t* above,
+                                              const uint8_t* left);
+
 void aom_smooth_predictor_8x4_c(uint8_t* dst,
                                 ptrdiff_t y_stride,
                                 const uint8_t* above,
@@ -6633,6 +7147,32 @@ RTCD_EXTERN void (*aom_smooth_v_predictor_16x32)(uint8_t* dst,
                                                  const uint8_t* above,
                                                  const uint8_t* left);
 
+void aom_smooth_v_predictor_16x4_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+void aom_smooth_v_predictor_16x4_ssse3(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_v_predictor_16x4)(uint8_t* dst,
+                                                ptrdiff_t y_stride,
+                                                const uint8_t* above,
+                                                const uint8_t* left);
+
+void aom_smooth_v_predictor_16x64_c(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+void aom_smooth_v_predictor_16x64_ssse3(uint8_t* dst,
+                                        ptrdiff_t y_stride,
+                                        const uint8_t* above,
+                                        const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_v_predictor_16x64)(uint8_t* dst,
+                                                 ptrdiff_t y_stride,
+                                                 const uint8_t* above,
+                                                 const uint8_t* left);
+
 void aom_smooth_v_predictor_16x8_c(uint8_t* dst,
                                    ptrdiff_t y_stride,
                                    const uint8_t* above,
@@ -6646,12 +7186,6 @@ RTCD_EXTERN void (*aom_smooth_v_predictor_16x8)(uint8_t* dst,
                                                 const uint8_t* above,
                                                 const uint8_t* left);
 
-void aom_smooth_v_predictor_2x2_c(uint8_t* dst,
-                                  ptrdiff_t y_stride,
-                                  const uint8_t* above,
-                                  const uint8_t* left);
-#define aom_smooth_v_predictor_2x2 aom_smooth_v_predictor_2x2_c
-
 void aom_smooth_v_predictor_32x16_c(uint8_t* dst,
                                     ptrdiff_t y_stride,
                                     const uint8_t* above,
@@ -6691,6 +7225,32 @@ RTCD_EXTERN void (*aom_smooth_v_predictor_32x64)(uint8_t* dst,
                                                  const uint8_t* above,
                                                  const uint8_t* left);
 
+void aom_smooth_v_predictor_32x8_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+void aom_smooth_v_predictor_32x8_ssse3(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_v_predictor_32x8)(uint8_t* dst,
+                                                ptrdiff_t y_stride,
+                                                const uint8_t* above,
+                                                const uint8_t* left);
+
+void aom_smooth_v_predictor_4x16_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+void aom_smooth_v_predictor_4x16_ssse3(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_v_predictor_4x16)(uint8_t* dst,
+                                                ptrdiff_t y_stride,
+                                                const uint8_t* above,
+                                                const uint8_t* left);
+
 void aom_smooth_v_predictor_4x4_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
@@ -6717,6 +7277,19 @@ RTCD_EXTERN void (*aom_smooth_v_predictor_4x8)(uint8_t* dst,
                                                const uint8_t* above,
                                                const uint8_t* left);
 
+void aom_smooth_v_predictor_64x16_c(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+void aom_smooth_v_predictor_64x16_ssse3(uint8_t* dst,
+                                        ptrdiff_t y_stride,
+                                        const uint8_t* above,
+                                        const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_v_predictor_64x16)(uint8_t* dst,
+                                                 ptrdiff_t y_stride,
+                                                 const uint8_t* above,
+                                                 const uint8_t* left);
+
 void aom_smooth_v_predictor_64x32_c(uint8_t* dst,
                                     ptrdiff_t y_stride,
                                     const uint8_t* above,
@@ -6756,6 +7329,19 @@ RTCD_EXTERN void (*aom_smooth_v_predictor_8x16)(uint8_t* dst,
                                                 const uint8_t* above,
                                                 const uint8_t* left);
 
+void aom_smooth_v_predictor_8x32_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+void aom_smooth_v_predictor_8x32_ssse3(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_v_predictor_8x32)(uint8_t* dst,
+                                                ptrdiff_t y_stride,
+                                                const uint8_t* above,
+                                                const uint8_t* left);
+
 void aom_smooth_v_predictor_8x4_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
@@ -8071,6 +8657,26 @@ void aom_v_predictor_16x32_sse2(uint8_t* dst,
                                 const uint8_t* left);
 #define aom_v_predictor_16x32 aom_v_predictor_16x32_sse2
 
+void aom_v_predictor_16x4_c(uint8_t* dst,
+                            ptrdiff_t y_stride,
+                            const uint8_t* above,
+                            const uint8_t* left);
+void aom_v_predictor_16x4_sse2(uint8_t* dst,
+                               ptrdiff_t y_stride,
+                               const uint8_t* above,
+                               const uint8_t* left);
+#define aom_v_predictor_16x4 aom_v_predictor_16x4_sse2
+
+void aom_v_predictor_16x64_c(uint8_t* dst,
+                             ptrdiff_t y_stride,
+                             const uint8_t* above,
+                             const uint8_t* left);
+void aom_v_predictor_16x64_sse2(uint8_t* dst,
+                                ptrdiff_t y_stride,
+                                const uint8_t* above,
+                                const uint8_t* left);
+#define aom_v_predictor_16x64 aom_v_predictor_16x64_sse2
+
 void aom_v_predictor_16x8_c(uint8_t* dst,
                             ptrdiff_t y_stride,
                             const uint8_t* above,
@@ -8081,12 +8687,6 @@ void aom_v_predictor_16x8_sse2(uint8_t* dst,
                                const uint8_t* left);
 #define aom_v_predictor_16x8 aom_v_predictor_16x8_sse2
 
-void aom_v_predictor_2x2_c(uint8_t* dst,
-                           ptrdiff_t y_stride,
-                           const uint8_t* above,
-                           const uint8_t* left);
-#define aom_v_predictor_2x2 aom_v_predictor_2x2_c
-
 void aom_v_predictor_32x16_c(uint8_t* dst,
                              ptrdiff_t y_stride,
                              const uint8_t* above,
@@ -8138,6 +8738,26 @@ RTCD_EXTERN void (*aom_v_predictor_32x64)(uint8_t* dst,
                                           const uint8_t* above,
                                           const uint8_t* left);
 
+void aom_v_predictor_32x8_c(uint8_t* dst,
+                            ptrdiff_t y_stride,
+                            const uint8_t* above,
+                            const uint8_t* left);
+void aom_v_predictor_32x8_sse2(uint8_t* dst,
+                               ptrdiff_t y_stride,
+                               const uint8_t* above,
+                               const uint8_t* left);
+#define aom_v_predictor_32x8 aom_v_predictor_32x8_sse2
+
+void aom_v_predictor_4x16_c(uint8_t* dst,
+                            ptrdiff_t y_stride,
+                            const uint8_t* above,
+                            const uint8_t* left);
+void aom_v_predictor_4x16_sse2(uint8_t* dst,
+                               ptrdiff_t y_stride,
+                               const uint8_t* above,
+                               const uint8_t* left);
+#define aom_v_predictor_4x16 aom_v_predictor_4x16_sse2
+
 void aom_v_predictor_4x4_c(uint8_t* dst,
                            ptrdiff_t y_stride,
                            const uint8_t* above,
@@ -8158,6 +8778,23 @@ void aom_v_predictor_4x8_sse2(uint8_t* dst,
                               const uint8_t* left);
 #define aom_v_predictor_4x8 aom_v_predictor_4x8_sse2
 
+void aom_v_predictor_64x16_c(uint8_t* dst,
+                             ptrdiff_t y_stride,
+                             const uint8_t* above,
+                             const uint8_t* left);
+void aom_v_predictor_64x16_sse2(uint8_t* dst,
+                                ptrdiff_t y_stride,
+                                const uint8_t* above,
+                                const uint8_t* left);
+void aom_v_predictor_64x16_avx2(uint8_t* dst,
+                                ptrdiff_t y_stride,
+                                const uint8_t* above,
+                                const uint8_t* left);
+RTCD_EXTERN void (*aom_v_predictor_64x16)(uint8_t* dst,
+                                          ptrdiff_t y_stride,
+                                          const uint8_t* above,
+                                          const uint8_t* left);
+
 void aom_v_predictor_64x32_c(uint8_t* dst,
                              ptrdiff_t y_stride,
                              const uint8_t* above,
@@ -8202,6 +8839,16 @@ void aom_v_predictor_8x16_sse2(uint8_t* dst,
                                const uint8_t* left);
 #define aom_v_predictor_8x16 aom_v_predictor_8x16_sse2
 
+void aom_v_predictor_8x32_c(uint8_t* dst,
+                            ptrdiff_t y_stride,
+                            const uint8_t* above,
+                            const uint8_t* left);
+void aom_v_predictor_8x32_sse2(uint8_t* dst,
+                               ptrdiff_t y_stride,
+                               const uint8_t* above,
+                               const uint8_t* left);
+#define aom_v_predictor_8x32 aom_v_predictor_8x32_sse2
+
 void aom_v_predictor_8x4_c(uint8_t* dst,
                            ptrdiff_t y_stride,
                            const uint8_t* above,
@@ -8562,8 +9209,12 @@ unsigned int aom_variance8x8_sse2(const uint8_t* src_ptr,
                                   unsigned int* sse);
 #define aom_variance8x8 aom_variance8x8_sse2
 
-int aom_vector_var_c(const int16_t* ref, const int16_t* src, const int bwl);
-#define aom_vector_var aom_vector_var_c
+int aom_vector_var_c(const int16_t* ref, const int16_t* src, int bwl);
+int aom_vector_var_sse4_1(const int16_t* ref, const int16_t* src, int bwl);
+int aom_vector_var_avx2(const int16_t* ref, const int16_t* src, int bwl);
+RTCD_EXTERN int (*aom_vector_var)(const int16_t* ref,
+                                  const int16_t* src,
+                                  int bwl);
 
 void aom_dsp_rtcd(void);
 
@@ -8615,6 +9266,9 @@ static void setup_rtcd_internal(void) {
   aom_dc_128_predictor_32x64 = aom_dc_128_predictor_32x64_sse2;
   if (flags & HAS_AVX2)
     aom_dc_128_predictor_32x64 = aom_dc_128_predictor_32x64_avx2;
+  aom_dc_128_predictor_64x16 = aom_dc_128_predictor_64x16_sse2;
+  if (flags & HAS_AVX2)
+    aom_dc_128_predictor_64x16 = aom_dc_128_predictor_64x16_avx2;
   aom_dc_128_predictor_64x32 = aom_dc_128_predictor_64x32_sse2;
   if (flags & HAS_AVX2)
     aom_dc_128_predictor_64x32 = aom_dc_128_predictor_64x32_avx2;
@@ -8630,6 +9284,9 @@ static void setup_rtcd_internal(void) {
   aom_dc_left_predictor_32x64 = aom_dc_left_predictor_32x64_sse2;
   if (flags & HAS_AVX2)
     aom_dc_left_predictor_32x64 = aom_dc_left_predictor_32x64_avx2;
+  aom_dc_left_predictor_64x16 = aom_dc_left_predictor_64x16_sse2;
+  if (flags & HAS_AVX2)
+    aom_dc_left_predictor_64x16 = aom_dc_left_predictor_64x16_avx2;
   aom_dc_left_predictor_64x32 = aom_dc_left_predictor_64x32_sse2;
   if (flags & HAS_AVX2)
     aom_dc_left_predictor_64x32 = aom_dc_left_predictor_64x32_avx2;
@@ -8645,6 +9302,9 @@ static void setup_rtcd_internal(void) {
   aom_dc_predictor_32x64 = aom_dc_predictor_32x64_sse2;
   if (flags & HAS_AVX2)
     aom_dc_predictor_32x64 = aom_dc_predictor_32x64_avx2;
+  aom_dc_predictor_64x16 = aom_dc_predictor_64x16_sse2;
+  if (flags & HAS_AVX2)
+    aom_dc_predictor_64x16 = aom_dc_predictor_64x16_avx2;
   aom_dc_predictor_64x32 = aom_dc_predictor_64x32_sse2;
   if (flags & HAS_AVX2)
     aom_dc_predictor_64x32 = aom_dc_predictor_64x32_avx2;
@@ -8660,6 +9320,9 @@ static void setup_rtcd_internal(void) {
   aom_dc_top_predictor_32x64 = aom_dc_top_predictor_32x64_sse2;
   if (flags & HAS_AVX2)
     aom_dc_top_predictor_32x64 = aom_dc_top_predictor_32x64_avx2;
+  aom_dc_top_predictor_64x16 = aom_dc_top_predictor_64x16_sse2;
+  if (flags & HAS_AVX2)
+    aom_dc_top_predictor_64x16 = aom_dc_top_predictor_64x16_avx2;
   aom_dc_top_predictor_64x32 = aom_dc_top_predictor_64x32_sse2;
   if (flags & HAS_AVX2)
     aom_dc_top_predictor_64x32 = aom_dc_top_predictor_64x32_avx2;
@@ -8824,12 +9487,12 @@ static void setup_rtcd_internal(void) {
   aom_hadamard_32x32 = aom_hadamard_32x32_sse2;
   if (flags & HAS_AVX2)
     aom_hadamard_32x32 = aom_hadamard_32x32_avx2;
-  aom_hadamard_8x8_dual = aom_hadamard_8x8_dual_sse2;
-  if (flags & HAS_AVX2)
-    aom_hadamard_8x8_dual = aom_hadamard_8x8_dual_avx2;
   aom_hadamard_lp_16x16 = aom_hadamard_lp_16x16_sse2;
   if (flags & HAS_AVX2)
     aom_hadamard_lp_16x16 = aom_hadamard_lp_16x16_avx2;
+  aom_hadamard_lp_8x8_dual = aom_hadamard_lp_8x8_dual_sse2;
+  if (flags & HAS_AVX2)
+    aom_hadamard_lp_8x8_dual = aom_hadamard_lp_8x8_dual_avx2;
   aom_ifft16x16_float = aom_ifft16x16_float_sse2;
   if (flags & HAS_AVX2)
     aom_ifft16x16_float = aom_ifft16x16_float_avx2;
@@ -8839,6 +9502,12 @@ static void setup_rtcd_internal(void) {
   aom_ifft8x8_float = aom_ifft8x8_float_sse2;
   if (flags & HAS_AVX2)
     aom_ifft8x8_float = aom_ifft8x8_float_avx2;
+  aom_int_pro_col = aom_int_pro_col_sse2;
+  if (flags & HAS_AVX2)
+    aom_int_pro_col = aom_int_pro_col_avx2;
+  aom_int_pro_row = aom_int_pro_row_sse2;
+  if (flags & HAS_AVX2)
+    aom_int_pro_row = aom_int_pro_row_avx2;
   aom_lowbd_blend_a64_d16_mask = aom_lowbd_blend_a64_d16_mask_c;
   if (flags & HAS_SSE4_1)
     aom_lowbd_blend_a64_d16_mask = aom_lowbd_blend_a64_d16_mask_sse4_1;
@@ -9042,6 +9711,9 @@ static void setup_rtcd_internal(void) {
   aom_mse16x16 = aom_mse16x16_sse2;
   if (flags & HAS_AVX2)
     aom_mse16x16 = aom_mse16x16_avx2;
+  aom_mse_16xh_16bit = aom_mse_16xh_16bit_sse2;
+  if (flags & HAS_AVX2)
+    aom_mse_16xh_16bit = aom_mse_16xh_16bit_avx2;
   aom_mse_wxh_16bit = aom_mse_wxh_16bit_sse2;
   if (flags & HAS_AVX2)
     aom_mse_wxh_16bit = aom_mse_wxh_16bit_avx2;
@@ -9055,6 +9727,14 @@ static void setup_rtcd_internal(void) {
     aom_paeth_predictor_16x32 = aom_paeth_predictor_16x32_ssse3;
   if (flags & HAS_AVX2)
     aom_paeth_predictor_16x32 = aom_paeth_predictor_16x32_avx2;
+  aom_paeth_predictor_16x4 = aom_paeth_predictor_16x4_c;
+  if (flags & HAS_SSSE3)
+    aom_paeth_predictor_16x4 = aom_paeth_predictor_16x4_ssse3;
+  aom_paeth_predictor_16x64 = aom_paeth_predictor_16x64_c;
+  if (flags & HAS_SSSE3)
+    aom_paeth_predictor_16x64 = aom_paeth_predictor_16x64_ssse3;
+  if (flags & HAS_AVX2)
+    aom_paeth_predictor_16x64 = aom_paeth_predictor_16x64_avx2;
   aom_paeth_predictor_16x8 = aom_paeth_predictor_16x8_c;
   if (flags & HAS_SSSE3)
     aom_paeth_predictor_16x8 = aom_paeth_predictor_16x8_ssse3;
@@ -9075,12 +9755,23 @@ static void setup_rtcd_internal(void) {
     aom_paeth_predictor_32x64 = aom_paeth_predictor_32x64_ssse3;
   if (flags & HAS_AVX2)
     aom_paeth_predictor_32x64 = aom_paeth_predictor_32x64_avx2;
+  aom_paeth_predictor_32x8 = aom_paeth_predictor_32x8_c;
+  if (flags & HAS_SSSE3)
+    aom_paeth_predictor_32x8 = aom_paeth_predictor_32x8_ssse3;
+  aom_paeth_predictor_4x16 = aom_paeth_predictor_4x16_c;
+  if (flags & HAS_SSSE3)
+    aom_paeth_predictor_4x16 = aom_paeth_predictor_4x16_ssse3;
   aom_paeth_predictor_4x4 = aom_paeth_predictor_4x4_c;
   if (flags & HAS_SSSE3)
     aom_paeth_predictor_4x4 = aom_paeth_predictor_4x4_ssse3;
   aom_paeth_predictor_4x8 = aom_paeth_predictor_4x8_c;
   if (flags & HAS_SSSE3)
     aom_paeth_predictor_4x8 = aom_paeth_predictor_4x8_ssse3;
+  aom_paeth_predictor_64x16 = aom_paeth_predictor_64x16_c;
+  if (flags & HAS_SSSE3)
+    aom_paeth_predictor_64x16 = aom_paeth_predictor_64x16_ssse3;
+  if (flags & HAS_AVX2)
+    aom_paeth_predictor_64x16 = aom_paeth_predictor_64x16_avx2;
   aom_paeth_predictor_64x32 = aom_paeth_predictor_64x32_c;
   if (flags & HAS_SSSE3)
     aom_paeth_predictor_64x32 = aom_paeth_predictor_64x32_ssse3;
@@ -9094,6 +9785,9 @@ static void setup_rtcd_internal(void) {
   aom_paeth_predictor_8x16 = aom_paeth_predictor_8x16_c;
   if (flags & HAS_SSSE3)
     aom_paeth_predictor_8x16 = aom_paeth_predictor_8x16_ssse3;
+  aom_paeth_predictor_8x32 = aom_paeth_predictor_8x32_c;
+  if (flags & HAS_SSSE3)
+    aom_paeth_predictor_8x32 = aom_paeth_predictor_8x32_ssse3;
   aom_paeth_predictor_8x4 = aom_paeth_predictor_8x4_c;
   if (flags & HAS_SSSE3)
     aom_paeth_predictor_8x4 = aom_paeth_predictor_8x4_ssse3;
@@ -9105,17 +9799,20 @@ static void setup_rtcd_internal(void) {
     aom_quantize_b = aom_quantize_b_ssse3;
   if (flags & HAS_AVX)
     aom_quantize_b = aom_quantize_b_avx;
+  if (flags & HAS_AVX2)
+    aom_quantize_b = aom_quantize_b_avx2;
   aom_quantize_b_32x32 = aom_quantize_b_32x32_c;
   if (flags & HAS_SSSE3)
     aom_quantize_b_32x32 = aom_quantize_b_32x32_ssse3;
   if (flags & HAS_AVX)
     aom_quantize_b_32x32 = aom_quantize_b_32x32_avx;
+  if (flags & HAS_AVX2)
+    aom_quantize_b_32x32 = aom_quantize_b_32x32_avx2;
   aom_quantize_b_64x64 = aom_quantize_b_64x64_c;
   if (flags & HAS_SSSE3)
     aom_quantize_b_64x64 = aom_quantize_b_64x64_ssse3;
-  aom_quantize_b_adaptive = aom_quantize_b_adaptive_sse2;
   if (flags & HAS_AVX2)
-    aom_quantize_b_adaptive = aom_quantize_b_adaptive_avx2;
+    aom_quantize_b_64x64 = aom_quantize_b_64x64_avx2;
   aom_sad128x128 = aom_sad128x128_sse2;
   if (flags & HAS_AVX2)
     aom_sad128x128 = aom_sad128x128_avx2;
@@ -9251,6 +9948,12 @@ static void setup_rtcd_internal(void) {
   aom_smooth_h_predictor_16x32 = aom_smooth_h_predictor_16x32_c;
   if (flags & HAS_SSSE3)
     aom_smooth_h_predictor_16x32 = aom_smooth_h_predictor_16x32_ssse3;
+  aom_smooth_h_predictor_16x4 = aom_smooth_h_predictor_16x4_c;
+  if (flags & HAS_SSSE3)
+    aom_smooth_h_predictor_16x4 = aom_smooth_h_predictor_16x4_ssse3;
+  aom_smooth_h_predictor_16x64 = aom_smooth_h_predictor_16x64_c;
+  if (flags & HAS_SSSE3)
+    aom_smooth_h_predictor_16x64 = aom_smooth_h_predictor_16x64_ssse3;
   aom_smooth_h_predictor_16x8 = aom_smooth_h_predictor_16x8_c;
   if (flags & HAS_SSSE3)
     aom_smooth_h_predictor_16x8 = aom_smooth_h_predictor_16x8_ssse3;
@@ -9263,12 +9966,21 @@ static void setup_rtcd_internal(void) {
   aom_smooth_h_predictor_32x64 = aom_smooth_h_predictor_32x64_c;
   if (flags & HAS_SSSE3)
     aom_smooth_h_predictor_32x64 = aom_smooth_h_predictor_32x64_ssse3;
+  aom_smooth_h_predictor_32x8 = aom_smooth_h_predictor_32x8_c;
+  if (flags & HAS_SSSE3)
+    aom_smooth_h_predictor_32x8 = aom_smooth_h_predictor_32x8_ssse3;
+  aom_smooth_h_predictor_4x16 = aom_smooth_h_predictor_4x16_c;
+  if (flags & HAS_SSSE3)
+    aom_smooth_h_predictor_4x16 = aom_smooth_h_predictor_4x16_ssse3;
   aom_smooth_h_predictor_4x4 = aom_smooth_h_predictor_4x4_c;
   if (flags & HAS_SSSE3)
     aom_smooth_h_predictor_4x4 = aom_smooth_h_predictor_4x4_ssse3;
   aom_smooth_h_predictor_4x8 = aom_smooth_h_predictor_4x8_c;
   if (flags & HAS_SSSE3)
     aom_smooth_h_predictor_4x8 = aom_smooth_h_predictor_4x8_ssse3;
+  aom_smooth_h_predictor_64x16 = aom_smooth_h_predictor_64x16_c;
+  if (flags & HAS_SSSE3)
+    aom_smooth_h_predictor_64x16 = aom_smooth_h_predictor_64x16_ssse3;
   aom_smooth_h_predictor_64x32 = aom_smooth_h_predictor_64x32_c;
   if (flags & HAS_SSSE3)
     aom_smooth_h_predictor_64x32 = aom_smooth_h_predictor_64x32_ssse3;
@@ -9278,6 +9990,9 @@ static void setup_rtcd_internal(void) {
   aom_smooth_h_predictor_8x16 = aom_smooth_h_predictor_8x16_c;
   if (flags & HAS_SSSE3)
     aom_smooth_h_predictor_8x16 = aom_smooth_h_predictor_8x16_ssse3;
+  aom_smooth_h_predictor_8x32 = aom_smooth_h_predictor_8x32_c;
+  if (flags & HAS_SSSE3)
+    aom_smooth_h_predictor_8x32 = aom_smooth_h_predictor_8x32_ssse3;
   aom_smooth_h_predictor_8x4 = aom_smooth_h_predictor_8x4_c;
   if (flags & HAS_SSSE3)
     aom_smooth_h_predictor_8x4 = aom_smooth_h_predictor_8x4_ssse3;
@@ -9290,6 +10005,12 @@ static void setup_rtcd_internal(void) {
   aom_smooth_predictor_16x32 = aom_smooth_predictor_16x32_c;
   if (flags & HAS_SSSE3)
     aom_smooth_predictor_16x32 = aom_smooth_predictor_16x32_ssse3;
+  aom_smooth_predictor_16x4 = aom_smooth_predictor_16x4_c;
+  if (flags & HAS_SSSE3)
+    aom_smooth_predictor_16x4 = aom_smooth_predictor_16x4_ssse3;
+  aom_smooth_predictor_16x64 = aom_smooth_predictor_16x64_c;
+  if (flags & HAS_SSSE3)
+    aom_smooth_predictor_16x64 = aom_smooth_predictor_16x64_ssse3;
   aom_smooth_predictor_16x8 = aom_smooth_predictor_16x8_c;
   if (flags & HAS_SSSE3)
     aom_smooth_predictor_16x8 = aom_smooth_predictor_16x8_ssse3;
@@ -9302,12 +10023,21 @@ static void setup_rtcd_internal(void) {
   aom_smooth_predictor_32x64 = aom_smooth_predictor_32x64_c;
   if (flags & HAS_SSSE3)
     aom_smooth_predictor_32x64 = aom_smooth_predictor_32x64_ssse3;
+  aom_smooth_predictor_32x8 = aom_smooth_predictor_32x8_c;
+  if (flags & HAS_SSSE3)
+    aom_smooth_predictor_32x8 = aom_smooth_predictor_32x8_ssse3;
+  aom_smooth_predictor_4x16 = aom_smooth_predictor_4x16_c;
+  if (flags & HAS_SSSE3)
+    aom_smooth_predictor_4x16 = aom_smooth_predictor_4x16_ssse3;
   aom_smooth_predictor_4x4 = aom_smooth_predictor_4x4_c;
   if (flags & HAS_SSSE3)
     aom_smooth_predictor_4x4 = aom_smooth_predictor_4x4_ssse3;
   aom_smooth_predictor_4x8 = aom_smooth_predictor_4x8_c;
   if (flags & HAS_SSSE3)
     aom_smooth_predictor_4x8 = aom_smooth_predictor_4x8_ssse3;
+  aom_smooth_predictor_64x16 = aom_smooth_predictor_64x16_c;
+  if (flags & HAS_SSSE3)
+    aom_smooth_predictor_64x16 = aom_smooth_predictor_64x16_ssse3;
   aom_smooth_predictor_64x32 = aom_smooth_predictor_64x32_c;
   if (flags & HAS_SSSE3)
     aom_smooth_predictor_64x32 = aom_smooth_predictor_64x32_ssse3;
@@ -9317,6 +10047,9 @@ static void setup_rtcd_internal(void) {
   aom_smooth_predictor_8x16 = aom_smooth_predictor_8x16_c;
   if (flags & HAS_SSSE3)
     aom_smooth_predictor_8x16 = aom_smooth_predictor_8x16_ssse3;
+  aom_smooth_predictor_8x32 = aom_smooth_predictor_8x32_c;
+  if (flags & HAS_SSSE3)
+    aom_smooth_predictor_8x32 = aom_smooth_predictor_8x32_ssse3;
   aom_smooth_predictor_8x4 = aom_smooth_predictor_8x4_c;
   if (flags & HAS_SSSE3)
     aom_smooth_predictor_8x4 = aom_smooth_predictor_8x4_ssse3;
@@ -9329,6 +10062,12 @@ static void setup_rtcd_internal(void) {
   aom_smooth_v_predictor_16x32 = aom_smooth_v_predictor_16x32_c;
   if (flags & HAS_SSSE3)
     aom_smooth_v_predictor_16x32 = aom_smooth_v_predictor_16x32_ssse3;
+  aom_smooth_v_predictor_16x4 = aom_smooth_v_predictor_16x4_c;
+  if (flags & HAS_SSSE3)
+    aom_smooth_v_predictor_16x4 = aom_smooth_v_predictor_16x4_ssse3;
+  aom_smooth_v_predictor_16x64 = aom_smooth_v_predictor_16x64_c;
+  if (flags & HAS_SSSE3)
+    aom_smooth_v_predictor_16x64 = aom_smooth_v_predictor_16x64_ssse3;
   aom_smooth_v_predictor_16x8 = aom_smooth_v_predictor_16x8_c;
   if (flags & HAS_SSSE3)
     aom_smooth_v_predictor_16x8 = aom_smooth_v_predictor_16x8_ssse3;
@@ -9341,12 +10080,21 @@ static void setup_rtcd_internal(void) {
   aom_smooth_v_predictor_32x64 = aom_smooth_v_predictor_32x64_c;
   if (flags & HAS_SSSE3)
     aom_smooth_v_predictor_32x64 = aom_smooth_v_predictor_32x64_ssse3;
+  aom_smooth_v_predictor_32x8 = aom_smooth_v_predictor_32x8_c;
+  if (flags & HAS_SSSE3)
+    aom_smooth_v_predictor_32x8 = aom_smooth_v_predictor_32x8_ssse3;
+  aom_smooth_v_predictor_4x16 = aom_smooth_v_predictor_4x16_c;
+  if (flags & HAS_SSSE3)
+    aom_smooth_v_predictor_4x16 = aom_smooth_v_predictor_4x16_ssse3;
   aom_smooth_v_predictor_4x4 = aom_smooth_v_predictor_4x4_c;
   if (flags & HAS_SSSE3)
     aom_smooth_v_predictor_4x4 = aom_smooth_v_predictor_4x4_ssse3;
   aom_smooth_v_predictor_4x8 = aom_smooth_v_predictor_4x8_c;
   if (flags & HAS_SSSE3)
     aom_smooth_v_predictor_4x8 = aom_smooth_v_predictor_4x8_ssse3;
+  aom_smooth_v_predictor_64x16 = aom_smooth_v_predictor_64x16_c;
+  if (flags & HAS_SSSE3)
+    aom_smooth_v_predictor_64x16 = aom_smooth_v_predictor_64x16_ssse3;
   aom_smooth_v_predictor_64x32 = aom_smooth_v_predictor_64x32_c;
   if (flags & HAS_SSSE3)
     aom_smooth_v_predictor_64x32 = aom_smooth_v_predictor_64x32_ssse3;
@@ -9356,6 +10104,9 @@ static void setup_rtcd_internal(void) {
   aom_smooth_v_predictor_8x16 = aom_smooth_v_predictor_8x16_c;
   if (flags & HAS_SSSE3)
     aom_smooth_v_predictor_8x16 = aom_smooth_v_predictor_8x16_ssse3;
+  aom_smooth_v_predictor_8x32 = aom_smooth_v_predictor_8x32_c;
+  if (flags & HAS_SSSE3)
+    aom_smooth_v_predictor_8x32 = aom_smooth_v_predictor_8x32_ssse3;
   aom_smooth_v_predictor_8x4 = aom_smooth_v_predictor_8x4_c;
   if (flags & HAS_SSSE3)
     aom_smooth_v_predictor_8x4 = aom_smooth_v_predictor_8x4_ssse3;
@@ -9519,6 +10270,9 @@ static void setup_rtcd_internal(void) {
   aom_v_predictor_32x64 = aom_v_predictor_32x64_sse2;
   if (flags & HAS_AVX2)
     aom_v_predictor_32x64 = aom_v_predictor_32x64_avx2;
+  aom_v_predictor_64x16 = aom_v_predictor_64x16_sse2;
+  if (flags & HAS_AVX2)
+    aom_v_predictor_64x16 = aom_v_predictor_64x16_avx2;
   aom_v_predictor_64x32 = aom_v_predictor_64x32_sse2;
   if (flags & HAS_AVX2)
     aom_v_predictor_64x32 = aom_v_predictor_64x32_avx2;
@@ -9564,6 +10318,11 @@ static void setup_rtcd_internal(void) {
   aom_variance64x64 = aom_variance64x64_sse2;
   if (flags & HAS_AVX2)
     aom_variance64x64 = aom_variance64x64_avx2;
+  aom_vector_var = aom_vector_var_c;
+  if (flags & HAS_SSE4_1)
+    aom_vector_var = aom_vector_var_sse4_1;
+  if (flags & HAS_AVX2)
+    aom_vector_var = aom_vector_var_avx2;
 }
 #endif
 
diff --git a/chromium/third_party/libaom/source/config/linux/x64/config/aom_scale_rtcd.h b/chromium/third_party/libaom/source/config/linux/x64/config/aom_scale_rtcd.h
index b6059e1426a..5e6c03317d6 100644
--- a/chromium/third_party/libaom/source/config/linux/x64/config/aom_scale_rtcd.h
+++ b/chromium/third_party/libaom/source/config/linux/x64/config/aom_scale_rtcd.h
@@ -18,6 +18,12 @@ void aom_extend_frame_borders_c(struct yv12_buffer_config* ybf,
                                 const int num_planes);
 #define aom_extend_frame_borders aom_extend_frame_borders_c
 
+void aom_extend_frame_borders_plane_row_c(const struct yv12_buffer_config* ybf,
+                                          int plane,
+                                          int v_start,
+                                          int v_end);
+#define aom_extend_frame_borders_plane_row aom_extend_frame_borders_plane_row_c
+
 void aom_extend_frame_borders_y_c(struct yv12_buffer_config* ybf);
 #define aom_extend_frame_borders_y aom_extend_frame_borders_y_c
 
diff --git a/chromium/third_party/libaom/source/config/linux/x64/config/av1_rtcd.h b/chromium/third_party/libaom/source/config/linux/x64/config/av1_rtcd.h
index 8d516d90bde..6d8a64d1270 100644
--- a/chromium/third_party/libaom/source/config/linux/x64/config/av1_rtcd.h
+++ b/chromium/third_party/libaom/source/config/linux/x64/config/av1_rtcd.h
@@ -242,6 +242,51 @@ void aom_upsampled_pred_sse2(MACROBLOCKD* xd,
                              int subpel_search);
 #define aom_upsampled_pred aom_upsampled_pred_sse2
 
+void av1_apply_selfguided_restoration_c(const uint8_t* dat,
+                                        int width,
+                                        int height,
+                                        int stride,
+                                        int eps,
+                                        const int* xqd,
+                                        uint8_t* dst,
+                                        int dst_stride,
+                                        int32_t* tmpbuf,
+                                        int bit_depth,
+                                        int highbd);
+void av1_apply_selfguided_restoration_sse4_1(const uint8_t* dat,
+                                             int width,
+                                             int height,
+                                             int stride,
+                                             int eps,
+                                             const int* xqd,
+                                             uint8_t* dst,
+                                             int dst_stride,
+                                             int32_t* tmpbuf,
+                                             int bit_depth,
+                                             int highbd);
+void av1_apply_selfguided_restoration_avx2(const uint8_t* dat,
+                                           int width,
+                                           int height,
+                                           int stride,
+                                           int eps,
+                                           const int* xqd,
+                                           uint8_t* dst,
+                                           int dst_stride,
+                                           int32_t* tmpbuf,
+                                           int bit_depth,
+                                           int highbd);
+RTCD_EXTERN void (*av1_apply_selfguided_restoration)(const uint8_t* dat,
+                                                     int width,
+                                                     int height,
+                                                     int stride,
+                                                     int eps,
+                                                     const int* xqd,
+                                                     uint8_t* dst,
+                                                     int dst_stride,
+                                                     int32_t* tmpbuf,
+                                                     int bit_depth,
+                                                     int highbd);
+
 int64_t av1_block_error_c(const tran_low_t* coeff,
                           const tran_low_t* dqcoeff,
                           intptr_t block_size,
@@ -347,6 +392,31 @@ RTCD_EXTERN void (*av1_build_compound_diffwtd_mask_d16)(
     ConvolveParams* conv_params,
     int bd);
 
+int64_t av1_calc_frame_error_c(const uint8_t* const ref,
+                               int stride,
+                               const uint8_t* const dst,
+                               int p_width,
+                               int p_height,
+                               int p_stride);
+int64_t av1_calc_frame_error_sse2(const uint8_t* const ref,
+                                  int stride,
+                                  const uint8_t* const dst,
+                                  int p_width,
+                                  int p_height,
+                                  int p_stride);
+int64_t av1_calc_frame_error_avx2(const uint8_t* const ref,
+                                  int stride,
+                                  const uint8_t* const dst,
+                                  int p_width,
+                                  int p_height,
+                                  int p_stride);
+RTCD_EXTERN int64_t (*av1_calc_frame_error)(const uint8_t* const ref,
+                                            int stride,
+                                            const uint8_t* const dst,
+                                            int p_width,
+                                            int p_height,
+                                            int p_stride);
+
 void av1_calc_indices_dim1_c(const int* data,
                              const int* centroids,
                              uint8_t* indices,
@@ -1839,6 +1909,47 @@ void av1_round_shift_array_c(int32_t* arr, int size, int bit);
 void av1_round_shift_array_sse4_1(int32_t* arr, int size, int bit);
 RTCD_EXTERN void (*av1_round_shift_array)(int32_t* arr, int size, int bit);
 
+int av1_selfguided_restoration_c(const uint8_t* dgd8,
+                                 int width,
+                                 int height,
+                                 int dgd_stride,
+                                 int32_t* flt0,
+                                 int32_t* flt1,
+                                 int flt_stride,
+                                 int sgr_params_idx,
+                                 int bit_depth,
+                                 int highbd);
+int av1_selfguided_restoration_sse4_1(const uint8_t* dgd8,
+                                      int width,
+                                      int height,
+                                      int dgd_stride,
+                                      int32_t* flt0,
+                                      int32_t* flt1,
+                                      int flt_stride,
+                                      int sgr_params_idx,
+                                      int bit_depth,
+                                      int highbd);
+int av1_selfguided_restoration_avx2(const uint8_t* dgd8,
+                                    int width,
+                                    int height,
+                                    int dgd_stride,
+                                    int32_t* flt0,
+                                    int32_t* flt1,
+                                    int flt_stride,
+                                    int sgr_params_idx,
+                                    int bit_depth,
+                                    int highbd);
+RTCD_EXTERN int (*av1_selfguided_restoration)(const uint8_t* dgd8,
+                                              int width,
+                                              int height,
+                                              int dgd_stride,
+                                              int32_t* flt0,
+                                              int32_t* flt1,
+                                              int flt_stride,
+                                              int sgr_params_idx,
+                                              int bit_depth,
+                                              int highbd);
+
 void av1_txb_init_levels_c(const tran_low_t* const coeff,
                            const int width,
                            const int height,
@@ -1864,6 +1975,79 @@ void av1_upsample_intra_edge_high_c(uint16_t* p, int sz, int bd);
 void av1_upsample_intra_edge_high_sse4_1(uint16_t* p, int sz, int bd);
 RTCD_EXTERN void (*av1_upsample_intra_edge_high)(uint16_t* p, int sz, int bd);
 
+void av1_warp_affine_c(const int32_t* mat,
+                       const uint8_t* ref,
+                       int width,
+                       int height,
+                       int stride,
+                       uint8_t* pred,
+                       int p_col,
+                       int p_row,
+                       int p_width,
+                       int p_height,
+                       int p_stride,
+                       int subsampling_x,
+                       int subsampling_y,
+                       ConvolveParams* conv_params,
+                       int16_t alpha,
+                       int16_t beta,
+                       int16_t gamma,
+                       int16_t delta);
+void av1_warp_affine_sse4_1(const int32_t* mat,
+                            const uint8_t* ref,
+                            int width,
+                            int height,
+                            int stride,
+                            uint8_t* pred,
+                            int p_col,
+                            int p_row,
+                            int p_width,
+                            int p_height,
+                            int p_stride,
+                            int subsampling_x,
+                            int subsampling_y,
+                            ConvolveParams* conv_params,
+                            int16_t alpha,
+                            int16_t beta,
+                            int16_t gamma,
+                            int16_t delta);
+void av1_warp_affine_avx2(const int32_t* mat,
+                          const uint8_t* ref,
+                          int width,
+                          int height,
+                          int stride,
+                          uint8_t* pred,
+                          int p_col,
+                          int p_row,
+                          int p_width,
+                          int p_height,
+                          int p_stride,
+                          int subsampling_x,
+                          int subsampling_y,
+                          ConvolveParams* conv_params,
+                          int16_t alpha,
+                          int16_t beta,
+                          int16_t gamma,
+                          int16_t delta);
+RTCD_EXTERN void (*av1_warp_affine)(const int32_t* mat,
+                                    const uint8_t* ref,
+                                    int width,
+                                    int height,
+                                    int stride,
+                                    uint8_t* pred,
+                                    int p_col,
+                                    int p_row,
+                                    int p_width,
+                                    int p_height,
+                                    int p_stride,
+                                    int subsampling_x,
+                                    int subsampling_y,
+                                    ConvolveParams* conv_params,
+                                    int16_t alpha,
+                                    int16_t beta,
+                                    int16_t gamma,
+                                    int16_t delta);
+
 void av1_wedge_compute_delta_squares_c(int16_t* d,
                                        const int16_t* a,
                                        const int16_t* b,
@@ -2687,6 +2871,11 @@ static void setup_rtcd_internal(void) {
   if (flags & HAS_SSSE3)
     aom_dist_wtd_comp_avg_upsampled_pred =
         aom_dist_wtd_comp_avg_upsampled_pred_ssse3;
+  av1_apply_selfguided_restoration = av1_apply_selfguided_restoration_c;
+  if (flags & HAS_SSE4_1)
+    av1_apply_selfguided_restoration = av1_apply_selfguided_restoration_sse4_1;
+  if (flags & HAS_AVX2)
+    av1_apply_selfguided_restoration = av1_apply_selfguided_restoration_avx2;
   av1_block_error = av1_block_error_sse2;
   if (flags & HAS_AVX2)
     av1_block_error = av1_block_error_avx2;
@@ -2705,6 +2894,9 @@ static void setup_rtcd_internal(void) {
   if (flags & HAS_AVX2)
     av1_build_compound_diffwtd_mask_d16 =
         av1_build_compound_diffwtd_mask_d16_avx2;
+  av1_calc_frame_error = av1_calc_frame_error_sse2;
+  if (flags & HAS_AVX2)
+    av1_calc_frame_error = av1_calc_frame_error_avx2;
   av1_calc_indices_dim1 = av1_calc_indices_dim1_sse2;
   if (flags & HAS_AVX2)
     av1_calc_indices_dim1 = av1_calc_indices_dim1_avx2;
@@ -2903,6 +3095,11 @@ static void setup_rtcd_internal(void) {
   av1_round_shift_array = av1_round_shift_array_c;
   if (flags & HAS_SSE4_1)
     av1_round_shift_array = av1_round_shift_array_sse4_1;
+  av1_selfguided_restoration = av1_selfguided_restoration_c;
+  if (flags & HAS_SSE4_1)
+    av1_selfguided_restoration = av1_selfguided_restoration_sse4_1;
+  if (flags & HAS_AVX2)
+    av1_selfguided_restoration = av1_selfguided_restoration_avx2;
   av1_txb_init_levels = av1_txb_init_levels_c;
   if (flags & HAS_SSE4_1)
     av1_txb_init_levels = av1_txb_init_levels_sse4_1;
@@ -2914,6 +3111,11 @@ static void setup_rtcd_internal(void) {
   av1_upsample_intra_edge_high = av1_upsample_intra_edge_high_c;
   if (flags & HAS_SSE4_1)
     av1_upsample_intra_edge_high = av1_upsample_intra_edge_high_sse4_1;
+  av1_warp_affine = av1_warp_affine_c;
+  if (flags & HAS_SSE4_1)
+    av1_warp_affine = av1_warp_affine_sse4_1;
+  if (flags & HAS_AVX2)
+    av1_warp_affine = av1_warp_affine_avx2;
   av1_wedge_compute_delta_squares = av1_wedge_compute_delta_squares_sse2;
   if (flags & HAS_AVX2)
     av1_wedge_compute_delta_squares = av1_wedge_compute_delta_squares_avx2;
diff --git a/chromium/third_party/libaom/source/config/win/arm64/config/aom_config.asm b/chromium/third_party/libaom/source/config/win/arm64/config/aom_config.asm
index 15df49ee5ab..c5ebd6ab9ba 100644
--- a/chromium/third_party/libaom/source/config/win/arm64/config/aom_config.asm
+++ b/chromium/third_party/libaom/source/config/win/arm64/config/aom_config.asm
@@ -9,7 +9,6 @@
 ; PATENTS file, you can obtain it at www.aomedia.org/license/patent.
 ;
 ARCH_ARM equ 1
-ARCH_MIPS equ 0
 ARCH_PPC equ 0
 ARCH_X86 equ 0
 ARCH_X86_64 equ 0
@@ -33,8 +32,6 @@ CONFIG_DISABLE_FULL_PIXEL_SPLIT_8X8 equ 1
 CONFIG_ENTROPY_STATS equ 0
 CONFIG_EXCLUDE_SIMD_MISMATCH equ 0
 CONFIG_FPMT_TEST equ 0
-CONFIG_FRAME_PARALLEL_ENCODE equ 0
-CONFIG_FRAME_PARALLEL_ENCODE_2 equ 0
 CONFIG_GCC equ 1
 CONFIG_GCOV equ 0
 CONFIG_GPROF equ 0
@@ -50,7 +47,7 @@ CONFIG_NORMAL_TILE_MODE equ 1
 CONFIG_OPTICAL_FLOW_API equ 0
 CONFIG_OS_SUPPORT equ 1
 CONFIG_PARTITION_SEARCH_ORDER equ 0
-CONFIG_PIC equ 0
+CONFIG_PIC equ 1
 CONFIG_RATECTRL_LOG equ 0
 CONFIG_RD_COMMAND equ 0
 CONFIG_RD_DEBUG equ 0
@@ -69,14 +66,11 @@ CONFIG_WEBM_IO equ 1
 DECODE_HEIGHT_LIMIT equ 16384
 DECODE_WIDTH_LIMIT equ 16384
 FORCE_HIGHBITDEPTH_DECODING equ 0
+HAVE_ARM_CRC32 equ 0
 HAVE_AVX equ 0
 HAVE_AVX2 equ 0
-HAVE_DSPR2 equ 0
 HAVE_FEXCEPT equ 1
-HAVE_MIPS32 equ 0
-HAVE_MIPS64 equ 0
 HAVE_MMX equ 0
-HAVE_MSA equ 0
 HAVE_NEON equ 1
 HAVE_PTHREAD_H equ 1
 HAVE_SSE equ 0
diff --git a/chromium/third_party/libaom/source/config/win/arm64/config/aom_config.c b/chromium/third_party/libaom/source/config/win/arm64/config/aom_config.c
index 7c8f15b9d65..ff9196735fc 100644
--- a/chromium/third_party/libaom/source/config/win/arm64/config/aom_config.c
+++ b/chromium/third_party/libaom/source/config/win/arm64/config/aom_config.c
@@ -9,5 +9,5 @@
  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
  */
 #include "aom/aom_codec.h"
-static const char* const cfg = "cmake ../source/libaom -G \"Unix Makefiles\" -DCMAKE_TOOLCHAIN_FILE=\"../source/libaom/build/cmake/toolchains/arm64-linux-gcc.cmake\" -DCONFIG_AV1_ENCODER=1 -DCONFIG_LIBYUV=0 -DCONFIG_REALTIME_ONLY=1 -DCONFIG_AV1_HIGHBITDEPTH=0 -DCONFIG_AV1_TEMPORAL_DENOISING=1 -DCONFIG_MAX_DECODE_PROFILE=0 -DCONFIG_NORMAL_TILE_MODE=1 -DCONFIG_SIZE_LIMIT=1 -DDECODE_HEIGHT_LIMIT=16384 -DDECODE_WIDTH_LIMIT=16384";
+static const char* const cfg = "cmake ../source/libaom -G \"Unix Makefiles\" -DCMAKE_TOOLCHAIN_FILE=\"../source/libaom/build/cmake/toolchains/arm64-linux-gcc.cmake\" -DCONFIG_AV1_ENCODER=1 -DCONFIG_LIBYUV=0 -DCONFIG_AV1_HIGHBITDEPTH=0 -DCONFIG_AV1_TEMPORAL_DENOISING=1 -DCONFIG_REALTIME_ONLY=1 -DCONFIG_MAX_DECODE_PROFILE=0 -DCONFIG_NORMAL_TILE_MODE=1 -DCONFIG_SIZE_LIMIT=1 -DDECODE_HEIGHT_LIMIT=16384 -DDECODE_WIDTH_LIMIT=16384";
 const char *aom_codec_build_config(void) {return cfg;}
diff --git a/chromium/third_party/libaom/source/config/win/arm64/config/aom_config.h b/chromium/third_party/libaom/source/config/win/arm64/config/aom_config.h
index d8e2ea667ac..fd91a552d61 100644
--- a/chromium/third_party/libaom/source/config/win/arm64/config/aom_config.h
+++ b/chromium/third_party/libaom/source/config/win/arm64/config/aom_config.h
@@ -11,7 +11,6 @@
 #ifndef AOM_CONFIG_H_
 #define AOM_CONFIG_H_
 #define ARCH_ARM 1
-#define ARCH_MIPS 0
 #define ARCH_PPC 0
 #define ARCH_X86 0
 #define ARCH_X86_64 0
@@ -35,8 +34,6 @@
 #define CONFIG_ENTROPY_STATS 0
 #define CONFIG_EXCLUDE_SIMD_MISMATCH 0
 #define CONFIG_FPMT_TEST 0
-#define CONFIG_FRAME_PARALLEL_ENCODE 0
-#define CONFIG_FRAME_PARALLEL_ENCODE_2 0
 #define CONFIG_GCC 0
 #define CONFIG_GCOV 0
 #define CONFIG_GPROF 0
@@ -52,7 +49,7 @@
 #define CONFIG_OPTICAL_FLOW_API 0
 #define CONFIG_OS_SUPPORT 1
 #define CONFIG_PARTITION_SEARCH_ORDER 0
-#define CONFIG_PIC 0
+#define CONFIG_PIC 1
 #define CONFIG_RATECTRL_LOG 0
 #define CONFIG_RD_COMMAND 0
 #define CONFIG_RD_DEBUG 0
@@ -71,14 +68,11 @@
 #define DECODE_HEIGHT_LIMIT 16384
 #define DECODE_WIDTH_LIMIT 16384
 #define FORCE_HIGHBITDEPTH_DECODING 0
+#define HAVE_ARM_CRC32 0
 #define HAVE_AVX 0
 #define HAVE_AVX2 0
-#define HAVE_DSPR2 0
 #define HAVE_FEXCEPT 1
-#define HAVE_MIPS32 0
-#define HAVE_MIPS64 0
 #define HAVE_MMX 0
-#define HAVE_MSA 0
 #define HAVE_NEON 1
 #define HAVE_PTHREAD_H 0
 #define HAVE_SSE 0
diff --git a/chromium/third_party/libaom/source/config/win/arm64/config/aom_dsp_rtcd.h b/chromium/third_party/libaom/source/config/win/arm64/config/aom_dsp_rtcd.h
index 155c11857c0..c2d2ac85aaa 100644
--- a/chromium/third_party/libaom/source/config/win/arm64/config/aom_dsp_rtcd.h
+++ b/chromium/third_party/libaom/source/config/win/arm64/config/aom_dsp_rtcd.h
@@ -34,7 +34,12 @@ void aom_avg_8x8_quad_c(const uint8_t* s,
                         int x16_idx,
                         int y16_idx,
                         int* avg);
-#define aom_avg_8x8_quad aom_avg_8x8_quad_c
+void aom_avg_8x8_quad_neon(const uint8_t* s,
+                           int p,
+                           int x16_idx,
+                           int y16_idx,
+                           int* avg);
+#define aom_avg_8x8_quad aom_avg_8x8_quad_neon
 
 void aom_blend_a64_hmask_c(uint8_t* dst,
                            uint32_t dst_stride,
@@ -176,18 +181,24 @@ void aom_dc_128_predictor_16x32_c(uint8_t* dst,
                                   const uint8_t* left);
 #define aom_dc_128_predictor_16x32 aom_dc_128_predictor_16x32_c
 
+void aom_dc_128_predictor_16x4_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+#define aom_dc_128_predictor_16x4 aom_dc_128_predictor_16x4_c
+
+void aom_dc_128_predictor_16x64_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+#define aom_dc_128_predictor_16x64 aom_dc_128_predictor_16x64_c
+
 void aom_dc_128_predictor_16x8_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
                                  const uint8_t* left);
 #define aom_dc_128_predictor_16x8 aom_dc_128_predictor_16x8_c
 
-void aom_dc_128_predictor_2x2_c(uint8_t* dst,
-                                ptrdiff_t y_stride,
-                                const uint8_t* above,
-                                const uint8_t* left);
-#define aom_dc_128_predictor_2x2 aom_dc_128_predictor_2x2_c
-
 void aom_dc_128_predictor_32x16_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
@@ -210,6 +221,18 @@ void aom_dc_128_predictor_32x64_c(uint8_t* dst,
                                   const uint8_t* left);
 #define aom_dc_128_predictor_32x64 aom_dc_128_predictor_32x64_c
 
+void aom_dc_128_predictor_32x8_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+#define aom_dc_128_predictor_32x8 aom_dc_128_predictor_32x8_c
+
+void aom_dc_128_predictor_4x16_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+#define aom_dc_128_predictor_4x16 aom_dc_128_predictor_4x16_c
+
 void aom_dc_128_predictor_4x4_c(uint8_t* dst,
                                 ptrdiff_t y_stride,
                                 const uint8_t* above,
@@ -226,6 +249,12 @@ void aom_dc_128_predictor_4x8_c(uint8_t* dst,
                                 const uint8_t* left);
 #define aom_dc_128_predictor_4x8 aom_dc_128_predictor_4x8_c
 
+void aom_dc_128_predictor_64x16_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+#define aom_dc_128_predictor_64x16 aom_dc_128_predictor_64x16_c
+
 void aom_dc_128_predictor_64x32_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
@@ -244,6 +273,12 @@ void aom_dc_128_predictor_8x16_c(uint8_t* dst,
                                  const uint8_t* left);
 #define aom_dc_128_predictor_8x16 aom_dc_128_predictor_8x16_c
 
+void aom_dc_128_predictor_8x32_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+#define aom_dc_128_predictor_8x32 aom_dc_128_predictor_8x32_c
+
 void aom_dc_128_predictor_8x4_c(uint8_t* dst,
                                 ptrdiff_t y_stride,
                                 const uint8_t* above,
@@ -276,18 +311,24 @@ void aom_dc_left_predictor_16x32_c(uint8_t* dst,
                                    const uint8_t* left);
 #define aom_dc_left_predictor_16x32 aom_dc_left_predictor_16x32_c
 
+void aom_dc_left_predictor_16x4_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+#define aom_dc_left_predictor_16x4 aom_dc_left_predictor_16x4_c
+
+void aom_dc_left_predictor_16x64_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+#define aom_dc_left_predictor_16x64 aom_dc_left_predictor_16x64_c
+
 void aom_dc_left_predictor_16x8_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
                                   const uint8_t* left);
 #define aom_dc_left_predictor_16x8 aom_dc_left_predictor_16x8_c
 
-void aom_dc_left_predictor_2x2_c(uint8_t* dst,
-                                 ptrdiff_t y_stride,
-                                 const uint8_t* above,
-                                 const uint8_t* left);
-#define aom_dc_left_predictor_2x2 aom_dc_left_predictor_2x2_c
-
 void aom_dc_left_predictor_32x16_c(uint8_t* dst,
                                    ptrdiff_t y_stride,
                                    const uint8_t* above,
@@ -310,6 +351,18 @@ void aom_dc_left_predictor_32x64_c(uint8_t* dst,
                                    const uint8_t* left);
 #define aom_dc_left_predictor_32x64 aom_dc_left_predictor_32x64_c
 
+void aom_dc_left_predictor_32x8_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+#define aom_dc_left_predictor_32x8 aom_dc_left_predictor_32x8_c
+
+void aom_dc_left_predictor_4x16_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+#define aom_dc_left_predictor_4x16 aom_dc_left_predictor_4x16_c
+
 void aom_dc_left_predictor_4x4_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
@@ -326,6 +379,12 @@ void aom_dc_left_predictor_4x8_c(uint8_t* dst,
                                  const uint8_t* left);
 #define aom_dc_left_predictor_4x8 aom_dc_left_predictor_4x8_c
 
+void aom_dc_left_predictor_64x16_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+#define aom_dc_left_predictor_64x16 aom_dc_left_predictor_64x16_c
+
 void aom_dc_left_predictor_64x32_c(uint8_t* dst,
                                    ptrdiff_t y_stride,
                                    const uint8_t* above,
@@ -344,6 +403,12 @@ void aom_dc_left_predictor_8x16_c(uint8_t* dst,
                                   const uint8_t* left);
 #define aom_dc_left_predictor_8x16 aom_dc_left_predictor_8x16_c
 
+void aom_dc_left_predictor_8x32_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+#define aom_dc_left_predictor_8x32 aom_dc_left_predictor_8x32_c
+
 void aom_dc_left_predictor_8x4_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
@@ -376,18 +441,24 @@ void aom_dc_predictor_16x32_c(uint8_t* dst,
                               const uint8_t* left);
 #define aom_dc_predictor_16x32 aom_dc_predictor_16x32_c
 
+void aom_dc_predictor_16x4_c(uint8_t* dst,
+                             ptrdiff_t y_stride,
+                             const uint8_t* above,
+                             const uint8_t* left);
+#define aom_dc_predictor_16x4 aom_dc_predictor_16x4_c
+
+void aom_dc_predictor_16x64_c(uint8_t* dst,
+                              ptrdiff_t y_stride,
+                              const uint8_t* above,
+                              const uint8_t* left);
+#define aom_dc_predictor_16x64 aom_dc_predictor_16x64_c
+
 void aom_dc_predictor_16x8_c(uint8_t* dst,
                              ptrdiff_t y_stride,
                              const uint8_t* above,
                              const uint8_t* left);
 #define aom_dc_predictor_16x8 aom_dc_predictor_16x8_c
 
-void aom_dc_predictor_2x2_c(uint8_t* dst,
-                            ptrdiff_t y_stride,
-                            const uint8_t* above,
-                            const uint8_t* left);
-#define aom_dc_predictor_2x2 aom_dc_predictor_2x2_c
-
 void aom_dc_predictor_32x16_c(uint8_t* dst,
                               ptrdiff_t y_stride,
                               const uint8_t* above,
@@ -410,6 +481,18 @@ void aom_dc_predictor_32x64_c(uint8_t* dst,
                               const uint8_t* left);
 #define aom_dc_predictor_32x64 aom_dc_predictor_32x64_c
 
+void aom_dc_predictor_32x8_c(uint8_t* dst,
+                             ptrdiff_t y_stride,
+                             const uint8_t* above,
+                             const uint8_t* left);
+#define aom_dc_predictor_32x8 aom_dc_predictor_32x8_c
+
+void aom_dc_predictor_4x16_c(uint8_t* dst,
+                             ptrdiff_t y_stride,
+                             const uint8_t* above,
+                             const uint8_t* left);
+#define aom_dc_predictor_4x16 aom_dc_predictor_4x16_c
+
 void aom_dc_predictor_4x4_c(uint8_t* dst,
                             ptrdiff_t y_stride,
                             const uint8_t* above,
@@ -426,6 +509,12 @@ void aom_dc_predictor_4x8_c(uint8_t* dst,
                             const uint8_t* left);
 #define aom_dc_predictor_4x8 aom_dc_predictor_4x8_c
 
+void aom_dc_predictor_64x16_c(uint8_t* dst,
+                              ptrdiff_t y_stride,
+                              const uint8_t* above,
+                              const uint8_t* left);
+#define aom_dc_predictor_64x16 aom_dc_predictor_64x16_c
+
 void aom_dc_predictor_64x32_c(uint8_t* dst,
                               ptrdiff_t y_stride,
                               const uint8_t* above,
@@ -444,6 +533,12 @@ void aom_dc_predictor_8x16_c(uint8_t* dst,
                              const uint8_t* left);
 #define aom_dc_predictor_8x16 aom_dc_predictor_8x16_c
 
+void aom_dc_predictor_8x32_c(uint8_t* dst,
+                             ptrdiff_t y_stride,
+                             const uint8_t* above,
+                             const uint8_t* left);
+#define aom_dc_predictor_8x32 aom_dc_predictor_8x32_c
+
 void aom_dc_predictor_8x4_c(uint8_t* dst,
                             ptrdiff_t y_stride,
                             const uint8_t* above,
@@ -476,18 +571,24 @@ void aom_dc_top_predictor_16x32_c(uint8_t* dst,
                                   const uint8_t* left);
 #define aom_dc_top_predictor_16x32 aom_dc_top_predictor_16x32_c
 
+void aom_dc_top_predictor_16x4_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+#define aom_dc_top_predictor_16x4 aom_dc_top_predictor_16x4_c
+
+void aom_dc_top_predictor_16x64_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+#define aom_dc_top_predictor_16x64 aom_dc_top_predictor_16x64_c
+
 void aom_dc_top_predictor_16x8_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
                                  const uint8_t* left);
 #define aom_dc_top_predictor_16x8 aom_dc_top_predictor_16x8_c
 
-void aom_dc_top_predictor_2x2_c(uint8_t* dst,
-                                ptrdiff_t y_stride,
-                                const uint8_t* above,
-                                const uint8_t* left);
-#define aom_dc_top_predictor_2x2 aom_dc_top_predictor_2x2_c
-
 void aom_dc_top_predictor_32x16_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
@@ -510,6 +611,18 @@ void aom_dc_top_predictor_32x64_c(uint8_t* dst,
                                   const uint8_t* left);
 #define aom_dc_top_predictor_32x64 aom_dc_top_predictor_32x64_c
 
+void aom_dc_top_predictor_32x8_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+#define aom_dc_top_predictor_32x8 aom_dc_top_predictor_32x8_c
+
+void aom_dc_top_predictor_4x16_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+#define aom_dc_top_predictor_4x16 aom_dc_top_predictor_4x16_c
+
 void aom_dc_top_predictor_4x4_c(uint8_t* dst,
                                 ptrdiff_t y_stride,
                                 const uint8_t* above,
@@ -526,6 +639,12 @@ void aom_dc_top_predictor_4x8_c(uint8_t* dst,
                                 const uint8_t* left);
 #define aom_dc_top_predictor_4x8 aom_dc_top_predictor_4x8_c
 
+void aom_dc_top_predictor_64x16_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+#define aom_dc_top_predictor_64x16 aom_dc_top_predictor_64x16_c
+
 void aom_dc_top_predictor_64x32_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
@@ -544,6 +663,12 @@ void aom_dc_top_predictor_8x16_c(uint8_t* dst,
                                  const uint8_t* left);
 #define aom_dc_top_predictor_8x16 aom_dc_top_predictor_8x16_c
 
+void aom_dc_top_predictor_8x32_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+#define aom_dc_top_predictor_8x32 aom_dc_top_predictor_8x32_c
+
 void aom_dc_top_predictor_8x4_c(uint8_t* dst,
                                 ptrdiff_t y_stride,
                                 const uint8_t* above,
@@ -1014,18 +1139,24 @@ void aom_h_predictor_16x32_c(uint8_t* dst,
                              const uint8_t* left);
 #define aom_h_predictor_16x32 aom_h_predictor_16x32_c
 
+void aom_h_predictor_16x4_c(uint8_t* dst,
+                            ptrdiff_t y_stride,
+                            const uint8_t* above,
+                            const uint8_t* left);
+#define aom_h_predictor_16x4 aom_h_predictor_16x4_c
+
+void aom_h_predictor_16x64_c(uint8_t* dst,
+                             ptrdiff_t y_stride,
+                             const uint8_t* above,
+                             const uint8_t* left);
+#define aom_h_predictor_16x64 aom_h_predictor_16x64_c
+
 void aom_h_predictor_16x8_c(uint8_t* dst,
                             ptrdiff_t y_stride,
                             const uint8_t* above,
                             const uint8_t* left);
 #define aom_h_predictor_16x8 aom_h_predictor_16x8_c
 
-void aom_h_predictor_2x2_c(uint8_t* dst,
-                           ptrdiff_t y_stride,
-                           const uint8_t* above,
-                           const uint8_t* left);
-#define aom_h_predictor_2x2 aom_h_predictor_2x2_c
-
 void aom_h_predictor_32x16_c(uint8_t* dst,
                              ptrdiff_t y_stride,
                              const uint8_t* above,
@@ -1048,6 +1179,18 @@ void aom_h_predictor_32x64_c(uint8_t* dst,
                              const uint8_t* left);
 #define aom_h_predictor_32x64 aom_h_predictor_32x64_c
 
+void aom_h_predictor_32x8_c(uint8_t* dst,
+                            ptrdiff_t y_stride,
+                            const uint8_t* above,
+                            const uint8_t* left);
+#define aom_h_predictor_32x8 aom_h_predictor_32x8_c
+
+void aom_h_predictor_4x16_c(uint8_t* dst,
+                            ptrdiff_t y_stride,
+                            const uint8_t* above,
+                            const uint8_t* left);
+#define aom_h_predictor_4x16 aom_h_predictor_4x16_c
+
 void aom_h_predictor_4x4_c(uint8_t* dst,
                            ptrdiff_t y_stride,
                            const uint8_t* above,
@@ -1064,6 +1207,12 @@ void aom_h_predictor_4x8_c(uint8_t* dst,
                            const uint8_t* left);
 #define aom_h_predictor_4x8 aom_h_predictor_4x8_c
 
+void aom_h_predictor_64x16_c(uint8_t* dst,
+                             ptrdiff_t y_stride,
+                             const uint8_t* above,
+                             const uint8_t* left);
+#define aom_h_predictor_64x16 aom_h_predictor_64x16_c
+
 void aom_h_predictor_64x32_c(uint8_t* dst,
                              ptrdiff_t y_stride,
                              const uint8_t* above,
@@ -1082,6 +1231,12 @@ void aom_h_predictor_8x16_c(uint8_t* dst,
                             const uint8_t* left);
 #define aom_h_predictor_8x16 aom_h_predictor_8x16_c
 
+void aom_h_predictor_8x32_c(uint8_t* dst,
+                            ptrdiff_t y_stride,
+                            const uint8_t* above,
+                            const uint8_t* left);
+#define aom_h_predictor_8x32 aom_h_predictor_8x32_c
+
 void aom_h_predictor_8x4_c(uint8_t* dst,
                            ptrdiff_t y_stride,
                            const uint8_t* above,
@@ -1124,11 +1279,6 @@ void aom_hadamard_8x8_neon(const int16_t* src_diff,
                            tran_low_t* coeff);
 #define aom_hadamard_8x8 aom_hadamard_8x8_neon
 
-void aom_hadamard_8x8_dual_c(const int16_t* src_diff,
-                             ptrdiff_t src_stride,
-                             int16_t* coeff);
-#define aom_hadamard_8x8_dual aom_hadamard_8x8_dual_c
-
 void aom_hadamard_lp_16x16_c(const int16_t* src_diff,
                              ptrdiff_t src_stride,
                              int16_t* coeff);
@@ -1145,6 +1295,14 @@ void aom_hadamard_lp_8x8_neon(const int16_t* src_diff,
                               int16_t* coeff);
 #define aom_hadamard_lp_8x8 aom_hadamard_lp_8x8_neon
 
+void aom_hadamard_lp_8x8_dual_c(const int16_t* src_diff,
+                                ptrdiff_t src_stride,
+                                int16_t* coeff);
+void aom_hadamard_lp_8x8_dual_neon(const int16_t* src_diff,
+                                   ptrdiff_t src_stride,
+                                   int16_t* coeff);
+#define aom_hadamard_lp_8x8_dual aom_hadamard_lp_8x8_dual_neon
+
 void aom_ifft16x16_float_c(const float* input, float* temp, float* output);
 #define aom_ifft16x16_float aom_ifft16x16_float_c
 
@@ -1160,18 +1318,32 @@ void aom_ifft4x4_float_c(const float* input, float* temp, float* output);
 void aom_ifft8x8_float_c(const float* input, float* temp, float* output);
 #define aom_ifft8x8_float aom_ifft8x8_float_c
 
-int16_t aom_int_pro_col_c(const uint8_t* ref, const int width);
-int16_t aom_int_pro_col_neon(const uint8_t* ref, const int width);
+void aom_int_pro_col_c(int16_t* vbuf,
+                       const uint8_t* ref,
+                       const int ref_stride,
+                       const int width,
+                       const int height,
+                       int norm_factor);
+void aom_int_pro_col_neon(int16_t* vbuf,
+                          const uint8_t* ref,
+                          const int ref_stride,
+                          const int width,
+                          const int height,
+                          int norm_factor);
 #define aom_int_pro_col aom_int_pro_col_neon
 
-void aom_int_pro_row_c(int16_t hbuf[16],
+void aom_int_pro_row_c(int16_t* hbuf,
                        const uint8_t* ref,
                        const int ref_stride,
-                       const int height);
-void aom_int_pro_row_neon(int16_t hbuf[16],
+                       const int width,
+                       const int height,
+                       int norm_factor);
+void aom_int_pro_row_neon(int16_t* hbuf,
                           const uint8_t* ref,
                           const int ref_stride,
-                          const int height);
+                          const int width,
+                          const int height,
+                          int norm_factor);
 #define aom_int_pro_row aom_int_pro_row_neon
 
 void aom_lowbd_blend_a64_d16_mask_c(uint8_t* dst,
@@ -1237,7 +1409,12 @@ void aom_lpf_horizontal_14_quad_c(uint8_t* s,
                                   const uint8_t* blimit0,
                                   const uint8_t* limit0,
                                   const uint8_t* thresh0);
-#define aom_lpf_horizontal_14_quad aom_lpf_horizontal_14_quad_c
+void aom_lpf_horizontal_14_quad_neon(uint8_t* s,
+                                     int pitch,
+                                     const uint8_t* blimit0,
+                                     const uint8_t* limit0,
+                                     const uint8_t* thresh0);
+#define aom_lpf_horizontal_14_quad aom_lpf_horizontal_14_quad_neon
 
 void aom_lpf_horizontal_4_c(uint8_t* s,
                             int pitch,
@@ -1274,7 +1451,12 @@ void aom_lpf_horizontal_4_quad_c(uint8_t* s,
                                  const uint8_t* blimit0,
                                  const uint8_t* limit0,
                                  const uint8_t* thresh0);
-#define aom_lpf_horizontal_4_quad aom_lpf_horizontal_4_quad_c
+void aom_lpf_horizontal_4_quad_neon(uint8_t* s,
+                                    int pitch,
+                                    const uint8_t* blimit0,
+                                    const uint8_t* limit0,
+                                    const uint8_t* thresh0);
+#define aom_lpf_horizontal_4_quad aom_lpf_horizontal_4_quad_neon
 
 void aom_lpf_horizontal_6_c(uint8_t* s,
                             int pitch,
@@ -1311,7 +1493,12 @@ void aom_lpf_horizontal_6_quad_c(uint8_t* s,
                                  const uint8_t* blimit0,
                                  const uint8_t* limit0,
                                  const uint8_t* thresh0);
-#define aom_lpf_horizontal_6_quad aom_lpf_horizontal_6_quad_c
+void aom_lpf_horizontal_6_quad_neon(uint8_t* s,
+                                    int pitch,
+                                    const uint8_t* blimit0,
+                                    const uint8_t* limit0,
+                                    const uint8_t* thresh0);
+#define aom_lpf_horizontal_6_quad aom_lpf_horizontal_6_quad_neon
 
 void aom_lpf_horizontal_8_c(uint8_t* s,
                             int pitch,
@@ -1348,7 +1535,12 @@ void aom_lpf_horizontal_8_quad_c(uint8_t* s,
                                  const uint8_t* blimit0,
                                  const uint8_t* limit0,
                                  const uint8_t* thresh0);
-#define aom_lpf_horizontal_8_quad aom_lpf_horizontal_8_quad_c
+void aom_lpf_horizontal_8_quad_neon(uint8_t* s,
+                                    int pitch,
+                                    const uint8_t* blimit0,
+                                    const uint8_t* limit0,
+                                    const uint8_t* thresh0);
+#define aom_lpf_horizontal_8_quad aom_lpf_horizontal_8_quad_neon
 
 void aom_lpf_vertical_14_c(uint8_t* s,
                            int pitch,
@@ -1385,7 +1577,12 @@ void aom_lpf_vertical_14_quad_c(uint8_t* s,
                                 const uint8_t* blimit0,
                                 const uint8_t* limit0,
                                 const uint8_t* thresh0);
-#define aom_lpf_vertical_14_quad aom_lpf_vertical_14_quad_c
+void aom_lpf_vertical_14_quad_neon(uint8_t* s,
+                                   int pitch,
+                                   const uint8_t* blimit0,
+                                   const uint8_t* limit0,
+                                   const uint8_t* thresh0);
+#define aom_lpf_vertical_14_quad aom_lpf_vertical_14_quad_neon
 
 void aom_lpf_vertical_4_c(uint8_t* s,
                           int pitch,
@@ -1422,7 +1619,12 @@ void aom_lpf_vertical_4_quad_c(uint8_t* s,
                                const uint8_t* blimit0,
                                const uint8_t* limit0,
                                const uint8_t* thresh0);
-#define aom_lpf_vertical_4_quad aom_lpf_vertical_4_quad_c
+void aom_lpf_vertical_4_quad_neon(uint8_t* s,
+                                  int pitch,
+                                  const uint8_t* blimit0,
+                                  const uint8_t* limit0,
+                                  const uint8_t* thresh0);
+#define aom_lpf_vertical_4_quad aom_lpf_vertical_4_quad_neon
 
 void aom_lpf_vertical_6_c(uint8_t* s,
                           int pitch,
@@ -1459,7 +1661,12 @@ void aom_lpf_vertical_6_quad_c(uint8_t* s,
                                const uint8_t* blimit0,
                                const uint8_t* limit0,
                                const uint8_t* thresh0);
-#define aom_lpf_vertical_6_quad aom_lpf_vertical_6_quad_c
+void aom_lpf_vertical_6_quad_neon(uint8_t* s,
+                                  int pitch,
+                                  const uint8_t* blimit0,
+                                  const uint8_t* limit0,
+                                  const uint8_t* thresh0);
+#define aom_lpf_vertical_6_quad aom_lpf_vertical_6_quad_neon
 
 void aom_lpf_vertical_8_c(uint8_t* s,
                           int pitch,
@@ -1496,7 +1703,12 @@ void aom_lpf_vertical_8_quad_c(uint8_t* s,
                                const uint8_t* blimit0,
                                const uint8_t* limit0,
                                const uint8_t* thresh0);
-#define aom_lpf_vertical_8_quad aom_lpf_vertical_8_quad_c
+void aom_lpf_vertical_8_quad_neon(uint8_t* s,
+                                  int pitch,
+                                  const uint8_t* blimit0,
+                                  const uint8_t* limit0,
+                                  const uint8_t* thresh0);
+#define aom_lpf_vertical_8_quad aom_lpf_vertical_8_quad_neon
 
 unsigned int aom_masked_sad128x128_c(const uint8_t* src,
                                      int src_stride,
@@ -2070,21 +2282,43 @@ unsigned int aom_mse16x8_c(const uint8_t* src_ptr,
                            const uint8_t* ref_ptr,
                            int recon_stride,
                            unsigned int* sse);
-#define aom_mse16x8 aom_mse16x8_c
+unsigned int aom_mse16x8_neon(const uint8_t* src_ptr,
+                              int source_stride,
+                              const uint8_t* ref_ptr,
+                              int recon_stride,
+                              unsigned int* sse);
+#define aom_mse16x8 aom_mse16x8_neon
 
 unsigned int aom_mse8x16_c(const uint8_t* src_ptr,
                            int source_stride,
                            const uint8_t* ref_ptr,
                            int recon_stride,
                            unsigned int* sse);
-#define aom_mse8x16 aom_mse8x16_c
+unsigned int aom_mse8x16_neon(const uint8_t* src_ptr,
+                              int source_stride,
+                              const uint8_t* ref_ptr,
+                              int recon_stride,
+                              unsigned int* sse);
+#define aom_mse8x16 aom_mse8x16_neon
 
 unsigned int aom_mse8x8_c(const uint8_t* src_ptr,
                           int source_stride,
                           const uint8_t* ref_ptr,
                           int recon_stride,
                           unsigned int* sse);
-#define aom_mse8x8 aom_mse8x8_c
+unsigned int aom_mse8x8_neon(const uint8_t* src_ptr,
+                             int source_stride,
+                             const uint8_t* ref_ptr,
+                             int recon_stride,
+                             unsigned int* sse);
+#define aom_mse8x8 aom_mse8x8_neon
+
+uint64_t aom_mse_16xh_16bit_c(uint8_t* dst,
+                              int dstride,
+                              uint16_t* src,
+                              int w,
+                              int h);
+#define aom_mse_16xh_16bit aom_mse_16xh_16bit_c
 
 uint64_t aom_mse_wxh_16bit_c(uint8_t* dst,
                              int dstride,
@@ -2098,85 +2332,191 @@ void aom_paeth_predictor_16x16_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
                                  const uint8_t* left);
-#define aom_paeth_predictor_16x16 aom_paeth_predictor_16x16_c
+void aom_paeth_predictor_16x16_neon(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+#define aom_paeth_predictor_16x16 aom_paeth_predictor_16x16_neon
 
 void aom_paeth_predictor_16x32_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
                                  const uint8_t* left);
-#define aom_paeth_predictor_16x32 aom_paeth_predictor_16x32_c
+void aom_paeth_predictor_16x32_neon(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+#define aom_paeth_predictor_16x32 aom_paeth_predictor_16x32_neon
 
-void aom_paeth_predictor_16x8_c(uint8_t* dst,
+void aom_paeth_predictor_16x4_c(uint8_t* dst,
                                 ptrdiff_t y_stride,
                                 const uint8_t* above,
                                 const uint8_t* left);
-#define aom_paeth_predictor_16x8 aom_paeth_predictor_16x8_c
+void aom_paeth_predictor_16x4_neon(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+#define aom_paeth_predictor_16x4 aom_paeth_predictor_16x4_neon
 
-void aom_paeth_predictor_2x2_c(uint8_t* dst,
-                               ptrdiff_t y_stride,
-                               const uint8_t* above,
-                               const uint8_t* left);
-#define aom_paeth_predictor_2x2 aom_paeth_predictor_2x2_c
+void aom_paeth_predictor_16x64_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+void aom_paeth_predictor_16x64_neon(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+#define aom_paeth_predictor_16x64 aom_paeth_predictor_16x64_neon
+
+void aom_paeth_predictor_16x8_c(uint8_t* dst,
+                                ptrdiff_t y_stride,
+                                const uint8_t* above,
+                                const uint8_t* left);
+void aom_paeth_predictor_16x8_neon(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+#define aom_paeth_predictor_16x8 aom_paeth_predictor_16x8_neon
 
 void aom_paeth_predictor_32x16_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
                                  const uint8_t* left);
-#define aom_paeth_predictor_32x16 aom_paeth_predictor_32x16_c
+void aom_paeth_predictor_32x16_neon(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+#define aom_paeth_predictor_32x16 aom_paeth_predictor_32x16_neon
 
 void aom_paeth_predictor_32x32_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
                                  const uint8_t* left);
-#define aom_paeth_predictor_32x32 aom_paeth_predictor_32x32_c
+void aom_paeth_predictor_32x32_neon(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+#define aom_paeth_predictor_32x32 aom_paeth_predictor_32x32_neon
 
 void aom_paeth_predictor_32x64_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
                                  const uint8_t* left);
-#define aom_paeth_predictor_32x64 aom_paeth_predictor_32x64_c
+void aom_paeth_predictor_32x64_neon(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+#define aom_paeth_predictor_32x64 aom_paeth_predictor_32x64_neon
+
+void aom_paeth_predictor_32x8_c(uint8_t* dst,
+                                ptrdiff_t y_stride,
+                                const uint8_t* above,
+                                const uint8_t* left);
+void aom_paeth_predictor_32x8_neon(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+#define aom_paeth_predictor_32x8 aom_paeth_predictor_32x8_neon
+
+void aom_paeth_predictor_4x16_c(uint8_t* dst,
+                                ptrdiff_t y_stride,
+                                const uint8_t* above,
+                                const uint8_t* left);
+void aom_paeth_predictor_4x16_neon(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+#define aom_paeth_predictor_4x16 aom_paeth_predictor_4x16_neon
 
 void aom_paeth_predictor_4x4_c(uint8_t* dst,
                                ptrdiff_t y_stride,
                                const uint8_t* above,
                                const uint8_t* left);
-#define aom_paeth_predictor_4x4 aom_paeth_predictor_4x4_c
+void aom_paeth_predictor_4x4_neon(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+#define aom_paeth_predictor_4x4 aom_paeth_predictor_4x4_neon
 
 void aom_paeth_predictor_4x8_c(uint8_t* dst,
                                ptrdiff_t y_stride,
                                const uint8_t* above,
                                const uint8_t* left);
-#define aom_paeth_predictor_4x8 aom_paeth_predictor_4x8_c
+void aom_paeth_predictor_4x8_neon(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+#define aom_paeth_predictor_4x8 aom_paeth_predictor_4x8_neon
+
+void aom_paeth_predictor_64x16_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+void aom_paeth_predictor_64x16_neon(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+#define aom_paeth_predictor_64x16 aom_paeth_predictor_64x16_neon
 
 void aom_paeth_predictor_64x32_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
                                  const uint8_t* left);
-#define aom_paeth_predictor_64x32 aom_paeth_predictor_64x32_c
+void aom_paeth_predictor_64x32_neon(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+#define aom_paeth_predictor_64x32 aom_paeth_predictor_64x32_neon
 
 void aom_paeth_predictor_64x64_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
                                  const uint8_t* left);
-#define aom_paeth_predictor_64x64 aom_paeth_predictor_64x64_c
+void aom_paeth_predictor_64x64_neon(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+#define aom_paeth_predictor_64x64 aom_paeth_predictor_64x64_neon
 
 void aom_paeth_predictor_8x16_c(uint8_t* dst,
                                 ptrdiff_t y_stride,
                                 const uint8_t* above,
                                 const uint8_t* left);
-#define aom_paeth_predictor_8x16 aom_paeth_predictor_8x16_c
+void aom_paeth_predictor_8x16_neon(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+#define aom_paeth_predictor_8x16 aom_paeth_predictor_8x16_neon
+
+void aom_paeth_predictor_8x32_c(uint8_t* dst,
+                                ptrdiff_t y_stride,
+                                const uint8_t* above,
+                                const uint8_t* left);
+void aom_paeth_predictor_8x32_neon(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+#define aom_paeth_predictor_8x32 aom_paeth_predictor_8x32_neon
 
 void aom_paeth_predictor_8x4_c(uint8_t* dst,
                                ptrdiff_t y_stride,
                                const uint8_t* above,
                                const uint8_t* left);
-#define aom_paeth_predictor_8x4 aom_paeth_predictor_8x4_c
+void aom_paeth_predictor_8x4_neon(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+#define aom_paeth_predictor_8x4 aom_paeth_predictor_8x4_neon
 
 void aom_paeth_predictor_8x8_c(uint8_t* dst,
                                ptrdiff_t y_stride,
                                const uint8_t* above,
                                const uint8_t* left);
-#define aom_paeth_predictor_8x8 aom_paeth_predictor_8x8_c
+void aom_paeth_predictor_8x8_neon(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+#define aom_paeth_predictor_8x8 aom_paeth_predictor_8x8_neon
 
 void aom_pixel_scale_c(const int16_t* src_diff,
                        ptrdiff_t src_stride,
@@ -2238,20 +2578,6 @@ void aom_quantize_b_32x32_neon(const tran_low_t* coeff_ptr,
                                const int16_t* iscan);
 #define aom_quantize_b_32x32 aom_quantize_b_32x32_neon
 
-void aom_quantize_b_32x32_adaptive_c(const tran_low_t* coeff_ptr,
-                                     intptr_t n_coeffs,
-                                     const int16_t* zbin_ptr,
-                                     const int16_t* round_ptr,
-                                     const int16_t* quant_ptr,
-                                     const int16_t* quant_shift_ptr,
-                                     tran_low_t* qcoeff_ptr,
-                                     tran_low_t* dqcoeff_ptr,
-                                     const int16_t* dequant_ptr,
-                                     uint16_t* eob_ptr,
-                                     const int16_t* scan,
-                                     const int16_t* iscan);
-#define aom_quantize_b_32x32_adaptive aom_quantize_b_32x32_adaptive_c
-
 void aom_quantize_b_64x64_c(const tran_low_t* coeff_ptr,
                             intptr_t n_coeffs,
                             const int16_t* zbin_ptr,
@@ -2278,34 +2604,6 @@ void aom_quantize_b_64x64_neon(const tran_low_t* coeff_ptr,
                                const int16_t* iscan);
 #define aom_quantize_b_64x64 aom_quantize_b_64x64_neon
 
-void aom_quantize_b_64x64_adaptive_c(const tran_low_t* coeff_ptr,
-                                     intptr_t n_coeffs,
-                                     const int16_t* zbin_ptr,
-                                     const int16_t* round_ptr,
-                                     const int16_t* quant_ptr,
-                                     const int16_t* quant_shift_ptr,
-                                     tran_low_t* qcoeff_ptr,
-                                     tran_low_t* dqcoeff_ptr,
-                                     const int16_t* dequant_ptr,
-                                     uint16_t* eob_ptr,
-                                     const int16_t* scan,
-                                     const int16_t* iscan);
-#define aom_quantize_b_64x64_adaptive aom_quantize_b_64x64_adaptive_c
-
-void aom_quantize_b_adaptive_c(const tran_low_t* coeff_ptr,
-                               intptr_t n_coeffs,
-                               const int16_t* zbin_ptr,
-                               const int16_t* round_ptr,
-                               const int16_t* quant_ptr,
-                               const int16_t* quant_shift_ptr,
-                               tran_low_t* qcoeff_ptr,
-                               tran_low_t* dqcoeff_ptr,
-                               const int16_t* dequant_ptr,
-                               uint16_t* eob_ptr,
-                               const int16_t* scan,
-                               const int16_t* iscan);
-#define aom_quantize_b_adaptive aom_quantize_b_adaptive_c
-
 unsigned int aom_sad128x128_c(const uint8_t* src_ptr,
                               int src_stride,
                               const uint8_t* ref_ptr,
@@ -2328,7 +2626,12 @@ void aom_sad128x128x4d_c(const uint8_t* src_ptr,
                          const uint8_t* const ref_ptr[4],
                          int ref_stride,
                          uint32_t sad_array[4]);
-#define aom_sad128x128x4d aom_sad128x128x4d_c
+void aom_sad128x128x4d_neon(const uint8_t* src_ptr,
+                            int src_stride,
+                            const uint8_t* const ref_ptr[4],
+                            int ref_stride,
+                            uint32_t sad_array[4]);
+#define aom_sad128x128x4d aom_sad128x128x4d_neon
 
 void aom_sad128x128x4d_avg_c(const uint8_t* src_ptr,
                              int src_stride,
@@ -2342,7 +2645,11 @@ unsigned int aom_sad128x64_c(const uint8_t* src_ptr,
                              int src_stride,
                              const uint8_t* ref_ptr,
                              int ref_stride);
-#define aom_sad128x64 aom_sad128x64_c
+unsigned int aom_sad128x64_neon(const uint8_t* src_ptr,
+                                int src_stride,
+                                const uint8_t* ref_ptr,
+                                int ref_stride);
+#define aom_sad128x64 aom_sad128x64_neon
 
 unsigned int aom_sad128x64_avg_c(const uint8_t* src_ptr,
                                  int src_stride,
@@ -2356,7 +2663,12 @@ void aom_sad128x64x4d_c(const uint8_t* src_ptr,
                         const uint8_t* const ref_ptr[4],
                         int ref_stride,
                         uint32_t sad_array[4]);
-#define aom_sad128x64x4d aom_sad128x64x4d_c
+void aom_sad128x64x4d_neon(const uint8_t* src_ptr,
+                           int src_stride,
+                           const uint8_t* const ref_ptr[4],
+                           int ref_stride,
+                           uint32_t sad_array[4]);
+#define aom_sad128x64x4d aom_sad128x64x4d_neon
 
 void aom_sad128x64x4d_avg_c(const uint8_t* src_ptr,
                             int src_stride,
@@ -2415,7 +2727,11 @@ unsigned int aom_sad16x32_c(const uint8_t* src_ptr,
                             int src_stride,
                             const uint8_t* ref_ptr,
                             int ref_stride);
-#define aom_sad16x32 aom_sad16x32_c
+unsigned int aom_sad16x32_neon(const uint8_t* src_ptr,
+                               int src_stride,
+                               const uint8_t* ref_ptr,
+                               int ref_stride);
+#define aom_sad16x32 aom_sad16x32_neon
 
 unsigned int aom_sad16x32_avg_c(const uint8_t* src_ptr,
                                 int src_stride,
@@ -2429,7 +2745,12 @@ void aom_sad16x32x4d_c(const uint8_t* src_ptr,
                        const uint8_t* const ref_ptr[4],
                        int ref_stride,
                        uint32_t sad_array[4]);
-#define aom_sad16x32x4d aom_sad16x32x4d_c
+void aom_sad16x32x4d_neon(const uint8_t* src_ptr,
+                          int src_stride,
+                          const uint8_t* const ref_ptr[4],
+                          int ref_stride,
+                          uint32_t sad_array[4]);
+#define aom_sad16x32x4d aom_sad16x32x4d_neon
 
 void aom_sad16x32x4d_avg_c(const uint8_t* src_ptr,
                            int src_stride,
@@ -2461,7 +2782,12 @@ void aom_sad16x8x4d_c(const uint8_t* src_ptr,
                       const uint8_t* const ref_ptr[4],
                       int ref_stride,
                       uint32_t sad_array[4]);
-#define aom_sad16x8x4d aom_sad16x8x4d_c
+void aom_sad16x8x4d_neon(const uint8_t* src_ptr,
+                         int src_stride,
+                         const uint8_t* const ref_ptr[4],
+                         int ref_stride,
+                         uint32_t sad_array[4]);
+#define aom_sad16x8x4d aom_sad16x8x4d_neon
 
 void aom_sad16x8x4d_avg_c(const uint8_t* src_ptr,
                           int src_stride,
@@ -2483,7 +2809,11 @@ unsigned int aom_sad32x16_c(const uint8_t* src_ptr,
                             int src_stride,
                             const uint8_t* ref_ptr,
                             int ref_stride);
-#define aom_sad32x16 aom_sad32x16_c
+unsigned int aom_sad32x16_neon(const uint8_t* src_ptr,
+                               int src_stride,
+                               const uint8_t* ref_ptr,
+                               int ref_stride);
+#define aom_sad32x16 aom_sad32x16_neon
 
 unsigned int aom_sad32x16_avg_c(const uint8_t* src_ptr,
                                 int src_stride,
@@ -2497,7 +2827,12 @@ void aom_sad32x16x4d_c(const uint8_t* src_ptr,
                        const uint8_t* const ref_ptr[4],
                        int ref_stride,
                        uint32_t sad_array[4]);
-#define aom_sad32x16x4d aom_sad32x16x4d_c
+void aom_sad32x16x4d_neon(const uint8_t* src_ptr,
+                          int src_stride,
+                          const uint8_t* const ref_ptr[4],
+                          int ref_stride,
+                          uint32_t sad_array[4]);
+#define aom_sad32x16x4d aom_sad32x16x4d_neon
 
 void aom_sad32x16x4d_avg_c(const uint8_t* src_ptr,
                            int src_stride,
@@ -2548,7 +2883,11 @@ unsigned int aom_sad32x64_c(const uint8_t* src_ptr,
                             int src_stride,
                             const uint8_t* ref_ptr,
                             int ref_stride);
-#define aom_sad32x64 aom_sad32x64_c
+unsigned int aom_sad32x64_neon(const uint8_t* src_ptr,
+                               int src_stride,
+                               const uint8_t* ref_ptr,
+                               int ref_stride);
+#define aom_sad32x64 aom_sad32x64_neon
 
 unsigned int aom_sad32x64_avg_c(const uint8_t* src_ptr,
                                 int src_stride,
@@ -2562,7 +2901,12 @@ void aom_sad32x64x4d_c(const uint8_t* src_ptr,
                        const uint8_t* const ref_ptr[4],
                        int ref_stride,
                        uint32_t sad_array[4]);
-#define aom_sad32x64x4d aom_sad32x64x4d_c
+void aom_sad32x64x4d_neon(const uint8_t* src_ptr,
+                          int src_stride,
+                          const uint8_t* const ref_ptr[4],
+                          int ref_stride,
+                          uint32_t sad_array[4]);
+#define aom_sad32x64x4d aom_sad32x64x4d_neon
 
 void aom_sad32x64x4d_avg_c(const uint8_t* src_ptr,
                            int src_stride,
@@ -2602,7 +2946,12 @@ void aom_sad4x4x4d_c(const uint8_t* src_ptr,
                      const uint8_t* const ref_ptr[4],
                      int ref_stride,
                      uint32_t sad_array[4]);
-#define aom_sad4x4x4d aom_sad4x4x4d_c
+void aom_sad4x4x4d_neon(const uint8_t* src_ptr,
+                        int src_stride,
+                        const uint8_t* const ref_ptr[4],
+                        int ref_stride,
+                        uint32_t sad_array[4]);
+#define aom_sad4x4x4d aom_sad4x4x4d_neon
 
 void aom_sad4x4x4d_avg_c(const uint8_t* src_ptr,
                          int src_stride,
@@ -2616,7 +2965,11 @@ unsigned int aom_sad4x8_c(const uint8_t* src_ptr,
                           int src_stride,
                           const uint8_t* ref_ptr,
                           int ref_stride);
-#define aom_sad4x8 aom_sad4x8_c
+unsigned int aom_sad4x8_neon(const uint8_t* src_ptr,
+                             int src_stride,
+                             const uint8_t* ref_ptr,
+                             int ref_stride);
+#define aom_sad4x8 aom_sad4x8_neon
 
 unsigned int aom_sad4x8_avg_c(const uint8_t* src_ptr,
                               int src_stride,
@@ -2630,7 +2983,12 @@ void aom_sad4x8x4d_c(const uint8_t* src_ptr,
                      const uint8_t* const ref_ptr[4],
                      int ref_stride,
                      uint32_t sad_array[4]);
-#define aom_sad4x8x4d aom_sad4x8x4d_c
+void aom_sad4x8x4d_neon(const uint8_t* src_ptr,
+                        int src_stride,
+                        const uint8_t* const ref_ptr[4],
+                        int ref_stride,
+                        uint32_t sad_array[4]);
+#define aom_sad4x8x4d aom_sad4x8x4d_neon
 
 void aom_sad4x8x4d_avg_c(const uint8_t* src_ptr,
                          int src_stride,
@@ -2652,7 +3010,11 @@ unsigned int aom_sad64x128_c(const uint8_t* src_ptr,
                              int src_stride,
                              const uint8_t* ref_ptr,
                              int ref_stride);
-#define aom_sad64x128 aom_sad64x128_c
+unsigned int aom_sad64x128_neon(const uint8_t* src_ptr,
+                                int src_stride,
+                                const uint8_t* ref_ptr,
+                                int ref_stride);
+#define aom_sad64x128 aom_sad64x128_neon
 
 unsigned int aom_sad64x128_avg_c(const uint8_t* src_ptr,
                                  int src_stride,
@@ -2666,7 +3028,12 @@ void aom_sad64x128x4d_c(const uint8_t* src_ptr,
                         const uint8_t* const ref_ptr[4],
                         int ref_stride,
                         uint32_t sad_array[4]);
-#define aom_sad64x128x4d aom_sad64x128x4d_c
+void aom_sad64x128x4d_neon(const uint8_t* src_ptr,
+                           int src_stride,
+                           const uint8_t* const ref_ptr[4],
+                           int ref_stride,
+                           uint32_t sad_array[4]);
+#define aom_sad64x128x4d aom_sad64x128x4d_neon
 
 void aom_sad64x128x4d_avg_c(const uint8_t* src_ptr,
                             int src_stride,
@@ -2680,7 +3047,11 @@ unsigned int aom_sad64x32_c(const uint8_t* src_ptr,
                             int src_stride,
                             const uint8_t* ref_ptr,
                             int ref_stride);
-#define aom_sad64x32 aom_sad64x32_c
+unsigned int aom_sad64x32_neon(const uint8_t* src_ptr,
+                               int src_stride,
+                               const uint8_t* ref_ptr,
+                               int ref_stride);
+#define aom_sad64x32 aom_sad64x32_neon
 
 unsigned int aom_sad64x32_avg_c(const uint8_t* src_ptr,
                                 int src_stride,
@@ -2694,7 +3065,12 @@ void aom_sad64x32x4d_c(const uint8_t* src_ptr,
                        const uint8_t* const ref_ptr[4],
                        int ref_stride,
                        uint32_t sad_array[4]);
-#define aom_sad64x32x4d aom_sad64x32x4d_c
+void aom_sad64x32x4d_neon(const uint8_t* src_ptr,
+                          int src_stride,
+                          const uint8_t* const ref_ptr[4],
+                          int ref_stride,
+                          uint32_t sad_array[4]);
+#define aom_sad64x32x4d aom_sad64x32x4d_neon
 
 void aom_sad64x32x4d_avg_c(const uint8_t* src_ptr,
                            int src_stride,
@@ -2771,7 +3147,12 @@ void aom_sad8x16x4d_c(const uint8_t* src_ptr,
                       const uint8_t* const ref_ptr[4],
                       int ref_stride,
                       uint32_t sad_array[4]);
-#define aom_sad8x16x4d aom_sad8x16x4d_c
+void aom_sad8x16x4d_neon(const uint8_t* src_ptr,
+                         int src_stride,
+                         const uint8_t* const ref_ptr[4],
+                         int ref_stride,
+                         uint32_t sad_array[4]);
+#define aom_sad8x16x4d aom_sad8x16x4d_neon
 
 void aom_sad8x16x4d_avg_c(const uint8_t* src_ptr,
                           int src_stride,
@@ -2785,7 +3166,11 @@ unsigned int aom_sad8x4_c(const uint8_t* src_ptr,
                           int src_stride,
                           const uint8_t* ref_ptr,
                           int ref_stride);
-#define aom_sad8x4 aom_sad8x4_c
+unsigned int aom_sad8x4_neon(const uint8_t* src_ptr,
+                             int src_stride,
+                             const uint8_t* ref_ptr,
+                             int ref_stride);
+#define aom_sad8x4 aom_sad8x4_neon
 
 unsigned int aom_sad8x4_avg_c(const uint8_t* src_ptr,
                               int src_stride,
@@ -2799,7 +3184,12 @@ void aom_sad8x4x4d_c(const uint8_t* src_ptr,
                      const uint8_t* const ref_ptr[4],
                      int ref_stride,
                      uint32_t sad_array[4]);
-#define aom_sad8x4x4d aom_sad8x4x4d_c
+void aom_sad8x4x4d_neon(const uint8_t* src_ptr,
+                        int src_stride,
+                        const uint8_t* const ref_ptr[4],
+                        int ref_stride,
+                        uint32_t sad_array[4]);
+#define aom_sad8x4x4d aom_sad8x4x4d_neon
 
 void aom_sad8x4x4d_avg_c(const uint8_t* src_ptr,
                          int src_stride,
@@ -2831,7 +3221,12 @@ void aom_sad8x8x4d_c(const uint8_t* src_ptr,
                      const uint8_t* const ref_ptr[4],
                      int ref_stride,
                      uint32_t sad_array[4]);
-#define aom_sad8x8x4d aom_sad8x8x4d_c
+void aom_sad8x8x4d_neon(const uint8_t* src_ptr,
+                        int src_stride,
+                        const uint8_t* const ref_ptr[4],
+                        int ref_stride,
+                        uint32_t sad_array[4]);
+#define aom_sad8x8x4d aom_sad8x8x4d_neon
 
 void aom_sad8x8x4d_avg_c(const uint8_t* src_ptr,
                          int src_stride,
@@ -3219,85 +3614,191 @@ void aom_smooth_h_predictor_16x16_c(uint8_t* dst,
                                     ptrdiff_t y_stride,
                                     const uint8_t* above,
                                     const uint8_t* left);
-#define aom_smooth_h_predictor_16x16 aom_smooth_h_predictor_16x16_c
+void aom_smooth_h_predictor_16x16_neon(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+#define aom_smooth_h_predictor_16x16 aom_smooth_h_predictor_16x16_neon
 
 void aom_smooth_h_predictor_16x32_c(uint8_t* dst,
                                     ptrdiff_t y_stride,
                                     const uint8_t* above,
                                     const uint8_t* left);
-#define aom_smooth_h_predictor_16x32 aom_smooth_h_predictor_16x32_c
+void aom_smooth_h_predictor_16x32_neon(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+#define aom_smooth_h_predictor_16x32 aom_smooth_h_predictor_16x32_neon
 
-void aom_smooth_h_predictor_16x8_c(uint8_t* dst,
+void aom_smooth_h_predictor_16x4_c(uint8_t* dst,
                                    ptrdiff_t y_stride,
                                    const uint8_t* above,
                                    const uint8_t* left);
-#define aom_smooth_h_predictor_16x8 aom_smooth_h_predictor_16x8_c
+void aom_smooth_h_predictor_16x4_neon(uint8_t* dst,
+                                      ptrdiff_t y_stride,
+                                      const uint8_t* above,
+                                      const uint8_t* left);
+#define aom_smooth_h_predictor_16x4 aom_smooth_h_predictor_16x4_neon
 
-void aom_smooth_h_predictor_2x2_c(uint8_t* dst,
-                                  ptrdiff_t y_stride,
-                                  const uint8_t* above,
-                                  const uint8_t* left);
-#define aom_smooth_h_predictor_2x2 aom_smooth_h_predictor_2x2_c
+void aom_smooth_h_predictor_16x64_c(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+void aom_smooth_h_predictor_16x64_neon(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+#define aom_smooth_h_predictor_16x64 aom_smooth_h_predictor_16x64_neon
+
+void aom_smooth_h_predictor_16x8_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+void aom_smooth_h_predictor_16x8_neon(uint8_t* dst,
+                                      ptrdiff_t y_stride,
+                                      const uint8_t* above,
+                                      const uint8_t* left);
+#define aom_smooth_h_predictor_16x8 aom_smooth_h_predictor_16x8_neon
 
 void aom_smooth_h_predictor_32x16_c(uint8_t* dst,
                                     ptrdiff_t y_stride,
                                     const uint8_t* above,
                                     const uint8_t* left);
-#define aom_smooth_h_predictor_32x16 aom_smooth_h_predictor_32x16_c
+void aom_smooth_h_predictor_32x16_neon(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+#define aom_smooth_h_predictor_32x16 aom_smooth_h_predictor_32x16_neon
 
 void aom_smooth_h_predictor_32x32_c(uint8_t* dst,
                                     ptrdiff_t y_stride,
                                     const uint8_t* above,
                                     const uint8_t* left);
-#define aom_smooth_h_predictor_32x32 aom_smooth_h_predictor_32x32_c
+void aom_smooth_h_predictor_32x32_neon(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+#define aom_smooth_h_predictor_32x32 aom_smooth_h_predictor_32x32_neon
 
 void aom_smooth_h_predictor_32x64_c(uint8_t* dst,
                                     ptrdiff_t y_stride,
                                     const uint8_t* above,
                                     const uint8_t* left);
-#define aom_smooth_h_predictor_32x64 aom_smooth_h_predictor_32x64_c
+void aom_smooth_h_predictor_32x64_neon(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+#define aom_smooth_h_predictor_32x64 aom_smooth_h_predictor_32x64_neon
+
+void aom_smooth_h_predictor_32x8_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+void aom_smooth_h_predictor_32x8_neon(uint8_t* dst,
+                                      ptrdiff_t y_stride,
+                                      const uint8_t* above,
+                                      const uint8_t* left);
+#define aom_smooth_h_predictor_32x8 aom_smooth_h_predictor_32x8_neon
+
+void aom_smooth_h_predictor_4x16_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+void aom_smooth_h_predictor_4x16_neon(uint8_t* dst,
+                                      ptrdiff_t y_stride,
+                                      const uint8_t* above,
+                                      const uint8_t* left);
+#define aom_smooth_h_predictor_4x16 aom_smooth_h_predictor_4x16_neon
 
 void aom_smooth_h_predictor_4x4_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
                                   const uint8_t* left);
-#define aom_smooth_h_predictor_4x4 aom_smooth_h_predictor_4x4_c
+void aom_smooth_h_predictor_4x4_neon(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+#define aom_smooth_h_predictor_4x4 aom_smooth_h_predictor_4x4_neon
 
 void aom_smooth_h_predictor_4x8_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
                                   const uint8_t* left);
-#define aom_smooth_h_predictor_4x8 aom_smooth_h_predictor_4x8_c
+void aom_smooth_h_predictor_4x8_neon(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+#define aom_smooth_h_predictor_4x8 aom_smooth_h_predictor_4x8_neon
+
+void aom_smooth_h_predictor_64x16_c(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+void aom_smooth_h_predictor_64x16_neon(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+#define aom_smooth_h_predictor_64x16 aom_smooth_h_predictor_64x16_neon
 
 void aom_smooth_h_predictor_64x32_c(uint8_t* dst,
                                     ptrdiff_t y_stride,
                                     const uint8_t* above,
                                     const uint8_t* left);
-#define aom_smooth_h_predictor_64x32 aom_smooth_h_predictor_64x32_c
+void aom_smooth_h_predictor_64x32_neon(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+#define aom_smooth_h_predictor_64x32 aom_smooth_h_predictor_64x32_neon
 
 void aom_smooth_h_predictor_64x64_c(uint8_t* dst,
                                     ptrdiff_t y_stride,
                                     const uint8_t* above,
                                     const uint8_t* left);
-#define aom_smooth_h_predictor_64x64 aom_smooth_h_predictor_64x64_c
+void aom_smooth_h_predictor_64x64_neon(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+#define aom_smooth_h_predictor_64x64 aom_smooth_h_predictor_64x64_neon
 
 void aom_smooth_h_predictor_8x16_c(uint8_t* dst,
                                    ptrdiff_t y_stride,
                                    const uint8_t* above,
                                    const uint8_t* left);
-#define aom_smooth_h_predictor_8x16 aom_smooth_h_predictor_8x16_c
+void aom_smooth_h_predictor_8x16_neon(uint8_t* dst,
+                                      ptrdiff_t y_stride,
+                                      const uint8_t* above,
+                                      const uint8_t* left);
+#define aom_smooth_h_predictor_8x16 aom_smooth_h_predictor_8x16_neon
+
+void aom_smooth_h_predictor_8x32_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+void aom_smooth_h_predictor_8x32_neon(uint8_t* dst,
+                                      ptrdiff_t y_stride,
+                                      const uint8_t* above,
+                                      const uint8_t* left);
+#define aom_smooth_h_predictor_8x32 aom_smooth_h_predictor_8x32_neon
 
 void aom_smooth_h_predictor_8x4_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
                                   const uint8_t* left);
-#define aom_smooth_h_predictor_8x4 aom_smooth_h_predictor_8x4_c
+void aom_smooth_h_predictor_8x4_neon(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+#define aom_smooth_h_predictor_8x4 aom_smooth_h_predictor_8x4_neon
 
 void aom_smooth_h_predictor_8x8_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
                                   const uint8_t* left);
-#define aom_smooth_h_predictor_8x8 aom_smooth_h_predictor_8x8_c
+void aom_smooth_h_predictor_8x8_neon(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+#define aom_smooth_h_predictor_8x8 aom_smooth_h_predictor_8x8_neon
 
 void aom_smooth_predictor_16x16_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
@@ -3319,6 +3820,26 @@ void aom_smooth_predictor_16x32_neon(uint8_t* dst,
                                      const uint8_t* left);
 #define aom_smooth_predictor_16x32 aom_smooth_predictor_16x32_neon
 
+void aom_smooth_predictor_16x4_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+void aom_smooth_predictor_16x4_neon(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+#define aom_smooth_predictor_16x4 aom_smooth_predictor_16x4_neon
+
+void aom_smooth_predictor_16x64_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+void aom_smooth_predictor_16x64_neon(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+#define aom_smooth_predictor_16x64 aom_smooth_predictor_16x64_neon
+
 void aom_smooth_predictor_16x8_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
@@ -3329,12 +3850,6 @@ void aom_smooth_predictor_16x8_neon(uint8_t* dst,
                                     const uint8_t* left);
 #define aom_smooth_predictor_16x8 aom_smooth_predictor_16x8_neon
 
-void aom_smooth_predictor_2x2_c(uint8_t* dst,
-                                ptrdiff_t y_stride,
-                                const uint8_t* above,
-                                const uint8_t* left);
-#define aom_smooth_predictor_2x2 aom_smooth_predictor_2x2_c
-
 void aom_smooth_predictor_32x16_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
@@ -3365,6 +3880,26 @@ void aom_smooth_predictor_32x64_neon(uint8_t* dst,
                                      const uint8_t* left);
 #define aom_smooth_predictor_32x64 aom_smooth_predictor_32x64_neon
 
+void aom_smooth_predictor_32x8_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+void aom_smooth_predictor_32x8_neon(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+#define aom_smooth_predictor_32x8 aom_smooth_predictor_32x8_neon
+
+void aom_smooth_predictor_4x16_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+void aom_smooth_predictor_4x16_neon(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+#define aom_smooth_predictor_4x16 aom_smooth_predictor_4x16_neon
+
 void aom_smooth_predictor_4x4_c(uint8_t* dst,
                                 ptrdiff_t y_stride,
                                 const uint8_t* above,
@@ -3385,6 +3920,16 @@ void aom_smooth_predictor_4x8_neon(uint8_t* dst,
                                    const uint8_t* left);
 #define aom_smooth_predictor_4x8 aom_smooth_predictor_4x8_neon
 
+void aom_smooth_predictor_64x16_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+void aom_smooth_predictor_64x16_neon(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+#define aom_smooth_predictor_64x16 aom_smooth_predictor_64x16_neon
+
 void aom_smooth_predictor_64x32_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
@@ -3415,6 +3960,16 @@ void aom_smooth_predictor_8x16_neon(uint8_t* dst,
                                     const uint8_t* left);
 #define aom_smooth_predictor_8x16 aom_smooth_predictor_8x16_neon
 
+void aom_smooth_predictor_8x32_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+void aom_smooth_predictor_8x32_neon(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+#define aom_smooth_predictor_8x32 aom_smooth_predictor_8x32_neon
+
 void aom_smooth_predictor_8x4_c(uint8_t* dst,
                                 ptrdiff_t y_stride,
                                 const uint8_t* above,
@@ -3439,85 +3994,191 @@ void aom_smooth_v_predictor_16x16_c(uint8_t* dst,
                                     ptrdiff_t y_stride,
                                     const uint8_t* above,
                                     const uint8_t* left);
-#define aom_smooth_v_predictor_16x16 aom_smooth_v_predictor_16x16_c
+void aom_smooth_v_predictor_16x16_neon(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+#define aom_smooth_v_predictor_16x16 aom_smooth_v_predictor_16x16_neon
 
 void aom_smooth_v_predictor_16x32_c(uint8_t* dst,
                                     ptrdiff_t y_stride,
                                     const uint8_t* above,
                                     const uint8_t* left);
-#define aom_smooth_v_predictor_16x32 aom_smooth_v_predictor_16x32_c
+void aom_smooth_v_predictor_16x32_neon(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+#define aom_smooth_v_predictor_16x32 aom_smooth_v_predictor_16x32_neon
 
-void aom_smooth_v_predictor_16x8_c(uint8_t* dst,
+void aom_smooth_v_predictor_16x4_c(uint8_t* dst,
                                    ptrdiff_t y_stride,
                                    const uint8_t* above,
                                    const uint8_t* left);
-#define aom_smooth_v_predictor_16x8 aom_smooth_v_predictor_16x8_c
+void aom_smooth_v_predictor_16x4_neon(uint8_t* dst,
+                                      ptrdiff_t y_stride,
+                                      const uint8_t* above,
+                                      const uint8_t* left);
+#define aom_smooth_v_predictor_16x4 aom_smooth_v_predictor_16x4_neon
 
-void aom_smooth_v_predictor_2x2_c(uint8_t* dst,
-                                  ptrdiff_t y_stride,
-                                  const uint8_t* above,
-                                  const uint8_t* left);
-#define aom_smooth_v_predictor_2x2 aom_smooth_v_predictor_2x2_c
+void aom_smooth_v_predictor_16x64_c(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+void aom_smooth_v_predictor_16x64_neon(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+#define aom_smooth_v_predictor_16x64 aom_smooth_v_predictor_16x64_neon
+
+void aom_smooth_v_predictor_16x8_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+void aom_smooth_v_predictor_16x8_neon(uint8_t* dst,
+                                      ptrdiff_t y_stride,
+                                      const uint8_t* above,
+                                      const uint8_t* left);
+#define aom_smooth_v_predictor_16x8 aom_smooth_v_predictor_16x8_neon
 
 void aom_smooth_v_predictor_32x16_c(uint8_t* dst,
                                     ptrdiff_t y_stride,
                                     const uint8_t* above,
                                     const uint8_t* left);
-#define aom_smooth_v_predictor_32x16 aom_smooth_v_predictor_32x16_c
+void aom_smooth_v_predictor_32x16_neon(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+#define aom_smooth_v_predictor_32x16 aom_smooth_v_predictor_32x16_neon
 
 void aom_smooth_v_predictor_32x32_c(uint8_t* dst,
                                     ptrdiff_t y_stride,
                                     const uint8_t* above,
                                     const uint8_t* left);
-#define aom_smooth_v_predictor_32x32 aom_smooth_v_predictor_32x32_c
+void aom_smooth_v_predictor_32x32_neon(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+#define aom_smooth_v_predictor_32x32 aom_smooth_v_predictor_32x32_neon
 
 void aom_smooth_v_predictor_32x64_c(uint8_t* dst,
                                     ptrdiff_t y_stride,
                                     const uint8_t* above,
                                     const uint8_t* left);
-#define aom_smooth_v_predictor_32x64 aom_smooth_v_predictor_32x64_c
+void aom_smooth_v_predictor_32x64_neon(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+#define aom_smooth_v_predictor_32x64 aom_smooth_v_predictor_32x64_neon
+
+void aom_smooth_v_predictor_32x8_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+void aom_smooth_v_predictor_32x8_neon(uint8_t* dst,
+                                      ptrdiff_t y_stride,
+                                      const uint8_t* above,
+                                      const uint8_t* left);
+#define aom_smooth_v_predictor_32x8 aom_smooth_v_predictor_32x8_neon
+
+void aom_smooth_v_predictor_4x16_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+void aom_smooth_v_predictor_4x16_neon(uint8_t* dst,
+                                      ptrdiff_t y_stride,
+                                      const uint8_t* above,
+                                      const uint8_t* left);
+#define aom_smooth_v_predictor_4x16 aom_smooth_v_predictor_4x16_neon
 
 void aom_smooth_v_predictor_4x4_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
                                   const uint8_t* left);
-#define aom_smooth_v_predictor_4x4 aom_smooth_v_predictor_4x4_c
+void aom_smooth_v_predictor_4x4_neon(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+#define aom_smooth_v_predictor_4x4 aom_smooth_v_predictor_4x4_neon
 
 void aom_smooth_v_predictor_4x8_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
                                   const uint8_t* left);
-#define aom_smooth_v_predictor_4x8 aom_smooth_v_predictor_4x8_c
+void aom_smooth_v_predictor_4x8_neon(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+#define aom_smooth_v_predictor_4x8 aom_smooth_v_predictor_4x8_neon
+
+void aom_smooth_v_predictor_64x16_c(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+void aom_smooth_v_predictor_64x16_neon(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+#define aom_smooth_v_predictor_64x16 aom_smooth_v_predictor_64x16_neon
 
 void aom_smooth_v_predictor_64x32_c(uint8_t* dst,
                                     ptrdiff_t y_stride,
                                     const uint8_t* above,
                                     const uint8_t* left);
-#define aom_smooth_v_predictor_64x32 aom_smooth_v_predictor_64x32_c
+void aom_smooth_v_predictor_64x32_neon(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+#define aom_smooth_v_predictor_64x32 aom_smooth_v_predictor_64x32_neon
 
 void aom_smooth_v_predictor_64x64_c(uint8_t* dst,
                                     ptrdiff_t y_stride,
                                     const uint8_t* above,
                                     const uint8_t* left);
-#define aom_smooth_v_predictor_64x64 aom_smooth_v_predictor_64x64_c
+void aom_smooth_v_predictor_64x64_neon(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+#define aom_smooth_v_predictor_64x64 aom_smooth_v_predictor_64x64_neon
 
 void aom_smooth_v_predictor_8x16_c(uint8_t* dst,
                                    ptrdiff_t y_stride,
                                    const uint8_t* above,
                                    const uint8_t* left);
-#define aom_smooth_v_predictor_8x16 aom_smooth_v_predictor_8x16_c
+void aom_smooth_v_predictor_8x16_neon(uint8_t* dst,
+                                      ptrdiff_t y_stride,
+                                      const uint8_t* above,
+                                      const uint8_t* left);
+#define aom_smooth_v_predictor_8x16 aom_smooth_v_predictor_8x16_neon
+
+void aom_smooth_v_predictor_8x32_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+void aom_smooth_v_predictor_8x32_neon(uint8_t* dst,
+                                      ptrdiff_t y_stride,
+                                      const uint8_t* above,
+                                      const uint8_t* left);
+#define aom_smooth_v_predictor_8x32 aom_smooth_v_predictor_8x32_neon
 
 void aom_smooth_v_predictor_8x4_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
                                   const uint8_t* left);
-#define aom_smooth_v_predictor_8x4 aom_smooth_v_predictor_8x4_c
+void aom_smooth_v_predictor_8x4_neon(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+#define aom_smooth_v_predictor_8x4 aom_smooth_v_predictor_8x4_neon
 
 void aom_smooth_v_predictor_8x8_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
                                   const uint8_t* left);
-#define aom_smooth_v_predictor_8x8 aom_smooth_v_predictor_8x8_c
+void aom_smooth_v_predictor_8x8_neon(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+#define aom_smooth_v_predictor_8x8 aom_smooth_v_predictor_8x8_neon
 
 int64_t aom_sse_c(const uint8_t* a,
                   int a_stride,
@@ -3996,7 +4657,12 @@ uint64_t aom_sum_sse_2d_i16_c(const int16_t* src,
                               int width,
                               int height,
                               int* sum);
-#define aom_sum_sse_2d_i16 aom_sum_sse_2d_i16_c
+uint64_t aom_sum_sse_2d_i16_neon(const int16_t* src,
+                                 int src_stride,
+                                 int width,
+                                 int height,
+                                 int* sum);
+#define aom_sum_sse_2d_i16 aom_sum_sse_2d_i16_neon
 
 void aom_v_predictor_16x16_c(uint8_t* dst,
                              ptrdiff_t y_stride,
@@ -4014,18 +4680,24 @@ void aom_v_predictor_16x32_c(uint8_t* dst,
                              const uint8_t* left);
 #define aom_v_predictor_16x32 aom_v_predictor_16x32_c
 
+void aom_v_predictor_16x4_c(uint8_t* dst,
+                            ptrdiff_t y_stride,
+                            const uint8_t* above,
+                            const uint8_t* left);
+#define aom_v_predictor_16x4 aom_v_predictor_16x4_c
+
+void aom_v_predictor_16x64_c(uint8_t* dst,
+                             ptrdiff_t y_stride,
+                             const uint8_t* above,
+                             const uint8_t* left);
+#define aom_v_predictor_16x64 aom_v_predictor_16x64_c
+
 void aom_v_predictor_16x8_c(uint8_t* dst,
                             ptrdiff_t y_stride,
                             const uint8_t* above,
                             const uint8_t* left);
 #define aom_v_predictor_16x8 aom_v_predictor_16x8_c
 
-void aom_v_predictor_2x2_c(uint8_t* dst,
-                           ptrdiff_t y_stride,
-                           const uint8_t* above,
-                           const uint8_t* left);
-#define aom_v_predictor_2x2 aom_v_predictor_2x2_c
-
 void aom_v_predictor_32x16_c(uint8_t* dst,
                              ptrdiff_t y_stride,
                              const uint8_t* above,
@@ -4048,6 +4720,18 @@ void aom_v_predictor_32x64_c(uint8_t* dst,
                              const uint8_t* left);
 #define aom_v_predictor_32x64 aom_v_predictor_32x64_c
 
+void aom_v_predictor_32x8_c(uint8_t* dst,
+                            ptrdiff_t y_stride,
+                            const uint8_t* above,
+                            const uint8_t* left);
+#define aom_v_predictor_32x8 aom_v_predictor_32x8_c
+
+void aom_v_predictor_4x16_c(uint8_t* dst,
+                            ptrdiff_t y_stride,
+                            const uint8_t* above,
+                            const uint8_t* left);
+#define aom_v_predictor_4x16 aom_v_predictor_4x16_c
+
 void aom_v_predictor_4x4_c(uint8_t* dst,
                            ptrdiff_t y_stride,
                            const uint8_t* above,
@@ -4064,6 +4748,12 @@ void aom_v_predictor_4x8_c(uint8_t* dst,
                            const uint8_t* left);
 #define aom_v_predictor_4x8 aom_v_predictor_4x8_c
 
+void aom_v_predictor_64x16_c(uint8_t* dst,
+                             ptrdiff_t y_stride,
+                             const uint8_t* above,
+                             const uint8_t* left);
+#define aom_v_predictor_64x16 aom_v_predictor_64x16_c
+
 void aom_v_predictor_64x32_c(uint8_t* dst,
                              ptrdiff_t y_stride,
                              const uint8_t* above,
@@ -4082,6 +4772,12 @@ void aom_v_predictor_8x16_c(uint8_t* dst,
                             const uint8_t* left);
 #define aom_v_predictor_8x16 aom_v_predictor_8x16_c
 
+void aom_v_predictor_8x32_c(uint8_t* dst,
+                            ptrdiff_t y_stride,
+                            const uint8_t* above,
+                            const uint8_t* left);
+#define aom_v_predictor_8x32 aom_v_predictor_8x32_c
+
 void aom_v_predictor_8x4_c(uint8_t* dst,
                            ptrdiff_t y_stride,
                            const uint8_t* above,
@@ -4317,8 +5013,8 @@ unsigned int aom_variance8x8_neon(const uint8_t* src_ptr,
                                   unsigned int* sse);
 #define aom_variance8x8 aom_variance8x8_neon
 
-int aom_vector_var_c(const int16_t* ref, const int16_t* src, const int bwl);
-int aom_vector_var_neon(const int16_t* ref, const int16_t* src, const int bwl);
+int aom_vector_var_c(const int16_t* ref, const int16_t* src, int bwl);
+int aom_vector_var_neon(const int16_t* ref, const int16_t* src, int bwl);
 #define aom_vector_var aom_vector_var_neon
 
 void aom_dsp_rtcd(void);
diff --git a/chromium/third_party/libaom/source/config/win/arm64/config/aom_scale_rtcd.h b/chromium/third_party/libaom/source/config/win/arm64/config/aom_scale_rtcd.h
index e27b558dd38..7db7b9d0f84 100644
--- a/chromium/third_party/libaom/source/config/win/arm64/config/aom_scale_rtcd.h
+++ b/chromium/third_party/libaom/source/config/win/arm64/config/aom_scale_rtcd.h
@@ -18,6 +18,12 @@ void aom_extend_frame_borders_c(struct yv12_buffer_config* ybf,
                                 const int num_planes);
 #define aom_extend_frame_borders aom_extend_frame_borders_c
 
+void aom_extend_frame_borders_plane_row_c(const struct yv12_buffer_config* ybf,
+                                          int plane,
+                                          int v_start,
+                                          int v_end);
+#define aom_extend_frame_borders_plane_row aom_extend_frame_borders_plane_row_c
+
 void aom_extend_frame_borders_y_c(struct yv12_buffer_config* ybf);
 #define aom_extend_frame_borders_y aom_extend_frame_borders_y_c
 
diff --git a/chromium/third_party/libaom/source/config/win/arm64/config/av1_rtcd.h b/chromium/third_party/libaom/source/config/win/arm64/config/av1_rtcd.h
index 23e831cc591..fd9ac835ad7 100644
--- a/chromium/third_party/libaom/source/config/win/arm64/config/av1_rtcd.h
+++ b/chromium/third_party/libaom/source/config/win/arm64/config/av1_rtcd.h
@@ -183,6 +183,30 @@ void aom_upsampled_pred_c(MACROBLOCKD* xd,
                           int subpel_search);
 #define aom_upsampled_pred aom_upsampled_pred_c
 
+void av1_apply_selfguided_restoration_c(const uint8_t* dat,
+                                        int width,
+                                        int height,
+                                        int stride,
+                                        int eps,
+                                        const int* xqd,
+                                        uint8_t* dst,
+                                        int dst_stride,
+                                        int32_t* tmpbuf,
+                                        int bit_depth,
+                                        int highbd);
+void av1_apply_selfguided_restoration_neon(const uint8_t* dat,
+                                           int width,
+                                           int height,
+                                           int stride,
+                                           int eps,
+                                           const int* xqd,
+                                           uint8_t* dst,
+                                           int dst_stride,
+                                           int32_t* tmpbuf,
+                                           int bit_depth,
+                                           int highbd);
+#define av1_apply_selfguided_restoration av1_apply_selfguided_restoration_neon
+
 int64_t av1_block_error_c(const tran_low_t* coeff,
                           const tran_low_t* dqcoeff,
                           intptr_t block_size,
@@ -234,6 +258,14 @@ void av1_build_compound_diffwtd_mask_d16_neon(uint8_t* mask,
 #define av1_build_compound_diffwtd_mask_d16 \
   av1_build_compound_diffwtd_mask_d16_neon
 
+int64_t av1_calc_frame_error_c(const uint8_t* const ref,
+                               int stride,
+                               const uint8_t* const dst,
+                               int p_width,
+                               int p_height,
+                               int p_stride);
+#define av1_calc_frame_error av1_calc_frame_error_c
+
 void av1_calc_indices_dim1_c(const int* data,
                              const int* centroids,
                              uint8_t* indices,
@@ -1314,6 +1346,28 @@ void av1_round_shift_array_c(int32_t* arr, int size, int bit);
 void av1_round_shift_array_neon(int32_t* arr, int size, int bit);
 #define av1_round_shift_array av1_round_shift_array_neon
 
+int av1_selfguided_restoration_c(const uint8_t* dgd8,
+                                 int width,
+                                 int height,
+                                 int dgd_stride,
+                                 int32_t* flt0,
+                                 int32_t* flt1,
+                                 int flt_stride,
+                                 int sgr_params_idx,
+                                 int bit_depth,
+                                 int highbd);
+int av1_selfguided_restoration_neon(const uint8_t* dgd8,
+                                    int width,
+                                    int height,
+                                    int dgd_stride,
+                                    int32_t* flt0,
+                                    int32_t* flt1,
+                                    int flt_stride,
+                                    int sgr_params_idx,
+                                    int bit_depth,
+                                    int highbd);
+#define av1_selfguided_restoration av1_selfguided_restoration_neon
+
 void av1_txb_init_levels_c(const tran_low_t* const coeff,
                            const int width,
                            const int height,
@@ -1330,6 +1384,44 @@ void av1_upsample_intra_edge_c(uint8_t* p, int sz);
 void av1_upsample_intra_edge_high_c(uint16_t* p, int sz, int bd);
 #define av1_upsample_intra_edge_high av1_upsample_intra_edge_high_c
 
+void av1_warp_affine_c(const int32_t* mat,
+                       const uint8_t* ref,
+                       int width,
+                       int height,
+                       int stride,
+                       uint8_t* pred,
+                       int p_col,
+                       int p_row,
+                       int p_width,
+                       int p_height,
+                       int p_stride,
+                       int subsampling_x,
+                       int subsampling_y,
+                       ConvolveParams* conv_params,
+                       int16_t alpha,
+                       int16_t beta,
+                       int16_t gamma,
+                       int16_t delta);
+void av1_warp_affine_neon(const int32_t* mat,
+                          const uint8_t* ref,
+                          int width,
+                          int height,
+                          int stride,
+                          uint8_t* pred,
+                          int p_col,
+                          int p_row,
+                          int p_width,
+                          int p_height,
+                          int p_stride,
+                          int subsampling_x,
+                          int subsampling_y,
+                          ConvolveParams* conv_params,
+                          int16_t alpha,
+                          int16_t beta,
+                          int16_t gamma,
+                          int16_t delta);
+#define av1_warp_affine av1_warp_affine_neon
+
 void av1_wedge_compute_delta_squares_c(int16_t* d,
                                        const int16_t* a,
                                        const int16_t* b,
@@ -1346,7 +1438,11 @@ uint64_t av1_wedge_sse_from_residuals_c(const int16_t* r1,
                                         const int16_t* d,
                                         const uint8_t* m,
                                         int N);
-#define av1_wedge_sse_from_residuals av1_wedge_sse_from_residuals_c
+uint64_t av1_wedge_sse_from_residuals_neon(const int16_t* r1,
+                                           const int16_t* d,
+                                           const uint8_t* m,
+                                           int N);
+#define av1_wedge_sse_from_residuals av1_wedge_sse_from_residuals_neon
 
 void av1_wiener_convolve_add_src_c(const uint8_t* src,
                                    ptrdiff_t src_stride,
@@ -1610,7 +1706,15 @@ void cdef_find_dir_dual_c(const uint16_t* img1,
                           int coeff_shift,
                           int* out1,
                           int* out2);
-#define cdef_find_dir_dual cdef_find_dir_dual_c
+void cdef_find_dir_dual_neon(const uint16_t* img1,
+                             const uint16_t* img2,
+                             int stride,
+                             int32_t* var1,
+                             int32_t* var2,
+                             int coeff_shift,
+                             int* out1,
+                             int* out2);
+#define cdef_find_dir_dual cdef_find_dir_dual_neon
 
 cfl_subsample_lbd_fn cfl_get_luma_subsampling_420_lbd_c(TX_SIZE tx_size);
 cfl_subsample_lbd_fn cfl_get_luma_subsampling_420_lbd_neon(TX_SIZE tx_size);
diff --git a/chromium/third_party/libaom/source/config/win/ia32/config/aom_config.asm b/chromium/third_party/libaom/source/config/win/ia32/config/aom_config.asm
index 80045fdd494..565e75f203f 100644
--- a/chromium/third_party/libaom/source/config/win/ia32/config/aom_config.asm
+++ b/chromium/third_party/libaom/source/config/win/ia32/config/aom_config.asm
@@ -1,5 +1,4 @@
 %define ARCH_ARM 0
-%define ARCH_MIPS 0
 %define ARCH_PPC 0
 %define ARCH_X86 1
 %define ARCH_X86_64 0
@@ -23,8 +22,6 @@
 %define CONFIG_ENTROPY_STATS 0
 %define CONFIG_EXCLUDE_SIMD_MISMATCH 0
 %define CONFIG_FPMT_TEST 0
-%define CONFIG_FRAME_PARALLEL_ENCODE 0
-%define CONFIG_FRAME_PARALLEL_ENCODE_2 0
 %define CONFIG_GCC 0
 %define CONFIG_GCOV 0
 %define CONFIG_GPROF 0
@@ -59,14 +56,11 @@
 %define DECODE_HEIGHT_LIMIT 16384
 %define DECODE_WIDTH_LIMIT 16384
 %define FORCE_HIGHBITDEPTH_DECODING 0
+%define HAVE_ARM_CRC32 0
 %define HAVE_AVX 1
 %define HAVE_AVX2 1
-%define HAVE_DSPR2 0
 %define HAVE_FEXCEPT 1
-%define HAVE_MIPS32 0
-%define HAVE_MIPS64 0
 %define HAVE_MMX 1
-%define HAVE_MSA 0
 %define HAVE_NEON 0
 %define HAVE_PTHREAD_H 0
 %define HAVE_SSE 1
diff --git a/chromium/third_party/libaom/source/config/win/ia32/config/aom_config.c b/chromium/third_party/libaom/source/config/win/ia32/config/aom_config.c
index aaa8142b8b8..dadb41a0d60 100644
--- a/chromium/third_party/libaom/source/config/win/ia32/config/aom_config.c
+++ b/chromium/third_party/libaom/source/config/win/ia32/config/aom_config.c
@@ -9,5 +9,5 @@
  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
  */
 #include "aom/aom_codec.h"
-static const char* const cfg = "cmake ../source/libaom -G \"Unix Makefiles\" -DCMAKE_TOOLCHAIN_FILE=\"../source/libaom/build/cmake/toolchains/x86-linux.cmake\" -DAOM_RTCD_FLAGS=--require-mmx;--require-sse;--require-sse2 -DCONFIG_AV1_ENCODER=1 -DCONFIG_LIBYUV=0 -DCONFIG_PIC=1 -DCONFIG_REALTIME_ONLY=1 -DCONFIG_AV1_HIGHBITDEPTH=0 -DCONFIG_AV1_TEMPORAL_DENOISING=1 -DCONFIG_MAX_DECODE_PROFILE=0 -DCONFIG_NORMAL_TILE_MODE=1 -DCONFIG_SIZE_LIMIT=1 -DDECODE_HEIGHT_LIMIT=16384 -DDECODE_WIDTH_LIMIT=16384";
+static const char* const cfg = "cmake ../source/libaom -G \"Unix Makefiles\" -DCMAKE_TOOLCHAIN_FILE=\"../source/libaom/build/cmake/toolchains/x86-linux.cmake\" -DAOM_RTCD_FLAGS=--require-mmx;--require-sse;--require-sse2 -DCONFIG_AV1_ENCODER=1 -DCONFIG_LIBYUV=0 -DCONFIG_AV1_HIGHBITDEPTH=0 -DCONFIG_AV1_TEMPORAL_DENOISING=1 -DCONFIG_PIC=1 -DCONFIG_REALTIME_ONLY=1 -DCONFIG_MAX_DECODE_PROFILE=0 -DCONFIG_NORMAL_TILE_MODE=1 -DCONFIG_SIZE_LIMIT=1 -DDECODE_HEIGHT_LIMIT=16384 -DDECODE_WIDTH_LIMIT=16384";
 const char *aom_codec_build_config(void) {return cfg;}
diff --git a/chromium/third_party/libaom/source/config/win/ia32/config/aom_config.h b/chromium/third_party/libaom/source/config/win/ia32/config/aom_config.h
index a0592c2501e..cb33991f44d 100644
--- a/chromium/third_party/libaom/source/config/win/ia32/config/aom_config.h
+++ b/chromium/third_party/libaom/source/config/win/ia32/config/aom_config.h
@@ -11,7 +11,6 @@
 #ifndef AOM_CONFIG_H_
 #define AOM_CONFIG_H_
 #define ARCH_ARM 0
-#define ARCH_MIPS 0
 #define ARCH_PPC 0
 #define ARCH_X86 1
 #define ARCH_X86_64 0
@@ -35,8 +34,6 @@
 #define CONFIG_ENTROPY_STATS 0
 #define CONFIG_EXCLUDE_SIMD_MISMATCH 0
 #define CONFIG_FPMT_TEST 0
-#define CONFIG_FRAME_PARALLEL_ENCODE 0
-#define CONFIG_FRAME_PARALLEL_ENCODE_2 0
 #define CONFIG_GCC 0
 #define CONFIG_GCOV 0
 #define CONFIG_GPROF 0
@@ -71,14 +68,11 @@
 #define DECODE_HEIGHT_LIMIT 16384
 #define DECODE_WIDTH_LIMIT 16384
 #define FORCE_HIGHBITDEPTH_DECODING 0
+#define HAVE_ARM_CRC32 0
 #define HAVE_AVX 1
 #define HAVE_AVX2 1
-#define HAVE_DSPR2 0
 #define HAVE_FEXCEPT 1
-#define HAVE_MIPS32 0
-#define HAVE_MIPS64 0
 #define HAVE_MMX 1
-#define HAVE_MSA 0
 #define HAVE_NEON 0
 #define HAVE_PTHREAD_H 0
 #define HAVE_SSE 1
diff --git a/chromium/third_party/libaom/source/config/win/ia32/config/aom_dsp_rtcd.h b/chromium/third_party/libaom/source/config/win/ia32/config/aom_dsp_rtcd.h
index aa189c45f4a..d217a7b9fba 100644
--- a/chromium/third_party/libaom/source/config/win/ia32/config/aom_dsp_rtcd.h
+++ b/chromium/third_party/libaom/source/config/win/ia32/config/aom_dsp_rtcd.h
@@ -360,6 +360,26 @@ void aom_dc_128_predictor_16x32_sse2(uint8_t* dst,
                                      const uint8_t* left);
 #define aom_dc_128_predictor_16x32 aom_dc_128_predictor_16x32_sse2
 
+void aom_dc_128_predictor_16x4_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+void aom_dc_128_predictor_16x4_sse2(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+#define aom_dc_128_predictor_16x4 aom_dc_128_predictor_16x4_sse2
+
+void aom_dc_128_predictor_16x64_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+void aom_dc_128_predictor_16x64_sse2(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+#define aom_dc_128_predictor_16x64 aom_dc_128_predictor_16x64_sse2
+
 void aom_dc_128_predictor_16x8_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
@@ -370,12 +390,6 @@ void aom_dc_128_predictor_16x8_sse2(uint8_t* dst,
                                     const uint8_t* left);
 #define aom_dc_128_predictor_16x8 aom_dc_128_predictor_16x8_sse2
 
-void aom_dc_128_predictor_2x2_c(uint8_t* dst,
-                                ptrdiff_t y_stride,
-                                const uint8_t* above,
-                                const uint8_t* left);
-#define aom_dc_128_predictor_2x2 aom_dc_128_predictor_2x2_c
-
 void aom_dc_128_predictor_32x16_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
@@ -427,6 +441,26 @@ RTCD_EXTERN void (*aom_dc_128_predictor_32x64)(uint8_t* dst,
                                                const uint8_t* above,
                                                const uint8_t* left);
 
+void aom_dc_128_predictor_32x8_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+void aom_dc_128_predictor_32x8_sse2(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+#define aom_dc_128_predictor_32x8 aom_dc_128_predictor_32x8_sse2
+
+void aom_dc_128_predictor_4x16_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+void aom_dc_128_predictor_4x16_sse2(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+#define aom_dc_128_predictor_4x16 aom_dc_128_predictor_4x16_sse2
+
 void aom_dc_128_predictor_4x4_c(uint8_t* dst,
                                 ptrdiff_t y_stride,
                                 const uint8_t* above,
@@ -447,6 +481,23 @@ void aom_dc_128_predictor_4x8_sse2(uint8_t* dst,
                                    const uint8_t* left);
 #define aom_dc_128_predictor_4x8 aom_dc_128_predictor_4x8_sse2
 
+void aom_dc_128_predictor_64x16_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+void aom_dc_128_predictor_64x16_sse2(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+void aom_dc_128_predictor_64x16_avx2(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+RTCD_EXTERN void (*aom_dc_128_predictor_64x16)(uint8_t* dst,
+                                               ptrdiff_t y_stride,
+                                               const uint8_t* above,
+                                               const uint8_t* left);
+
 void aom_dc_128_predictor_64x32_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
@@ -491,6 +542,16 @@ void aom_dc_128_predictor_8x16_sse2(uint8_t* dst,
                                     const uint8_t* left);
 #define aom_dc_128_predictor_8x16 aom_dc_128_predictor_8x16_sse2
 
+void aom_dc_128_predictor_8x32_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+void aom_dc_128_predictor_8x32_sse2(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+#define aom_dc_128_predictor_8x32 aom_dc_128_predictor_8x32_sse2
+
 void aom_dc_128_predictor_8x4_c(uint8_t* dst,
                                 ptrdiff_t y_stride,
                                 const uint8_t* above,
@@ -531,6 +592,26 @@ void aom_dc_left_predictor_16x32_sse2(uint8_t* dst,
                                       const uint8_t* left);
 #define aom_dc_left_predictor_16x32 aom_dc_left_predictor_16x32_sse2
 
+void aom_dc_left_predictor_16x4_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+void aom_dc_left_predictor_16x4_sse2(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+#define aom_dc_left_predictor_16x4 aom_dc_left_predictor_16x4_sse2
+
+void aom_dc_left_predictor_16x64_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+void aom_dc_left_predictor_16x64_sse2(uint8_t* dst,
+                                      ptrdiff_t y_stride,
+                                      const uint8_t* above,
+                                      const uint8_t* left);
+#define aom_dc_left_predictor_16x64 aom_dc_left_predictor_16x64_sse2
+
 void aom_dc_left_predictor_16x8_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
@@ -541,12 +622,6 @@ void aom_dc_left_predictor_16x8_sse2(uint8_t* dst,
                                      const uint8_t* left);
 #define aom_dc_left_predictor_16x8 aom_dc_left_predictor_16x8_sse2
 
-void aom_dc_left_predictor_2x2_c(uint8_t* dst,
-                                 ptrdiff_t y_stride,
-                                 const uint8_t* above,
-                                 const uint8_t* left);
-#define aom_dc_left_predictor_2x2 aom_dc_left_predictor_2x2_c
-
 void aom_dc_left_predictor_32x16_c(uint8_t* dst,
                                    ptrdiff_t y_stride,
                                    const uint8_t* above,
@@ -598,6 +673,26 @@ RTCD_EXTERN void (*aom_dc_left_predictor_32x64)(uint8_t* dst,
                                                 const uint8_t* above,
                                                 const uint8_t* left);
 
+void aom_dc_left_predictor_32x8_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+void aom_dc_left_predictor_32x8_sse2(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+#define aom_dc_left_predictor_32x8 aom_dc_left_predictor_32x8_sse2
+
+void aom_dc_left_predictor_4x16_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+void aom_dc_left_predictor_4x16_sse2(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+#define aom_dc_left_predictor_4x16 aom_dc_left_predictor_4x16_sse2
+
 void aom_dc_left_predictor_4x4_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
@@ -618,6 +713,23 @@ void aom_dc_left_predictor_4x8_sse2(uint8_t* dst,
                                     const uint8_t* left);
 #define aom_dc_left_predictor_4x8 aom_dc_left_predictor_4x8_sse2
 
+void aom_dc_left_predictor_64x16_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+void aom_dc_left_predictor_64x16_sse2(uint8_t* dst,
+                                      ptrdiff_t y_stride,
+                                      const uint8_t* above,
+                                      const uint8_t* left);
+void aom_dc_left_predictor_64x16_avx2(uint8_t* dst,
+                                      ptrdiff_t y_stride,
+                                      const uint8_t* above,
+                                      const uint8_t* left);
+RTCD_EXTERN void (*aom_dc_left_predictor_64x16)(uint8_t* dst,
+                                                ptrdiff_t y_stride,
+                                                const uint8_t* above,
+                                                const uint8_t* left);
+
 void aom_dc_left_predictor_64x32_c(uint8_t* dst,
                                    ptrdiff_t y_stride,
                                    const uint8_t* above,
@@ -662,6 +774,16 @@ void aom_dc_left_predictor_8x16_sse2(uint8_t* dst,
                                      const uint8_t* left);
 #define aom_dc_left_predictor_8x16 aom_dc_left_predictor_8x16_sse2
 
+void aom_dc_left_predictor_8x32_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+void aom_dc_left_predictor_8x32_sse2(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+#define aom_dc_left_predictor_8x32 aom_dc_left_predictor_8x32_sse2
+
 void aom_dc_left_predictor_8x4_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
@@ -702,6 +824,26 @@ void aom_dc_predictor_16x32_sse2(uint8_t* dst,
                                  const uint8_t* left);
 #define aom_dc_predictor_16x32 aom_dc_predictor_16x32_sse2
 
+void aom_dc_predictor_16x4_c(uint8_t* dst,
+                             ptrdiff_t y_stride,
+                             const uint8_t* above,
+                             const uint8_t* left);
+void aom_dc_predictor_16x4_sse2(uint8_t* dst,
+                                ptrdiff_t y_stride,
+                                const uint8_t* above,
+                                const uint8_t* left);
+#define aom_dc_predictor_16x4 aom_dc_predictor_16x4_sse2
+
+void aom_dc_predictor_16x64_c(uint8_t* dst,
+                              ptrdiff_t y_stride,
+                              const uint8_t* above,
+                              const uint8_t* left);
+void aom_dc_predictor_16x64_sse2(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+#define aom_dc_predictor_16x64 aom_dc_predictor_16x64_sse2
+
 void aom_dc_predictor_16x8_c(uint8_t* dst,
                              ptrdiff_t y_stride,
                              const uint8_t* above,
@@ -712,12 +854,6 @@ void aom_dc_predictor_16x8_sse2(uint8_t* dst,
                                 const uint8_t* left);
 #define aom_dc_predictor_16x8 aom_dc_predictor_16x8_sse2
 
-void aom_dc_predictor_2x2_c(uint8_t* dst,
-                            ptrdiff_t y_stride,
-                            const uint8_t* above,
-                            const uint8_t* left);
-#define aom_dc_predictor_2x2 aom_dc_predictor_2x2_c
-
 void aom_dc_predictor_32x16_c(uint8_t* dst,
                               ptrdiff_t y_stride,
                               const uint8_t* above,
@@ -769,6 +905,26 @@ RTCD_EXTERN void (*aom_dc_predictor_32x64)(uint8_t* dst,
                                            const uint8_t* above,
                                            const uint8_t* left);
 
+void aom_dc_predictor_32x8_c(uint8_t* dst,
+                             ptrdiff_t y_stride,
+                             const uint8_t* above,
+                             const uint8_t* left);
+void aom_dc_predictor_32x8_sse2(uint8_t* dst,
+                                ptrdiff_t y_stride,
+                                const uint8_t* above,
+                                const uint8_t* left);
+#define aom_dc_predictor_32x8 aom_dc_predictor_32x8_sse2
+
+void aom_dc_predictor_4x16_c(uint8_t* dst,
+                             ptrdiff_t y_stride,
+                             const uint8_t* above,
+                             const uint8_t* left);
+void aom_dc_predictor_4x16_sse2(uint8_t* dst,
+                                ptrdiff_t y_stride,
+                                const uint8_t* above,
+                                const uint8_t* left);
+#define aom_dc_predictor_4x16 aom_dc_predictor_4x16_sse2
+
 void aom_dc_predictor_4x4_c(uint8_t* dst,
                             ptrdiff_t y_stride,
                             const uint8_t* above,
@@ -789,6 +945,23 @@ void aom_dc_predictor_4x8_sse2(uint8_t* dst,
                                const uint8_t* left);
 #define aom_dc_predictor_4x8 aom_dc_predictor_4x8_sse2
 
+void aom_dc_predictor_64x16_c(uint8_t* dst,
+                              ptrdiff_t y_stride,
+                              const uint8_t* above,
+                              const uint8_t* left);
+void aom_dc_predictor_64x16_sse2(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+void aom_dc_predictor_64x16_avx2(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+RTCD_EXTERN void (*aom_dc_predictor_64x16)(uint8_t* dst,
+                                           ptrdiff_t y_stride,
+                                           const uint8_t* above,
+                                           const uint8_t* left);
+
 void aom_dc_predictor_64x32_c(uint8_t* dst,
                               ptrdiff_t y_stride,
                               const uint8_t* above,
@@ -833,6 +1006,16 @@ void aom_dc_predictor_8x16_sse2(uint8_t* dst,
                                 const uint8_t* left);
 #define aom_dc_predictor_8x16 aom_dc_predictor_8x16_sse2
 
+void aom_dc_predictor_8x32_c(uint8_t* dst,
+                             ptrdiff_t y_stride,
+                             const uint8_t* above,
+                             const uint8_t* left);
+void aom_dc_predictor_8x32_sse2(uint8_t* dst,
+                                ptrdiff_t y_stride,
+                                const uint8_t* above,
+                                const uint8_t* left);
+#define aom_dc_predictor_8x32 aom_dc_predictor_8x32_sse2
+
 void aom_dc_predictor_8x4_c(uint8_t* dst,
                             ptrdiff_t y_stride,
                             const uint8_t* above,
@@ -873,6 +1056,26 @@ void aom_dc_top_predictor_16x32_sse2(uint8_t* dst,
                                      const uint8_t* left);
 #define aom_dc_top_predictor_16x32 aom_dc_top_predictor_16x32_sse2
 
+void aom_dc_top_predictor_16x4_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+void aom_dc_top_predictor_16x4_sse2(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+#define aom_dc_top_predictor_16x4 aom_dc_top_predictor_16x4_sse2
+
+void aom_dc_top_predictor_16x64_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+void aom_dc_top_predictor_16x64_sse2(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+#define aom_dc_top_predictor_16x64 aom_dc_top_predictor_16x64_sse2
+
 void aom_dc_top_predictor_16x8_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
@@ -883,12 +1086,6 @@ void aom_dc_top_predictor_16x8_sse2(uint8_t* dst,
                                     const uint8_t* left);
 #define aom_dc_top_predictor_16x8 aom_dc_top_predictor_16x8_sse2
 
-void aom_dc_top_predictor_2x2_c(uint8_t* dst,
-                                ptrdiff_t y_stride,
-                                const uint8_t* above,
-                                const uint8_t* left);
-#define aom_dc_top_predictor_2x2 aom_dc_top_predictor_2x2_c
-
 void aom_dc_top_predictor_32x16_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
@@ -940,6 +1137,26 @@ RTCD_EXTERN void (*aom_dc_top_predictor_32x64)(uint8_t* dst,
                                                const uint8_t* above,
                                                const uint8_t* left);
 
+void aom_dc_top_predictor_32x8_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+void aom_dc_top_predictor_32x8_sse2(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+#define aom_dc_top_predictor_32x8 aom_dc_top_predictor_32x8_sse2
+
+void aom_dc_top_predictor_4x16_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+void aom_dc_top_predictor_4x16_sse2(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+#define aom_dc_top_predictor_4x16 aom_dc_top_predictor_4x16_sse2
+
 void aom_dc_top_predictor_4x4_c(uint8_t* dst,
                                 ptrdiff_t y_stride,
                                 const uint8_t* above,
@@ -960,6 +1177,23 @@ void aom_dc_top_predictor_4x8_sse2(uint8_t* dst,
                                    const uint8_t* left);
 #define aom_dc_top_predictor_4x8 aom_dc_top_predictor_4x8_sse2
 
+void aom_dc_top_predictor_64x16_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+void aom_dc_top_predictor_64x16_sse2(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+void aom_dc_top_predictor_64x16_avx2(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+RTCD_EXTERN void (*aom_dc_top_predictor_64x16)(uint8_t* dst,
+                                               ptrdiff_t y_stride,
+                                               const uint8_t* above,
+                                               const uint8_t* left);
+
 void aom_dc_top_predictor_64x32_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
@@ -1004,6 +1238,16 @@ void aom_dc_top_predictor_8x16_sse2(uint8_t* dst,
                                     const uint8_t* left);
 #define aom_dc_top_predictor_8x16 aom_dc_top_predictor_8x16_sse2
 
+void aom_dc_top_predictor_8x32_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+void aom_dc_top_predictor_8x32_sse2(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+#define aom_dc_top_predictor_8x32 aom_dc_top_predictor_8x32_sse2
+
 void aom_dc_top_predictor_8x4_c(uint8_t* dst,
                                 ptrdiff_t y_stride,
                                 const uint8_t* above,
@@ -2024,6 +2268,26 @@ void aom_h_predictor_16x32_sse2(uint8_t* dst,
                                 const uint8_t* left);
 #define aom_h_predictor_16x32 aom_h_predictor_16x32_sse2
 
+void aom_h_predictor_16x4_c(uint8_t* dst,
+                            ptrdiff_t y_stride,
+                            const uint8_t* above,
+                            const uint8_t* left);
+void aom_h_predictor_16x4_sse2(uint8_t* dst,
+                               ptrdiff_t y_stride,
+                               const uint8_t* above,
+                               const uint8_t* left);
+#define aom_h_predictor_16x4 aom_h_predictor_16x4_sse2
+
+void aom_h_predictor_16x64_c(uint8_t* dst,
+                             ptrdiff_t y_stride,
+                             const uint8_t* above,
+                             const uint8_t* left);
+void aom_h_predictor_16x64_sse2(uint8_t* dst,
+                                ptrdiff_t y_stride,
+                                const uint8_t* above,
+                                const uint8_t* left);
+#define aom_h_predictor_16x64 aom_h_predictor_16x64_sse2
+
 void aom_h_predictor_16x8_c(uint8_t* dst,
                             ptrdiff_t y_stride,
                             const uint8_t* above,
@@ -2034,12 +2298,6 @@ void aom_h_predictor_16x8_sse2(uint8_t* dst,
                                const uint8_t* left);
 #define aom_h_predictor_16x8 aom_h_predictor_16x8_sse2
 
-void aom_h_predictor_2x2_c(uint8_t* dst,
-                           ptrdiff_t y_stride,
-                           const uint8_t* above,
-                           const uint8_t* left);
-#define aom_h_predictor_2x2 aom_h_predictor_2x2_c
-
 void aom_h_predictor_32x16_c(uint8_t* dst,
                              ptrdiff_t y_stride,
                              const uint8_t* above,
@@ -2077,6 +2335,26 @@ void aom_h_predictor_32x64_sse2(uint8_t* dst,
                                 const uint8_t* left);
 #define aom_h_predictor_32x64 aom_h_predictor_32x64_sse2
 
+void aom_h_predictor_32x8_c(uint8_t* dst,
+                            ptrdiff_t y_stride,
+                            const uint8_t* above,
+                            const uint8_t* left);
+void aom_h_predictor_32x8_sse2(uint8_t* dst,
+                               ptrdiff_t y_stride,
+                               const uint8_t* above,
+                               const uint8_t* left);
+#define aom_h_predictor_32x8 aom_h_predictor_32x8_sse2
+
+void aom_h_predictor_4x16_c(uint8_t* dst,
+                            ptrdiff_t y_stride,
+                            const uint8_t* above,
+                            const uint8_t* left);
+void aom_h_predictor_4x16_sse2(uint8_t* dst,
+                               ptrdiff_t y_stride,
+                               const uint8_t* above,
+                               const uint8_t* left);
+#define aom_h_predictor_4x16 aom_h_predictor_4x16_sse2
+
 void aom_h_predictor_4x4_c(uint8_t* dst,
                            ptrdiff_t y_stride,
                            const uint8_t* above,
@@ -2097,6 +2375,16 @@ void aom_h_predictor_4x8_sse2(uint8_t* dst,
                               const uint8_t* left);
 #define aom_h_predictor_4x8 aom_h_predictor_4x8_sse2
 
+void aom_h_predictor_64x16_c(uint8_t* dst,
+                             ptrdiff_t y_stride,
+                             const uint8_t* above,
+                             const uint8_t* left);
+void aom_h_predictor_64x16_sse2(uint8_t* dst,
+                                ptrdiff_t y_stride,
+                                const uint8_t* above,
+                                const uint8_t* left);
+#define aom_h_predictor_64x16 aom_h_predictor_64x16_sse2
+
 void aom_h_predictor_64x32_c(uint8_t* dst,
                              ptrdiff_t y_stride,
                              const uint8_t* above,
@@ -2127,6 +2415,16 @@ void aom_h_predictor_8x16_sse2(uint8_t* dst,
                                const uint8_t* left);
 #define aom_h_predictor_8x16 aom_h_predictor_8x16_sse2
 
+void aom_h_predictor_8x32_c(uint8_t* dst,
+                            ptrdiff_t y_stride,
+                            const uint8_t* above,
+                            const uint8_t* left);
+void aom_h_predictor_8x32_sse2(uint8_t* dst,
+                               ptrdiff_t y_stride,
+                               const uint8_t* above,
+                               const uint8_t* left);
+#define aom_h_predictor_8x32 aom_h_predictor_8x32_sse2
+
 void aom_h_predictor_8x4_c(uint8_t* dst,
                            ptrdiff_t y_stride,
                            const uint8_t* above,
@@ -2189,19 +2487,6 @@ void aom_hadamard_8x8_sse2(const int16_t* src_diff,
                            tran_low_t* coeff);
 #define aom_hadamard_8x8 aom_hadamard_8x8_sse2
 
-void aom_hadamard_8x8_dual_c(const int16_t* src_diff,
-                             ptrdiff_t src_stride,
-                             int16_t* coeff);
-void aom_hadamard_8x8_dual_sse2(const int16_t* src_diff,
-                                ptrdiff_t src_stride,
-                                int16_t* coeff);
-void aom_hadamard_8x8_dual_avx2(const int16_t* src_diff,
-                                ptrdiff_t src_stride,
-                                int16_t* coeff);
-RTCD_EXTERN void (*aom_hadamard_8x8_dual)(const int16_t* src_diff,
-                                          ptrdiff_t src_stride,
-                                          int16_t* coeff);
-
 void aom_hadamard_lp_16x16_c(const int16_t* src_diff,
                              ptrdiff_t src_stride,
                              int16_t* coeff);
@@ -2223,6 +2508,19 @@ void aom_hadamard_lp_8x8_sse2(const int16_t* src_diff,
                               int16_t* coeff);
 #define aom_hadamard_lp_8x8 aom_hadamard_lp_8x8_sse2
 
+void aom_hadamard_lp_8x8_dual_c(const int16_t* src_diff,
+                                ptrdiff_t src_stride,
+                                int16_t* coeff);
+void aom_hadamard_lp_8x8_dual_sse2(const int16_t* src_diff,
+                                   ptrdiff_t src_stride,
+                                   int16_t* coeff);
+void aom_hadamard_lp_8x8_dual_avx2(const int16_t* src_diff,
+                                   ptrdiff_t src_stride,
+                                   int16_t* coeff);
+RTCD_EXTERN void (*aom_hadamard_lp_8x8_dual)(const int16_t* src_diff,
+                                             ptrdiff_t src_stride,
+                                             int16_t* coeff);
+
 void aom_ifft16x16_float_c(const float* input, float* temp, float* output);
 void aom_ifft16x16_float_sse2(const float* input, float* temp, float* output);
 void aom_ifft16x16_float_avx2(const float* input, float* temp, float* output);
@@ -2251,19 +2549,55 @@ RTCD_EXTERN void (*aom_ifft8x8_float)(const float* input,
                                       float* temp,
                                       float* output);
 
-int16_t aom_int_pro_col_c(const uint8_t* ref, const int width);
-int16_t aom_int_pro_col_sse2(const uint8_t* ref, const int width);
-#define aom_int_pro_col aom_int_pro_col_sse2
+void aom_int_pro_col_c(int16_t* vbuf,
+                       const uint8_t* ref,
+                       const int ref_stride,
+                       const int width,
+                       const int height,
+                       int norm_factor);
+void aom_int_pro_col_sse2(int16_t* vbuf,
+                          const uint8_t* ref,
+                          const int ref_stride,
+                          const int width,
+                          const int height,
+                          int norm_factor);
+void aom_int_pro_col_avx2(int16_t* vbuf,
+                          const uint8_t* ref,
+                          const int ref_stride,
+                          const int width,
+                          const int height,
+                          int norm_factor);
+RTCD_EXTERN void (*aom_int_pro_col)(int16_t* vbuf,
+                                    const uint8_t* ref,
+                                    const int ref_stride,
+                                    const int width,
+                                    const int height,
+                                    int norm_factor);
 
-void aom_int_pro_row_c(int16_t hbuf[16],
+void aom_int_pro_row_c(int16_t* hbuf,
                        const uint8_t* ref,
                        const int ref_stride,
-                       const int height);
-void aom_int_pro_row_sse2(int16_t hbuf[16],
+                       const int width,
+                       const int height,
+                       int norm_factor);
+void aom_int_pro_row_sse2(int16_t* hbuf,
+                          const uint8_t* ref,
+                          const int ref_stride,
+                          const int width,
+                          const int height,
+                          int norm_factor);
+void aom_int_pro_row_avx2(int16_t* hbuf,
                           const uint8_t* ref,
                           const int ref_stride,
-                          const int height);
-#define aom_int_pro_row aom_int_pro_row_sse2
+                          const int width,
+                          const int height,
+                          int norm_factor);
+RTCD_EXTERN void (*aom_int_pro_row)(int16_t* hbuf,
+                                    const uint8_t* ref,
+                                    const int ref_stride,
+                                    const int width,
+                                    const int height,
+                                    int norm_factor);
 
 void aom_lowbd_blend_a64_d16_mask_c(uint8_t* dst,
                                     uint32_t dst_stride,
@@ -4298,6 +4632,27 @@ unsigned int aom_mse8x8_sse2(const uint8_t* src_ptr,
                              unsigned int* sse);
 #define aom_mse8x8 aom_mse8x8_sse2
 
+uint64_t aom_mse_16xh_16bit_c(uint8_t* dst,
+                              int dstride,
+                              uint16_t* src,
+                              int w,
+                              int h);
+uint64_t aom_mse_16xh_16bit_sse2(uint8_t* dst,
+                                 int dstride,
+                                 uint16_t* src,
+                                 int w,
+                                 int h);
+uint64_t aom_mse_16xh_16bit_avx2(uint8_t* dst,
+                                 int dstride,
+                                 uint16_t* src,
+                                 int w,
+                                 int h);
+RTCD_EXTERN uint64_t (*aom_mse_16xh_16bit)(uint8_t* dst,
+                                           int dstride,
+                                           uint16_t* src,
+                                           int w,
+                                           int h);
+
 uint64_t aom_mse_wxh_16bit_c(uint8_t* dst,
                              int dstride,
                              uint16_t* src,
@@ -4357,6 +4712,36 @@ RTCD_EXTERN void (*aom_paeth_predictor_16x32)(uint8_t* dst,
                                               const uint8_t* above,
                                               const uint8_t* left);
 
+void aom_paeth_predictor_16x4_c(uint8_t* dst,
+                                ptrdiff_t y_stride,
+                                const uint8_t* above,
+                                const uint8_t* left);
+void aom_paeth_predictor_16x4_ssse3(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+RTCD_EXTERN void (*aom_paeth_predictor_16x4)(uint8_t* dst,
+                                             ptrdiff_t y_stride,
+                                             const uint8_t* above,
+                                             const uint8_t* left);
+
+void aom_paeth_predictor_16x64_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+void aom_paeth_predictor_16x64_ssse3(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+void aom_paeth_predictor_16x64_avx2(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+RTCD_EXTERN void (*aom_paeth_predictor_16x64)(uint8_t* dst,
+                                              ptrdiff_t y_stride,
+                                              const uint8_t* above,
+                                              const uint8_t* left);
+
 void aom_paeth_predictor_16x8_c(uint8_t* dst,
                                 ptrdiff_t y_stride,
                                 const uint8_t* above,
@@ -4374,12 +4759,6 @@ RTCD_EXTERN void (*aom_paeth_predictor_16x8)(uint8_t* dst,
                                              const uint8_t* above,
                                              const uint8_t* left);
 
-void aom_paeth_predictor_2x2_c(uint8_t* dst,
-                               ptrdiff_t y_stride,
-                               const uint8_t* above,
-                               const uint8_t* left);
-#define aom_paeth_predictor_2x2 aom_paeth_predictor_2x2_c
-
 void aom_paeth_predictor_32x16_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
@@ -4431,6 +4810,32 @@ RTCD_EXTERN void (*aom_paeth_predictor_32x64)(uint8_t* dst,
                                               const uint8_t* above,
                                               const uint8_t* left);
 
+void aom_paeth_predictor_32x8_c(uint8_t* dst,
+                                ptrdiff_t y_stride,
+                                const uint8_t* above,
+                                const uint8_t* left);
+void aom_paeth_predictor_32x8_ssse3(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+RTCD_EXTERN void (*aom_paeth_predictor_32x8)(uint8_t* dst,
+                                             ptrdiff_t y_stride,
+                                             const uint8_t* above,
+                                             const uint8_t* left);
+
+void aom_paeth_predictor_4x16_c(uint8_t* dst,
+                                ptrdiff_t y_stride,
+                                const uint8_t* above,
+                                const uint8_t* left);
+void aom_paeth_predictor_4x16_ssse3(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+RTCD_EXTERN void (*aom_paeth_predictor_4x16)(uint8_t* dst,
+                                             ptrdiff_t y_stride,
+                                             const uint8_t* above,
+                                             const uint8_t* left);
+
 void aom_paeth_predictor_4x4_c(uint8_t* dst,
                                ptrdiff_t y_stride,
                                const uint8_t* above,
@@ -4457,6 +4862,23 @@ RTCD_EXTERN void (*aom_paeth_predictor_4x8)(uint8_t* dst,
                                             const uint8_t* above,
                                             const uint8_t* left);
 
+void aom_paeth_predictor_64x16_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+void aom_paeth_predictor_64x16_ssse3(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+void aom_paeth_predictor_64x16_avx2(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+RTCD_EXTERN void (*aom_paeth_predictor_64x16)(uint8_t* dst,
+                                              ptrdiff_t y_stride,
+                                              const uint8_t* above,
+                                              const uint8_t* left);
+
 void aom_paeth_predictor_64x32_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
@@ -4504,6 +4926,19 @@ RTCD_EXTERN void (*aom_paeth_predictor_8x16)(uint8_t* dst,
                                              const uint8_t* above,
                                              const uint8_t* left);
 
+void aom_paeth_predictor_8x32_c(uint8_t* dst,
+                                ptrdiff_t y_stride,
+                                const uint8_t* above,
+                                const uint8_t* left);
+void aom_paeth_predictor_8x32_ssse3(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+RTCD_EXTERN void (*aom_paeth_predictor_8x32)(uint8_t* dst,
+                                             ptrdiff_t y_stride,
+                                             const uint8_t* above,
+                                             const uint8_t* left);
+
 void aom_paeth_predictor_8x4_c(uint8_t* dst,
                                ptrdiff_t y_stride,
                                const uint8_t* above,
@@ -4580,6 +5015,18 @@ void aom_quantize_b_avx(const tran_low_t* coeff_ptr,
                         uint16_t* eob_ptr,
                         const int16_t* scan,
                         const int16_t* iscan);
+void aom_quantize_b_avx2(const tran_low_t* coeff_ptr,
+                         intptr_t n_coeffs,
+                         const int16_t* zbin_ptr,
+                         const int16_t* round_ptr,
+                         const int16_t* quant_ptr,
+                         const int16_t* quant_shift_ptr,
+                         tran_low_t* qcoeff_ptr,
+                         tran_low_t* dqcoeff_ptr,
+                         const int16_t* dequant_ptr,
+                         uint16_t* eob_ptr,
+                         const int16_t* scan,
+                         const int16_t* iscan);
 RTCD_EXTERN void (*aom_quantize_b)(const tran_low_t* coeff_ptr,
                                    intptr_t n_coeffs,
                                    const int16_t* zbin_ptr,
@@ -4617,6 +5064,18 @@ void aom_quantize_b_32x32_avx(const tran_low_t* coeff_ptr,
                               uint16_t* eob_ptr,
                               const int16_t* scan,
                               const int16_t* iscan);
+void aom_quantize_b_32x32_avx2(const tran_low_t* coeff_ptr,
+                               intptr_t n_coeffs,
+                               const int16_t* zbin_ptr,
+                               const int16_t* round_ptr,
+                               const int16_t* quant_ptr,
+                               const int16_t* quant_shift_ptr,
+                               tran_low_t* qcoeff_ptr,
+                               tran_low_t* dqcoeff_ptr,
+                               const int16_t* dequant_ptr,
+                               uint16_t* eob_ptr,
+                               const int16_t* scan,
+                               const int16_t* iscan);
 RTCD_EXTERN void (*aom_quantize_b_32x32)(const tran_low_t* coeff_ptr,
                                          intptr_t n_coeffs,
                                          const int16_t* zbin_ptr,
@@ -4630,32 +5089,6 @@ RTCD_EXTERN void (*aom_quantize_b_32x32)(const tran_low_t* coeff_ptr,
                                          const int16_t* scan,
                                          const int16_t* iscan);
 
-void aom_quantize_b_32x32_adaptive_c(const tran_low_t* coeff_ptr,
-                                     intptr_t n_coeffs,
-                                     const int16_t* zbin_ptr,
-                                     const int16_t* round_ptr,
-                                     const int16_t* quant_ptr,
-                                     const int16_t* quant_shift_ptr,
-                                     tran_low_t* qcoeff_ptr,
-                                     tran_low_t* dqcoeff_ptr,
-                                     const int16_t* dequant_ptr,
-                                     uint16_t* eob_ptr,
-                                     const int16_t* scan,
-                                     const int16_t* iscan);
-void aom_quantize_b_32x32_adaptive_sse2(const tran_low_t* coeff_ptr,
-                                        intptr_t n_coeffs,
-                                        const int16_t* zbin_ptr,
-                                        const int16_t* round_ptr,
-                                        const int16_t* quant_ptr,
-                                        const int16_t* quant_shift_ptr,
-                                        tran_low_t* qcoeff_ptr,
-                                        tran_low_t* dqcoeff_ptr,
-                                        const int16_t* dequant_ptr,
-                                        uint16_t* eob_ptr,
-                                        const int16_t* scan,
-                                        const int16_t* iscan);
-#define aom_quantize_b_32x32_adaptive aom_quantize_b_32x32_adaptive_sse2
-
 void aom_quantize_b_64x64_c(const tran_low_t* coeff_ptr,
                             intptr_t n_coeffs,
                             const int16_t* zbin_ptr,
@@ -4680,6 +5113,18 @@ void aom_quantize_b_64x64_ssse3(const tran_low_t* coeff_ptr,
                                 uint16_t* eob_ptr,
                                 const int16_t* scan,
                                 const int16_t* iscan);
+void aom_quantize_b_64x64_avx2(const tran_low_t* coeff_ptr,
+                               intptr_t n_coeffs,
+                               const int16_t* zbin_ptr,
+                               const int16_t* round_ptr,
+                               const int16_t* quant_ptr,
+                               const int16_t* quant_shift_ptr,
+                               tran_low_t* qcoeff_ptr,
+                               tran_low_t* dqcoeff_ptr,
+                               const int16_t* dequant_ptr,
+                               uint16_t* eob_ptr,
+                               const int16_t* scan,
+                               const int16_t* iscan);
 RTCD_EXTERN void (*aom_quantize_b_64x64)(const tran_low_t* coeff_ptr,
                                          intptr_t n_coeffs,
                                          const int16_t* zbin_ptr,
@@ -4693,81 +5138,6 @@ RTCD_EXTERN void (*aom_quantize_b_64x64)(const tran_low_t* coeff_ptr,
                                          const int16_t* scan,
                                          const int16_t* iscan);
 
-void aom_quantize_b_64x64_adaptive_c(const tran_low_t* coeff_ptr,
-                                     intptr_t n_coeffs,
-                                     const int16_t* zbin_ptr,
-                                     const int16_t* round_ptr,
-                                     const int16_t* quant_ptr,
-                                     const int16_t* quant_shift_ptr,
-                                     tran_low_t* qcoeff_ptr,
-                                     tran_low_t* dqcoeff_ptr,
-                                     const int16_t* dequant_ptr,
-                                     uint16_t* eob_ptr,
-                                     const int16_t* scan,
-                                     const int16_t* iscan);
-void aom_quantize_b_64x64_adaptive_sse2(const tran_low_t* coeff_ptr,
-                                        intptr_t n_coeffs,
-                                        const int16_t* zbin_ptr,
-                                        const int16_t* round_ptr,
-                                        const int16_t* quant_ptr,
-                                        const int16_t* quant_shift_ptr,
-                                        tran_low_t* qcoeff_ptr,
-                                        tran_low_t* dqcoeff_ptr,
-                                        const int16_t* dequant_ptr,
-                                        uint16_t* eob_ptr,
-                                        const int16_t* scan,
-                                        const int16_t* iscan);
-#define aom_quantize_b_64x64_adaptive aom_quantize_b_64x64_adaptive_sse2
-
-void aom_quantize_b_adaptive_c(const tran_low_t* coeff_ptr,
-                               intptr_t n_coeffs,
-                               const int16_t* zbin_ptr,
-                               const int16_t* round_ptr,
-                               const int16_t* quant_ptr,
-                               const int16_t* quant_shift_ptr,
-                               tran_low_t* qcoeff_ptr,
-                               tran_low_t* dqcoeff_ptr,
-                               const int16_t* dequant_ptr,
-                               uint16_t* eob_ptr,
-                               const int16_t* scan,
-                               const int16_t* iscan);
-void aom_quantize_b_adaptive_sse2(const tran_low_t* coeff_ptr,
-                                  intptr_t n_coeffs,
-                                  const int16_t* zbin_ptr,
-                                  const int16_t* round_ptr,
-                                  const int16_t* quant_ptr,
-                                  const int16_t* quant_shift_ptr,
-                                  tran_low_t* qcoeff_ptr,
-                                  tran_low_t* dqcoeff_ptr,
-                                  const int16_t* dequant_ptr,
-                                  uint16_t* eob_ptr,
-                                  const int16_t* scan,
-                                  const int16_t* iscan);
-void aom_quantize_b_adaptive_avx2(const tran_low_t* coeff_ptr,
-                                  intptr_t n_coeffs,
-                                  const int16_t* zbin_ptr,
-                                  const int16_t* round_ptr,
-                                  const int16_t* quant_ptr,
-                                  const int16_t* quant_shift_ptr,
-                                  tran_low_t* qcoeff_ptr,
-                                  tran_low_t* dqcoeff_ptr,
-                                  const int16_t* dequant_ptr,
-                                  uint16_t* eob_ptr,
-                                  const int16_t* scan,
-                                  const int16_t* iscan);
-RTCD_EXTERN void (*aom_quantize_b_adaptive)(const tran_low_t* coeff_ptr,
-                                            intptr_t n_coeffs,
-                                            const int16_t* zbin_ptr,
-                                            const int16_t* round_ptr,
-                                            const int16_t* quant_ptr,
-                                            const int16_t* quant_shift_ptr,
-                                            tran_low_t* qcoeff_ptr,
-                                            tran_low_t* dqcoeff_ptr,
-                                            const int16_t* dequant_ptr,
-                                            uint16_t* eob_ptr,
-                                            const int16_t* scan,
-                                            const int16_t* iscan);
-
 unsigned int aom_sad128x128_c(const uint8_t* src_ptr,
                               int src_stride,
                               const uint8_t* ref_ptr,
@@ -6256,6 +6626,32 @@ RTCD_EXTERN void (*aom_smooth_h_predictor_16x32)(uint8_t* dst,
                                                  const uint8_t* above,
                                                  const uint8_t* left);
 
+void aom_smooth_h_predictor_16x4_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+void aom_smooth_h_predictor_16x4_ssse3(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_h_predictor_16x4)(uint8_t* dst,
+                                                ptrdiff_t y_stride,
+                                                const uint8_t* above,
+                                                const uint8_t* left);
+
+void aom_smooth_h_predictor_16x64_c(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+void aom_smooth_h_predictor_16x64_ssse3(uint8_t* dst,
+                                        ptrdiff_t y_stride,
+                                        const uint8_t* above,
+                                        const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_h_predictor_16x64)(uint8_t* dst,
+                                                 ptrdiff_t y_stride,
+                                                 const uint8_t* above,
+                                                 const uint8_t* left);
+
 void aom_smooth_h_predictor_16x8_c(uint8_t* dst,
                                    ptrdiff_t y_stride,
                                    const uint8_t* above,
@@ -6269,12 +6665,6 @@ RTCD_EXTERN void (*aom_smooth_h_predictor_16x8)(uint8_t* dst,
                                                 const uint8_t* above,
                                                 const uint8_t* left);
 
-void aom_smooth_h_predictor_2x2_c(uint8_t* dst,
-                                  ptrdiff_t y_stride,
-                                  const uint8_t* above,
-                                  const uint8_t* left);
-#define aom_smooth_h_predictor_2x2 aom_smooth_h_predictor_2x2_c
-
 void aom_smooth_h_predictor_32x16_c(uint8_t* dst,
                                     ptrdiff_t y_stride,
                                     const uint8_t* above,
@@ -6314,6 +6704,32 @@ RTCD_EXTERN void (*aom_smooth_h_predictor_32x64)(uint8_t* dst,
                                                  const uint8_t* above,
                                                  const uint8_t* left);
 
+void aom_smooth_h_predictor_32x8_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+void aom_smooth_h_predictor_32x8_ssse3(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_h_predictor_32x8)(uint8_t* dst,
+                                                ptrdiff_t y_stride,
+                                                const uint8_t* above,
+                                                const uint8_t* left);
+
+void aom_smooth_h_predictor_4x16_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+void aom_smooth_h_predictor_4x16_ssse3(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_h_predictor_4x16)(uint8_t* dst,
+                                                ptrdiff_t y_stride,
+                                                const uint8_t* above,
+                                                const uint8_t* left);
+
 void aom_smooth_h_predictor_4x4_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
@@ -6340,6 +6756,19 @@ RTCD_EXTERN void (*aom_smooth_h_predictor_4x8)(uint8_t* dst,
                                                const uint8_t* above,
                                                const uint8_t* left);
 
+void aom_smooth_h_predictor_64x16_c(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+void aom_smooth_h_predictor_64x16_ssse3(uint8_t* dst,
+                                        ptrdiff_t y_stride,
+                                        const uint8_t* above,
+                                        const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_h_predictor_64x16)(uint8_t* dst,
+                                                 ptrdiff_t y_stride,
+                                                 const uint8_t* above,
+                                                 const uint8_t* left);
+
 void aom_smooth_h_predictor_64x32_c(uint8_t* dst,
                                     ptrdiff_t y_stride,
                                     const uint8_t* above,
@@ -6379,6 +6808,19 @@ RTCD_EXTERN void (*aom_smooth_h_predictor_8x16)(uint8_t* dst,
                                                 const uint8_t* above,
                                                 const uint8_t* left);
 
+void aom_smooth_h_predictor_8x32_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+void aom_smooth_h_predictor_8x32_ssse3(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_h_predictor_8x32)(uint8_t* dst,
+                                                ptrdiff_t y_stride,
+                                                const uint8_t* above,
+                                                const uint8_t* left);
+
 void aom_smooth_h_predictor_8x4_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
@@ -6431,6 +6873,32 @@ RTCD_EXTERN void (*aom_smooth_predictor_16x32)(uint8_t* dst,
                                                const uint8_t* above,
                                                const uint8_t* left);
 
+void aom_smooth_predictor_16x4_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+void aom_smooth_predictor_16x4_ssse3(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_predictor_16x4)(uint8_t* dst,
+                                              ptrdiff_t y_stride,
+                                              const uint8_t* above,
+                                              const uint8_t* left);
+
+void aom_smooth_predictor_16x64_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+void aom_smooth_predictor_16x64_ssse3(uint8_t* dst,
+                                      ptrdiff_t y_stride,
+                                      const uint8_t* above,
+                                      const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_predictor_16x64)(uint8_t* dst,
+                                               ptrdiff_t y_stride,
+                                               const uint8_t* above,
+                                               const uint8_t* left);
+
 void aom_smooth_predictor_16x8_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
@@ -6444,12 +6912,6 @@ RTCD_EXTERN void (*aom_smooth_predictor_16x8)(uint8_t* dst,
                                               const uint8_t* above,
                                               const uint8_t* left);
 
-void aom_smooth_predictor_2x2_c(uint8_t* dst,
-                                ptrdiff_t y_stride,
-                                const uint8_t* above,
-                                const uint8_t* left);
-#define aom_smooth_predictor_2x2 aom_smooth_predictor_2x2_c
-
 void aom_smooth_predictor_32x16_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
@@ -6489,6 +6951,32 @@ RTCD_EXTERN void (*aom_smooth_predictor_32x64)(uint8_t* dst,
                                                const uint8_t* above,
                                                const uint8_t* left);
 
+void aom_smooth_predictor_32x8_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+void aom_smooth_predictor_32x8_ssse3(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_predictor_32x8)(uint8_t* dst,
+                                              ptrdiff_t y_stride,
+                                              const uint8_t* above,
+                                              const uint8_t* left);
+
+void aom_smooth_predictor_4x16_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+void aom_smooth_predictor_4x16_ssse3(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_predictor_4x16)(uint8_t* dst,
+                                              ptrdiff_t y_stride,
+                                              const uint8_t* above,
+                                              const uint8_t* left);
+
 void aom_smooth_predictor_4x4_c(uint8_t* dst,
                                 ptrdiff_t y_stride,
                                 const uint8_t* above,
@@ -6515,6 +7003,19 @@ RTCD_EXTERN void (*aom_smooth_predictor_4x8)(uint8_t* dst,
                                              const uint8_t* above,
                                              const uint8_t* left);
 
+void aom_smooth_predictor_64x16_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+void aom_smooth_predictor_64x16_ssse3(uint8_t* dst,
+                                      ptrdiff_t y_stride,
+                                      const uint8_t* above,
+                                      const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_predictor_64x16)(uint8_t* dst,
+                                               ptrdiff_t y_stride,
+                                               const uint8_t* above,
+                                               const uint8_t* left);
+
 void aom_smooth_predictor_64x32_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
@@ -6554,6 +7055,19 @@ RTCD_EXTERN void (*aom_smooth_predictor_8x16)(uint8_t* dst,
                                               const uint8_t* above,
                                               const uint8_t* left);
 
+void aom_smooth_predictor_8x32_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+void aom_smooth_predictor_8x32_ssse3(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_predictor_8x32)(uint8_t* dst,
+                                              ptrdiff_t y_stride,
+                                              const uint8_t* above,
+                                              const uint8_t* left);
+
 void aom_smooth_predictor_8x4_c(uint8_t* dst,
                                 ptrdiff_t y_stride,
                                 const uint8_t* above,
@@ -6606,6 +7120,32 @@ RTCD_EXTERN void (*aom_smooth_v_predictor_16x32)(uint8_t* dst,
                                                  const uint8_t* above,
                                                  const uint8_t* left);
 
+void aom_smooth_v_predictor_16x4_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+void aom_smooth_v_predictor_16x4_ssse3(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_v_predictor_16x4)(uint8_t* dst,
+                                                ptrdiff_t y_stride,
+                                                const uint8_t* above,
+                                                const uint8_t* left);
+
+void aom_smooth_v_predictor_16x64_c(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+void aom_smooth_v_predictor_16x64_ssse3(uint8_t* dst,
+                                        ptrdiff_t y_stride,
+                                        const uint8_t* above,
+                                        const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_v_predictor_16x64)(uint8_t* dst,
+                                                 ptrdiff_t y_stride,
+                                                 const uint8_t* above,
+                                                 const uint8_t* left);
+
 void aom_smooth_v_predictor_16x8_c(uint8_t* dst,
                                    ptrdiff_t y_stride,
                                    const uint8_t* above,
@@ -6619,12 +7159,6 @@ RTCD_EXTERN void (*aom_smooth_v_predictor_16x8)(uint8_t* dst,
                                                 const uint8_t* above,
                                                 const uint8_t* left);
 
-void aom_smooth_v_predictor_2x2_c(uint8_t* dst,
-                                  ptrdiff_t y_stride,
-                                  const uint8_t* above,
-                                  const uint8_t* left);
-#define aom_smooth_v_predictor_2x2 aom_smooth_v_predictor_2x2_c
-
 void aom_smooth_v_predictor_32x16_c(uint8_t* dst,
                                     ptrdiff_t y_stride,
                                     const uint8_t* above,
@@ -6664,6 +7198,32 @@ RTCD_EXTERN void (*aom_smooth_v_predictor_32x64)(uint8_t* dst,
                                                  const uint8_t* above,
                                                  const uint8_t* left);
 
+void aom_smooth_v_predictor_32x8_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+void aom_smooth_v_predictor_32x8_ssse3(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_v_predictor_32x8)(uint8_t* dst,
+                                                ptrdiff_t y_stride,
+                                                const uint8_t* above,
+                                                const uint8_t* left);
+
+void aom_smooth_v_predictor_4x16_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+void aom_smooth_v_predictor_4x16_ssse3(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_v_predictor_4x16)(uint8_t* dst,
+                                                ptrdiff_t y_stride,
+                                                const uint8_t* above,
+                                                const uint8_t* left);
+
 void aom_smooth_v_predictor_4x4_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
@@ -6690,6 +7250,19 @@ RTCD_EXTERN void (*aom_smooth_v_predictor_4x8)(uint8_t* dst,
                                                const uint8_t* above,
                                                const uint8_t* left);
 
+void aom_smooth_v_predictor_64x16_c(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+void aom_smooth_v_predictor_64x16_ssse3(uint8_t* dst,
+                                        ptrdiff_t y_stride,
+                                        const uint8_t* above,
+                                        const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_v_predictor_64x16)(uint8_t* dst,
+                                                 ptrdiff_t y_stride,
+                                                 const uint8_t* above,
+                                                 const uint8_t* left);
+
 void aom_smooth_v_predictor_64x32_c(uint8_t* dst,
                                     ptrdiff_t y_stride,
                                     const uint8_t* above,
@@ -6729,6 +7302,19 @@ RTCD_EXTERN void (*aom_smooth_v_predictor_8x16)(uint8_t* dst,
                                                 const uint8_t* above,
                                                 const uint8_t* left);
 
+void aom_smooth_v_predictor_8x32_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+void aom_smooth_v_predictor_8x32_ssse3(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_v_predictor_8x32)(uint8_t* dst,
+                                                ptrdiff_t y_stride,
+                                                const uint8_t* above,
+                                                const uint8_t* left);
+
 void aom_smooth_v_predictor_8x4_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
@@ -8035,6 +8621,26 @@ void aom_v_predictor_16x32_sse2(uint8_t* dst,
                                 const uint8_t* left);
 #define aom_v_predictor_16x32 aom_v_predictor_16x32_sse2
 
+void aom_v_predictor_16x4_c(uint8_t* dst,
+                            ptrdiff_t y_stride,
+                            const uint8_t* above,
+                            const uint8_t* left);
+void aom_v_predictor_16x4_sse2(uint8_t* dst,
+                               ptrdiff_t y_stride,
+                               const uint8_t* above,
+                               const uint8_t* left);
+#define aom_v_predictor_16x4 aom_v_predictor_16x4_sse2
+
+void aom_v_predictor_16x64_c(uint8_t* dst,
+                             ptrdiff_t y_stride,
+                             const uint8_t* above,
+                             const uint8_t* left);
+void aom_v_predictor_16x64_sse2(uint8_t* dst,
+                                ptrdiff_t y_stride,
+                                const uint8_t* above,
+                                const uint8_t* left);
+#define aom_v_predictor_16x64 aom_v_predictor_16x64_sse2
+
 void aom_v_predictor_16x8_c(uint8_t* dst,
                             ptrdiff_t y_stride,
                             const uint8_t* above,
@@ -8045,12 +8651,6 @@ void aom_v_predictor_16x8_sse2(uint8_t* dst,
                                const uint8_t* left);
 #define aom_v_predictor_16x8 aom_v_predictor_16x8_sse2
 
-void aom_v_predictor_2x2_c(uint8_t* dst,
-                           ptrdiff_t y_stride,
-                           const uint8_t* above,
-                           const uint8_t* left);
-#define aom_v_predictor_2x2 aom_v_predictor_2x2_c
-
 void aom_v_predictor_32x16_c(uint8_t* dst,
                              ptrdiff_t y_stride,
                              const uint8_t* above,
@@ -8102,6 +8702,26 @@ RTCD_EXTERN void (*aom_v_predictor_32x64)(uint8_t* dst,
                                           const uint8_t* above,
                                           const uint8_t* left);
 
+void aom_v_predictor_32x8_c(uint8_t* dst,
+                            ptrdiff_t y_stride,
+                            const uint8_t* above,
+                            const uint8_t* left);
+void aom_v_predictor_32x8_sse2(uint8_t* dst,
+                               ptrdiff_t y_stride,
+                               const uint8_t* above,
+                               const uint8_t* left);
+#define aom_v_predictor_32x8 aom_v_predictor_32x8_sse2
+
+void aom_v_predictor_4x16_c(uint8_t* dst,
+                            ptrdiff_t y_stride,
+                            const uint8_t* above,
+                            const uint8_t* left);
+void aom_v_predictor_4x16_sse2(uint8_t* dst,
+                               ptrdiff_t y_stride,
+                               const uint8_t* above,
+                               const uint8_t* left);
+#define aom_v_predictor_4x16 aom_v_predictor_4x16_sse2
+
 void aom_v_predictor_4x4_c(uint8_t* dst,
                            ptrdiff_t y_stride,
                            const uint8_t* above,
@@ -8122,6 +8742,23 @@ void aom_v_predictor_4x8_sse2(uint8_t* dst,
                               const uint8_t* left);
 #define aom_v_predictor_4x8 aom_v_predictor_4x8_sse2
 
+void aom_v_predictor_64x16_c(uint8_t* dst,
+                             ptrdiff_t y_stride,
+                             const uint8_t* above,
+                             const uint8_t* left);
+void aom_v_predictor_64x16_sse2(uint8_t* dst,
+                                ptrdiff_t y_stride,
+                                const uint8_t* above,
+                                const uint8_t* left);
+void aom_v_predictor_64x16_avx2(uint8_t* dst,
+                                ptrdiff_t y_stride,
+                                const uint8_t* above,
+                                const uint8_t* left);
+RTCD_EXTERN void (*aom_v_predictor_64x16)(uint8_t* dst,
+                                          ptrdiff_t y_stride,
+                                          const uint8_t* above,
+                                          const uint8_t* left);
+
 void aom_v_predictor_64x32_c(uint8_t* dst,
                              ptrdiff_t y_stride,
                              const uint8_t* above,
@@ -8166,6 +8803,16 @@ void aom_v_predictor_8x16_sse2(uint8_t* dst,
                                const uint8_t* left);
 #define aom_v_predictor_8x16 aom_v_predictor_8x16_sse2
 
+void aom_v_predictor_8x32_c(uint8_t* dst,
+                            ptrdiff_t y_stride,
+                            const uint8_t* above,
+                            const uint8_t* left);
+void aom_v_predictor_8x32_sse2(uint8_t* dst,
+                               ptrdiff_t y_stride,
+                               const uint8_t* above,
+                               const uint8_t* left);
+#define aom_v_predictor_8x32 aom_v_predictor_8x32_sse2
+
 void aom_v_predictor_8x4_c(uint8_t* dst,
                            ptrdiff_t y_stride,
                            const uint8_t* above,
@@ -8526,8 +9173,12 @@ unsigned int aom_variance8x8_sse2(const uint8_t* src_ptr,
                                   unsigned int* sse);
 #define aom_variance8x8 aom_variance8x8_sse2
 
-int aom_vector_var_c(const int16_t* ref, const int16_t* src, const int bwl);
-#define aom_vector_var aom_vector_var_c
+int aom_vector_var_c(const int16_t* ref, const int16_t* src, int bwl);
+int aom_vector_var_sse4_1(const int16_t* ref, const int16_t* src, int bwl);
+int aom_vector_var_avx2(const int16_t* ref, const int16_t* src, int bwl);
+RTCD_EXTERN int (*aom_vector_var)(const int16_t* ref,
+                                  const int16_t* src,
+                                  int bwl);
 
 void aom_dsp_rtcd(void);
 
@@ -8579,6 +9230,9 @@ static void setup_rtcd_internal(void) {
   aom_dc_128_predictor_32x64 = aom_dc_128_predictor_32x64_sse2;
   if (flags & HAS_AVX2)
     aom_dc_128_predictor_32x64 = aom_dc_128_predictor_32x64_avx2;
+  aom_dc_128_predictor_64x16 = aom_dc_128_predictor_64x16_sse2;
+  if (flags & HAS_AVX2)
+    aom_dc_128_predictor_64x16 = aom_dc_128_predictor_64x16_avx2;
   aom_dc_128_predictor_64x32 = aom_dc_128_predictor_64x32_sse2;
   if (flags & HAS_AVX2)
     aom_dc_128_predictor_64x32 = aom_dc_128_predictor_64x32_avx2;
@@ -8594,6 +9248,9 @@ static void setup_rtcd_internal(void) {
   aom_dc_left_predictor_32x64 = aom_dc_left_predictor_32x64_sse2;
   if (flags & HAS_AVX2)
     aom_dc_left_predictor_32x64 = aom_dc_left_predictor_32x64_avx2;
+  aom_dc_left_predictor_64x16 = aom_dc_left_predictor_64x16_sse2;
+  if (flags & HAS_AVX2)
+    aom_dc_left_predictor_64x16 = aom_dc_left_predictor_64x16_avx2;
   aom_dc_left_predictor_64x32 = aom_dc_left_predictor_64x32_sse2;
   if (flags & HAS_AVX2)
     aom_dc_left_predictor_64x32 = aom_dc_left_predictor_64x32_avx2;
@@ -8609,6 +9266,9 @@ static void setup_rtcd_internal(void) {
   aom_dc_predictor_32x64 = aom_dc_predictor_32x64_sse2;
   if (flags & HAS_AVX2)
     aom_dc_predictor_32x64 = aom_dc_predictor_32x64_avx2;
+  aom_dc_predictor_64x16 = aom_dc_predictor_64x16_sse2;
+  if (flags & HAS_AVX2)
+    aom_dc_predictor_64x16 = aom_dc_predictor_64x16_avx2;
   aom_dc_predictor_64x32 = aom_dc_predictor_64x32_sse2;
   if (flags & HAS_AVX2)
     aom_dc_predictor_64x32 = aom_dc_predictor_64x32_avx2;
@@ -8624,6 +9284,9 @@ static void setup_rtcd_internal(void) {
   aom_dc_top_predictor_32x64 = aom_dc_top_predictor_32x64_sse2;
   if (flags & HAS_AVX2)
     aom_dc_top_predictor_32x64 = aom_dc_top_predictor_32x64_avx2;
+  aom_dc_top_predictor_64x16 = aom_dc_top_predictor_64x16_sse2;
+  if (flags & HAS_AVX2)
+    aom_dc_top_predictor_64x16 = aom_dc_top_predictor_64x16_avx2;
   aom_dc_top_predictor_64x32 = aom_dc_top_predictor_64x32_sse2;
   if (flags & HAS_AVX2)
     aom_dc_top_predictor_64x32 = aom_dc_top_predictor_64x32_avx2;
@@ -8785,12 +9448,12 @@ static void setup_rtcd_internal(void) {
   aom_hadamard_32x32 = aom_hadamard_32x32_sse2;
   if (flags & HAS_AVX2)
     aom_hadamard_32x32 = aom_hadamard_32x32_avx2;
-  aom_hadamard_8x8_dual = aom_hadamard_8x8_dual_sse2;
-  if (flags & HAS_AVX2)
-    aom_hadamard_8x8_dual = aom_hadamard_8x8_dual_avx2;
   aom_hadamard_lp_16x16 = aom_hadamard_lp_16x16_sse2;
   if (flags & HAS_AVX2)
     aom_hadamard_lp_16x16 = aom_hadamard_lp_16x16_avx2;
+  aom_hadamard_lp_8x8_dual = aom_hadamard_lp_8x8_dual_sse2;
+  if (flags & HAS_AVX2)
+    aom_hadamard_lp_8x8_dual = aom_hadamard_lp_8x8_dual_avx2;
   aom_ifft16x16_float = aom_ifft16x16_float_sse2;
   if (flags & HAS_AVX2)
     aom_ifft16x16_float = aom_ifft16x16_float_avx2;
@@ -8800,6 +9463,12 @@ static void setup_rtcd_internal(void) {
   aom_ifft8x8_float = aom_ifft8x8_float_sse2;
   if (flags & HAS_AVX2)
     aom_ifft8x8_float = aom_ifft8x8_float_avx2;
+  aom_int_pro_col = aom_int_pro_col_sse2;
+  if (flags & HAS_AVX2)
+    aom_int_pro_col = aom_int_pro_col_avx2;
+  aom_int_pro_row = aom_int_pro_row_sse2;
+  if (flags & HAS_AVX2)
+    aom_int_pro_row = aom_int_pro_row_avx2;
   aom_lowbd_blend_a64_d16_mask = aom_lowbd_blend_a64_d16_mask_c;
   if (flags & HAS_SSE4_1)
     aom_lowbd_blend_a64_d16_mask = aom_lowbd_blend_a64_d16_mask_sse4_1;
@@ -9003,6 +9672,9 @@ static void setup_rtcd_internal(void) {
   aom_mse16x16 = aom_mse16x16_sse2;
   if (flags & HAS_AVX2)
     aom_mse16x16 = aom_mse16x16_avx2;
+  aom_mse_16xh_16bit = aom_mse_16xh_16bit_sse2;
+  if (flags & HAS_AVX2)
+    aom_mse_16xh_16bit = aom_mse_16xh_16bit_avx2;
   aom_mse_wxh_16bit = aom_mse_wxh_16bit_sse2;
   if (flags & HAS_AVX2)
     aom_mse_wxh_16bit = aom_mse_wxh_16bit_avx2;
@@ -9016,6 +9688,14 @@ static void setup_rtcd_internal(void) {
     aom_paeth_predictor_16x32 = aom_paeth_predictor_16x32_ssse3;
   if (flags & HAS_AVX2)
     aom_paeth_predictor_16x32 = aom_paeth_predictor_16x32_avx2;
+  aom_paeth_predictor_16x4 = aom_paeth_predictor_16x4_c;
+  if (flags & HAS_SSSE3)
+    aom_paeth_predictor_16x4 = aom_paeth_predictor_16x4_ssse3;
+  aom_paeth_predictor_16x64 = aom_paeth_predictor_16x64_c;
+  if (flags & HAS_SSSE3)
+    aom_paeth_predictor_16x64 = aom_paeth_predictor_16x64_ssse3;
+  if (flags & HAS_AVX2)
+    aom_paeth_predictor_16x64 = aom_paeth_predictor_16x64_avx2;
   aom_paeth_predictor_16x8 = aom_paeth_predictor_16x8_c;
   if (flags & HAS_SSSE3)
     aom_paeth_predictor_16x8 = aom_paeth_predictor_16x8_ssse3;
@@ -9036,12 +9716,23 @@ static void setup_rtcd_internal(void) {
     aom_paeth_predictor_32x64 = aom_paeth_predictor_32x64_ssse3;
   if (flags & HAS_AVX2)
     aom_paeth_predictor_32x64 = aom_paeth_predictor_32x64_avx2;
+  aom_paeth_predictor_32x8 = aom_paeth_predictor_32x8_c;
+  if (flags & HAS_SSSE3)
+    aom_paeth_predictor_32x8 = aom_paeth_predictor_32x8_ssse3;
+  aom_paeth_predictor_4x16 = aom_paeth_predictor_4x16_c;
+  if (flags & HAS_SSSE3)
+    aom_paeth_predictor_4x16 = aom_paeth_predictor_4x16_ssse3;
   aom_paeth_predictor_4x4 = aom_paeth_predictor_4x4_c;
   if (flags & HAS_SSSE3)
     aom_paeth_predictor_4x4 = aom_paeth_predictor_4x4_ssse3;
   aom_paeth_predictor_4x8 = aom_paeth_predictor_4x8_c;
   if (flags & HAS_SSSE3)
     aom_paeth_predictor_4x8 = aom_paeth_predictor_4x8_ssse3;
+  aom_paeth_predictor_64x16 = aom_paeth_predictor_64x16_c;
+  if (flags & HAS_SSSE3)
+    aom_paeth_predictor_64x16 = aom_paeth_predictor_64x16_ssse3;
+  if (flags & HAS_AVX2)
+    aom_paeth_predictor_64x16 = aom_paeth_predictor_64x16_avx2;
   aom_paeth_predictor_64x32 = aom_paeth_predictor_64x32_c;
   if (flags & HAS_SSSE3)
     aom_paeth_predictor_64x32 = aom_paeth_predictor_64x32_ssse3;
@@ -9055,6 +9746,9 @@ static void setup_rtcd_internal(void) {
   aom_paeth_predictor_8x16 = aom_paeth_predictor_8x16_c;
   if (flags & HAS_SSSE3)
     aom_paeth_predictor_8x16 = aom_paeth_predictor_8x16_ssse3;
+  aom_paeth_predictor_8x32 = aom_paeth_predictor_8x32_c;
+  if (flags & HAS_SSSE3)
+    aom_paeth_predictor_8x32 = aom_paeth_predictor_8x32_ssse3;
   aom_paeth_predictor_8x4 = aom_paeth_predictor_8x4_c;
   if (flags & HAS_SSSE3)
     aom_paeth_predictor_8x4 = aom_paeth_predictor_8x4_ssse3;
@@ -9064,15 +9758,18 @@ static void setup_rtcd_internal(void) {
   aom_quantize_b = aom_quantize_b_sse2;
   if (flags & HAS_AVX)
     aom_quantize_b = aom_quantize_b_avx;
+  if (flags & HAS_AVX2)
+    aom_quantize_b = aom_quantize_b_avx2;
   aom_quantize_b_32x32 = aom_quantize_b_32x32_c;
   if (flags & HAS_AVX)
     aom_quantize_b_32x32 = aom_quantize_b_32x32_avx;
+  if (flags & HAS_AVX2)
+    aom_quantize_b_32x32 = aom_quantize_b_32x32_avx2;
   aom_quantize_b_64x64 = aom_quantize_b_64x64_c;
   if (flags & HAS_SSSE3)
     aom_quantize_b_64x64 = aom_quantize_b_64x64_ssse3;
-  aom_quantize_b_adaptive = aom_quantize_b_adaptive_sse2;
   if (flags & HAS_AVX2)
-    aom_quantize_b_adaptive = aom_quantize_b_adaptive_avx2;
+    aom_quantize_b_64x64 = aom_quantize_b_64x64_avx2;
   aom_sad128x128 = aom_sad128x128_sse2;
   if (flags & HAS_AVX2)
     aom_sad128x128 = aom_sad128x128_avx2;
@@ -9208,6 +9905,12 @@ static void setup_rtcd_internal(void) {
   aom_smooth_h_predictor_16x32 = aom_smooth_h_predictor_16x32_c;
   if (flags & HAS_SSSE3)
     aom_smooth_h_predictor_16x32 = aom_smooth_h_predictor_16x32_ssse3;
+  aom_smooth_h_predictor_16x4 = aom_smooth_h_predictor_16x4_c;
+  if (flags & HAS_SSSE3)
+    aom_smooth_h_predictor_16x4 = aom_smooth_h_predictor_16x4_ssse3;
+  aom_smooth_h_predictor_16x64 = aom_smooth_h_predictor_16x64_c;
+  if (flags & HAS_SSSE3)
+    aom_smooth_h_predictor_16x64 = aom_smooth_h_predictor_16x64_ssse3;
   aom_smooth_h_predictor_16x8 = aom_smooth_h_predictor_16x8_c;
   if (flags & HAS_SSSE3)
     aom_smooth_h_predictor_16x8 = aom_smooth_h_predictor_16x8_ssse3;
@@ -9220,12 +9923,21 @@ static void setup_rtcd_internal(void) {
   aom_smooth_h_predictor_32x64 = aom_smooth_h_predictor_32x64_c;
   if (flags & HAS_SSSE3)
     aom_smooth_h_predictor_32x64 = aom_smooth_h_predictor_32x64_ssse3;
+  aom_smooth_h_predictor_32x8 = aom_smooth_h_predictor_32x8_c;
+  if (flags & HAS_SSSE3)
+    aom_smooth_h_predictor_32x8 = aom_smooth_h_predictor_32x8_ssse3;
+  aom_smooth_h_predictor_4x16 = aom_smooth_h_predictor_4x16_c;
+  if (flags & HAS_SSSE3)
+    aom_smooth_h_predictor_4x16 = aom_smooth_h_predictor_4x16_ssse3;
   aom_smooth_h_predictor_4x4 = aom_smooth_h_predictor_4x4_c;
   if (flags & HAS_SSSE3)
     aom_smooth_h_predictor_4x4 = aom_smooth_h_predictor_4x4_ssse3;
   aom_smooth_h_predictor_4x8 = aom_smooth_h_predictor_4x8_c;
   if (flags & HAS_SSSE3)
     aom_smooth_h_predictor_4x8 = aom_smooth_h_predictor_4x8_ssse3;
+  aom_smooth_h_predictor_64x16 = aom_smooth_h_predictor_64x16_c;
+  if (flags & HAS_SSSE3)
+    aom_smooth_h_predictor_64x16 = aom_smooth_h_predictor_64x16_ssse3;
   aom_smooth_h_predictor_64x32 = aom_smooth_h_predictor_64x32_c;
   if (flags & HAS_SSSE3)
     aom_smooth_h_predictor_64x32 = aom_smooth_h_predictor_64x32_ssse3;
@@ -9235,6 +9947,9 @@ static void setup_rtcd_internal(void) {
   aom_smooth_h_predictor_8x16 = aom_smooth_h_predictor_8x16_c;
   if (flags & HAS_SSSE3)
     aom_smooth_h_predictor_8x16 = aom_smooth_h_predictor_8x16_ssse3;
+  aom_smooth_h_predictor_8x32 = aom_smooth_h_predictor_8x32_c;
+  if (flags & HAS_SSSE3)
+    aom_smooth_h_predictor_8x32 = aom_smooth_h_predictor_8x32_ssse3;
   aom_smooth_h_predictor_8x4 = aom_smooth_h_predictor_8x4_c;
   if (flags & HAS_SSSE3)
     aom_smooth_h_predictor_8x4 = aom_smooth_h_predictor_8x4_ssse3;
@@ -9247,6 +9962,12 @@ static void setup_rtcd_internal(void) {
   aom_smooth_predictor_16x32 = aom_smooth_predictor_16x32_c;
   if (flags & HAS_SSSE3)
     aom_smooth_predictor_16x32 = aom_smooth_predictor_16x32_ssse3;
+  aom_smooth_predictor_16x4 = aom_smooth_predictor_16x4_c;
+  if (flags & HAS_SSSE3)
+    aom_smooth_predictor_16x4 = aom_smooth_predictor_16x4_ssse3;
+  aom_smooth_predictor_16x64 = aom_smooth_predictor_16x64_c;
+  if (flags & HAS_SSSE3)
+    aom_smooth_predictor_16x64 = aom_smooth_predictor_16x64_ssse3;
   aom_smooth_predictor_16x8 = aom_smooth_predictor_16x8_c;
   if (flags & HAS_SSSE3)
     aom_smooth_predictor_16x8 = aom_smooth_predictor_16x8_ssse3;
@@ -9259,12 +9980,21 @@ static void setup_rtcd_internal(void) {
   aom_smooth_predictor_32x64 = aom_smooth_predictor_32x64_c;
   if (flags & HAS_SSSE3)
     aom_smooth_predictor_32x64 = aom_smooth_predictor_32x64_ssse3;
+  aom_smooth_predictor_32x8 = aom_smooth_predictor_32x8_c;
+  if (flags & HAS_SSSE3)
+    aom_smooth_predictor_32x8 = aom_smooth_predictor_32x8_ssse3;
+  aom_smooth_predictor_4x16 = aom_smooth_predictor_4x16_c;
+  if (flags & HAS_SSSE3)
+    aom_smooth_predictor_4x16 = aom_smooth_predictor_4x16_ssse3;
   aom_smooth_predictor_4x4 = aom_smooth_predictor_4x4_c;
   if (flags & HAS_SSSE3)
     aom_smooth_predictor_4x4 = aom_smooth_predictor_4x4_ssse3;
   aom_smooth_predictor_4x8 = aom_smooth_predictor_4x8_c;
   if (flags & HAS_SSSE3)
     aom_smooth_predictor_4x8 = aom_smooth_predictor_4x8_ssse3;
+  aom_smooth_predictor_64x16 = aom_smooth_predictor_64x16_c;
+  if (flags & HAS_SSSE3)
+    aom_smooth_predictor_64x16 = aom_smooth_predictor_64x16_ssse3;
   aom_smooth_predictor_64x32 = aom_smooth_predictor_64x32_c;
   if (flags & HAS_SSSE3)
     aom_smooth_predictor_64x32 = aom_smooth_predictor_64x32_ssse3;
@@ -9274,6 +10004,9 @@ static void setup_rtcd_internal(void) {
   aom_smooth_predictor_8x16 = aom_smooth_predictor_8x16_c;
   if (flags & HAS_SSSE3)
     aom_smooth_predictor_8x16 = aom_smooth_predictor_8x16_ssse3;
+  aom_smooth_predictor_8x32 = aom_smooth_predictor_8x32_c;
+  if (flags & HAS_SSSE3)
+    aom_smooth_predictor_8x32 = aom_smooth_predictor_8x32_ssse3;
   aom_smooth_predictor_8x4 = aom_smooth_predictor_8x4_c;
   if (flags & HAS_SSSE3)
     aom_smooth_predictor_8x4 = aom_smooth_predictor_8x4_ssse3;
@@ -9286,6 +10019,12 @@ static void setup_rtcd_internal(void) {
   aom_smooth_v_predictor_16x32 = aom_smooth_v_predictor_16x32_c;
   if (flags & HAS_SSSE3)
     aom_smooth_v_predictor_16x32 = aom_smooth_v_predictor_16x32_ssse3;
+  aom_smooth_v_predictor_16x4 = aom_smooth_v_predictor_16x4_c;
+  if (flags & HAS_SSSE3)
+    aom_smooth_v_predictor_16x4 = aom_smooth_v_predictor_16x4_ssse3;
+  aom_smooth_v_predictor_16x64 = aom_smooth_v_predictor_16x64_c;
+  if (flags & HAS_SSSE3)
+    aom_smooth_v_predictor_16x64 = aom_smooth_v_predictor_16x64_ssse3;
   aom_smooth_v_predictor_16x8 = aom_smooth_v_predictor_16x8_c;
   if (flags & HAS_SSSE3)
     aom_smooth_v_predictor_16x8 = aom_smooth_v_predictor_16x8_ssse3;
@@ -9298,12 +10037,21 @@ static void setup_rtcd_internal(void) {
   aom_smooth_v_predictor_32x64 = aom_smooth_v_predictor_32x64_c;
   if (flags & HAS_SSSE3)
     aom_smooth_v_predictor_32x64 = aom_smooth_v_predictor_32x64_ssse3;
+  aom_smooth_v_predictor_32x8 = aom_smooth_v_predictor_32x8_c;
+  if (flags & HAS_SSSE3)
+    aom_smooth_v_predictor_32x8 = aom_smooth_v_predictor_32x8_ssse3;
+  aom_smooth_v_predictor_4x16 = aom_smooth_v_predictor_4x16_c;
+  if (flags & HAS_SSSE3)
+    aom_smooth_v_predictor_4x16 = aom_smooth_v_predictor_4x16_ssse3;
   aom_smooth_v_predictor_4x4 = aom_smooth_v_predictor_4x4_c;
   if (flags & HAS_SSSE3)
     aom_smooth_v_predictor_4x4 = aom_smooth_v_predictor_4x4_ssse3;
   aom_smooth_v_predictor_4x8 = aom_smooth_v_predictor_4x8_c;
   if (flags & HAS_SSSE3)
     aom_smooth_v_predictor_4x8 = aom_smooth_v_predictor_4x8_ssse3;
+  aom_smooth_v_predictor_64x16 = aom_smooth_v_predictor_64x16_c;
+  if (flags & HAS_SSSE3)
+    aom_smooth_v_predictor_64x16 = aom_smooth_v_predictor_64x16_ssse3;
   aom_smooth_v_predictor_64x32 = aom_smooth_v_predictor_64x32_c;
   if (flags & HAS_SSSE3)
     aom_smooth_v_predictor_64x32 = aom_smooth_v_predictor_64x32_ssse3;
@@ -9313,6 +10061,9 @@ static void setup_rtcd_internal(void) {
   aom_smooth_v_predictor_8x16 = aom_smooth_v_predictor_8x16_c;
   if (flags & HAS_SSSE3)
     aom_smooth_v_predictor_8x16 = aom_smooth_v_predictor_8x16_ssse3;
+  aom_smooth_v_predictor_8x32 = aom_smooth_v_predictor_8x32_c;
+  if (flags & HAS_SSSE3)
+    aom_smooth_v_predictor_8x32 = aom_smooth_v_predictor_8x32_ssse3;
   aom_smooth_v_predictor_8x4 = aom_smooth_v_predictor_8x4_c;
   if (flags & HAS_SSSE3)
     aom_smooth_v_predictor_8x4 = aom_smooth_v_predictor_8x4_ssse3;
@@ -9476,6 +10227,9 @@ static void setup_rtcd_internal(void) {
   aom_v_predictor_32x64 = aom_v_predictor_32x64_sse2;
   if (flags & HAS_AVX2)
     aom_v_predictor_32x64 = aom_v_predictor_32x64_avx2;
+  aom_v_predictor_64x16 = aom_v_predictor_64x16_sse2;
+  if (flags & HAS_AVX2)
+    aom_v_predictor_64x16 = aom_v_predictor_64x16_avx2;
   aom_v_predictor_64x32 = aom_v_predictor_64x32_sse2;
   if (flags & HAS_AVX2)
     aom_v_predictor_64x32 = aom_v_predictor_64x32_avx2;
@@ -9521,6 +10275,11 @@ static void setup_rtcd_internal(void) {
   aom_variance64x64 = aom_variance64x64_sse2;
   if (flags & HAS_AVX2)
     aom_variance64x64 = aom_variance64x64_avx2;
+  aom_vector_var = aom_vector_var_c;
+  if (flags & HAS_SSE4_1)
+    aom_vector_var = aom_vector_var_sse4_1;
+  if (flags & HAS_AVX2)
+    aom_vector_var = aom_vector_var_avx2;
 }
 #endif
 
diff --git a/chromium/third_party/libaom/source/config/win/ia32/config/aom_scale_rtcd.h b/chromium/third_party/libaom/source/config/win/ia32/config/aom_scale_rtcd.h
index b6059e1426a..5e6c03317d6 100644
--- a/chromium/third_party/libaom/source/config/win/ia32/config/aom_scale_rtcd.h
+++ b/chromium/third_party/libaom/source/config/win/ia32/config/aom_scale_rtcd.h
@@ -18,6 +18,12 @@ void aom_extend_frame_borders_c(struct yv12_buffer_config* ybf,
                                 const int num_planes);
 #define aom_extend_frame_borders aom_extend_frame_borders_c
 
+void aom_extend_frame_borders_plane_row_c(const struct yv12_buffer_config* ybf,
+                                          int plane,
+                                          int v_start,
+                                          int v_end);
+#define aom_extend_frame_borders_plane_row aom_extend_frame_borders_plane_row_c
+
 void aom_extend_frame_borders_y_c(struct yv12_buffer_config* ybf);
 #define aom_extend_frame_borders_y aom_extend_frame_borders_y_c
 
diff --git a/chromium/third_party/libaom/source/config/win/ia32/config/av1_rtcd.h b/chromium/third_party/libaom/source/config/win/ia32/config/av1_rtcd.h
index 8d516d90bde..6d8a64d1270 100644
--- a/chromium/third_party/libaom/source/config/win/ia32/config/av1_rtcd.h
+++ b/chromium/third_party/libaom/source/config/win/ia32/config/av1_rtcd.h
@@ -242,6 +242,51 @@ void aom_upsampled_pred_sse2(MACROBLOCKD* xd,
                              int subpel_search);
 #define aom_upsampled_pred aom_upsampled_pred_sse2
 
+void av1_apply_selfguided_restoration_c(const uint8_t* dat,
+                                        int width,
+                                        int height,
+                                        int stride,
+                                        int eps,
+                                        const int* xqd,
+                                        uint8_t* dst,
+                                        int dst_stride,
+                                        int32_t* tmpbuf,
+                                        int bit_depth,
+                                        int highbd);
+void av1_apply_selfguided_restoration_sse4_1(const uint8_t* dat,
+                                             int width,
+                                             int height,
+                                             int stride,
+                                             int eps,
+                                             const int* xqd,
+                                             uint8_t* dst,
+                                             int dst_stride,
+                                             int32_t* tmpbuf,
+                                             int bit_depth,
+                                             int highbd);
+void av1_apply_selfguided_restoration_avx2(const uint8_t* dat,
+                                           int width,
+                                           int height,
+                                           int stride,
+                                           int eps,
+                                           const int* xqd,
+                                           uint8_t* dst,
+                                           int dst_stride,
+                                           int32_t* tmpbuf,
+                                           int bit_depth,
+                                           int highbd);
+RTCD_EXTERN void (*av1_apply_selfguided_restoration)(const uint8_t* dat,
+                                                     int width,
+                                                     int height,
+                                                     int stride,
+                                                     int eps,
+                                                     const int* xqd,
+                                                     uint8_t* dst,
+                                                     int dst_stride,
+                                                     int32_t* tmpbuf,
+                                                     int bit_depth,
+                                                     int highbd);
+
 int64_t av1_block_error_c(const tran_low_t* coeff,
                           const tran_low_t* dqcoeff,
                           intptr_t block_size,
@@ -347,6 +392,31 @@ RTCD_EXTERN void (*av1_build_compound_diffwtd_mask_d16)(
     ConvolveParams* conv_params,
     int bd);
 
+int64_t av1_calc_frame_error_c(const uint8_t* const ref,
+                               int stride,
+                               const uint8_t* const dst,
+                               int p_width,
+                               int p_height,
+                               int p_stride);
+int64_t av1_calc_frame_error_sse2(const uint8_t* const ref,
+                                  int stride,
+                                  const uint8_t* const dst,
+                                  int p_width,
+                                  int p_height,
+                                  int p_stride);
+int64_t av1_calc_frame_error_avx2(const uint8_t* const ref,
+                                  int stride,
+                                  const uint8_t* const dst,
+                                  int p_width,
+                                  int p_height,
+                                  int p_stride);
+RTCD_EXTERN int64_t (*av1_calc_frame_error)(const uint8_t* const ref,
+                                            int stride,
+                                            const uint8_t* const dst,
+                                            int p_width,
+                                            int p_height,
+                                            int p_stride);
+
 void av1_calc_indices_dim1_c(const int* data,
                              const int* centroids,
                              uint8_t* indices,
@@ -1839,6 +1909,47 @@ void av1_round_shift_array_c(int32_t* arr, int size, int bit);
 void av1_round_shift_array_sse4_1(int32_t* arr, int size, int bit);
 RTCD_EXTERN void (*av1_round_shift_array)(int32_t* arr, int size, int bit);
 
+int av1_selfguided_restoration_c(const uint8_t* dgd8,
+                                 int width,
+                                 int height,
+                                 int dgd_stride,
+                                 int32_t* flt0,
+                                 int32_t* flt1,
+                                 int flt_stride,
+                                 int sgr_params_idx,
+                                 int bit_depth,
+                                 int highbd);
+int av1_selfguided_restoration_sse4_1(const uint8_t* dgd8,
+                                      int width,
+                                      int height,
+                                      int dgd_stride,
+                                      int32_t* flt0,
+                                      int32_t* flt1,
+                                      int flt_stride,
+                                      int sgr_params_idx,
+                                      int bit_depth,
+                                      int highbd);
+int av1_selfguided_restoration_avx2(const uint8_t* dgd8,
+                                    int width,
+                                    int height,
+                                    int dgd_stride,
+                                    int32_t* flt0,
+                                    int32_t* flt1,
+                                    int flt_stride,
+                                    int sgr_params_idx,
+                                    int bit_depth,
+                                    int highbd);
+RTCD_EXTERN int (*av1_selfguided_restoration)(const uint8_t* dgd8,
+                                              int width,
+                                              int height,
+                                              int dgd_stride,
+                                              int32_t* flt0,
+                                              int32_t* flt1,
+                                              int flt_stride,
+                                              int sgr_params_idx,
+                                              int bit_depth,
+                                              int highbd);
+
 void av1_txb_init_levels_c(const tran_low_t* const coeff,
                            const int width,
                            const int height,
@@ -1864,6 +1975,79 @@ void av1_upsample_intra_edge_high_c(uint16_t* p, int sz, int bd);
 void av1_upsample_intra_edge_high_sse4_1(uint16_t* p, int sz, int bd);
 RTCD_EXTERN void (*av1_upsample_intra_edge_high)(uint16_t* p, int sz, int bd);
 
+void av1_warp_affine_c(const int32_t* mat,
+                       const uint8_t* ref,
+                       int width,
+                       int height,
+                       int stride,
+                       uint8_t* pred,
+                       int p_col,
+                       int p_row,
+                       int p_width,
+                       int p_height,
+                       int p_stride,
+                       int subsampling_x,
+                       int subsampling_y,
+                       ConvolveParams* conv_params,
+                       int16_t alpha,
+                       int16_t beta,
+                       int16_t gamma,
+                       int16_t delta);
+void av1_warp_affine_sse4_1(const int32_t* mat,
+                            const uint8_t* ref,
+                            int width,
+                            int height,
+                            int stride,
+                            uint8_t* pred,
+                            int p_col,
+                            int p_row,
+                            int p_width,
+                            int p_height,
+                            int p_stride,
+                            int subsampling_x,
+                            int subsampling_y,
+                            ConvolveParams* conv_params,
+                            int16_t alpha,
+                            int16_t beta,
+                            int16_t gamma,
+                            int16_t delta);
+void av1_warp_affine_avx2(const int32_t* mat,
+                          const uint8_t* ref,
+                          int width,
+                          int height,
+                          int stride,
+                          uint8_t* pred,
+                          int p_col,
+                          int p_row,
+                          int p_width,
+                          int p_height,
+                          int p_stride,
+                          int subsampling_x,
+                          int subsampling_y,
+                          ConvolveParams* conv_params,
+                          int16_t alpha,
+                          int16_t beta,
+                          int16_t gamma,
+                          int16_t delta);
+RTCD_EXTERN void (*av1_warp_affine)(const int32_t* mat,
+                                    const uint8_t* ref,
+                                    int width,
+                                    int height,
+                                    int stride,
+                                    uint8_t* pred,
+                                    int p_col,
+                                    int p_row,
+                                    int p_width,
+                                    int p_height,
+                                    int p_stride,
+                                    int subsampling_x,
+                                    int subsampling_y,
+                                    ConvolveParams* conv_params,
+                                    int16_t alpha,
+                                    int16_t beta,
+                                    int16_t gamma,
+                                    int16_t delta);
+
 void av1_wedge_compute_delta_squares_c(int16_t* d,
                                        const int16_t* a,
                                        const int16_t* b,
@@ -2687,6 +2871,11 @@ static void setup_rtcd_internal(void) {
   if (flags & HAS_SSSE3)
     aom_dist_wtd_comp_avg_upsampled_pred =
         aom_dist_wtd_comp_avg_upsampled_pred_ssse3;
+  av1_apply_selfguided_restoration = av1_apply_selfguided_restoration_c;
+  if (flags & HAS_SSE4_1)
+    av1_apply_selfguided_restoration = av1_apply_selfguided_restoration_sse4_1;
+  if (flags & HAS_AVX2)
+    av1_apply_selfguided_restoration = av1_apply_selfguided_restoration_avx2;
   av1_block_error = av1_block_error_sse2;
   if (flags & HAS_AVX2)
     av1_block_error = av1_block_error_avx2;
@@ -2705,6 +2894,9 @@ static void setup_rtcd_internal(void) {
   if (flags & HAS_AVX2)
     av1_build_compound_diffwtd_mask_d16 =
         av1_build_compound_diffwtd_mask_d16_avx2;
+  av1_calc_frame_error = av1_calc_frame_error_sse2;
+  if (flags & HAS_AVX2)
+    av1_calc_frame_error = av1_calc_frame_error_avx2;
   av1_calc_indices_dim1 = av1_calc_indices_dim1_sse2;
   if (flags & HAS_AVX2)
     av1_calc_indices_dim1 = av1_calc_indices_dim1_avx2;
@@ -2903,6 +3095,11 @@ static void setup_rtcd_internal(void) {
   av1_round_shift_array = av1_round_shift_array_c;
   if (flags & HAS_SSE4_1)
     av1_round_shift_array = av1_round_shift_array_sse4_1;
+  av1_selfguided_restoration = av1_selfguided_restoration_c;
+  if (flags & HAS_SSE4_1)
+    av1_selfguided_restoration = av1_selfguided_restoration_sse4_1;
+  if (flags & HAS_AVX2)
+    av1_selfguided_restoration = av1_selfguided_restoration_avx2;
   av1_txb_init_levels = av1_txb_init_levels_c;
   if (flags & HAS_SSE4_1)
     av1_txb_init_levels = av1_txb_init_levels_sse4_1;
@@ -2914,6 +3111,11 @@ static void setup_rtcd_internal(void) {
   av1_upsample_intra_edge_high = av1_upsample_intra_edge_high_c;
   if (flags & HAS_SSE4_1)
     av1_upsample_intra_edge_high = av1_upsample_intra_edge_high_sse4_1;
+  av1_warp_affine = av1_warp_affine_c;
+  if (flags & HAS_SSE4_1)
+    av1_warp_affine = av1_warp_affine_sse4_1;
+  if (flags & HAS_AVX2)
+    av1_warp_affine = av1_warp_affine_avx2;
   av1_wedge_compute_delta_squares = av1_wedge_compute_delta_squares_sse2;
   if (flags & HAS_AVX2)
     av1_wedge_compute_delta_squares = av1_wedge_compute_delta_squares_avx2;
diff --git a/chromium/third_party/libaom/source/config/win/x64/config/aom_config.asm b/chromium/third_party/libaom/source/config/win/x64/config/aom_config.asm
index 9e4de800b6a..6a228f22e53 100644
--- a/chromium/third_party/libaom/source/config/win/x64/config/aom_config.asm
+++ b/chromium/third_party/libaom/source/config/win/x64/config/aom_config.asm
@@ -1,5 +1,4 @@
 %define ARCH_ARM 0
-%define ARCH_MIPS 0
 %define ARCH_PPC 0
 %define ARCH_X86 0
 %define ARCH_X86_64 1
@@ -23,8 +22,6 @@
 %define CONFIG_ENTROPY_STATS 0
 %define CONFIG_EXCLUDE_SIMD_MISMATCH 0
 %define CONFIG_FPMT_TEST 0
-%define CONFIG_FRAME_PARALLEL_ENCODE 0
-%define CONFIG_FRAME_PARALLEL_ENCODE_2 0
 %define CONFIG_GCC 0
 %define CONFIG_GCOV 0
 %define CONFIG_GPROF 0
@@ -40,7 +37,7 @@
 %define CONFIG_OPTICAL_FLOW_API 0
 %define CONFIG_OS_SUPPORT 1
 %define CONFIG_PARTITION_SEARCH_ORDER 0
-%define CONFIG_PIC 0
+%define CONFIG_PIC 1
 %define CONFIG_RATECTRL_LOG 0
 %define CONFIG_RD_COMMAND 0
 %define CONFIG_RD_DEBUG 0
@@ -59,14 +56,11 @@
 %define DECODE_HEIGHT_LIMIT 16384
 %define DECODE_WIDTH_LIMIT 16384
 %define FORCE_HIGHBITDEPTH_DECODING 0
+%define HAVE_ARM_CRC32 0
 %define HAVE_AVX 1
 %define HAVE_AVX2 1
-%define HAVE_DSPR2 0
 %define HAVE_FEXCEPT 1
-%define HAVE_MIPS32 0
-%define HAVE_MIPS64 0
 %define HAVE_MMX 1
-%define HAVE_MSA 0
 %define HAVE_NEON 0
 %define HAVE_PTHREAD_H 0
 %define HAVE_SSE 1
diff --git a/chromium/third_party/libaom/source/config/win/x64/config/aom_config.c b/chromium/third_party/libaom/source/config/win/x64/config/aom_config.c
index 8786cc6957d..4c037ce451b 100644
--- a/chromium/third_party/libaom/source/config/win/x64/config/aom_config.c
+++ b/chromium/third_party/libaom/source/config/win/x64/config/aom_config.c
@@ -9,5 +9,5 @@
  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
  */
 #include "aom/aom_codec.h"
-static const char* const cfg = "cmake ../source/libaom -G \"Unix Makefiles\" -DAOM_TARGET_CPU=x86_64 -DCONFIG_AV1_ENCODER=1 -DCONFIG_LIBYUV=0 -DCONFIG_REALTIME_ONLY=1 -DCONFIG_AV1_HIGHBITDEPTH=0 -DCONFIG_AV1_TEMPORAL_DENOISING=1 -DCONFIG_MAX_DECODE_PROFILE=0 -DCONFIG_NORMAL_TILE_MODE=1 -DCONFIG_SIZE_LIMIT=1 -DDECODE_HEIGHT_LIMIT=16384 -DDECODE_WIDTH_LIMIT=16384";
+static const char* const cfg = "cmake ../source/libaom -G \"Unix Makefiles\" -DAOM_TARGET_CPU=x86_64 -DCONFIG_AV1_ENCODER=1 -DCONFIG_LIBYUV=0 -DCONFIG_AV1_HIGHBITDEPTH=0 -DCONFIG_AV1_TEMPORAL_DENOISING=1 -DCONFIG_REALTIME_ONLY=1 -DCONFIG_MAX_DECODE_PROFILE=0 -DCONFIG_NORMAL_TILE_MODE=1 -DCONFIG_SIZE_LIMIT=1 -DDECODE_HEIGHT_LIMIT=16384 -DDECODE_WIDTH_LIMIT=16384";
 const char *aom_codec_build_config(void) {return cfg;}
diff --git a/chromium/third_party/libaom/source/config/win/x64/config/aom_config.h b/chromium/third_party/libaom/source/config/win/x64/config/aom_config.h
index 46ebf88a603..a76264e94c1 100644
--- a/chromium/third_party/libaom/source/config/win/x64/config/aom_config.h
+++ b/chromium/third_party/libaom/source/config/win/x64/config/aom_config.h
@@ -11,7 +11,6 @@
 #ifndef AOM_CONFIG_H_
 #define AOM_CONFIG_H_
 #define ARCH_ARM 0
-#define ARCH_MIPS 0
 #define ARCH_PPC 0
 #define ARCH_X86 0
 #define ARCH_X86_64 1
@@ -35,8 +34,6 @@
 #define CONFIG_ENTROPY_STATS 0
 #define CONFIG_EXCLUDE_SIMD_MISMATCH 0
 #define CONFIG_FPMT_TEST 0
-#define CONFIG_FRAME_PARALLEL_ENCODE 0
-#define CONFIG_FRAME_PARALLEL_ENCODE_2 0
 #define CONFIG_GCC 0
 #define CONFIG_GCOV 0
 #define CONFIG_GPROF 0
@@ -52,7 +49,7 @@
 #define CONFIG_OPTICAL_FLOW_API 0
 #define CONFIG_OS_SUPPORT 1
 #define CONFIG_PARTITION_SEARCH_ORDER 0
-#define CONFIG_PIC 0
+#define CONFIG_PIC 1
 #define CONFIG_RATECTRL_LOG 0
 #define CONFIG_RD_COMMAND 0
 #define CONFIG_RD_DEBUG 0
@@ -71,14 +68,11 @@
 #define DECODE_HEIGHT_LIMIT 16384
 #define DECODE_WIDTH_LIMIT 16384
 #define FORCE_HIGHBITDEPTH_DECODING 0
+#define HAVE_ARM_CRC32 0
 #define HAVE_AVX 1
 #define HAVE_AVX2 1
-#define HAVE_DSPR2 0
 #define HAVE_FEXCEPT 1
-#define HAVE_MIPS32 0
-#define HAVE_MIPS64 0
 #define HAVE_MMX 1
-#define HAVE_MSA 0
 #define HAVE_NEON 0
 #define HAVE_PTHREAD_H 0
 #define HAVE_SSE 1
diff --git a/chromium/third_party/libaom/source/config/win/x64/config/aom_dsp_rtcd.h b/chromium/third_party/libaom/source/config/win/x64/config/aom_dsp_rtcd.h
index b04c0cfea23..e1a2841f1fe 100644
--- a/chromium/third_party/libaom/source/config/win/x64/config/aom_dsp_rtcd.h
+++ b/chromium/third_party/libaom/source/config/win/x64/config/aom_dsp_rtcd.h
@@ -360,6 +360,26 @@ void aom_dc_128_predictor_16x32_sse2(uint8_t* dst,
                                      const uint8_t* left);
 #define aom_dc_128_predictor_16x32 aom_dc_128_predictor_16x32_sse2
 
+void aom_dc_128_predictor_16x4_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+void aom_dc_128_predictor_16x4_sse2(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+#define aom_dc_128_predictor_16x4 aom_dc_128_predictor_16x4_sse2
+
+void aom_dc_128_predictor_16x64_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+void aom_dc_128_predictor_16x64_sse2(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+#define aom_dc_128_predictor_16x64 aom_dc_128_predictor_16x64_sse2
+
 void aom_dc_128_predictor_16x8_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
@@ -370,12 +390,6 @@ void aom_dc_128_predictor_16x8_sse2(uint8_t* dst,
                                     const uint8_t* left);
 #define aom_dc_128_predictor_16x8 aom_dc_128_predictor_16x8_sse2
 
-void aom_dc_128_predictor_2x2_c(uint8_t* dst,
-                                ptrdiff_t y_stride,
-                                const uint8_t* above,
-                                const uint8_t* left);
-#define aom_dc_128_predictor_2x2 aom_dc_128_predictor_2x2_c
-
 void aom_dc_128_predictor_32x16_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
@@ -427,6 +441,26 @@ RTCD_EXTERN void (*aom_dc_128_predictor_32x64)(uint8_t* dst,
                                                const uint8_t* above,
                                                const uint8_t* left);
 
+void aom_dc_128_predictor_32x8_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+void aom_dc_128_predictor_32x8_sse2(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+#define aom_dc_128_predictor_32x8 aom_dc_128_predictor_32x8_sse2
+
+void aom_dc_128_predictor_4x16_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+void aom_dc_128_predictor_4x16_sse2(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+#define aom_dc_128_predictor_4x16 aom_dc_128_predictor_4x16_sse2
+
 void aom_dc_128_predictor_4x4_c(uint8_t* dst,
                                 ptrdiff_t y_stride,
                                 const uint8_t* above,
@@ -447,6 +481,23 @@ void aom_dc_128_predictor_4x8_sse2(uint8_t* dst,
                                    const uint8_t* left);
 #define aom_dc_128_predictor_4x8 aom_dc_128_predictor_4x8_sse2
 
+void aom_dc_128_predictor_64x16_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+void aom_dc_128_predictor_64x16_sse2(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+void aom_dc_128_predictor_64x16_avx2(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+RTCD_EXTERN void (*aom_dc_128_predictor_64x16)(uint8_t* dst,
+                                               ptrdiff_t y_stride,
+                                               const uint8_t* above,
+                                               const uint8_t* left);
+
 void aom_dc_128_predictor_64x32_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
@@ -491,6 +542,16 @@ void aom_dc_128_predictor_8x16_sse2(uint8_t* dst,
                                     const uint8_t* left);
 #define aom_dc_128_predictor_8x16 aom_dc_128_predictor_8x16_sse2
 
+void aom_dc_128_predictor_8x32_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+void aom_dc_128_predictor_8x32_sse2(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+#define aom_dc_128_predictor_8x32 aom_dc_128_predictor_8x32_sse2
+
 void aom_dc_128_predictor_8x4_c(uint8_t* dst,
                                 ptrdiff_t y_stride,
                                 const uint8_t* above,
@@ -531,6 +592,26 @@ void aom_dc_left_predictor_16x32_sse2(uint8_t* dst,
                                       const uint8_t* left);
 #define aom_dc_left_predictor_16x32 aom_dc_left_predictor_16x32_sse2
 
+void aom_dc_left_predictor_16x4_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+void aom_dc_left_predictor_16x4_sse2(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+#define aom_dc_left_predictor_16x4 aom_dc_left_predictor_16x4_sse2
+
+void aom_dc_left_predictor_16x64_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+void aom_dc_left_predictor_16x64_sse2(uint8_t* dst,
+                                      ptrdiff_t y_stride,
+                                      const uint8_t* above,
+                                      const uint8_t* left);
+#define aom_dc_left_predictor_16x64 aom_dc_left_predictor_16x64_sse2
+
 void aom_dc_left_predictor_16x8_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
@@ -541,12 +622,6 @@ void aom_dc_left_predictor_16x8_sse2(uint8_t* dst,
                                      const uint8_t* left);
 #define aom_dc_left_predictor_16x8 aom_dc_left_predictor_16x8_sse2
 
-void aom_dc_left_predictor_2x2_c(uint8_t* dst,
-                                 ptrdiff_t y_stride,
-                                 const uint8_t* above,
-                                 const uint8_t* left);
-#define aom_dc_left_predictor_2x2 aom_dc_left_predictor_2x2_c
-
 void aom_dc_left_predictor_32x16_c(uint8_t* dst,
                                    ptrdiff_t y_stride,
                                    const uint8_t* above,
@@ -598,6 +673,26 @@ RTCD_EXTERN void (*aom_dc_left_predictor_32x64)(uint8_t* dst,
                                                 const uint8_t* above,
                                                 const uint8_t* left);
 
+void aom_dc_left_predictor_32x8_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+void aom_dc_left_predictor_32x8_sse2(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+#define aom_dc_left_predictor_32x8 aom_dc_left_predictor_32x8_sse2
+
+void aom_dc_left_predictor_4x16_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+void aom_dc_left_predictor_4x16_sse2(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+#define aom_dc_left_predictor_4x16 aom_dc_left_predictor_4x16_sse2
+
 void aom_dc_left_predictor_4x4_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
@@ -618,6 +713,23 @@ void aom_dc_left_predictor_4x8_sse2(uint8_t* dst,
                                     const uint8_t* left);
 #define aom_dc_left_predictor_4x8 aom_dc_left_predictor_4x8_sse2
 
+void aom_dc_left_predictor_64x16_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+void aom_dc_left_predictor_64x16_sse2(uint8_t* dst,
+                                      ptrdiff_t y_stride,
+                                      const uint8_t* above,
+                                      const uint8_t* left);
+void aom_dc_left_predictor_64x16_avx2(uint8_t* dst,
+                                      ptrdiff_t y_stride,
+                                      const uint8_t* above,
+                                      const uint8_t* left);
+RTCD_EXTERN void (*aom_dc_left_predictor_64x16)(uint8_t* dst,
+                                                ptrdiff_t y_stride,
+                                                const uint8_t* above,
+                                                const uint8_t* left);
+
 void aom_dc_left_predictor_64x32_c(uint8_t* dst,
                                    ptrdiff_t y_stride,
                                    const uint8_t* above,
@@ -662,6 +774,16 @@ void aom_dc_left_predictor_8x16_sse2(uint8_t* dst,
                                      const uint8_t* left);
 #define aom_dc_left_predictor_8x16 aom_dc_left_predictor_8x16_sse2
 
+void aom_dc_left_predictor_8x32_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+void aom_dc_left_predictor_8x32_sse2(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+#define aom_dc_left_predictor_8x32 aom_dc_left_predictor_8x32_sse2
+
 void aom_dc_left_predictor_8x4_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
@@ -702,6 +824,26 @@ void aom_dc_predictor_16x32_sse2(uint8_t* dst,
                                  const uint8_t* left);
 #define aom_dc_predictor_16x32 aom_dc_predictor_16x32_sse2
 
+void aom_dc_predictor_16x4_c(uint8_t* dst,
+                             ptrdiff_t y_stride,
+                             const uint8_t* above,
+                             const uint8_t* left);
+void aom_dc_predictor_16x4_sse2(uint8_t* dst,
+                                ptrdiff_t y_stride,
+                                const uint8_t* above,
+                                const uint8_t* left);
+#define aom_dc_predictor_16x4 aom_dc_predictor_16x4_sse2
+
+void aom_dc_predictor_16x64_c(uint8_t* dst,
+                              ptrdiff_t y_stride,
+                              const uint8_t* above,
+                              const uint8_t* left);
+void aom_dc_predictor_16x64_sse2(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+#define aom_dc_predictor_16x64 aom_dc_predictor_16x64_sse2
+
 void aom_dc_predictor_16x8_c(uint8_t* dst,
                              ptrdiff_t y_stride,
                              const uint8_t* above,
@@ -712,12 +854,6 @@ void aom_dc_predictor_16x8_sse2(uint8_t* dst,
                                 const uint8_t* left);
 #define aom_dc_predictor_16x8 aom_dc_predictor_16x8_sse2
 
-void aom_dc_predictor_2x2_c(uint8_t* dst,
-                            ptrdiff_t y_stride,
-                            const uint8_t* above,
-                            const uint8_t* left);
-#define aom_dc_predictor_2x2 aom_dc_predictor_2x2_c
-
 void aom_dc_predictor_32x16_c(uint8_t* dst,
                               ptrdiff_t y_stride,
                               const uint8_t* above,
@@ -769,6 +905,26 @@ RTCD_EXTERN void (*aom_dc_predictor_32x64)(uint8_t* dst,
                                            const uint8_t* above,
                                            const uint8_t* left);
 
+void aom_dc_predictor_32x8_c(uint8_t* dst,
+                             ptrdiff_t y_stride,
+                             const uint8_t* above,
+                             const uint8_t* left);
+void aom_dc_predictor_32x8_sse2(uint8_t* dst,
+                                ptrdiff_t y_stride,
+                                const uint8_t* above,
+                                const uint8_t* left);
+#define aom_dc_predictor_32x8 aom_dc_predictor_32x8_sse2
+
+void aom_dc_predictor_4x16_c(uint8_t* dst,
+                             ptrdiff_t y_stride,
+                             const uint8_t* above,
+                             const uint8_t* left);
+void aom_dc_predictor_4x16_sse2(uint8_t* dst,
+                                ptrdiff_t y_stride,
+                                const uint8_t* above,
+                                const uint8_t* left);
+#define aom_dc_predictor_4x16 aom_dc_predictor_4x16_sse2
+
 void aom_dc_predictor_4x4_c(uint8_t* dst,
                             ptrdiff_t y_stride,
                             const uint8_t* above,
@@ -789,6 +945,23 @@ void aom_dc_predictor_4x8_sse2(uint8_t* dst,
                                const uint8_t* left);
 #define aom_dc_predictor_4x8 aom_dc_predictor_4x8_sse2
 
+void aom_dc_predictor_64x16_c(uint8_t* dst,
+                              ptrdiff_t y_stride,
+                              const uint8_t* above,
+                              const uint8_t* left);
+void aom_dc_predictor_64x16_sse2(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+void aom_dc_predictor_64x16_avx2(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+RTCD_EXTERN void (*aom_dc_predictor_64x16)(uint8_t* dst,
+                                           ptrdiff_t y_stride,
+                                           const uint8_t* above,
+                                           const uint8_t* left);
+
 void aom_dc_predictor_64x32_c(uint8_t* dst,
                               ptrdiff_t y_stride,
                               const uint8_t* above,
@@ -833,6 +1006,16 @@ void aom_dc_predictor_8x16_sse2(uint8_t* dst,
                                 const uint8_t* left);
 #define aom_dc_predictor_8x16 aom_dc_predictor_8x16_sse2
 
+void aom_dc_predictor_8x32_c(uint8_t* dst,
+                             ptrdiff_t y_stride,
+                             const uint8_t* above,
+                             const uint8_t* left);
+void aom_dc_predictor_8x32_sse2(uint8_t* dst,
+                                ptrdiff_t y_stride,
+                                const uint8_t* above,
+                                const uint8_t* left);
+#define aom_dc_predictor_8x32 aom_dc_predictor_8x32_sse2
+
 void aom_dc_predictor_8x4_c(uint8_t* dst,
                             ptrdiff_t y_stride,
                             const uint8_t* above,
@@ -873,6 +1056,26 @@ void aom_dc_top_predictor_16x32_sse2(uint8_t* dst,
                                      const uint8_t* left);
 #define aom_dc_top_predictor_16x32 aom_dc_top_predictor_16x32_sse2
 
+void aom_dc_top_predictor_16x4_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+void aom_dc_top_predictor_16x4_sse2(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+#define aom_dc_top_predictor_16x4 aom_dc_top_predictor_16x4_sse2
+
+void aom_dc_top_predictor_16x64_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+void aom_dc_top_predictor_16x64_sse2(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+#define aom_dc_top_predictor_16x64 aom_dc_top_predictor_16x64_sse2
+
 void aom_dc_top_predictor_16x8_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
@@ -883,12 +1086,6 @@ void aom_dc_top_predictor_16x8_sse2(uint8_t* dst,
                                     const uint8_t* left);
 #define aom_dc_top_predictor_16x8 aom_dc_top_predictor_16x8_sse2
 
-void aom_dc_top_predictor_2x2_c(uint8_t* dst,
-                                ptrdiff_t y_stride,
-                                const uint8_t* above,
-                                const uint8_t* left);
-#define aom_dc_top_predictor_2x2 aom_dc_top_predictor_2x2_c
-
 void aom_dc_top_predictor_32x16_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
@@ -940,6 +1137,26 @@ RTCD_EXTERN void (*aom_dc_top_predictor_32x64)(uint8_t* dst,
                                                const uint8_t* above,
                                                const uint8_t* left);
 
+void aom_dc_top_predictor_32x8_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+void aom_dc_top_predictor_32x8_sse2(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+#define aom_dc_top_predictor_32x8 aom_dc_top_predictor_32x8_sse2
+
+void aom_dc_top_predictor_4x16_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+void aom_dc_top_predictor_4x16_sse2(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+#define aom_dc_top_predictor_4x16 aom_dc_top_predictor_4x16_sse2
+
 void aom_dc_top_predictor_4x4_c(uint8_t* dst,
                                 ptrdiff_t y_stride,
                                 const uint8_t* above,
@@ -960,6 +1177,23 @@ void aom_dc_top_predictor_4x8_sse2(uint8_t* dst,
                                    const uint8_t* left);
 #define aom_dc_top_predictor_4x8 aom_dc_top_predictor_4x8_sse2
 
+void aom_dc_top_predictor_64x16_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+void aom_dc_top_predictor_64x16_sse2(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+void aom_dc_top_predictor_64x16_avx2(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+RTCD_EXTERN void (*aom_dc_top_predictor_64x16)(uint8_t* dst,
+                                               ptrdiff_t y_stride,
+                                               const uint8_t* above,
+                                               const uint8_t* left);
+
 void aom_dc_top_predictor_64x32_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
@@ -1004,6 +1238,16 @@ void aom_dc_top_predictor_8x16_sse2(uint8_t* dst,
                                     const uint8_t* left);
 #define aom_dc_top_predictor_8x16 aom_dc_top_predictor_8x16_sse2
 
+void aom_dc_top_predictor_8x32_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+void aom_dc_top_predictor_8x32_sse2(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+#define aom_dc_top_predictor_8x32 aom_dc_top_predictor_8x32_sse2
+
 void aom_dc_top_predictor_8x4_c(uint8_t* dst,
                                 ptrdiff_t y_stride,
                                 const uint8_t* above,
@@ -2027,6 +2271,26 @@ void aom_h_predictor_16x32_sse2(uint8_t* dst,
                                 const uint8_t* left);
 #define aom_h_predictor_16x32 aom_h_predictor_16x32_sse2
 
+void aom_h_predictor_16x4_c(uint8_t* dst,
+                            ptrdiff_t y_stride,
+                            const uint8_t* above,
+                            const uint8_t* left);
+void aom_h_predictor_16x4_sse2(uint8_t* dst,
+                               ptrdiff_t y_stride,
+                               const uint8_t* above,
+                               const uint8_t* left);
+#define aom_h_predictor_16x4 aom_h_predictor_16x4_sse2
+
+void aom_h_predictor_16x64_c(uint8_t* dst,
+                             ptrdiff_t y_stride,
+                             const uint8_t* above,
+                             const uint8_t* left);
+void aom_h_predictor_16x64_sse2(uint8_t* dst,
+                                ptrdiff_t y_stride,
+                                const uint8_t* above,
+                                const uint8_t* left);
+#define aom_h_predictor_16x64 aom_h_predictor_16x64_sse2
+
 void aom_h_predictor_16x8_c(uint8_t* dst,
                             ptrdiff_t y_stride,
                             const uint8_t* above,
@@ -2037,12 +2301,6 @@ void aom_h_predictor_16x8_sse2(uint8_t* dst,
                                const uint8_t* left);
 #define aom_h_predictor_16x8 aom_h_predictor_16x8_sse2
 
-void aom_h_predictor_2x2_c(uint8_t* dst,
-                           ptrdiff_t y_stride,
-                           const uint8_t* above,
-                           const uint8_t* left);
-#define aom_h_predictor_2x2 aom_h_predictor_2x2_c
-
 void aom_h_predictor_32x16_c(uint8_t* dst,
                              ptrdiff_t y_stride,
                              const uint8_t* above,
@@ -2080,6 +2338,26 @@ void aom_h_predictor_32x64_sse2(uint8_t* dst,
                                 const uint8_t* left);
 #define aom_h_predictor_32x64 aom_h_predictor_32x64_sse2
 
+void aom_h_predictor_32x8_c(uint8_t* dst,
+                            ptrdiff_t y_stride,
+                            const uint8_t* above,
+                            const uint8_t* left);
+void aom_h_predictor_32x8_sse2(uint8_t* dst,
+                               ptrdiff_t y_stride,
+                               const uint8_t* above,
+                               const uint8_t* left);
+#define aom_h_predictor_32x8 aom_h_predictor_32x8_sse2
+
+void aom_h_predictor_4x16_c(uint8_t* dst,
+                            ptrdiff_t y_stride,
+                            const uint8_t* above,
+                            const uint8_t* left);
+void aom_h_predictor_4x16_sse2(uint8_t* dst,
+                               ptrdiff_t y_stride,
+                               const uint8_t* above,
+                               const uint8_t* left);
+#define aom_h_predictor_4x16 aom_h_predictor_4x16_sse2
+
 void aom_h_predictor_4x4_c(uint8_t* dst,
                            ptrdiff_t y_stride,
                            const uint8_t* above,
@@ -2100,6 +2378,16 @@ void aom_h_predictor_4x8_sse2(uint8_t* dst,
                               const uint8_t* left);
 #define aom_h_predictor_4x8 aom_h_predictor_4x8_sse2
 
+void aom_h_predictor_64x16_c(uint8_t* dst,
+                             ptrdiff_t y_stride,
+                             const uint8_t* above,
+                             const uint8_t* left);
+void aom_h_predictor_64x16_sse2(uint8_t* dst,
+                                ptrdiff_t y_stride,
+                                const uint8_t* above,
+                                const uint8_t* left);
+#define aom_h_predictor_64x16 aom_h_predictor_64x16_sse2
+
 void aom_h_predictor_64x32_c(uint8_t* dst,
                              ptrdiff_t y_stride,
                              const uint8_t* above,
@@ -2130,6 +2418,16 @@ void aom_h_predictor_8x16_sse2(uint8_t* dst,
                                const uint8_t* left);
 #define aom_h_predictor_8x16 aom_h_predictor_8x16_sse2
 
+void aom_h_predictor_8x32_c(uint8_t* dst,
+                            ptrdiff_t y_stride,
+                            const uint8_t* above,
+                            const uint8_t* left);
+void aom_h_predictor_8x32_sse2(uint8_t* dst,
+                               ptrdiff_t y_stride,
+                               const uint8_t* above,
+                               const uint8_t* left);
+#define aom_h_predictor_8x32 aom_h_predictor_8x32_sse2
+
 void aom_h_predictor_8x4_c(uint8_t* dst,
                            ptrdiff_t y_stride,
                            const uint8_t* above,
@@ -2192,19 +2490,6 @@ void aom_hadamard_8x8_sse2(const int16_t* src_diff,
                            tran_low_t* coeff);
 #define aom_hadamard_8x8 aom_hadamard_8x8_sse2
 
-void aom_hadamard_8x8_dual_c(const int16_t* src_diff,
-                             ptrdiff_t src_stride,
-                             int16_t* coeff);
-void aom_hadamard_8x8_dual_sse2(const int16_t* src_diff,
-                                ptrdiff_t src_stride,
-                                int16_t* coeff);
-void aom_hadamard_8x8_dual_avx2(const int16_t* src_diff,
-                                ptrdiff_t src_stride,
-                                int16_t* coeff);
-RTCD_EXTERN void (*aom_hadamard_8x8_dual)(const int16_t* src_diff,
-                                          ptrdiff_t src_stride,
-                                          int16_t* coeff);
-
 void aom_hadamard_lp_16x16_c(const int16_t* src_diff,
                              ptrdiff_t src_stride,
                              int16_t* coeff);
@@ -2226,6 +2511,19 @@ void aom_hadamard_lp_8x8_sse2(const int16_t* src_diff,
                               int16_t* coeff);
 #define aom_hadamard_lp_8x8 aom_hadamard_lp_8x8_sse2
 
+void aom_hadamard_lp_8x8_dual_c(const int16_t* src_diff,
+                                ptrdiff_t src_stride,
+                                int16_t* coeff);
+void aom_hadamard_lp_8x8_dual_sse2(const int16_t* src_diff,
+                                   ptrdiff_t src_stride,
+                                   int16_t* coeff);
+void aom_hadamard_lp_8x8_dual_avx2(const int16_t* src_diff,
+                                   ptrdiff_t src_stride,
+                                   int16_t* coeff);
+RTCD_EXTERN void (*aom_hadamard_lp_8x8_dual)(const int16_t* src_diff,
+                                             ptrdiff_t src_stride,
+                                             int16_t* coeff);
+
 void aom_ifft16x16_float_c(const float* input, float* temp, float* output);
 void aom_ifft16x16_float_sse2(const float* input, float* temp, float* output);
 void aom_ifft16x16_float_avx2(const float* input, float* temp, float* output);
@@ -2254,19 +2552,55 @@ RTCD_EXTERN void (*aom_ifft8x8_float)(const float* input,
                                       float* temp,
                                       float* output);
 
-int16_t aom_int_pro_col_c(const uint8_t* ref, const int width);
-int16_t aom_int_pro_col_sse2(const uint8_t* ref, const int width);
-#define aom_int_pro_col aom_int_pro_col_sse2
+void aom_int_pro_col_c(int16_t* vbuf,
+                       const uint8_t* ref,
+                       const int ref_stride,
+                       const int width,
+                       const int height,
+                       int norm_factor);
+void aom_int_pro_col_sse2(int16_t* vbuf,
+                          const uint8_t* ref,
+                          const int ref_stride,
+                          const int width,
+                          const int height,
+                          int norm_factor);
+void aom_int_pro_col_avx2(int16_t* vbuf,
+                          const uint8_t* ref,
+                          const int ref_stride,
+                          const int width,
+                          const int height,
+                          int norm_factor);
+RTCD_EXTERN void (*aom_int_pro_col)(int16_t* vbuf,
+                                    const uint8_t* ref,
+                                    const int ref_stride,
+                                    const int width,
+                                    const int height,
+                                    int norm_factor);
 
-void aom_int_pro_row_c(int16_t hbuf[16],
+void aom_int_pro_row_c(int16_t* hbuf,
                        const uint8_t* ref,
                        const int ref_stride,
-                       const int height);
-void aom_int_pro_row_sse2(int16_t hbuf[16],
+                       const int width,
+                       const int height,
+                       int norm_factor);
+void aom_int_pro_row_sse2(int16_t* hbuf,
+                          const uint8_t* ref,
+                          const int ref_stride,
+                          const int width,
+                          const int height,
+                          int norm_factor);
+void aom_int_pro_row_avx2(int16_t* hbuf,
                           const uint8_t* ref,
                           const int ref_stride,
-                          const int height);
-#define aom_int_pro_row aom_int_pro_row_sse2
+                          const int width,
+                          const int height,
+                          int norm_factor);
+RTCD_EXTERN void (*aom_int_pro_row)(int16_t* hbuf,
+                                    const uint8_t* ref,
+                                    const int ref_stride,
+                                    const int width,
+                                    const int height,
+                                    int norm_factor);
 
 void aom_lowbd_blend_a64_d16_mask_c(uint8_t* dst,
                                     uint32_t dst_stride,
@@ -4301,6 +4635,27 @@ unsigned int aom_mse8x8_sse2(const uint8_t* src_ptr,
                              unsigned int* sse);
 #define aom_mse8x8 aom_mse8x8_sse2
 
+uint64_t aom_mse_16xh_16bit_c(uint8_t* dst,
+                              int dstride,
+                              uint16_t* src,
+                              int w,
+                              int h);
+uint64_t aom_mse_16xh_16bit_sse2(uint8_t* dst,
+                                 int dstride,
+                                 uint16_t* src,
+                                 int w,
+                                 int h);
+uint64_t aom_mse_16xh_16bit_avx2(uint8_t* dst,
+                                 int dstride,
+                                 uint16_t* src,
+                                 int w,
+                                 int h);
+RTCD_EXTERN uint64_t (*aom_mse_16xh_16bit)(uint8_t* dst,
+                                           int dstride,
+                                           uint16_t* src,
+                                           int w,
+                                           int h);
+
 uint64_t aom_mse_wxh_16bit_c(uint8_t* dst,
                              int dstride,
                              uint16_t* src,
@@ -4360,6 +4715,36 @@ RTCD_EXTERN void (*aom_paeth_predictor_16x32)(uint8_t* dst,
                                               const uint8_t* above,
                                               const uint8_t* left);
 
+void aom_paeth_predictor_16x4_c(uint8_t* dst,
+                                ptrdiff_t y_stride,
+                                const uint8_t* above,
+                                const uint8_t* left);
+void aom_paeth_predictor_16x4_ssse3(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+RTCD_EXTERN void (*aom_paeth_predictor_16x4)(uint8_t* dst,
+                                             ptrdiff_t y_stride,
+                                             const uint8_t* above,
+                                             const uint8_t* left);
+
+void aom_paeth_predictor_16x64_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+void aom_paeth_predictor_16x64_ssse3(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+void aom_paeth_predictor_16x64_avx2(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+RTCD_EXTERN void (*aom_paeth_predictor_16x64)(uint8_t* dst,
+                                              ptrdiff_t y_stride,
+                                              const uint8_t* above,
+                                              const uint8_t* left);
+
 void aom_paeth_predictor_16x8_c(uint8_t* dst,
                                 ptrdiff_t y_stride,
                                 const uint8_t* above,
@@ -4377,12 +4762,6 @@ RTCD_EXTERN void (*aom_paeth_predictor_16x8)(uint8_t* dst,
                                              const uint8_t* above,
                                              const uint8_t* left);
 
-void aom_paeth_predictor_2x2_c(uint8_t* dst,
-                               ptrdiff_t y_stride,
-                               const uint8_t* above,
-                               const uint8_t* left);
-#define aom_paeth_predictor_2x2 aom_paeth_predictor_2x2_c
-
 void aom_paeth_predictor_32x16_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
@@ -4434,6 +4813,32 @@ RTCD_EXTERN void (*aom_paeth_predictor_32x64)(uint8_t* dst,
                                               const uint8_t* above,
                                               const uint8_t* left);
 
+void aom_paeth_predictor_32x8_c(uint8_t* dst,
+                                ptrdiff_t y_stride,
+                                const uint8_t* above,
+                                const uint8_t* left);
+void aom_paeth_predictor_32x8_ssse3(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+RTCD_EXTERN void (*aom_paeth_predictor_32x8)(uint8_t* dst,
+                                             ptrdiff_t y_stride,
+                                             const uint8_t* above,
+                                             const uint8_t* left);
+
+void aom_paeth_predictor_4x16_c(uint8_t* dst,
+                                ptrdiff_t y_stride,
+                                const uint8_t* above,
+                                const uint8_t* left);
+void aom_paeth_predictor_4x16_ssse3(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+RTCD_EXTERN void (*aom_paeth_predictor_4x16)(uint8_t* dst,
+                                             ptrdiff_t y_stride,
+                                             const uint8_t* above,
+                                             const uint8_t* left);
+
 void aom_paeth_predictor_4x4_c(uint8_t* dst,
                                ptrdiff_t y_stride,
                                const uint8_t* above,
@@ -4460,6 +4865,23 @@ RTCD_EXTERN void (*aom_paeth_predictor_4x8)(uint8_t* dst,
                                             const uint8_t* above,
                                             const uint8_t* left);
 
+void aom_paeth_predictor_64x16_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+void aom_paeth_predictor_64x16_ssse3(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+void aom_paeth_predictor_64x16_avx2(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+RTCD_EXTERN void (*aom_paeth_predictor_64x16)(uint8_t* dst,
+                                              ptrdiff_t y_stride,
+                                              const uint8_t* above,
+                                              const uint8_t* left);
+
 void aom_paeth_predictor_64x32_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
@@ -4507,6 +4929,19 @@ RTCD_EXTERN void (*aom_paeth_predictor_8x16)(uint8_t* dst,
                                              const uint8_t* above,
                                              const uint8_t* left);
 
+void aom_paeth_predictor_8x32_c(uint8_t* dst,
+                                ptrdiff_t y_stride,
+                                const uint8_t* above,
+                                const uint8_t* left);
+void aom_paeth_predictor_8x32_ssse3(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+RTCD_EXTERN void (*aom_paeth_predictor_8x32)(uint8_t* dst,
+                                             ptrdiff_t y_stride,
+                                             const uint8_t* above,
+                                             const uint8_t* left);
+
 void aom_paeth_predictor_8x4_c(uint8_t* dst,
                                ptrdiff_t y_stride,
                                const uint8_t* above,
@@ -4595,6 +5030,18 @@ void aom_quantize_b_avx(const tran_low_t* coeff_ptr,
                         uint16_t* eob_ptr,
                         const int16_t* scan,
                         const int16_t* iscan);
+void aom_quantize_b_avx2(const tran_low_t* coeff_ptr,
+                         intptr_t n_coeffs,
+                         const int16_t* zbin_ptr,
+                         const int16_t* round_ptr,
+                         const int16_t* quant_ptr,
+                         const int16_t* quant_shift_ptr,
+                         tran_low_t* qcoeff_ptr,
+                         tran_low_t* dqcoeff_ptr,
+                         const int16_t* dequant_ptr,
+                         uint16_t* eob_ptr,
+                         const int16_t* scan,
+                         const int16_t* iscan);
 RTCD_EXTERN void (*aom_quantize_b)(const tran_low_t* coeff_ptr,
                                    intptr_t n_coeffs,
                                    const int16_t* zbin_ptr,
@@ -4644,6 +5091,18 @@ void aom_quantize_b_32x32_avx(const tran_low_t* coeff_ptr,
                               uint16_t* eob_ptr,
                               const int16_t* scan,
                               const int16_t* iscan);
+void aom_quantize_b_32x32_avx2(const tran_low_t* coeff_ptr,
+                               intptr_t n_coeffs,
+                               const int16_t* zbin_ptr,
+                               const int16_t* round_ptr,
+                               const int16_t* quant_ptr,
+                               const int16_t* quant_shift_ptr,
+                               tran_low_t* qcoeff_ptr,
+                               tran_low_t* dqcoeff_ptr,
+                               const int16_t* dequant_ptr,
+                               uint16_t* eob_ptr,
+                               const int16_t* scan,
+                               const int16_t* iscan);
 RTCD_EXTERN void (*aom_quantize_b_32x32)(const tran_low_t* coeff_ptr,
                                          intptr_t n_coeffs,
                                          const int16_t* zbin_ptr,
@@ -4657,32 +5116,6 @@ RTCD_EXTERN void (*aom_quantize_b_32x32)(const tran_low_t* coeff_ptr,
                                          const int16_t* scan,
                                          const int16_t* iscan);
 
-void aom_quantize_b_32x32_adaptive_c(const tran_low_t* coeff_ptr,
-                                     intptr_t n_coeffs,
-                                     const int16_t* zbin_ptr,
-                                     const int16_t* round_ptr,
-                                     const int16_t* quant_ptr,
-                                     const int16_t* quant_shift_ptr,
-                                     tran_low_t* qcoeff_ptr,
-                                     tran_low_t* dqcoeff_ptr,
-                                     const int16_t* dequant_ptr,
-                                     uint16_t* eob_ptr,
-                                     const int16_t* scan,
-                                     const int16_t* iscan);
-void aom_quantize_b_32x32_adaptive_sse2(const tran_low_t* coeff_ptr,
-                                        intptr_t n_coeffs,
-                                        const int16_t* zbin_ptr,
-                                        const int16_t* round_ptr,
-                                        const int16_t* quant_ptr,
-                                        const int16_t* quant_shift_ptr,
-                                        tran_low_t* qcoeff_ptr,
-                                        tran_low_t* dqcoeff_ptr,
-                                        const int16_t* dequant_ptr,
-                                        uint16_t* eob_ptr,
-                                        const int16_t* scan,
-                                        const int16_t* iscan);
-#define aom_quantize_b_32x32_adaptive aom_quantize_b_32x32_adaptive_sse2
-
 void aom_quantize_b_64x64_c(const tran_low_t* coeff_ptr,
                             intptr_t n_coeffs,
                             const int16_t* zbin_ptr,
@@ -4707,6 +5140,18 @@ void aom_quantize_b_64x64_ssse3(const tran_low_t* coeff_ptr,
                                 uint16_t* eob_ptr,
                                 const int16_t* scan,
                                 const int16_t* iscan);
+void aom_quantize_b_64x64_avx2(const tran_low_t* coeff_ptr,
+                               intptr_t n_coeffs,
+                               const int16_t* zbin_ptr,
+                               const int16_t* round_ptr,
+                               const int16_t* quant_ptr,
+                               const int16_t* quant_shift_ptr,
+                               tran_low_t* qcoeff_ptr,
+                               tran_low_t* dqcoeff_ptr,
+                               const int16_t* dequant_ptr,
+                               uint16_t* eob_ptr,
+                               const int16_t* scan,
+                               const int16_t* iscan);
 RTCD_EXTERN void (*aom_quantize_b_64x64)(const tran_low_t* coeff_ptr,
                                          intptr_t n_coeffs,
                                          const int16_t* zbin_ptr,
@@ -4720,81 +5165,6 @@ RTCD_EXTERN void (*aom_quantize_b_64x64)(const tran_low_t* coeff_ptr,
                                          const int16_t* scan,
                                          const int16_t* iscan);
 
-void aom_quantize_b_64x64_adaptive_c(const tran_low_t* coeff_ptr,
-                                     intptr_t n_coeffs,
-                                     const int16_t* zbin_ptr,
-                                     const int16_t* round_ptr,
-                                     const int16_t* quant_ptr,
-                                     const int16_t* quant_shift_ptr,
-                                     tran_low_t* qcoeff_ptr,
-                                     tran_low_t* dqcoeff_ptr,
-                                     const int16_t* dequant_ptr,
-                                     uint16_t* eob_ptr,
-                                     const int16_t* scan,
-                                     const int16_t* iscan);
-void aom_quantize_b_64x64_adaptive_sse2(const tran_low_t* coeff_ptr,
-                                        intptr_t n_coeffs,
-                                        const int16_t* zbin_ptr,
-                                        const int16_t* round_ptr,
-                                        const int16_t* quant_ptr,
-                                        const int16_t* quant_shift_ptr,
-                                        tran_low_t* qcoeff_ptr,
-                                        tran_low_t* dqcoeff_ptr,
-                                        const int16_t* dequant_ptr,
-                                        uint16_t* eob_ptr,
-                                        const int16_t* scan,
-                                        const int16_t* iscan);
-#define aom_quantize_b_64x64_adaptive aom_quantize_b_64x64_adaptive_sse2
-
-void aom_quantize_b_adaptive_c(const tran_low_t* coeff_ptr,
-                               intptr_t n_coeffs,
-                               const int16_t* zbin_ptr,
-                               const int16_t* round_ptr,
-                               const int16_t* quant_ptr,
-                               const int16_t* quant_shift_ptr,
-                               tran_low_t* qcoeff_ptr,
-                               tran_low_t* dqcoeff_ptr,
-                               const int16_t* dequant_ptr,
-                               uint16_t* eob_ptr,
-                               const int16_t* scan,
-                               const int16_t* iscan);
-void aom_quantize_b_adaptive_sse2(const tran_low_t* coeff_ptr,
-                                  intptr_t n_coeffs,
-                                  const int16_t* zbin_ptr,
-                                  const int16_t* round_ptr,
-                                  const int16_t* quant_ptr,
-                                  const int16_t* quant_shift_ptr,
-                                  tran_low_t* qcoeff_ptr,
-                                  tran_low_t* dqcoeff_ptr,
-                                  const int16_t* dequant_ptr,
-                                  uint16_t* eob_ptr,
-                                  const int16_t* scan,
-                                  const int16_t* iscan);
-void aom_quantize_b_adaptive_avx2(const tran_low_t* coeff_ptr,
-                                  intptr_t n_coeffs,
-                                  const int16_t* zbin_ptr,
-                                  const int16_t* round_ptr,
-                                  const int16_t* quant_ptr,
-                                  const int16_t* quant_shift_ptr,
-                                  tran_low_t* qcoeff_ptr,
-                                  tran_low_t* dqcoeff_ptr,
-                                  const int16_t* dequant_ptr,
-                                  uint16_t* eob_ptr,
-                                  const int16_t* scan,
-                                  const int16_t* iscan);
-RTCD_EXTERN void (*aom_quantize_b_adaptive)(const tran_low_t* coeff_ptr,
-                                            intptr_t n_coeffs,
-                                            const int16_t* zbin_ptr,
-                                            const int16_t* round_ptr,
-                                            const int16_t* quant_ptr,
-                                            const int16_t* quant_shift_ptr,
-                                            tran_low_t* qcoeff_ptr,
-                                            tran_low_t* dqcoeff_ptr,
-                                            const int16_t* dequant_ptr,
-                                            uint16_t* eob_ptr,
-                                            const int16_t* scan,
-                                            const int16_t* iscan);
-
 unsigned int aom_sad128x128_c(const uint8_t* src_ptr,
                               int src_stride,
                               const uint8_t* ref_ptr,
@@ -6283,6 +6653,32 @@ RTCD_EXTERN void (*aom_smooth_h_predictor_16x32)(uint8_t* dst,
                                                  const uint8_t* above,
                                                  const uint8_t* left);
 
+void aom_smooth_h_predictor_16x4_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+void aom_smooth_h_predictor_16x4_ssse3(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_h_predictor_16x4)(uint8_t* dst,
+                                                ptrdiff_t y_stride,
+                                                const uint8_t* above,
+                                                const uint8_t* left);
+
+void aom_smooth_h_predictor_16x64_c(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+void aom_smooth_h_predictor_16x64_ssse3(uint8_t* dst,
+                                        ptrdiff_t y_stride,
+                                        const uint8_t* above,
+                                        const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_h_predictor_16x64)(uint8_t* dst,
+                                                 ptrdiff_t y_stride,
+                                                 const uint8_t* above,
+                                                 const uint8_t* left);
+
 void aom_smooth_h_predictor_16x8_c(uint8_t* dst,
                                    ptrdiff_t y_stride,
                                    const uint8_t* above,
@@ -6296,12 +6692,6 @@ RTCD_EXTERN void (*aom_smooth_h_predictor_16x8)(uint8_t* dst,
                                                 const uint8_t* above,
                                                 const uint8_t* left);
 
-void aom_smooth_h_predictor_2x2_c(uint8_t* dst,
-                                  ptrdiff_t y_stride,
-                                  const uint8_t* above,
-                                  const uint8_t* left);
-#define aom_smooth_h_predictor_2x2 aom_smooth_h_predictor_2x2_c
-
 void aom_smooth_h_predictor_32x16_c(uint8_t* dst,
                                     ptrdiff_t y_stride,
                                     const uint8_t* above,
@@ -6341,6 +6731,32 @@ RTCD_EXTERN void (*aom_smooth_h_predictor_32x64)(uint8_t* dst,
                                                  const uint8_t* above,
                                                  const uint8_t* left);
 
+void aom_smooth_h_predictor_32x8_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+void aom_smooth_h_predictor_32x8_ssse3(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_h_predictor_32x8)(uint8_t* dst,
+                                                ptrdiff_t y_stride,
+                                                const uint8_t* above,
+                                                const uint8_t* left);
+
+void aom_smooth_h_predictor_4x16_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+void aom_smooth_h_predictor_4x16_ssse3(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_h_predictor_4x16)(uint8_t* dst,
+                                                ptrdiff_t y_stride,
+                                                const uint8_t* above,
+                                                const uint8_t* left);
+
 void aom_smooth_h_predictor_4x4_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
@@ -6367,6 +6783,19 @@ RTCD_EXTERN void (*aom_smooth_h_predictor_4x8)(uint8_t* dst,
                                                const uint8_t* above,
                                                const uint8_t* left);
 
+void aom_smooth_h_predictor_64x16_c(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+void aom_smooth_h_predictor_64x16_ssse3(uint8_t* dst,
+                                        ptrdiff_t y_stride,
+                                        const uint8_t* above,
+                                        const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_h_predictor_64x16)(uint8_t* dst,
+                                                 ptrdiff_t y_stride,
+                                                 const uint8_t* above,
+                                                 const uint8_t* left);
+
 void aom_smooth_h_predictor_64x32_c(uint8_t* dst,
                                     ptrdiff_t y_stride,
                                     const uint8_t* above,
@@ -6406,6 +6835,19 @@ RTCD_EXTERN void (*aom_smooth_h_predictor_8x16)(uint8_t* dst,
                                                 const uint8_t* above,
                                                 const uint8_t* left);
 
+void aom_smooth_h_predictor_8x32_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+void aom_smooth_h_predictor_8x32_ssse3(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_h_predictor_8x32)(uint8_t* dst,
+                                                ptrdiff_t y_stride,
+                                                const uint8_t* above,
+                                                const uint8_t* left);
+
 void aom_smooth_h_predictor_8x4_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
@@ -6458,6 +6900,32 @@ RTCD_EXTERN void (*aom_smooth_predictor_16x32)(uint8_t* dst,
                                                const uint8_t* above,
                                                const uint8_t* left);
 
+void aom_smooth_predictor_16x4_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+void aom_smooth_predictor_16x4_ssse3(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_predictor_16x4)(uint8_t* dst,
+                                              ptrdiff_t y_stride,
+                                              const uint8_t* above,
+                                              const uint8_t* left);
+
+void aom_smooth_predictor_16x64_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+void aom_smooth_predictor_16x64_ssse3(uint8_t* dst,
+                                      ptrdiff_t y_stride,
+                                      const uint8_t* above,
+                                      const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_predictor_16x64)(uint8_t* dst,
+                                               ptrdiff_t y_stride,
+                                               const uint8_t* above,
+                                               const uint8_t* left);
+
 void aom_smooth_predictor_16x8_c(uint8_t* dst,
                                  ptrdiff_t y_stride,
                                  const uint8_t* above,
@@ -6471,12 +6939,6 @@ RTCD_EXTERN void (*aom_smooth_predictor_16x8)(uint8_t* dst,
                                               const uint8_t* above,
                                               const uint8_t* left);
 
-void aom_smooth_predictor_2x2_c(uint8_t* dst,
-                                ptrdiff_t y_stride,
-                                const uint8_t* above,
-                                const uint8_t* left);
-#define aom_smooth_predictor_2x2 aom_smooth_predictor_2x2_c
-
 void aom_smooth_predictor_32x16_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
@@ -6516,6 +6978,32 @@ RTCD_EXTERN void (*aom_smooth_predictor_32x64)(uint8_t* dst,
                                                const uint8_t* above,
                                                const uint8_t* left);
 
+void aom_smooth_predictor_32x8_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+void aom_smooth_predictor_32x8_ssse3(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_predictor_32x8)(uint8_t* dst,
+                                              ptrdiff_t y_stride,
+                                              const uint8_t* above,
+                                              const uint8_t* left);
+
+void aom_smooth_predictor_4x16_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+void aom_smooth_predictor_4x16_ssse3(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_predictor_4x16)(uint8_t* dst,
+                                              ptrdiff_t y_stride,
+                                              const uint8_t* above,
+                                              const uint8_t* left);
+
 void aom_smooth_predictor_4x4_c(uint8_t* dst,
                                 ptrdiff_t y_stride,
                                 const uint8_t* above,
@@ -6542,6 +7030,19 @@ RTCD_EXTERN void (*aom_smooth_predictor_4x8)(uint8_t* dst,
                                              const uint8_t* above,
                                              const uint8_t* left);
 
+void aom_smooth_predictor_64x16_c(uint8_t* dst,
+                                  ptrdiff_t y_stride,
+                                  const uint8_t* above,
+                                  const uint8_t* left);
+void aom_smooth_predictor_64x16_ssse3(uint8_t* dst,
+                                      ptrdiff_t y_stride,
+                                      const uint8_t* above,
+                                      const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_predictor_64x16)(uint8_t* dst,
+                                               ptrdiff_t y_stride,
+                                               const uint8_t* above,
+                                               const uint8_t* left);
+
 void aom_smooth_predictor_64x32_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
@@ -6581,6 +7082,19 @@ RTCD_EXTERN void (*aom_smooth_predictor_8x16)(uint8_t* dst,
                                               const uint8_t* above,
                                               const uint8_t* left);
 
+void aom_smooth_predictor_8x32_c(uint8_t* dst,
+                                 ptrdiff_t y_stride,
+                                 const uint8_t* above,
+                                 const uint8_t* left);
+void aom_smooth_predictor_8x32_ssse3(uint8_t* dst,
+                                     ptrdiff_t y_stride,
+                                     const uint8_t* above,
+                                     const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_predictor_8x32)(uint8_t* dst,
+                                              ptrdiff_t y_stride,
+                                              const uint8_t* above,
+                                              const uint8_t* left);
+
 void aom_smooth_predictor_8x4_c(uint8_t* dst,
                                 ptrdiff_t y_stride,
                                 const uint8_t* above,
@@ -6633,6 +7147,32 @@ RTCD_EXTERN void (*aom_smooth_v_predictor_16x32)(uint8_t* dst,
                                                  const uint8_t* above,
                                                  const uint8_t* left);
 
+void aom_smooth_v_predictor_16x4_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+void aom_smooth_v_predictor_16x4_ssse3(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_v_predictor_16x4)(uint8_t* dst,
+                                                ptrdiff_t y_stride,
+                                                const uint8_t* above,
+                                                const uint8_t* left);
+
+void aom_smooth_v_predictor_16x64_c(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+void aom_smooth_v_predictor_16x64_ssse3(uint8_t* dst,
+                                        ptrdiff_t y_stride,
+                                        const uint8_t* above,
+                                        const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_v_predictor_16x64)(uint8_t* dst,
+                                                 ptrdiff_t y_stride,
+                                                 const uint8_t* above,
+                                                 const uint8_t* left);
+
 void aom_smooth_v_predictor_16x8_c(uint8_t* dst,
                                    ptrdiff_t y_stride,
                                    const uint8_t* above,
@@ -6646,12 +7186,6 @@ RTCD_EXTERN void (*aom_smooth_v_predictor_16x8)(uint8_t* dst,
                                                 const uint8_t* above,
                                                 const uint8_t* left);
 
-void aom_smooth_v_predictor_2x2_c(uint8_t* dst,
-                                  ptrdiff_t y_stride,
-                                  const uint8_t* above,
-                                  const uint8_t* left);
-#define aom_smooth_v_predictor_2x2 aom_smooth_v_predictor_2x2_c
-
 void aom_smooth_v_predictor_32x16_c(uint8_t* dst,
                                     ptrdiff_t y_stride,
                                     const uint8_t* above,
@@ -6691,6 +7225,32 @@ RTCD_EXTERN void (*aom_smooth_v_predictor_32x64)(uint8_t* dst,
                                                  const uint8_t* above,
                                                  const uint8_t* left);
 
+void aom_smooth_v_predictor_32x8_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+void aom_smooth_v_predictor_32x8_ssse3(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_v_predictor_32x8)(uint8_t* dst,
+                                                ptrdiff_t y_stride,
+                                                const uint8_t* above,
+                                                const uint8_t* left);
+
+void aom_smooth_v_predictor_4x16_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+void aom_smooth_v_predictor_4x16_ssse3(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_v_predictor_4x16)(uint8_t* dst,
+                                                ptrdiff_t y_stride,
+                                                const uint8_t* above,
+                                                const uint8_t* left);
+
 void aom_smooth_v_predictor_4x4_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
@@ -6717,6 +7277,19 @@ RTCD_EXTERN void (*aom_smooth_v_predictor_4x8)(uint8_t* dst,
                                                const uint8_t* above,
                                                const uint8_t* left);
 
+void aom_smooth_v_predictor_64x16_c(uint8_t* dst,
+                                    ptrdiff_t y_stride,
+                                    const uint8_t* above,
+                                    const uint8_t* left);
+void aom_smooth_v_predictor_64x16_ssse3(uint8_t* dst,
+                                        ptrdiff_t y_stride,
+                                        const uint8_t* above,
+                                        const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_v_predictor_64x16)(uint8_t* dst,
+                                                 ptrdiff_t y_stride,
+                                                 const uint8_t* above,
+                                                 const uint8_t* left);
+
 void aom_smooth_v_predictor_64x32_c(uint8_t* dst,
                                     ptrdiff_t y_stride,
                                     const uint8_t* above,
@@ -6756,6 +7329,19 @@ RTCD_EXTERN void (*aom_smooth_v_predictor_8x16)(uint8_t* dst,
                                                 const uint8_t* above,
                                                 const uint8_t* left);
 
+void aom_smooth_v_predictor_8x32_c(uint8_t* dst,
+                                   ptrdiff_t y_stride,
+                                   const uint8_t* above,
+                                   const uint8_t* left);
+void aom_smooth_v_predictor_8x32_ssse3(uint8_t* dst,
+                                       ptrdiff_t y_stride,
+                                       const uint8_t* above,
+                                       const uint8_t* left);
+RTCD_EXTERN void (*aom_smooth_v_predictor_8x32)(uint8_t* dst,
+                                                ptrdiff_t y_stride,
+                                                const uint8_t* above,
+                                                const uint8_t* left);
+
 void aom_smooth_v_predictor_8x4_c(uint8_t* dst,
                                   ptrdiff_t y_stride,
                                   const uint8_t* above,
@@ -8071,6 +8657,26 @@ void aom_v_predictor_16x32_sse2(uint8_t* dst,
                                 const uint8_t* left);
 #define aom_v_predictor_16x32 aom_v_predictor_16x32_sse2
 
+void aom_v_predictor_16x4_c(uint8_t* dst,
+                            ptrdiff_t y_stride,
+                            const uint8_t* above,
+                            const uint8_t* left);
+void aom_v_predictor_16x4_sse2(uint8_t* dst,
+                               ptrdiff_t y_stride,
+                               const uint8_t* above,
+                               const uint8_t* left);
+#define aom_v_predictor_16x4 aom_v_predictor_16x4_sse2
+
+void aom_v_predictor_16x64_c(uint8_t* dst,
+                             ptrdiff_t y_stride,
+                             const uint8_t* above,
+                             const uint8_t* left);
+void aom_v_predictor_16x64_sse2(uint8_t* dst,
+                                ptrdiff_t y_stride,
+                                const uint8_t* above,
+                                const uint8_t* left);
+#define aom_v_predictor_16x64 aom_v_predictor_16x64_sse2
+
 void aom_v_predictor_16x8_c(uint8_t* dst,
                             ptrdiff_t y_stride,
                             const uint8_t* above,
@@ -8081,12 +8687,6 @@ void aom_v_predictor_16x8_sse2(uint8_t* dst,
                                const uint8_t* left);
 #define aom_v_predictor_16x8 aom_v_predictor_16x8_sse2
 
-void aom_v_predictor_2x2_c(uint8_t* dst,
-                           ptrdiff_t y_stride,
-                           const uint8_t* above,
-                           const uint8_t* left);
-#define aom_v_predictor_2x2 aom_v_predictor_2x2_c
-
 void aom_v_predictor_32x16_c(uint8_t* dst,
                              ptrdiff_t y_stride,
                              const uint8_t* above,
@@ -8138,6 +8738,26 @@ RTCD_EXTERN void (*aom_v_predictor_32x64)(uint8_t* dst,
                                           const uint8_t* above,
                                           const uint8_t* left);
 
+void aom_v_predictor_32x8_c(uint8_t* dst,
+                            ptrdiff_t y_stride,
+                            const uint8_t* above,
+                            const uint8_t* left);
+void aom_v_predictor_32x8_sse2(uint8_t* dst,
+                               ptrdiff_t y_stride,
+                               const uint8_t* above,
+                               const uint8_t* left);
+#define aom_v_predictor_32x8 aom_v_predictor_32x8_sse2
+
+void aom_v_predictor_4x16_c(uint8_t* dst,
+                            ptrdiff_t y_stride,
+                            const uint8_t* above,
+                            const uint8_t* left);
+void aom_v_predictor_4x16_sse2(uint8_t* dst,
+                               ptrdiff_t y_stride,
+                               const uint8_t* above,
+                               const uint8_t* left);
+#define aom_v_predictor_4x16 aom_v_predictor_4x16_sse2
+
 void aom_v_predictor_4x4_c(uint8_t* dst,
                            ptrdiff_t y_stride,
                            const uint8_t* above,
@@ -8158,6 +8778,23 @@ void aom_v_predictor_4x8_sse2(uint8_t* dst,
                               const uint8_t* left);
 #define aom_v_predictor_4x8 aom_v_predictor_4x8_sse2
 
+void aom_v_predictor_64x16_c(uint8_t* dst,
+                             ptrdiff_t y_stride,
+                             const uint8_t* above,
+                             const uint8_t* left);
+void aom_v_predictor_64x16_sse2(uint8_t* dst,
+                                ptrdiff_t y_stride,
+                                const uint8_t* above,
+                                const uint8_t* left);
+void aom_v_predictor_64x16_avx2(uint8_t* dst,
+                                ptrdiff_t y_stride,
+                                const uint8_t* above,
+                                const uint8_t* left);
+RTCD_EXTERN void (*aom_v_predictor_64x16)(uint8_t* dst,
+                                          ptrdiff_t y_stride,
+                                          const uint8_t* above,
+                                          const uint8_t* left);
+
 void aom_v_predictor_64x32_c(uint8_t* dst,
                              ptrdiff_t y_stride,
                              const uint8_t* above,
@@ -8202,6 +8839,16 @@ void aom_v_predictor_8x16_sse2(uint8_t* dst,
                                const uint8_t* left);
 #define aom_v_predictor_8x16 aom_v_predictor_8x16_sse2
 
+void aom_v_predictor_8x32_c(uint8_t* dst,
+                            ptrdiff_t y_stride,
+                            const uint8_t* above,
+                            const uint8_t* left);
+void aom_v_predictor_8x32_sse2(uint8_t* dst,
+                               ptrdiff_t y_stride,
+                               const uint8_t* above,
+                               const uint8_t* left);
+#define aom_v_predictor_8x32 aom_v_predictor_8x32_sse2
+
 void aom_v_predictor_8x4_c(uint8_t* dst,
                            ptrdiff_t y_stride,
                            const uint8_t* above,
@@ -8562,8 +9209,12 @@ unsigned int aom_variance8x8_sse2(const uint8_t* src_ptr,
                                   unsigned int* sse);
 #define aom_variance8x8 aom_variance8x8_sse2
 
-int aom_vector_var_c(const int16_t* ref, const int16_t* src, const int bwl);
-#define aom_vector_var aom_vector_var_c
+int aom_vector_var_c(const int16_t* ref, const int16_t* src, int bwl);
+int aom_vector_var_sse4_1(const int16_t* ref, const int16_t* src, int bwl);
+int aom_vector_var_avx2(const int16_t* ref, const int16_t* src, int bwl);
+RTCD_EXTERN int (*aom_vector_var)(const int16_t* ref,
+                                  const int16_t* src,
+                                  int bwl);
 
 void aom_dsp_rtcd(void);
 
@@ -8615,6 +9266,9 @@ static void setup_rtcd_internal(void) {
   aom_dc_128_predictor_32x64 = aom_dc_128_predictor_32x64_sse2;
   if (flags & HAS_AVX2)
     aom_dc_128_predictor_32x64 = aom_dc_128_predictor_32x64_avx2;
+  aom_dc_128_predictor_64x16 = aom_dc_128_predictor_64x16_sse2;
+  if (flags & HAS_AVX2)
+    aom_dc_128_predictor_64x16 = aom_dc_128_predictor_64x16_avx2;
   aom_dc_128_predictor_64x32 = aom_dc_128_predictor_64x32_sse2;
   if (flags & HAS_AVX2)
     aom_dc_128_predictor_64x32 = aom_dc_128_predictor_64x32_avx2;
@@ -8630,6 +9284,9 @@ static void setup_rtcd_internal(void) {
   aom_dc_left_predictor_32x64 = aom_dc_left_predictor_32x64_sse2;
   if (flags & HAS_AVX2)
     aom_dc_left_predictor_32x64 = aom_dc_left_predictor_32x64_avx2;
+  aom_dc_left_predictor_64x16 = aom_dc_left_predictor_64x16_sse2;
+  if (flags & HAS_AVX2)
+    aom_dc_left_predictor_64x16 = aom_dc_left_predictor_64x16_avx2;
   aom_dc_left_predictor_64x32 = aom_dc_left_predictor_64x32_sse2;
   if (flags & HAS_AVX2)
     aom_dc_left_predictor_64x32 = aom_dc_left_predictor_64x32_avx2;
@@ -8645,6 +9302,9 @@ static void setup_rtcd_internal(void) {
   aom_dc_predictor_32x64 = aom_dc_predictor_32x64_sse2;
   if (flags & HAS_AVX2)
     aom_dc_predictor_32x64 = aom_dc_predictor_32x64_avx2;
+  aom_dc_predictor_64x16 = aom_dc_predictor_64x16_sse2;
+  if (flags & HAS_AVX2)
+    aom_dc_predictor_64x16 = aom_dc_predictor_64x16_avx2;
   aom_dc_predictor_64x32 = aom_dc_predictor_64x32_sse2;
   if (flags & HAS_AVX2)
     aom_dc_predictor_64x32 = aom_dc_predictor_64x32_avx2;
@@ -8660,6 +9320,9 @@ static void setup_rtcd_internal(void) {
   aom_dc_top_predictor_32x64 = aom_dc_top_predictor_32x64_sse2;
   if (flags & HAS_AVX2)
     aom_dc_top_predictor_32x64 = aom_dc_top_predictor_32x64_avx2;
+  aom_dc_top_predictor_64x16 = aom_dc_top_predictor_64x16_sse2;
+  if (flags & HAS_AVX2)
+    aom_dc_top_predictor_64x16 = aom_dc_top_predictor_64x16_avx2;
   aom_dc_top_predictor_64x32 = aom_dc_top_predictor_64x32_sse2;
   if (flags & HAS_AVX2)
     aom_dc_top_predictor_64x32 = aom_dc_top_predictor_64x32_avx2;
@@ -8824,12 +9487,12 @@ static void setup_rtcd_internal(void) {
   aom_hadamard_32x32 = aom_hadamard_32x32_sse2;
   if (flags & HAS_AVX2)
     aom_hadamard_32x32 = aom_hadamard_32x32_avx2;
-  aom_hadamard_8x8_dual = aom_hadamard_8x8_dual_sse2;
-  if (flags & HAS_AVX2)
-    aom_hadamard_8x8_dual = aom_hadamard_8x8_dual_avx2;
   aom_hadamard_lp_16x16 = aom_hadamard_lp_16x16_sse2;
   if (flags & HAS_AVX2)
     aom_hadamard_lp_16x16 = aom_hadamard_lp_16x16_avx2;
+  aom_hadamard_lp_8x8_dual = aom_hadamard_lp_8x8_dual_sse2;
+  if (flags & HAS_AVX2)
+    aom_hadamard_lp_8x8_dual = aom_hadamard_lp_8x8_dual_avx2;
   aom_ifft16x16_float = aom_ifft16x16_float_sse2;
   if (flags & HAS_AVX2)
     aom_ifft16x16_float = aom_ifft16x16_float_avx2;
@@ -8839,6 +9502,12 @@ static void setup_rtcd_internal(void) {
   aom_ifft8x8_float = aom_ifft8x8_float_sse2;
   if (flags & HAS_AVX2)
     aom_ifft8x8_float = aom_ifft8x8_float_avx2;
+  aom_int_pro_col = aom_int_pro_col_sse2;
+  if (flags & HAS_AVX2)
+    aom_int_pro_col = aom_int_pro_col_avx2;
+  aom_int_pro_row = aom_int_pro_row_sse2;
+  if (flags & HAS_AVX2)
+    aom_int_pro_row = aom_int_pro_row_avx2;
   aom_lowbd_blend_a64_d16_mask = aom_lowbd_blend_a64_d16_mask_c;
   if (flags & HAS_SSE4_1)
     aom_lowbd_blend_a64_d16_mask = aom_lowbd_blend_a64_d16_mask_sse4_1;
@@ -9042,6 +9711,9 @@ static void setup_rtcd_internal(void) {
   aom_mse16x16 = aom_mse16x16_sse2;
   if (flags & HAS_AVX2)
     aom_mse16x16 = aom_mse16x16_avx2;
+  aom_mse_16xh_16bit = aom_mse_16xh_16bit_sse2;
+  if (flags & HAS_AVX2)
+    aom_mse_16xh_16bit = aom_mse_16xh_16bit_avx2;
   aom_mse_wxh_16bit = aom_mse_wxh_16bit_sse2;
   if (flags & HAS_AVX2)
     aom_mse_wxh_16bit = aom_mse_wxh_16bit_avx2;
@@ -9055,6 +9727,14 @@ static void setup_rtcd_internal(void) {
     aom_paeth_predictor_16x32 = aom_paeth_predictor_16x32_ssse3;
   if (flags & HAS_AVX2)
     aom_paeth_predictor_16x32 = aom_paeth_predictor_16x32_avx2;
+  aom_paeth_predictor_16x4 = aom_paeth_predictor_16x4_c;
+  if (flags & HAS_SSSE3)
+    aom_paeth_predictor_16x4 = aom_paeth_predictor_16x4_ssse3;
+  aom_paeth_predictor_16x64 = aom_paeth_predictor_16x64_c;
+  if (flags & HAS_SSSE3)
+    aom_paeth_predictor_16x64 = aom_paeth_predictor_16x64_ssse3;
+  if (flags & HAS_AVX2)
+    aom_paeth_predictor_16x64 = aom_paeth_predictor_16x64_avx2;
   aom_paeth_predictor_16x8 = aom_paeth_predictor_16x8_c;
   if (flags & HAS_SSSE3)
     aom_paeth_predictor_16x8 = aom_paeth_predictor_16x8_ssse3;
@@ -9075,12 +9755,23 @@ static void setup_rtcd_internal(void) {
     aom_paeth_predictor_32x64 = aom_paeth_predictor_32x64_ssse3;
   if (flags & HAS_AVX2)
     aom_paeth_predictor_32x64 = aom_paeth_predictor_32x64_avx2;
+  aom_paeth_predictor_32x8 = aom_paeth_predictor_32x8_c;
+  if (flags & HAS_SSSE3)
+    aom_paeth_predictor_32x8 = aom_paeth_predictor_32x8_ssse3;
+  aom_paeth_predictor_4x16 = aom_paeth_predictor_4x16_c;
+  if (flags & HAS_SSSE3)
+    aom_paeth_predictor_4x16 = aom_paeth_predictor_4x16_ssse3;
   aom_paeth_predictor_4x4 = aom_paeth_predictor_4x4_c;
   if (flags & HAS_SSSE3)
     aom_paeth_predictor_4x4 = aom_paeth_predictor_4x4_ssse3;
   aom_paeth_predictor_4x8 = aom_paeth_predictor_4x8_c;
   if (flags & HAS_SSSE3)
     aom_paeth_predictor_4x8 = aom_paeth_predictor_4x8_ssse3;
+  aom_paeth_predictor_64x16 = aom_paeth_predictor_64x16_c;
+  if (flags & HAS_SSSE3)
+    aom_paeth_predictor_64x16 = aom_paeth_predictor_64x16_ssse3;
+  if (flags & HAS_AVX2)
+    aom_paeth_predictor_64x16 = aom_paeth_predictor_64x16_avx2;
   aom_paeth_predictor_64x32 = aom_paeth_predictor_64x32_c;
   if (flags & HAS_SSSE3)
     aom_paeth_predictor_64x32 = aom_paeth_predictor_64x32_ssse3;
@@ -9094,6 +9785,9 @@ static void setup_rtcd_internal(void) {
   aom_paeth_predictor_8x16 = aom_paeth_predictor_8x16_c;
   if (flags & HAS_SSSE3)
     aom_paeth_predictor_8x16 = aom_paeth_predictor_8x16_ssse3;
+  aom_paeth_predictor_8x32 = aom_paeth_predictor_8x32_c;
+  if (flags & HAS_SSSE3)
+    aom_paeth_predictor_8x32 = aom_paeth_predictor_8x32_ssse3;
   aom_paeth_predictor_8x4 = aom_paeth_predictor_8x4_c;
   if (flags & HAS_SSSE3)
     aom_paeth_predictor_8x4 = aom_paeth_predictor_8x4_ssse3;
@@ -9105,17 +9799,20 @@ static void setup_rtcd_internal(void) {
     aom_quantize_b = aom_quantize_b_ssse3;
   if (flags & HAS_AVX)
     aom_quantize_b = aom_quantize_b_avx;
+  if (flags & HAS_AVX2)
+    aom_quantize_b = aom_quantize_b_avx2;
   aom_quantize_b_32x32 = aom_quantize_b_32x32_c;
   if (flags & HAS_SSSE3)
     aom_quantize_b_32x32 = aom_quantize_b_32x32_ssse3;
   if (flags & HAS_AVX)
     aom_quantize_b_32x32 = aom_quantize_b_32x32_avx;
+  if (flags & HAS_AVX2)
+    aom_quantize_b_32x32 = aom_quantize_b_32x32_avx2;
   aom_quantize_b_64x64 = aom_quantize_b_64x64_c;
   if (flags & HAS_SSSE3)
     aom_quantize_b_64x64 = aom_quantize_b_64x64_ssse3;
-  aom_quantize_b_adaptive = aom_quantize_b_adaptive_sse2;
   if (flags & HAS_AVX2)
-    aom_quantize_b_adaptive = aom_quantize_b_adaptive_avx2;
+    aom_quantize_b_64x64 = aom_quantize_b_64x64_avx2;
   aom_sad128x128 = aom_sad128x128_sse2;
   if (flags & HAS_AVX2)
     aom_sad128x128 = aom_sad128x128_avx2;
@@ -9251,6 +9948,12 @@ static void setup_rtcd_internal(void) {
   aom_smooth_h_predictor_16x32 = aom_smooth_h_predictor_16x32_c;
   if (flags & HAS_SSSE3)
     aom_smooth_h_predictor_16x32 = aom_smooth_h_predictor_16x32_ssse3;
+  aom_smooth_h_predictor_16x4 = aom_smooth_h_predictor_16x4_c;
+  if (flags & HAS_SSSE3)
+    aom_smooth_h_predictor_16x4 = aom_smooth_h_predictor_16x4_ssse3;
+  aom_smooth_h_predictor_16x64 = aom_smooth_h_predictor_16x64_c;
+  if (flags & HAS_SSSE3)
+    aom_smooth_h_predictor_16x64 = aom_smooth_h_predictor_16x64_ssse3;
   aom_smooth_h_predictor_16x8 = aom_smooth_h_predictor_16x8_c;
   if (flags & HAS_SSSE3)
     aom_smooth_h_predictor_16x8 = aom_smooth_h_predictor_16x8_ssse3;
@@ -9263,12 +9966,21 @@ static void setup_rtcd_internal(void) {
   aom_smooth_h_predictor_32x64 = aom_smooth_h_predictor_32x64_c;
   if (flags & HAS_SSSE3)
     aom_smooth_h_predictor_32x64 = aom_smooth_h_predictor_32x64_ssse3;
+  aom_smooth_h_predictor_32x8 = aom_smooth_h_predictor_32x8_c;
+  if (flags & HAS_SSSE3)
+    aom_smooth_h_predictor_32x8 = aom_smooth_h_predictor_32x8_ssse3;
+  aom_smooth_h_predictor_4x16 = aom_smooth_h_predictor_4x16_c;
+  if (flags & HAS_SSSE3)
+    aom_smooth_h_predictor_4x16 = aom_smooth_h_predictor_4x16_ssse3;
   aom_smooth_h_predictor_4x4 = aom_smooth_h_predictor_4x4_c;
   if (flags & HAS_SSSE3)
     aom_smooth_h_predictor_4x4 = aom_smooth_h_predictor_4x4_ssse3;
   aom_smooth_h_predictor_4x8 = aom_smooth_h_predictor_4x8_c;
   if (flags & HAS_SSSE3)
     aom_smooth_h_predictor_4x8 = aom_smooth_h_predictor_4x8_ssse3;
+  aom_smooth_h_predictor_64x16 = aom_smooth_h_predictor_64x16_c;
+  if (flags & HAS_SSSE3)
+    aom_smooth_h_predictor_64x16 = aom_smooth_h_predictor_64x16_ssse3;
   aom_smooth_h_predictor_64x32 = aom_smooth_h_predictor_64x32_c;
   if (flags & HAS_SSSE3)
     aom_smooth_h_predictor_64x32 = aom_smooth_h_predictor_64x32_ssse3;
@@ -9278,6 +9990,9 @@ static void setup_rtcd_internal(void) {
   aom_smooth_h_predictor_8x16 = aom_smooth_h_predictor_8x16_c;
   if (flags & HAS_SSSE3)
     aom_smooth_h_predictor_8x16 = aom_smooth_h_predictor_8x16_ssse3;
+  aom_smooth_h_predictor_8x32 = aom_smooth_h_predictor_8x32_c;
+  if (flags & HAS_SSSE3)
+    aom_smooth_h_predictor_8x32 = aom_smooth_h_predictor_8x32_ssse3;
   aom_smooth_h_predictor_8x4 = aom_smooth_h_predictor_8x4_c;
   if (flags & HAS_SSSE3)
     aom_smooth_h_predictor_8x4 = aom_smooth_h_predictor_8x4_ssse3;
@@ -9290,6 +10005,12 @@ static void setup_rtcd_internal(void) {
   aom_smooth_predictor_16x32 = aom_smooth_predictor_16x32_c;
   if (flags & HAS_SSSE3)
     aom_smooth_predictor_16x32 = aom_smooth_predictor_16x32_ssse3;
+  aom_smooth_predictor_16x4 = aom_smooth_predictor_16x4_c;
+  if (flags & HAS_SSSE3)
+    aom_smooth_predictor_16x4 = aom_smooth_predictor_16x4_ssse3;
+  aom_smooth_predictor_16x64 = aom_smooth_predictor_16x64_c;
+  if (flags & HAS_SSSE3)
+    aom_smooth_predictor_16x64 = aom_smooth_predictor_16x64_ssse3;
   aom_smooth_predictor_16x8 = aom_smooth_predictor_16x8_c;
   if (flags & HAS_SSSE3)
     aom_smooth_predictor_16x8 = aom_smooth_predictor_16x8_ssse3;
@@ -9302,12 +10023,21 @@ static void setup_rtcd_internal(void) {
   aom_smooth_predictor_32x64 = aom_smooth_predictor_32x64_c;
   if (flags & HAS_SSSE3)
     aom_smooth_predictor_32x64 = aom_smooth_predictor_32x64_ssse3;
+  aom_smooth_predictor_32x8 = aom_smooth_predictor_32x8_c;
+  if (flags & HAS_SSSE3)
+    aom_smooth_predictor_32x8 = aom_smooth_predictor_32x8_ssse3;
+  aom_smooth_predictor_4x16 = aom_smooth_predictor_4x16_c;
+  if (flags & HAS_SSSE3)
+    aom_smooth_predictor_4x16 = aom_smooth_predictor_4x16_ssse3;
   aom_smooth_predictor_4x4 = aom_smooth_predictor_4x4_c;
   if (flags & HAS_SSSE3)
     aom_smooth_predictor_4x4 = aom_smooth_predictor_4x4_ssse3;
   aom_smooth_predictor_4x8 = aom_smooth_predictor_4x8_c;
   if (flags & HAS_SSSE3)
     aom_smooth_predictor_4x8 = aom_smooth_predictor_4x8_ssse3;
+  aom_smooth_predictor_64x16 = aom_smooth_predictor_64x16_c;
+  if (flags & HAS_SSSE3)
+    aom_smooth_predictor_64x16 = aom_smooth_predictor_64x16_ssse3;
   aom_smooth_predictor_64x32 = aom_smooth_predictor_64x32_c;
   if (flags & HAS_SSSE3)
     aom_smooth_predictor_64x32 = aom_smooth_predictor_64x32_ssse3;
@@ -9317,6 +10047,9 @@ static void setup_rtcd_internal(void) {
   aom_smooth_predictor_8x16 = aom_smooth_predictor_8x16_c;
   if (flags & HAS_SSSE3)
     aom_smooth_predictor_8x16 = aom_smooth_predictor_8x16_ssse3;
+  aom_smooth_predictor_8x32 = aom_smooth_predictor_8x32_c;
+  if (flags & HAS_SSSE3)
+    aom_smooth_predictor_8x32 = aom_smooth_predictor_8x32_ssse3;
   aom_smooth_predictor_8x4 = aom_smooth_predictor_8x4_c;
   if (flags & HAS_SSSE3)
     aom_smooth_predictor_8x4 = aom_smooth_predictor_8x4_ssse3;
@@ -9329,6 +10062,12 @@ static void setup_rtcd_internal(void) {
   aom_smooth_v_predictor_16x32 = aom_smooth_v_predictor_16x32_c;
   if (flags & HAS_SSSE3)
     aom_smooth_v_predictor_16x32 = aom_smooth_v_predictor_16x32_ssse3;
+  aom_smooth_v_predictor_16x4 = aom_smooth_v_predictor_16x4_c;
+  if (flags & HAS_SSSE3)
+    aom_smooth_v_predictor_16x4 = aom_smooth_v_predictor_16x4_ssse3;
+  aom_smooth_v_predictor_16x64 = aom_smooth_v_predictor_16x64_c;
+  if (flags & HAS_SSSE3)
+    aom_smooth_v_predictor_16x64 = aom_smooth_v_predictor_16x64_ssse3;
   aom_smooth_v_predictor_16x8 = aom_smooth_v_predictor_16x8_c;
   if (flags & HAS_SSSE3)
     aom_smooth_v_predictor_16x8 = aom_smooth_v_predictor_16x8_ssse3;
@@ -9341,12 +10080,21 @@ static void setup_rtcd_internal(void) {
   aom_smooth_v_predictor_32x64 = aom_smooth_v_predictor_32x64_c;
   if (flags & HAS_SSSE3)
     aom_smooth_v_predictor_32x64 = aom_smooth_v_predictor_32x64_ssse3;
+  aom_smooth_v_predictor_32x8 = aom_smooth_v_predictor_32x8_c;
+  if (flags & HAS_SSSE3)
+    aom_smooth_v_predictor_32x8 = aom_smooth_v_predictor_32x8_ssse3;
+  aom_smooth_v_predictor_4x16 = aom_smooth_v_predictor_4x16_c;
+  if (flags & HAS_SSSE3)
+    aom_smooth_v_predictor_4x16 = aom_smooth_v_predictor_4x16_ssse3;
   aom_smooth_v_predictor_4x4 = aom_smooth_v_predictor_4x4_c;
   if (flags & HAS_SSSE3)
     aom_smooth_v_predictor_4x4 = aom_smooth_v_predictor_4x4_ssse3;
   aom_smooth_v_predictor_4x8 = aom_smooth_v_predictor_4x8_c;
   if (flags & HAS_SSSE3)
     aom_smooth_v_predictor_4x8 = aom_smooth_v_predictor_4x8_ssse3;
+  aom_smooth_v_predictor_64x16 = aom_smooth_v_predictor_64x16_c;
+  if (flags & HAS_SSSE3)
+    aom_smooth_v_predictor_64x16 = aom_smooth_v_predictor_64x16_ssse3;
   aom_smooth_v_predictor_64x32 = aom_smooth_v_predictor_64x32_c;
   if (flags & HAS_SSSE3)
     aom_smooth_v_predictor_64x32 = aom_smooth_v_predictor_64x32_ssse3;
@@ -9356,6 +10104,9 @@ static void setup_rtcd_internal(void) {
   aom_smooth_v_predictor_8x16 = aom_smooth_v_predictor_8x16_c;
   if (flags & HAS_SSSE3)
     aom_smooth_v_predictor_8x16 = aom_smooth_v_predictor_8x16_ssse3;
+  aom_smooth_v_predictor_8x32 = aom_smooth_v_predictor_8x32_c;
+  if (flags & HAS_SSSE3)
+    aom_smooth_v_predictor_8x32 = aom_smooth_v_predictor_8x32_ssse3;
   aom_smooth_v_predictor_8x4 = aom_smooth_v_predictor_8x4_c;
   if (flags & HAS_SSSE3)
     aom_smooth_v_predictor_8x4 = aom_smooth_v_predictor_8x4_ssse3;
@@ -9519,6 +10270,9 @@ static void setup_rtcd_internal(void) {
   aom_v_predictor_32x64 = aom_v_predictor_32x64_sse2;
   if (flags & HAS_AVX2)
     aom_v_predictor_32x64 = aom_v_predictor_32x64_avx2;
+  aom_v_predictor_64x16 = aom_v_predictor_64x16_sse2;
+  if (flags & HAS_AVX2)
+    aom_v_predictor_64x16 = aom_v_predictor_64x16_avx2;
   aom_v_predictor_64x32 = aom_v_predictor_64x32_sse2;
   if (flags & HAS_AVX2)
     aom_v_predictor_64x32 = aom_v_predictor_64x32_avx2;
@@ -9564,6 +10318,11 @@ static void setup_rtcd_internal(void) {
   aom_variance64x64 = aom_variance64x64_sse2;
   if (flags & HAS_AVX2)
     aom_variance64x64 = aom_variance64x64_avx2;
+  aom_vector_var = aom_vector_var_c;
+  if (flags & HAS_SSE4_1)
+    aom_vector_var = aom_vector_var_sse4_1;
+  if (flags & HAS_AVX2)
+    aom_vector_var = aom_vector_var_avx2;
 }
 #endif
 
diff --git a/chromium/third_party/libaom/source/config/win/x64/config/aom_scale_rtcd.h b/chromium/third_party/libaom/source/config/win/x64/config/aom_scale_rtcd.h
index b6059e1426a..5e6c03317d6 100644
--- a/chromium/third_party/libaom/source/config/win/x64/config/aom_scale_rtcd.h
+++ b/chromium/third_party/libaom/source/config/win/x64/config/aom_scale_rtcd.h
@@ -18,6 +18,12 @@ void aom_extend_frame_borders_c(struct yv12_buffer_config* ybf,
                                 const int num_planes);
 #define aom_extend_frame_borders aom_extend_frame_borders_c
 
+void aom_extend_frame_borders_plane_row_c(const struct yv12_buffer_config* ybf,
+                                          int plane,
+                                          int v_start,
+                                          int v_end);
+#define aom_extend_frame_borders_plane_row aom_extend_frame_borders_plane_row_c
+
 void aom_extend_frame_borders_y_c(struct yv12_buffer_config* ybf);
 #define aom_extend_frame_borders_y aom_extend_frame_borders_y_c
 
diff --git a/chromium/third_party/libaom/source/config/win/x64/config/av1_rtcd.h b/chromium/third_party/libaom/source/config/win/x64/config/av1_rtcd.h
index 8d516d90bde..6d8a64d1270 100644
--- a/chromium/third_party/libaom/source/config/win/x64/config/av1_rtcd.h
+++ b/chromium/third_party/libaom/source/config/win/x64/config/av1_rtcd.h
@@ -242,6 +242,51 @@ void aom_upsampled_pred_sse2(MACROBLOCKD* xd,
                              int subpel_search);
 #define aom_upsampled_pred aom_upsampled_pred_sse2
 
+void av1_apply_selfguided_restoration_c(const uint8_t* dat,
+                                        int width,
+                                        int height,
+                                        int stride,
+                                        int eps,
+                                        const int* xqd,
+                                        uint8_t* dst,
+                                        int dst_stride,
+                                        int32_t* tmpbuf,
+                                        int bit_depth,
+                                        int highbd);
+void av1_apply_selfguided_restoration_sse4_1(const uint8_t* dat,
+                                             int width,
+                                             int height,
+                                             int stride,
+                                             int eps,
+                                             const int* xqd,
+                                             uint8_t* dst,
+                                             int dst_stride,
+                                             int32_t* tmpbuf,
+                                             int bit_depth,
+                                             int highbd);
+void av1_apply_selfguided_restoration_avx2(const uint8_t* dat,
+                                           int width,
+                                           int height,
+                                           int stride,
+                                           int eps,
+                                           const int* xqd,
+                                           uint8_t* dst,
+                                           int dst_stride,
+                                           int32_t* tmpbuf,
+                                           int bit_depth,
+                                           int highbd);
+RTCD_EXTERN void (*av1_apply_selfguided_restoration)(const uint8_t* dat,
+                                                     int width,
+                                                     int height,
+                                                     int stride,
+                                                     int eps,
+                                                     const int* xqd,
+                                                     uint8_t* dst,
+                                                     int dst_stride,
+                                                     int32_t* tmpbuf,
+                                                     int bit_depth,
+                                                     int highbd);
+
 int64_t av1_block_error_c(const tran_low_t* coeff,
                           const tran_low_t* dqcoeff,
                           intptr_t block_size,
@@ -347,6 +392,31 @@ RTCD_EXTERN void (*av1_build_compound_diffwtd_mask_d16)(
     ConvolveParams* conv_params,
     int bd);
 
+int64_t av1_calc_frame_error_c(const uint8_t* const ref,
+                               int stride,
+                               const uint8_t* const dst,
+                               int p_width,
+                               int p_height,
+                               int p_stride);
+int64_t av1_calc_frame_error_sse2(const uint8_t* const ref,
+                                  int stride,
+                                  const uint8_t* const dst,
+                                  int p_width,
+                                  int p_height,
+                                  int p_stride);
+int64_t av1_calc_frame_error_avx2(const uint8_t* const ref,
+                                  int stride,
+                                  const uint8_t* const dst,
+                                  int p_width,
+                                  int p_height,
+                                  int p_stride);
+RTCD_EXTERN int64_t (*av1_calc_frame_error)(const uint8_t* const ref,
+                                            int stride,
+                                            const uint8_t* const dst,
+                                            int p_width,
+                                            int p_height,
+                                            int p_stride);
+
 void av1_calc_indices_dim1_c(const int* data,
                              const int* centroids,
                              uint8_t* indices,
@@ -1839,6 +1909,47 @@ void av1_round_shift_array_c(int32_t* arr, int size, int bit);
 void av1_round_shift_array_sse4_1(int32_t* arr, int size, int bit);
 RTCD_EXTERN void (*av1_round_shift_array)(int32_t* arr, int size, int bit);
 
+int av1_selfguided_restoration_c(const uint8_t* dgd8,
+                                 int width,
+                                 int height,
+                                 int dgd_stride,
+                                 int32_t* flt0,
+                                 int32_t* flt1,
+                                 int flt_stride,
+                                 int sgr_params_idx,
+                                 int bit_depth,
+                                 int highbd);
+int av1_selfguided_restoration_sse4_1(const uint8_t* dgd8,
+                                      int width,
+                                      int height,
+                                      int dgd_stride,
+                                      int32_t* flt0,
+                                      int32_t* flt1,
+                                      int flt_stride,
+                                      int sgr_params_idx,
+                                      int bit_depth,
+                                      int highbd);
+int av1_selfguided_restoration_avx2(const uint8_t* dgd8,
+                                    int width,
+                                    int height,
+                                    int dgd_stride,
+                                    int32_t* flt0,
+                                    int32_t* flt1,
+                                    int flt_stride,
+                                    int sgr_params_idx,
+                                    int bit_depth,
+                                    int highbd);
+RTCD_EXTERN int (*av1_selfguided_restoration)(const uint8_t* dgd8,
+                                              int width,
+                                              int height,
+                                              int dgd_stride,
+                                              int32_t* flt0,
+                                              int32_t* flt1,
+                                              int flt_stride,
+                                              int sgr_params_idx,
+                                              int bit_depth,
+                                              int highbd);
+
 void av1_txb_init_levels_c(const tran_low_t* const coeff,
                            const int width,
                            const int height,
@@ -1864,6 +1975,79 @@ void av1_upsample_intra_edge_high_c(uint16_t* p, int sz, int bd);
 void av1_upsample_intra_edge_high_sse4_1(uint16_t* p, int sz, int bd);
 RTCD_EXTERN void (*av1_upsample_intra_edge_high)(uint16_t* p, int sz, int bd);
 
+void av1_warp_affine_c(const int32_t* mat,
+                       const uint8_t* ref,
+                       int width,
+                       int height,
+                       int stride,
+                       uint8_t* pred,
+                       int p_col,
+                       int p_row,
+                       int p_width,
+                       int p_height,
+                       int p_stride,
+                       int subsampling_x,
+                       int subsampling_y,
+                       ConvolveParams* conv_params,
+                       int16_t alpha,
+                       int16_t beta,
+                       int16_t gamma,
+                       int16_t delta);
+void av1_warp_affine_sse4_1(const int32_t* mat,
+                            const uint8_t* ref,
+                            int width,
+                            int height,
+                            int stride,
+                            uint8_t* pred,
+                            int p_col,
+                            int p_row,
+                            int p_width,
+                            int p_height,
+                            int p_stride,
+                            int subsampling_x,
+                            int subsampling_y,
+                            ConvolveParams* conv_params,
+                            int16_t alpha,
+                            int16_t beta,
+                            int16_t gamma,
+                            int16_t delta);
+void av1_warp_affine_avx2(const int32_t* mat,
+                          const uint8_t* ref,
+                          int width,
+                          int height,
+                          int stride,
+                          uint8_t* pred,
+                          int p_col,
+                          int p_row,
+                          int p_width,
+                          int p_height,
+                          int p_stride,
+                          int subsampling_x,
+                          int subsampling_y,
+                          ConvolveParams* conv_params,
+                          int16_t alpha,
+                          int16_t beta,
+                          int16_t gamma,
+                          int16_t delta);
+RTCD_EXTERN void (*av1_warp_affine)(const int32_t* mat,
+                                    const uint8_t* ref,
+                                    int width,
+                                    int height,
+                                    int stride,
+                                    uint8_t* pred,
+                                    int p_col,
+                                    int p_row,
+                                    int p_width,
+                                    int p_height,
+                                    int p_stride,
+                                    int subsampling_x,
+                                    int subsampling_y,
+                                    ConvolveParams* conv_params,
+                                    int16_t alpha,
+                                    int16_t beta,
+                                    int16_t gamma,
+                                    int16_t delta);
+
 void av1_wedge_compute_delta_squares_c(int16_t* d,
                                        const int16_t* a,
                                        const int16_t* b,
@@ -2687,6 +2871,11 @@ static void setup_rtcd_internal(void) {
   if (flags & HAS_SSSE3)
     aom_dist_wtd_comp_avg_upsampled_pred =
         aom_dist_wtd_comp_avg_upsampled_pred_ssse3;
+  av1_apply_selfguided_restoration = av1_apply_selfguided_restoration_c;
+  if (flags & HAS_SSE4_1)
+    av1_apply_selfguided_restoration = av1_apply_selfguided_restoration_sse4_1;
+  if (flags & HAS_AVX2)
+    av1_apply_selfguided_restoration = av1_apply_selfguided_restoration_avx2;
   av1_block_error = av1_block_error_sse2;
   if (flags & HAS_AVX2)
     av1_block_error = av1_block_error_avx2;
@@ -2705,6 +2894,9 @@ static void setup_rtcd_internal(void) {
   if (flags & HAS_AVX2)
     av1_build_compound_diffwtd_mask_d16 =
         av1_build_compound_diffwtd_mask_d16_avx2;
+  av1_calc_frame_error = av1_calc_frame_error_sse2;
+  if (flags & HAS_AVX2)
+    av1_calc_frame_error = av1_calc_frame_error_avx2;
   av1_calc_indices_dim1 = av1_calc_indices_dim1_sse2;
   if (flags & HAS_AVX2)
     av1_calc_indices_dim1 = av1_calc_indices_dim1_avx2;
@@ -2903,6 +3095,11 @@ static void setup_rtcd_internal(void) {
   av1_round_shift_array = av1_round_shift_array_c;
   if (flags & HAS_SSE4_1)
     av1_round_shift_array = av1_round_shift_array_sse4_1;
+  av1_selfguided_restoration = av1_selfguided_restoration_c;
+  if (flags & HAS_SSE4_1)
+    av1_selfguided_restoration = av1_selfguided_restoration_sse4_1;
+  if (flags & HAS_AVX2)
+    av1_selfguided_restoration = av1_selfguided_restoration_avx2;
   av1_txb_init_levels = av1_txb_init_levels_c;
   if (flags & HAS_SSE4_1)
     av1_txb_init_levels = av1_txb_init_levels_sse4_1;
@@ -2914,6 +3111,11 @@ static void setup_rtcd_internal(void) {
   av1_upsample_intra_edge_high = av1_upsample_intra_edge_high_c;
   if (flags & HAS_SSE4_1)
     av1_upsample_intra_edge_high = av1_upsample_intra_edge_high_sse4_1;
+  av1_warp_affine = av1_warp_affine_c;
+  if (flags & HAS_SSE4_1)
+    av1_warp_affine = av1_warp_affine_sse4_1;
+  if (flags & HAS_AVX2)
+    av1_warp_affine = av1_warp_affine_avx2;
   av1_wedge_compute_delta_squares = av1_wedge_compute_delta_squares_sse2;
   if (flags & HAS_AVX2)
     av1_wedge_compute_delta_squares = av1_wedge_compute_delta_squares_avx2;
diff --git a/chromium/third_party/libaom/source/libaom/.clang-format b/chromium/third_party/libaom/source/libaom/.clang-format
index a378820073e..a8bc4967c39 100644
--- a/chromium/third_party/libaom/source/libaom/.clang-format
+++ b/chromium/third_party/libaom/source/libaom/.clang-format
@@ -1,148 +1,9 @@
 ---
 Language:        Cpp
-# BasedOnStyle:  Google
-# Generated with clang-format 7.0.1
-AccessModifierOffset: -1
-AlignAfterOpenBracket: Align
-AlignConsecutiveAssignments: false
-AlignConsecutiveDeclarations: false
-AlignEscapedNewlines: Left
-AlignOperands:   true
-AlignTrailingComments: true
-AllowAllParametersOfDeclarationOnNextLine: true
-AllowShortBlocksOnASingleLine: false
+BasedOnStyle:  Google
 AllowShortCaseLabelsOnASingleLine: true
-AllowShortIfStatementsOnASingleLine: true
-AllowShortLoopsOnASingleLine: true
-AlwaysBreakAfterDefinitionReturnType: None
-AlwaysBreakAfterReturnType: None
-AlwaysBreakBeforeMultilineStrings: true
-AlwaysBreakTemplateDeclarations: true
-BinPackArguments: true
-BinPackParameters: true
-BraceWrapping:
-  AfterClass:      false
-  AfterControlStatement: false
-  AfterEnum:       false
-  AfterFunction:   false
-  AfterNamespace:  false
-  AfterObjCDeclaration: false
-  AfterStruct:     false
-  AfterUnion:      false
-  AfterExternBlock: false
-  BeforeCatch:     false
-  BeforeElse:      false
-  IndentBraces:    false
-  SplitEmptyFunction: true
-  SplitEmptyRecord: true
-  SplitEmptyNamespace: true
-BreakBeforeBinaryOperators: None
-BreakBeforeBraces: Attach
-BreakBeforeInheritanceComma: false
-BreakInheritanceList: BeforeColon
-BreakBeforeTernaryOperators: true
-BreakConstructorInitializersBeforeComma: false
-BreakConstructorInitializers: BeforeColon
-BreakAfterJavaFieldAnnotations: false
-BreakStringLiterals: true
-ColumnLimit:     80
-CommentPragmas:  '^ IWYU pragma:'
-CompactNamespaces: false
 ConstructorInitializerAllOnOneLineOrOnePerLine: false
-ConstructorInitializerIndentWidth: 4
-ContinuationIndentWidth: 4
 Cpp11BracedListStyle: false
 DerivePointerAlignment: false
-DisableFormat:   false
-ExperimentalAutoDetectBinPacking: false
-FixNamespaceComments: true
-ForEachMacros:
-  - foreach
-  - Q_FOREACH
-  - BOOST_FOREACH
-IncludeBlocks:   Preserve
-IncludeCategories:
-  - Regex:           '^<ext/.*\.h>'
-    Priority:        2
-  - Regex:           '^<.*\.h>'
-    Priority:        1
-  - Regex:           '^<.*'
-    Priority:        2
-  - Regex:           '.*'
-    Priority:        3
-IncludeIsMainRegex: '([-_](test|unittest))?$'
-IndentCaseLabels: true
-IndentPPDirectives: None
-IndentWidth:     2
-IndentWrappedFunctionNames: false
-JavaScriptQuotes: Leave
-JavaScriptWrapImports: true
-KeepEmptyLinesAtTheStartOfBlocks: false
-MacroBlockBegin: ''
-MacroBlockEnd:   ''
-MaxEmptyLinesToKeep: 1
-NamespaceIndentation: None
-ObjCBinPackProtocolList: Never
-ObjCBlockIndentWidth: 2
-ObjCSpaceAfterProperty: false
-ObjCSpaceBeforeProtocolList: false
-PenaltyBreakAssignment: 2
-PenaltyBreakBeforeFirstCallParameter: 1
-PenaltyBreakComment: 300
-PenaltyBreakFirstLessLess: 120
-PenaltyBreakString: 1000
-PenaltyBreakTemplateDeclaration: 10
-PenaltyExcessCharacter: 1000000
-PenaltyReturnTypeOnItsOwnLine: 200
 PointerAlignment: Right
-RawStringFormats:
-  - Language:        Cpp
-    Delimiters:
-      - cc
-      - CC
-      - cpp
-      - Cpp
-      - CPP
-      - 'c++'
-      - 'C++'
-    CanonicalDelimiter: ''
-    BasedOnStyle:    google
-  - Language:        TextProto
-    Delimiters:
-      - pb
-      - PB
-      - proto
-      - PROTO
-    EnclosingFunctions:
-      - EqualsProto
-      - EquivToProto
-      - PARSE_PARTIAL_TEXT_PROTO
-      - PARSE_TEST_PROTO
-      - PARSE_TEXT_PROTO
-      - ParseTextOrDie
-      - ParseTextProtoOrDie
-    CanonicalDelimiter: ''
-    BasedOnStyle:    google
-ReflowComments:  true
 SortIncludes:    false
-SortUsingDeclarations: true
-SpaceAfterCStyleCast: false
-SpaceAfterTemplateKeyword: true
-SpaceBeforeAssignmentOperators: true
-SpaceBeforeCpp11BracedList: false
-SpaceBeforeCtorInitializerColon: true
-SpaceBeforeInheritanceColon: true
-SpaceBeforeParens: ControlStatements
-SpaceBeforeRangeBasedForLoopColon: true
-SpaceInEmptyParentheses: false
-SpacesBeforeTrailingComments: 2
-SpacesInAngles:  false
-SpacesInContainerLiterals: false
-SpacesInCStyleCastParentheses: false
-SpacesInParentheses: false
-SpacesInSquareBrackets: false
-Standard:        Auto
-TabWidth:        8
-UseTab:          Never
-...
-
diff --git a/chromium/third_party/libaom/source/libaom/.cmake-format.py b/chromium/third_party/libaom/source/libaom/.cmake-format.py
index 7b0e4f08dc5..c79a6ad604d 100644
--- a/chromium/third_party/libaom/source/libaom/.cmake-format.py
+++ b/chromium/third_party/libaom/source/libaom/.cmake-format.py
@@ -64,7 +64,7 @@ enable_markup = True
 # If comment markup is enabled, don't reflow the first comment block in
 # eachlistfile. Use this to preserve formatting of your
 # copyright/licensestatements.
-first_comment_is_literal = False
+first_comment_is_literal = True
 
 # If comment markup is enabled, don't reflow any comment block which matchesthis
 # (regex) pattern. Default is `None` (disabled).
diff --git a/chromium/third_party/libaom/source/libaom/.mailmap b/chromium/third_party/libaom/source/libaom/.mailmap
index 1f218688c5f..61adddb51b7 100644
--- a/chromium/third_party/libaom/source/libaom/.mailmap
+++ b/chromium/third_party/libaom/source/libaom/.mailmap
@@ -40,6 +40,8 @@ Johann Koenig <johannkoenig@google.com> <johann.koenig@gmail.com>
 Johann Koenig <johannkoenig@google.com> <johannkoenig@chromium.org>
 John Koleszar <jkoleszar@google.com>
 Joshua Litt <joshualitt@google.com> <joshualitt@chromium.org>
+Kyle Siefring <siekyleb@amazon.com>
+Kyle Siefring <siekyleb@amazon.com> <kylesiefring@gmail.com>
 Lokeshwar Reddy B <lokeshwar.reddy@ittiam.com>
 Logan Goldberg <logangw@google.com>
 Luc Trudeau <luc@trud.ca>
diff --git a/chromium/third_party/libaom/source/libaom/AUTHORS b/chromium/third_party/libaom/source/libaom/AUTHORS
index e601133e73c..0c2da8f260d 100644
--- a/chromium/third_party/libaom/source/libaom/AUTHORS
+++ b/chromium/third_party/libaom/source/libaom/AUTHORS
@@ -27,6 +27,7 @@ Aniket Wanare <Aniket.wanare@ittiam.com>
 Ankur Saxena <ankurs@nvidia.com>
 Arild Fuldseth <arilfuld@cisco.com>
 Aron Rosenberg <arosenberg@logitech.com>
+Arun Singh Negi <arun.negi@ittiam.com>
 Attila Nagy <attilanagy@google.com>
 Bohan Li <bohanli@google.com>
 Brennan Shacklett <bshacklett@mozilla.com>
@@ -133,7 +134,7 @@ Katsuhisa Yuasa <berupon@gmail.com>
 Kavi Ramamurthy <kavii@google.com>
 KO Myung-Hun <komh@chollian.net>
 Krishna Malladi <kmalladi@google.com>
-Kyle Siefring <kylesiefring@gmail.com>
+Kyle Siefring <siekyleb@amazon.com>
 Larisa Markeeva <lmarkeeva@google.com>
 Lauren Partin <lpartin@google.com>
 Lawrence Velázquez <larryv@macports.org>
@@ -146,12 +147,14 @@ Logan Goldberg <logangw@google.com>
 Lokeshwar Reddy B <lokeshwar.reddy@ittiam.com>
 Lou Quillio <louquillio@google.com>
 Luca Barbato <lu_zero@gentoo.org>
+Luca Versari <veluca@google.com>
 Luc Trudeau <luc@trud.ca>
 Madhu Peringassery Krishnan <mpkrishnan@tencent.com>
 Makoto Kato <makoto.kt@gmail.com>
 Mans Rullgard <mans@mansr.com>
 Marco Paniconi <marpan@google.com>
 Mark Mentovai <mark@chromium.org>
+Mark Wachsler <wachsler@google.com>
 Martin Ettl <ettl.martin78@googlemail.com>
 Martin Storsjo <martin@martin.st>
 Maryla <maryla@google.com>
@@ -174,6 +177,7 @@ Morton Jonuschat <yabawock@gmail.com>
 Mudassir Galagnath <mudassir.galaganath@ittiam.com>
 Mufaddal Chakera <mufaddal.chakera@ittiam.com>
 Nathan E. Egge <negge@mozilla.com>
+Neeraj Gadgil <neeraj.gadgil@ittiam.com>
 Neil Birkbeck <birkbeck@google.com>
 Nico Weber <thakis@chromium.org>
 Nithya V S <nithya.vs@ittiam.com>
@@ -218,6 +222,7 @@ Sachin Kumar Garg <sachin.kumargarg@ittiam.com>
 Sai Deng <sdeng@google.com>
 Sami Boukortt <sboukortt@google.com>
 Sami Pietilä <samipietila@google.com>
+Samuel Thibault <samuel.thibault@ens-lyon.org>
 Sarah Parker <sarahparker@google.com>
 Sasi Inguva <isasi@google.com>
 Satish Kumar Suman <satish.suman@ittiam.com>
diff --git a/chromium/third_party/libaom/source/libaom/CHANGELOG b/chromium/third_party/libaom/source/libaom/CHANGELOG
index ffc11777727..0df6a6876d6 100644
--- a/chromium/third_party/libaom/source/libaom/CHANGELOG
+++ b/chromium/third_party/libaom/source/libaom/CHANGELOG
@@ -1,3 +1,212 @@
+2022-08-31 v3.5.0
+  This release is ABI compatible with the last one, including speedup and memory
+  optimizations, and new APIs and features.
+
+  - New Features
+    * Support for frame parallel encode for larger number of threads. --fp-mt
+      flag is available for all build configurations.
+    * New codec control AV1E_GET_NUM_OPERATING_POINTS
+
+  - Speedup and Memory Optimizations
+    * Speed-up multithreaded encoding for good quality mode for larger number of
+      threads through frame parallel encoding:
+      o 30-34% encode time reduction for 1080p, 16 threads, 1x1 tile
+        configuration (tile_rows x tile_columns)
+      o 18-28% encode time reduction for 1080p, 16 threads, 2x4 tile
+        configuration
+      o 18-20% encode time reduction for 2160p, 32 threads, 2x4 tile
+        configuration
+    * 16-20% speed-up for speed=6 to 8 in still-picture encoding mode
+    * 5-6% heap memory reduction for speed=6 to 10 in real-time encoding mode
+    * Improvements to the speed for speed=7, 8 in real-time encoding mode
+    * Improvements to the speed for speed=9, 10 in real-time screen encoding
+      mode
+    * Optimizations to improve multi-thread efficiency in real-time encoding
+      mode
+    * 10-15% speed up for SVC with temporal layers
+    * SIMD optimizations:
+      o Improve av1_quantize_fp_32x32_neon() 1.05x to 1.24x faster
+      o Add aom_highbd_quantize_b{,_32x32,_64x64}_adaptive_neon() 3.15x to 5.6x
+        faster than "C"
+      o Improve av1_quantize_fp_64x64_neon() 1.17x to 1.66x faster
+      o Add aom_quantize_b_avx2() 1.4x to 1.7x faster than aom_quantize_b_avx()
+      o Add aom_quantize_b_32x32_avx2() 1.4x to 2.3x faster than
+        aom_quantize_b_32x32_avx()
+      o Add aom_quantize_b_64x64_avx2() 2.0x to 2.4x faster than
+        aom_quantize_b_64x64_ssse3()
+      o Add aom_highbd_quantize_b_32x32_avx2() 9.0x to 10.5x faster than
+        aom_highbd_quantize_b_32x32_c()
+      o Add aom_highbd_quantize_b_64x64_avx2() 7.3x to 9.7x faster than
+        aom_highbd_quantize_b_64x64_c()
+      o Improve aom_highbd_quantize_b_avx2() 1.07x to 1.20x faster
+      o Improve av1_quantize_fp_avx2() 1.13x to 1.49x faster
+      o Improve av1_quantize_fp_32x32_avx2() 1.07x to 1.54x faster
+      o Improve av1_quantize_fp_64x64_avx2()  1.03x to 1.25x faster
+      o Improve av1_quantize_lp_avx2() 1.07x to 1.16x faster
+
+  - Bug fixes including but not limited to
+    * aomedia:3206 Assert that skip_width > 0 for deconvolve function
+    * aomedia:3278 row_mt enc: Delay top-right sync when intraBC is enabled
+    * aomedia:3282 blend_a64_*_neon: fix bus error in armv7
+    * aomedia:3283 FRAME_PARALLEL: Propagate border size to all cpis
+    * aomedia:3283 RESIZE_MODE: Fix incorrect strides being used for motion
+      search
+    * aomedia:3286 rtc-svc: Fix to dynamic_enable spatial layers
+    * aomedia:3289 rtc-screen: Fix to skipping inter-mode test in nonrd
+    * aomedia:3289 rtc-screen: Fix for skip newmv on flat blocks
+    * aomedia:3299 Fix build failure with CONFIG_TUNE_VMAF=1
+    * aomedia:3296 Fix the conflict --enable-tx-size-search=0 with nonrd mode
+      --enable-tx-size-search will be ignored in non-rd pick mode
+    * aomedia:3304 Fix off-by-one error of max w/h in validate_config
+    * aomedia:3306 Do not use pthread_setname_np on GNU/Hurd
+    * aomedia:3325 row-multithreading produces invalid bitstream in some cases
+    * chromium:1346938, chromium:1338114
+    * compiler_flags.cmake: fix flag detection w/cmake 3.17-3.18.2
+    * tools/*.py: update to python3
+    * aom_configure.cmake: detect PIE and set CONFIG_PIC
+    * test/simd_cmp_impl: use explicit types w/CompareSimd*
+    * rtc: Fix to disable segm for aq-mode=3
+    * rtc: Fix to color_sensitivity in variance partition
+    * rtc-screen: Fix bsize in model rd computation for intra chroma
+    * Fixes to ensure the correct behavior of the encoder algorithms (like
+      segmentation, computation of statistics, etc.)
+
+2022-06-17 v3.4.0
+  This release includes compression efficiency and perceptual quality
+  improvements, speedup and memory optimizations, and some new features.
+  There are no ABI or API breaking changes in this release.
+
+  - New Features
+    * New --dist-metric flag with "qm-psnr" value to use quantization
+      matrices in the distortion computation for RD search. The default
+      value is "psnr".
+    * New command line option "--auto-intra-tools-off=1" to make
+      all-intra encoding faster for high bit rate under
+      "--deltaq-mode=3" mode.
+    * New rate control library aom_av1_rc for real-time hardware
+      encoders. Supports CBR for both one spatial layer and SVC.
+    * New image format AOM_IMG_FMT_NV12 can be used as input to the
+      encoder. The presence of AOM_IMG_FMT_NV12 can be detected at
+      compile time by checking if the macro AOM_HAVE_IMG_FMT_NV12 is
+      defined.
+    * New codec controls for the encoder:
+      o AV1E_SET_AUTO_INTRA_TOOLS_OFF. Only in effect if
+        --deltaq-mode=3.
+      o AV1E_SET_RTC_EXTERNAL_RC
+      o AV1E_SET_FP_MT. Only supported if libaom is built with
+        -DCONFIG_FRAME_PARALLEL_ENCODE=1.
+      o AV1E_GET_TARGET_SEQ_LEVEL_IDX
+    * New key-value pairs for the key-value API:
+      o --auto-intra-tools-off=0 (default) or 1. Only in effect if
+        --deltaq-mode=3.
+      o --strict-level-conformance=0 (default) or 1
+      o --fp-mt=0 (default) or 1. Only supported if libaom is built
+        with -DCONFIG_FRAME_PARALLEL_ENCODE=1.
+    * New aomenc options (not supported by the key-value API):
+      o --nv12
+
+  - Compression Efficiency Improvements
+    * Correctly calculate SSE for high bitdepth in skip mode, 0.2% to
+      0.6% coding gain.
+    * RTC at speed 9/10: BD-rate gain of ~4/5%
+    * RTC screen content coding: many improvements for real-time screen
+      at speed 10 (quality, speedup, and rate control), up to high
+      resolutions (1080p).
+    * RTC-SVC: fixes to make intra-only frames work for spatial layers.
+    * RTC-SVC: quality improvements for temporal layers.
+    * AV1 RT: A new passive rate control strategy for screen content, an
+      average of 7.5% coding gain, with some clips of 20+%. The feature
+      is turned off by default due to higher bit rate variation.
+
+  - Perceptual Quality Improvements
+    * RTC: Visual quality improvements for high speeds (9/10)
+    * Improvements in coding quality for all intra mode
+
+  - Speedup and Memory Optimizations
+    * ~10% speedup in good quality mode encoding.
+    * ~7% heap memory reduction in good quality encoding mode for speed
+      5 and 6.
+    * Ongoing improvements to intra-frame encoding performance on Arm
+    * Faster encoding speed for "--deltaq-mode=3" mode.
+    * ~10% speedup for speed 5/6, ~15% speedup for speed 7/8, and
+      ~10% speedup for speed 9/10 in real time encoding mode
+    * ~20% heap memory reduction in still-picture encoding mode for
+      360p-720p resolutions with multiple threads
+    * ~13% speedup for speed 6 and ~12% speedup for speed 9 in
+      still-picture encoding mode.
+    * Optimizations to improve multi-thread efficiency for still-picture
+      encoding mode.
+
+  - Bug Fixes
+    * b/204460717: README.md: replace master with main
+    * b/210677928: libaom disable_order is surprising for
+      max_reference_frames=3
+    * b/222461449: -DCONFIG_TUNE_BUTTERAUGLI=1 broken
+    * b/227207606: write_greyscale writes incorrect chroma in highbd
+      mode
+    * b/229955363: Integer-overflow in linsolve_wiener
+    * https://crbug.com/aomedia/2032
+    * https://crbug.com/aomedia/2397
+    * https://crbug.com/aomedia/2563
+    * https://crbug.com/aomedia/2815
+    * https://crbug.com/aomedia/3009
+    * https://crbug.com/aomedia/3018
+    * https://crbug.com/aomedia/3045
+    * https://crbug.com/aomedia/3101
+    * https://crbug.com/aomedia/3130
+    * https://crbug.com/aomedia/3173
+    * https://crbug.com/aomedia/3184
+    * https://crbug.com/aomedia/3187
+    * https://crbug.com/aomedia/3190
+    * https://crbug.com/aomedia/3195
+    * https://crbug.com/aomedia/3197
+    * https://crbug.com/aomedia/3201
+    * https://crbug.com/aomedia/3202
+    * https://crbug.com/aomedia/3204
+    * https://crbug.com/aomedia/3205
+    * https://crbug.com/aomedia/3207
+    * https://crbug.com/aomedia/3208
+    * https://crbug.com/aomedia/3209
+    * https://crbug.com/aomedia/3213
+    * https://crbug.com/aomedia/3214
+    * https://crbug.com/aomedia/3219
+    * https://crbug.com/aomedia/3222
+    * https://crbug.com/aomedia/3223
+    * https://crbug.com/aomedia/3225
+    * https://crbug.com/aomedia/3226
+    * https://crbug.com/aomedia/3228
+    * https://crbug.com/aomedia/3232
+    * https://crbug.com/aomedia/3236
+    * https://crbug.com/aomedia/3237
+    * https://crbug.com/aomedia/3238
+    * https://crbug.com/aomedia/3240
+    * https://crbug.com/aomedia/3243
+    * https://crbug.com/aomedia/3244
+    * https://crbug.com/aomedia/3246
+    * https://crbug.com/aomedia/3248
+    * https://crbug.com/aomedia/3250
+    * https://crbug.com/aomedia/3251
+    * https://crbug.com/aomedia/3252
+    * https://crbug.com/aomedia/3255
+    * https://crbug.com/aomedia/3257
+    * https://crbug.com/aomedia/3259
+    * https://crbug.com/aomedia/3260
+    * https://crbug.com/aomedia/3267
+    * https://crbug.com/aomedia/3268
+    * https://crbug.com/aomedia/3269
+    * https://crbug.com/aomedia/3276
+    * https://crbug.com/aomedia/3278
+    * https://crbug.com/chromium/1290068
+    * https://crbug.com/chromium/1303237
+    * https://crbug.com/chromium/1304990
+    * https://crbug.com/chromium/1321141
+    * https://crbug.com/chromium/1321388
+    * https://crbug.com/oss-fuzz/44846
+    * https://crbug.com/oss-fuzz/44856
+    * https://crbug.com/oss-fuzz/44862
+    * https://crbug.com/oss-fuzz/44904
+    * https://crbug.com/oss-fuzz/45056
+
 2022-01-28 v3.3.0
   This release includes compression efficiency and perceptual quality
   improvements, speedup and memory optimizations, some new features, and
@@ -76,7 +285,7 @@
   - Perceptual Quality Improvements
     * Added a new mode --deltaq-mode=3 to improve perceptual quality
       based on a differential contrast model for still images.
-    * Added a new mode –deltaq-mode=4 to improve perceptual quality
+    * Added a new mode --deltaq-mode=4 to improve perceptual quality
       based on user rated cq_level data set for still images.
     * Weighting of some intra mode and partition size choices to better
       manage and retain texture.
@@ -181,7 +390,7 @@
         "enable-diagonal-intra" for the aom_codec_set_option() function.
 
     New aom_tune_metric enum value: AOM_TUNE_BUTTERAUGLI. The new aomenc option
-    --tune=butteraugli was added to optimize the encoder’s perceptual quality by
+    --tune=butteraugli was added to optimize the encoder's perceptual quality by
     optimizing the Butteraugli metric. Install libjxl (JPEG XL) and then pass
     -DCONFIG_TUNE_BUTTERAUGLI=1 to the cmake command to enable it.
 
@@ -281,7 +490,7 @@
 
     Issue 2869: Add -Wimplicit-function-declaration as C flag only.
 
-    Issue 2878: Avoid memset in the av1_filter_intra_predictor module
+    Issue 2878: Avoid memset in the av1_filter_intra_predictor module
     functions.
 
     Issue 2903: Fix a typo bug in apply_temporal_filter_planewise.
diff --git a/chromium/third_party/libaom/source/libaom/CMakeLists.txt b/chromium/third_party/libaom/source/libaom/CMakeLists.txt
index c5067e257e7..5b55a633b39 100644
--- a/chromium/third_party/libaom/source/libaom/CMakeLists.txt
+++ b/chromium/third_party/libaom/source/libaom/CMakeLists.txt
@@ -51,9 +51,9 @@ endif()
 # passed to libtool.
 #
 # We set SO_FILE_VERSION = [c-a].a.r
-set(LT_CURRENT 6)
+set(LT_CURRENT 8)
 set(LT_REVISION 0)
-set(LT_AGE 3)
+set(LT_AGE 5)
 math(EXPR SO_VERSION "${LT_CURRENT} - ${LT_AGE}")
 set(SO_FILE_VERSION "${SO_VERSION}.${LT_AGE}.${LT_REVISION}")
 unset(LT_CURRENT)
@@ -316,12 +316,14 @@ endif()
 
 if(CONFIG_AV1_ENCODER AND NOT CONFIG_REALTIME_ONLY AND NOT BUILD_SHARED_LIBS)
   list(APPEND AOM_AV1_RC_QMODE_SOURCES
-              "${AOM_ROOT}/av1/ratectrl_qmode_interface.h"
-              "${AOM_ROOT}/av1/ratectrl_qmode_interface.cc"
-              "${AOM_ROOT}/av1/reference_manager.h"
-              "${AOM_ROOT}/av1/reference_manager.cc"
-              "${AOM_ROOT}/av1/ratectrl_qmode.h"
-              "${AOM_ROOT}/av1/ratectrl_qmode.cc")
+              "${AOM_ROOT}/av1/qmode_rc/ratectrl_qmode_interface.h"
+              "${AOM_ROOT}/av1/qmode_rc/ratectrl_qmode_interface.cc"
+              "${AOM_ROOT}/av1/qmode_rc/reference_manager.h"
+              "${AOM_ROOT}/av1/qmode_rc/reference_manager.cc"
+              "${AOM_ROOT}/av1/qmode_rc/ratectrl_qmode.h"
+              "${AOM_ROOT}/av1/qmode_rc/ratectrl_qmode.cc"
+              "${AOM_ROOT}/av1/qmode_rc/ducky_encode.h"
+              "${AOM_ROOT}/av1/qmode_rc/ducky_encode.cc")
   add_library(av1_rc_qmode ${AOM_AV1_RC_QMODE_SOURCES})
   target_link_libraries(av1_rc_qmode ${AOM_LIB_LINK_TYPE} aom)
   if(NOT MSVC AND NOT APPLE)
diff --git a/chromium/third_party/libaom/source/libaom/README.md b/chromium/third_party/libaom/source/libaom/README.md
index 0146003db98..7af05af7d04 100644
--- a/chromium/third_party/libaom/source/libaom/README.md
+++ b/chromium/third_party/libaom/source/libaom/README.md
@@ -19,16 +19,16 @@ README.md                {#LREADME}
     - [Build with VMAF support](#build-with-vmaf)
 2. [Testing the library](#testing-the-av1-codec)
     - [Basics](#testing-basics)
-        - [Unit tests](#1_unit-tests)
-        - [Example tests](#2_example-tests)
-        - [Encoder tests](#3_encoder-tests)
+        - [Unit tests](#unit-tests)
+        - [Example tests](#example-tests)
+        - [Encoder tests](#encoder-tests)
     - [IDE hosted tests](#ide-hosted-tests)
     - [Downloading test data](#downloading-the-test-data)
     - [Adding a new test data file](#adding-a-new-test-data-file)
     - [Additional test data](#additional-test-data)
     - [Sharded testing](#sharded-testing)
-        - [Running tests directly](#1_running-test_libaom-directly)
-        - [Running tests via CMake](#2_running-the-tests-via-the-cmake-build)
+        - [Running tests directly](#running-test_libaom-directly)
+        - [Running tests via CMake](#running-the-tests-via-the-cmake-build)
 3. [Coding style](#coding-style)
 4. [Submitting patches](#submitting-patches)
     - [Login cookie](#login-cookie)
@@ -165,8 +165,6 @@ The toolchain files available at the time of this writing are:
  - armv7-linux-gcc.cmake
  - armv7-mingw-gcc.cmake
  - armv7s-ios.cmake
- - mips32-linux-gcc.cmake
- - mips64-linux-gcc.cmake
  - x86-ios-simulator.cmake
  - x86-linux.cmake
  - x86-macos.cmake
@@ -341,7 +339,7 @@ There are several methods of testing the AV1 codec. All of these methods require
 the presence of the AV1 source code and a working build of the AV1 library and
 applications.
 
-#### 1. Unit tests: {#1_unit-tests}
+#### 1. Unit tests: {#unit-tests}
 
 The unit tests can be run at build time:
 
@@ -355,7 +353,7 @@ The unit tests can be run at build time:
     $ make runtests
 ~~~
 
-#### 2. Example tests: {#2_example-tests}
+#### 2. Example tests: {#example-tests}
 
 The example tests require a bash shell and can be run in the following manner:
 
@@ -370,7 +368,7 @@ The example tests require a bash shell and can be run in the following manner:
     $ path/to/aom/test/examples.sh --bin-path examples
 ~~~
 
-#### 3. Encoder tests: {#3_encoder-tests}
+#### 3. Encoder tests: {#encoder-tests}
 
 When making a change to the encoder run encoder tests to confirm that your
 change has a positive or negligible impact on encode quality. When running these
@@ -487,7 +485,7 @@ https://media.xiph.org/video/derf/
 The AV1 codec library unit tests are built upon gtest which supports sharding of
 test jobs. Sharded test runs can be achieved in a couple of ways.
 
-#### 1. Running test\_libaom directly: {#1_running-test_libaom-directly}
+#### 1. Running test\_libaom directly: {#running-test_libaom-directly}
 
 ~~~
    # Set the environment variable GTEST_TOTAL_SHARDS to control the number of
@@ -501,7 +499,7 @@ test jobs. Sharded test runs can be achieved in a couple of ways.
 To create a test shard for each CPU core available on the current system set
 `GTEST_TOTAL_SHARDS` to the number of CPU cores on your system minus one.
 
-#### 2. Running the tests via the CMake build: {#2_running-the-tests-via-the-cmake-build}
+#### 2. Running the tests via the CMake build: {#running-the-tests-via-the-cmake-build}
 
 ~~~
     # For IDE based builds, ENABLE_IDE_TEST_HOSTING must be enabled. See
diff --git a/chromium/third_party/libaom/source/libaom/aom/aom_codec.h b/chromium/third_party/libaom/source/libaom/aom/aom_codec.h
index 49d48cf1530..d77bae6b25c 100644
--- a/chromium/third_party/libaom/source/libaom/aom/aom_codec.h
+++ b/chromium/third_party/libaom/source/libaom/aom/aom_codec.h
@@ -14,7 +14,7 @@
 ///////////////////////////////////////////////////////////////////////////////
 //
 // There are two levels of interfaces used to access the AOM codec: the
-// the aom_codec_iface and the aom_codec_ctx.
+// aom_codec_iface and the aom_codec_ctx.
 //
 // 1. aom_codec_iface_t
 //    (Related files: aom/aom_codec.h, aom/src/aom_codec.c,
@@ -244,7 +244,7 @@ typedef int64_t aom_codec_pts_t;
  *   - aom_codec_get_caps(aom_codec_iface_t *iface): returns
  *     the capabilities of the codec
  *   - aom_codec_enc_config_default: generate the default config for
- *     initializing the encoder (see documention in aom_encoder.h)
+ *     initializing the encoder (see documentation in aom_encoder.h)
  *   - aom_codec_dec_init, aom_codec_enc_init: initialize the codec context
  *     structure (see documentation on aom_codec_ctx).
  *
@@ -365,7 +365,7 @@ int aom_codec_version(void);
  *
  * Returns a printable string containing the full library version number. This
  * may contain additional text following the three digit version number, as to
- * indicate release candidates, prerelease versions, etc.
+ * indicate release candidates, pre-release versions, etc.
  *
  */
 const char *aom_codec_version_str(void);
@@ -521,7 +521,7 @@ aom_codec_err_t aom_codec_set_option(aom_codec_ctx_t *ctx, const char *name,
 #define AOM_CODEC_CONTROL_TYPECHECKED(ctx, id, data) \
   aom_codec_control_typechecked_##id(ctx, id, data) /**<\hideinitializer*/
 
-/*!\brief Creates typechecking mechanisms for aom_codec_control
+/*!\brief Creates type checking mechanisms for aom_codec_control
  *
  * It defines a static function with the correctly typed arguments as a wrapper
  * to the type-unsafe aom_codec_control function. It also creates a typedef
diff --git a/chromium/third_party/libaom/source/libaom/aom/aom_encoder.h b/chromium/third_party/libaom/source/libaom/aom/aom_encoder.h
index 0c3be5a5ada..4d89176d94f 100644
--- a/chromium/third_party/libaom/source/libaom/aom/aom_encoder.h
+++ b/chromium/third_party/libaom/source/libaom/aom/aom_encoder.h
@@ -435,6 +435,11 @@ typedef struct aom_codec_enc_cfg {
 
   /*!\brief Max number of frames to encode
    *
+   * If force video mode is off (the default) and g_limit is 1, the encoder
+   * will encode a still picture (still_picture is set to 1 in the sequence
+   * header OBU). If in addition full_still_picture_hdr is 0 (the default),
+   * the encoder will use a reduced header (reduced_still_picture_header is
+   * set to 1 in the sequence header OBU) for the still picture.
    */
   unsigned int g_limit;
 
@@ -817,10 +822,12 @@ typedef struct aom_codec_enc_cfg {
 
   /*!\brief full_still_picture_hdr
    *
-   * If this is nonzero, the encoder will generate a full header even for
-   * still picture encoding. if zero, a reduced header is used for still
-   * picture. This flag has no effect when a regular video with more than
-   * a single frame is encoded.
+   * If this is nonzero, the encoder will generate a full header
+   * (reduced_still_picture_header is set to 0 in the sequence header OBU) even
+   * for still picture encoding. If this is zero (the default), a reduced
+   * header (reduced_still_picture_header is set to 1 in the sequence header
+   * OBU) is used for still picture encoding. This flag has no effect when a
+   * regular video with more than a single frame is encoded.
    */
   unsigned int full_still_picture_hdr;
 
@@ -878,16 +885,17 @@ typedef struct aom_codec_enc_cfg {
    *
    * If a value of 1 is provided, encoder will use fixed QP offsets for frames
    * at different levels of the pyramid.
-   * - If 'fixed_qp_offsets' is also provided, encoder will use the given
-   * offsets
-   * - If not, encoder will select the fixed offsets based on the cq-level
-   *   provided.
-   * If a value of 0 is provided and fixed_qp_offset are not provided, encoder
-   * will NOT use fixed QP offsets.
+   * If a value of 0 is provided, encoder will NOT use fixed QP offsets.
    * Note: This option is only relevant for --end-usage=q.
    */
   unsigned int use_fixed_qp_offsets;
 
+  /*!\brief Deprecated and ignored. DO NOT USE.
+   *
+   * TODO(aomedia:3269): Remove fixed_qp_offsets in libaom v4.0.0.
+   */
+  int fixed_qp_offsets[5];
+
   /*!\brief Options defined per config file
    *
    */
@@ -1015,6 +1023,8 @@ aom_fixed_buf_t *aom_codec_get_global_headers(aom_codec_ctx_t *ctx);
  *
  * \param[in]    ctx       Pointer to this instance's context
  * \param[in]    img       Image data to encode, NULL to flush.
+ *                         Encoding sample values outside the range
+ *                         [0..(1<<img->bit_depth)-1] is undefined behavior.
  * \param[in]    pts       Presentation time stamp, in timebase units. If img
  *                         is NULL, pts is ignored.
  * \param[in]    duration  Duration to show frame, in timebase units. If img
diff --git a/chromium/third_party/libaom/source/libaom/aom/aom_external_partition.h b/chromium/third_party/libaom/source/libaom/aom/aom_external_partition.h
index 55c59a57461..c381f6e5e98 100644
--- a/chromium/third_party/libaom/source/libaom/aom/aom_external_partition.h
+++ b/chromium/third_party/libaom/source/libaom/aom/aom_external_partition.h
@@ -246,7 +246,7 @@ typedef struct aom_partition_features {
   int block_size;                 ///< As "BLOCK_SIZE" in av1/common/enums.h
   /*!
    * Valid partition types. A bitmask is used.  "1" represents the
-   * corresponding type is vaild. The bitmask follows the enum order for
+   * corresponding type is valid. The bitmask follows the enum order for
    * PARTITION_TYPE in "enums.h" to represent one partition type at a bit.
    * For example, 0x01 stands for only PARTITION_NONE is valid,
    * 0x09 (00...001001) stands for PARTITION_NONE and PARTITION_SPLIT are valid.
@@ -313,10 +313,10 @@ typedef struct aom_partition_decision {
   int do_rectangular_split;        ///< Try rectangular split partition
   int do_square_split;             ///< Try square split partition
   int prune_rect_part[2];          ///< Prune rectangular partition
-  int horza_partition_allowed;     ///< Allow HORZ_A partitioin
-  int horzb_partition_allowed;     ///< Allow HORZ_B partitioin
-  int verta_partition_allowed;     ///< Allow VERT_A partitioin
-  int vertb_partition_allowed;     ///< Allow VERT_B partitioin
+  int horza_partition_allowed;     ///< Allow HORZ_A partition
+  int horzb_partition_allowed;     ///< Allow HORZ_B partition
+  int verta_partition_allowed;     ///< Allow VERT_A partition
+  int vertb_partition_allowed;     ///< Allow VERT_B partition
   int partition_horz4_allowed;     ///< Allow HORZ4 partition
   int partition_vert4_allowed;     ///< Allow VERT4 partition
 } aom_partition_decision_t;
@@ -326,7 +326,7 @@ typedef struct aom_partition_decision {
  * The encoding stats collected by encoding the superblock with the
  * given partition types.
  * The encoder sends the stats to the external model for training
- * or inference though "func()" defined in ....
+ * or inference through "func()" defined in ....
  */
 typedef struct aom_partition_stats {
   int rate;        ///< Rate cost of the block
diff --git a/chromium/third_party/libaom/source/libaom/aom/aom_image.h b/chromium/third_party/libaom/source/libaom/aom/aom_image.h
index 154563e1234..d5f0c087e6b 100644
--- a/chromium/third_party/libaom/source/libaom/aom/aom_image.h
+++ b/chromium/third_party/libaom/source/libaom/aom/aom_image.h
@@ -129,8 +129,12 @@ typedef enum aom_matrix_coefficients {
 
 /*!\brief List of supported color range */
 typedef enum aom_color_range {
-  AOM_CR_STUDIO_RANGE = 0, /**< Y [16..235], UV [16..240] */
-  AOM_CR_FULL_RANGE = 1    /**< YUV/RGB [0..255] */
+  AOM_CR_STUDIO_RANGE = 0, /**<- Y  [16..235],  UV  [16..240]  (bit depth 8) */
+                           /**<- Y  [64..940],  UV  [64..960]  (bit depth 10) */
+                           /**<- Y [256..3760], UV [256..3840] (bit depth 12) */
+  AOM_CR_FULL_RANGE = 1    /**<- YUV/RGB [0..255]  (bit depth 8) */
+                           /**<- YUV/RGB [0..1023] (bit depth 10) */
+                           /**<- YUV/RGB [0..4095] (bit depth 12) */
 } aom_color_range_t;       /**< alias for enum aom_color_range */
 
 /*!\brief List of chroma sample positions */
diff --git a/chromium/third_party/libaom/source/libaom/aom/aomcx.h b/chromium/third_party/libaom/source/libaom/aom/aomcx.h
index abf6284ebb8..36d62853bb4 100644
--- a/chromium/third_party/libaom/source/libaom/aom/aomcx.h
+++ b/chromium/third_party/libaom/source/libaom/aom/aomcx.h
@@ -32,6 +32,7 @@
  * Loop restoration
  *
  * The following features are also disabled with CONFIG_REALTIME_ONLY:
+ * AV1E_SET_QUANT_B_ADAPT
  * CNN
  * 4X rectangular blocks
  * 4X rectangular transform in intra prediction
@@ -190,8 +191,8 @@ enum aome_enc_control_id {
 
   /* NOTE: enum 10 unused */
 
-  /*!\brief Codec control function to set encoder scaling mode,
-   * aom_scaling_mode_t* parameter.
+  /*!\brief Codec control function to set encoder scaling mode for the next
+   * frame to be coded, aom_scaling_mode_t* parameter.
    */
   AOME_SET_SCALEMODE = 11,
 
@@ -431,7 +432,7 @@ enum aome_enc_control_id {
 
   /*!\brief Codec control function to enable error_resilient_mode, int parameter
    *
-   * AV1 has a bitstream feature to guarantee parseability of a frame
+   * AV1 has a bitstream feature to guarantee parsability of a frame
    * by turning on the error_resilient_decoding mode, even though the
    * reference buffers are unreliable or not received.
    *
@@ -612,18 +613,18 @@ enum aome_enc_control_id {
   AV1E_SET_RENDER_SIZE = 53,
 
   /*!\brief Control to set target sequence level index for a certain operating
-   * point(OP), int parameter
-   * Possible values are in the form of "ABxy"(pad leading zeros if less than
-   * 4 digits).
+   * point (OP), int parameter
+   * Possible values are in the form of "ABxy".
    *  - AB: OP index.
-   *  - xy: Target level index for the OP. Can be values 0~23(corresponding to
-   *    level 2.0 ~ 7.3) or 24(keep level stats only for level monitoring) or
-   *    31(maximum level parameter, no level-based constraints).
+   *  - xy: Target level index for the OP. Can be values 0~23 (corresponding to
+   *    level 2.0 ~ 7.3, note levels 2.2, 2.3, 3.2, 3.3, 4.2, 4.3, 7.0, 7.1, 7.2
+   *    & 7.3 are undefined) or 24 (keep level stats only for level monitoring)
+   *    or 31 (maximum level parameter, no level-based constraints).
    *
    * E.g.:
-   * - "0" means target level index 0 for the 0th OP;
-   * - "111" means target level index 11 for the 1st OP;
-   * - "1021" means target level index 21 for the 10th OP.
+   * - "0" means target level index 0 (2.0) for the 0th OP;
+   * - "109" means target level index 9 (4.1) for the 1st OP;
+   * - "1019" means target level index 19 (6.3) for the 10th OP.
    *
    * If the target level is not specified for an OP, the maximum level parameter
    * of 31 is used as default.
@@ -1270,7 +1271,7 @@ enum aome_enc_control_id {
    */
   AV1E_SET_SVC_LAYER_ID = 131,
 
-  /*!\brief Codec control function to set SVC paramaeters, aom_svc_params_t*
+  /*!\brief Codec control function to set SVC parameters, aom_svc_params_t*
    * parameter
    */
   AV1E_SET_SVC_PARAMS = 132,
@@ -1372,6 +1373,9 @@ enum aome_enc_control_id {
   AV1E_SET_ENABLE_DIRECTIONAL_INTRA = 145,
 
   /*!\brief Control to turn on / off transform size search.
+   * Note: it can not work with non RD pick mode in real-time encoding,
+   * where the max transform size is only 16x16.
+   * It will be ignored if non RD pick mode is set.
    *
    * - 0 = disable, transforms always have the largest possible size
    * - 1 = enable, search for the best transform size for each block (default)
@@ -1403,7 +1407,8 @@ enum aome_enc_control_id {
    */
   AOME_GET_LOOPFILTER_LEVEL = 150,
 
-  /*!\brief Codec control to automatically turn off several intra coding tools
+  /*!\brief Codec control to automatically turn off several intra coding tools,
+   * unsigned int parameter
    * - 0 = do not use the feature
    * - 1 = enable the automatic decision to turn off several intra tools
    */
@@ -1445,6 +1450,22 @@ enum aome_enc_control_id {
    */
   AV1E_GET_TARGET_SEQ_LEVEL_IDX = 155,
 
+  /*!\brief Codec control function to get the number of operating points. int*
+   * parameter.
+   */
+  AV1E_GET_NUM_OPERATING_POINTS = 156,
+
+  /*!\brief Codec control function to skip the application of post-processing
+   * filters on reconstructed frame, unsigned int parameter
+   *
+   * - 0 = disable (default)
+   * - 1 = enable
+   *
+   * \attention For this value to be used aom_codec_enc_cfg_t::g_usage
+   *            must be set to AOM_USAGE_ALL_INTRA.
+   */
+  AV1E_SET_SKIP_POSTPROC_FILTERING = 157,
+
   // Any new encoder control IDs should be added above.
   // Maximum allowed encoder control ID is 229.
   // No encoder control ID should be added below.
@@ -2058,6 +2079,12 @@ AOM_CTRL_USE_TYPE(AV1E_SET_FP_MT_UNIT_TEST, unsigned int)
 AOM_CTRL_USE_TYPE(AV1E_GET_TARGET_SEQ_LEVEL_IDX, int *)
 #define AOM_CTRL_AV1E_GET_TARGET_SEQ_LEVEL_IDX
 
+AOM_CTRL_USE_TYPE(AV1E_GET_NUM_OPERATING_POINTS, int *)
+#define AOM_CTRL_AV1E_GET_NUM_OPERATING_POINTS
+
+AOM_CTRL_USE_TYPE(AV1E_SET_SKIP_POSTPROC_FILTERING, unsigned int)
+#define AOM_CTRL_AV1E_SET_SKIP_POSTPROC_FILTERING
+
 /*!\endcond */
 /*! @} - end defgroup aom_encoder */
 #ifdef __cplusplus
diff --git a/chromium/third_party/libaom/source/libaom/aom/src/aom_encoder.c b/chromium/third_party/libaom/source/libaom/aom/src/aom_encoder.c
index 5dfda968a76..6ec2f349dfc 100644
--- a/chromium/third_party/libaom/source/libaom/aom/src/aom_encoder.c
+++ b/chromium/third_party/libaom/source/libaom/aom/src/aom_encoder.c
@@ -39,8 +39,25 @@ aom_codec_err_t aom_codec_enc_init_ver(aom_codec_ctx_t *ctx,
                                        const aom_codec_enc_cfg_t *cfg,
                                        aom_codec_flags_t flags, int ver) {
   aom_codec_err_t res;
-
-  if (ver != AOM_ENCODER_ABI_VERSION)
+  // The value of AOM_ENCODER_ABI_VERSION in libaom v3.0.0 and v3.1.0 - v3.1.3.
+  //
+  // We are compatible with these older libaom releases. AOM_ENCODER_ABI_VERSION
+  // was incremented after these releases for two reasons:
+  // 1. AOM_ENCODER_ABI_VERSION takes contribution from
+  //    AOM_EXT_PART_ABI_VERSION. The external partition API is still
+  //    experimental, so it should not be considered as part of the stable ABI.
+  //    fd9ed8366 External partition: Define APIs
+  //    https://aomedia-review.googlesource.com/c/aom/+/135663
+  // 2. As a way to detect the presence of speeds 7-9 in all-intra mode. I (wtc)
+  //    suggested this change because I misunderstood how
+  //    AOM_ENCODER_ABI_VERSION was used.
+  //    bbdfa68d1 AllIntra: Redefine all-intra mode speed features for speed 7+
+  //    https://aomedia-review.googlesource.com/c/aom/+/140624
+  const int aom_encoder_abi_version_25 = 25;
+
+  // TODO(bug aomedia:3228): Remove the check for aom_encoder_abi_version_25 in
+  // libaom v4.0.0.
+  if (ver != AOM_ENCODER_ABI_VERSION && ver != aom_encoder_abi_version_25)
     res = AOM_CODEC_ABI_MISMATCH;
   else if (!ctx || !iface || !cfg)
     res = AOM_CODEC_INVALID_PARAM;
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/aom_dsp.cmake b/chromium/third_party/libaom/source/libaom/aom_dsp/aom_dsp.cmake
index 20d2743e781..cf677afd81c 100644
--- a/chromium/third_party/libaom/source/libaom/aom_dsp/aom_dsp.cmake
+++ b/chromium/third_party/libaom/source/libaom/aom_dsp/aom_dsp.cmake
@@ -109,33 +109,11 @@ list(APPEND AOM_DSP_COMMON_INTRIN_NEON
             "${AOM_ROOT}/aom_dsp/arm/aom_convolve_copy_neon.c"
             "${AOM_ROOT}/aom_dsp/arm/fwd_txfm_neon.c"
             "${AOM_ROOT}/aom_dsp/arm/loopfilter_neon.c"
+            "${AOM_ROOT}/aom_dsp/arm/highbd_intrapred_neon.c"
             "${AOM_ROOT}/aom_dsp/arm/intrapred_neon.c"
             "${AOM_ROOT}/aom_dsp/arm/subtract_neon.c"
             "${AOM_ROOT}/aom_dsp/arm/blend_a64_mask_neon.c")
 
-list(APPEND AOM_DSP_COMMON_INTRIN_DSPR2
-            "${AOM_ROOT}/aom_dsp/mips/aom_convolve_copy_dspr2.c"
-            "${AOM_ROOT}/aom_dsp/mips/common_dspr2.c"
-            "${AOM_ROOT}/aom_dsp/mips/common_dspr2.h"
-            "${AOM_ROOT}/aom_dsp/mips/convolve2_dspr2.c"
-            "${AOM_ROOT}/aom_dsp/mips/convolve2_horiz_dspr2.c"
-            "${AOM_ROOT}/aom_dsp/mips/convolve2_vert_dspr2.c"
-            "${AOM_ROOT}/aom_dsp/mips/convolve8_horiz_dspr2.c"
-            "${AOM_ROOT}/aom_dsp/mips/convolve8_vert_dspr2.c"
-            "${AOM_ROOT}/aom_dsp/mips/convolve_common_dspr2.h"
-            "${AOM_ROOT}/aom_dsp/mips/intrapred16_dspr2.c"
-            "${AOM_ROOT}/aom_dsp/mips/intrapred4_dspr2.c"
-            "${AOM_ROOT}/aom_dsp/mips/intrapred8_dspr2.c"
-            "${AOM_ROOT}/aom_dsp/mips/inv_txfm_dspr2.h")
-
-list(APPEND AOM_DSP_COMMON_INTRIN_MSA
-            "${AOM_ROOT}/aom_dsp/mips/aom_convolve8_horiz_msa.c"
-            "${AOM_ROOT}/aom_dsp/mips/aom_convolve8_vert_msa.c"
-            "${AOM_ROOT}/aom_dsp/mips/aom_convolve_copy_msa.c"
-            "${AOM_ROOT}/aom_dsp/mips/aom_convolve_msa.h"
-            "${AOM_ROOT}/aom_dsp/mips/intrapred_msa.c"
-            "${AOM_ROOT}/aom_dsp/mips/macros_msa.h")
-
 if(CONFIG_AV1_HIGHBITDEPTH)
   list(APPEND AOM_DSP_COMMON_INTRIN_SSE2
               "${AOM_ROOT}/aom_dsp/x86/highbd_convolve_sse2.c"
@@ -222,6 +200,7 @@ if(CONFIG_AV1_ENCODER)
               "${AOM_ROOT}/aom_dsp/x86/highbd_quantize_intrin_avx2.c"
               "${AOM_ROOT}/aom_dsp/x86/adaptive_quantize_avx2.c"
               "${AOM_ROOT}/aom_dsp/x86/highbd_adaptive_quantize_avx2.c"
+              "${AOM_ROOT}/aom_dsp/x86/quantize_avx2.c"
               "${AOM_ROOT}/aom_dsp/x86/sad4d_avx2.c"
               "${AOM_ROOT}/aom_dsp/x86/sad_avx2.c"
               "${AOM_ROOT}/aom_dsp/x86/highbd_sad_avx2.c"
@@ -248,7 +227,9 @@ if(CONFIG_AV1_ENCODER)
               "${AOM_ROOT}/aom_dsp/x86/jnt_variance_ssse3.c"
               "${AOM_ROOT}/aom_dsp/x86/jnt_sad_ssse3.c")
 
-  list(APPEND AOM_DSP_ENCODER_INTRIN_SSE4_1 "${AOM_ROOT}/aom_dsp/x86/sse_sse4.c"
+  list(APPEND AOM_DSP_ENCODER_INTRIN_SSE4_1
+              "${AOM_ROOT}/aom_dsp/x86/avg_intrin_sse4.c"
+              "${AOM_ROOT}/aom_dsp/x86/sse_sse4.c"
               "${AOM_ROOT}/aom_dsp/x86/obmc_sad_sse4.c"
               "${AOM_ROOT}/aom_dsp/x86/obmc_variance_sse4.c")
 
@@ -261,11 +242,6 @@ if(CONFIG_AV1_ENCODER)
               "${AOM_ROOT}/aom_dsp/arm/sse_neon.c"
               "${AOM_ROOT}/aom_dsp/arm/sum_squares_neon.c")
 
-  list(APPEND AOM_DSP_ENCODER_INTRIN_MSA "${AOM_ROOT}/aom_dsp/mips/sad_msa.c"
-              "${AOM_ROOT}/aom_dsp/mips/subtract_msa.c"
-              "${AOM_ROOT}/aom_dsp/mips/variance_msa.c"
-              "${AOM_ROOT}/aom_dsp/mips/sub_pixel_variance_msa.c")
-
   if(CONFIG_AV1_HIGHBITDEPTH)
     list(APPEND AOM_DSP_ENCODER_ASM_SSE2
                 "${AOM_ROOT}/aom_dsp/x86/highbd_sad4d_sse2.asm"
@@ -286,6 +262,7 @@ if(CONFIG_AV1_ENCODER)
                 "${AOM_ROOT}/aom_dsp/x86/highbd_variance_sse4.c")
 
     list(APPEND AOM_DSP_ENCODER_INTRIN_NEON
+                "${AOM_ROOT}/aom_dsp/arm/highbd_quantize_neon.c"
                 "${AOM_ROOT}/aom_dsp/arm/highbd_variance_neon.c")
   endif()
 
@@ -306,12 +283,16 @@ if(CONFIG_AV1_ENCODER)
 
   if(CONFIG_REALTIME_ONLY)
     list(REMOVE_ITEM AOM_DSP_ENCODER_INTRIN_AVX2
+                     "${AOM_ROOT}/aom_dsp/x86/adaptive_quantize_avx2.c"
                      "${AOM_ROOT}/aom_dsp/x86/obmc_sad_avx2.c"
                      "${AOM_ROOT}/aom_dsp/x86/obmc_variance_avx2.c")
 
     list(REMOVE_ITEM AOM_DSP_ENCODER_INTRIN_SSE4_1
                      "${AOM_ROOT}/aom_dsp/x86/obmc_sad_sse4.c"
                      "${AOM_ROOT}/aom_dsp/x86/obmc_variance_sse4.c")
+
+    list(REMOVE_ITEM AOM_DSP_ENCODER_INTRIN_SSE2
+                     "${AOM_ROOT}/aom_dsp/x86/adaptive_quantize_sse2.c")
   endif()
 endif()
 
@@ -419,20 +400,6 @@ function(setup_aom_dsp_targets)
     endif()
   endif()
 
-  if(HAVE_DSPR2)
-    add_intrinsics_object_library("" "dspr2" "aom_dsp_common"
-                                  "AOM_DSP_COMMON_INTRIN_DSPR2")
-  endif()
-
-  if(HAVE_MSA)
-    add_intrinsics_object_library("" "msa" "aom_dsp_common"
-                                  "AOM_DSP_COMMON_INTRIN_MSA")
-    if(CONFIG_AV1_ENCODER)
-      add_intrinsics_object_library("" "msa" "aom_dsp_encoder"
-                                    "AOM_DSP_ENCODER_INTRIN_MSA")
-    endif()
-  endif()
-
   target_sources(aom PRIVATE $<TARGET_OBJECTS:aom_dsp>)
   if(BUILD_SHARED_LIBS)
     target_sources(aom_static PRIVATE $<TARGET_OBJECTS:aom_dsp>)
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/aom_dsp_rtcd_defs.pl b/chromium/third_party/libaom/source/libaom/aom_dsp/aom_dsp_rtcd_defs.pl
index b8ea69e3d84..89899f4a077 100755
--- a/chromium/third_party/libaom/source/libaom/aom_dsp/aom_dsp_rtcd_defs.pl
+++ b/chromium/third_party/libaom/source/libaom/aom_dsp/aom_dsp_rtcd_defs.pl
@@ -41,31 +41,29 @@ if ($opts{arch} eq "x86_64") {
 
 @block_widths = (4, 8, 16, 32, 64, 128);
 
-@block_sizes = ();
+@encoder_block_sizes = ();
 foreach $w (@block_widths) {
   foreach $h (@block_widths) {
-    push @block_sizes, [$w, $h] if ($w <= 2*$h && $h <= 2*$w) ;
+    push @encoder_block_sizes, [$w, $h] if ($w <= 2*$h && $h <= 2*$w);
   }
 }
 
 if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") {
-  push @block_sizes, [4, 16];
-  push @block_sizes, [16, 4];
-  push @block_sizes, [8, 32];
-  push @block_sizes, [32, 8];
-  push @block_sizes, [16, 64];
-  push @block_sizes, [64, 16];
+  push @encoder_block_sizes, [4, 16];
+  push @encoder_block_sizes, [16, 4];
+  push @encoder_block_sizes, [8, 32];
+  push @encoder_block_sizes, [32, 8];
+  push @encoder_block_sizes, [16, 64];
+  push @encoder_block_sizes, [64, 16];
 }
 
-@tx_dims = (2, 4, 8, 16, 32, 64);
+@tx_dims = (4, 8, 16, 32, 64);
 @tx_sizes = ();
 foreach $w (@tx_dims) {
   push @tx_sizes, [$w, $w];
   foreach $h (@tx_dims) {
     push @tx_sizes, [$w, $h] if ($w >=4 && $h >=4 && ($w == 2*$h || $h == 2*$w));
-    if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") {
-      push @tx_sizes, [$w, $h] if ($w >=4 && $h >=4 && ($w == 4*$h || $h == 4*$w));
-    }
+    push @tx_sizes, [$w, $h] if ($w >=4 && $h >=4 && ($w == 4*$h || $h == 4*$w));
   }
 }
 
@@ -87,236 +85,234 @@ foreach (@tx_sizes) {
   }
 }
 
-specialize qw/aom_dc_top_predictor_4x4 msa neon sse2/;
+specialize qw/aom_dc_top_predictor_4x4 neon sse2/;
 specialize qw/aom_dc_top_predictor_4x8 sse2/;
+specialize qw/aom_dc_top_predictor_4x16 sse2/;
 specialize qw/aom_dc_top_predictor_8x4 sse2/;
-specialize qw/aom_dc_top_predictor_8x8 neon msa sse2/;
+specialize qw/aom_dc_top_predictor_8x8 neon sse2/;
 specialize qw/aom_dc_top_predictor_8x16 sse2/;
+specialize qw/aom_dc_top_predictor_8x32 sse2/;
+specialize qw/aom_dc_top_predictor_16x4 sse2/;
 specialize qw/aom_dc_top_predictor_16x8 sse2/;
-specialize qw/aom_dc_top_predictor_16x16 neon msa sse2/;
+specialize qw/aom_dc_top_predictor_16x16 neon sse2/;
 specialize qw/aom_dc_top_predictor_16x32 sse2/;
+specialize qw/aom_dc_top_predictor_16x64 sse2/;
+specialize qw/aom_dc_top_predictor_32x8 sse2/;
 specialize qw/aom_dc_top_predictor_32x16 sse2 avx2/;
-specialize qw/aom_dc_top_predictor_32x32 msa neon sse2 avx2/;
+specialize qw/aom_dc_top_predictor_32x32 neon sse2 avx2/;
 specialize qw/aom_dc_top_predictor_32x64 sse2 avx2/;
-specialize qw/aom_dc_top_predictor_64x64 sse2 avx2/;
+specialize qw/aom_dc_top_predictor_64x16 sse2 avx2/;
 specialize qw/aom_dc_top_predictor_64x32 sse2 avx2/;
+specialize qw/aom_dc_top_predictor_64x64 sse2 avx2/;
 
-specialize qw/aom_dc_left_predictor_4x4 msa neon sse2/;
+specialize qw/aom_dc_left_predictor_4x4 neon sse2/;
 specialize qw/aom_dc_left_predictor_4x8 sse2/;
+specialize qw/aom_dc_left_predictor_4x16 sse2/;
 specialize qw/aom_dc_left_predictor_8x4 sse2/;
-specialize qw/aom_dc_left_predictor_8x8 neon msa sse2/;
+specialize qw/aom_dc_left_predictor_8x8 neon sse2/;
 specialize qw/aom_dc_left_predictor_8x16 sse2/;
+specialize qw/aom_dc_left_predictor_8x32 sse2/;
+specialize qw/aom_dc_left_predictor_16x4 sse2/;
 specialize qw/aom_dc_left_predictor_16x8 sse2/;
-specialize qw/aom_dc_left_predictor_16x16 neon msa sse2/;
+specialize qw/aom_dc_left_predictor_16x16 neon sse2/;
 specialize qw/aom_dc_left_predictor_16x32 sse2/;
+specialize qw/aom_dc_left_predictor_16x64 sse2/;
+specialize qw/aom_dc_left_predictor_32x8 sse2/;
 specialize qw/aom_dc_left_predictor_32x16 sse2 avx2/;
-specialize qw/aom_dc_left_predictor_32x32 msa neon sse2 avx2/;
+specialize qw/aom_dc_left_predictor_32x32 neon sse2 avx2/;
 specialize qw/aom_dc_left_predictor_32x64 sse2 avx2/;
-specialize qw/aom_dc_left_predictor_64x64 sse2 avx2/;
+specialize qw/aom_dc_left_predictor_64x16 sse2 avx2/;
 specialize qw/aom_dc_left_predictor_64x32 sse2 avx2/;
+specialize qw/aom_dc_left_predictor_64x64 sse2 avx2/;
 
-specialize qw/aom_dc_128_predictor_4x4 msa neon sse2/;
+specialize qw/aom_dc_128_predictor_4x4 neon sse2/;
 specialize qw/aom_dc_128_predictor_4x8 sse2/;
+specialize qw/aom_dc_128_predictor_4x16 sse2/;
 specialize qw/aom_dc_128_predictor_8x4 sse2/;
-specialize qw/aom_dc_128_predictor_8x8 neon msa sse2/;
+specialize qw/aom_dc_128_predictor_8x8 neon sse2/;
 specialize qw/aom_dc_128_predictor_8x16 sse2/;
+specialize qw/aom_dc_128_predictor_8x32 sse2/;
+specialize qw/aom_dc_128_predictor_16x4 sse2/;
 specialize qw/aom_dc_128_predictor_16x8 sse2/;
-specialize qw/aom_dc_128_predictor_16x16 neon msa sse2/;
+specialize qw/aom_dc_128_predictor_16x16 neon sse2/;
 specialize qw/aom_dc_128_predictor_16x32 sse2/;
+specialize qw/aom_dc_128_predictor_16x64 sse2/;
+specialize qw/aom_dc_128_predictor_32x8 sse2/;
 specialize qw/aom_dc_128_predictor_32x16 sse2 avx2/;
-specialize qw/aom_dc_128_predictor_32x32 msa neon sse2 avx2/;
+specialize qw/aom_dc_128_predictor_32x32 neon sse2 avx2/;
 specialize qw/aom_dc_128_predictor_32x64 sse2 avx2/;
-specialize qw/aom_dc_128_predictor_64x64 sse2 avx2/;
+specialize qw/aom_dc_128_predictor_64x16 sse2 avx2/;
 specialize qw/aom_dc_128_predictor_64x32 sse2 avx2/;
+specialize qw/aom_dc_128_predictor_64x64 sse2 avx2/;
 
-specialize qw/aom_v_predictor_4x4 neon msa sse2/;
+specialize qw/aom_v_predictor_4x4 neon sse2/;
 specialize qw/aom_v_predictor_4x8 sse2/;
+specialize qw/aom_v_predictor_4x16 sse2/;
 specialize qw/aom_v_predictor_8x4 sse2/;
-specialize qw/aom_v_predictor_8x8 neon msa sse2/;
+specialize qw/aom_v_predictor_8x8 neon sse2/;
 specialize qw/aom_v_predictor_8x16 sse2/;
+specialize qw/aom_v_predictor_8x32 sse2/;
+specialize qw/aom_v_predictor_16x4 sse2/;
 specialize qw/aom_v_predictor_16x8 sse2/;
-specialize qw/aom_v_predictor_16x16 neon msa sse2/;
+specialize qw/aom_v_predictor_16x16 neon sse2/;
 specialize qw/aom_v_predictor_16x32 sse2/;
+specialize qw/aom_v_predictor_16x64 sse2/;
+specialize qw/aom_v_predictor_32x8 sse2/;
 specialize qw/aom_v_predictor_32x16 sse2 avx2/;
-specialize qw/aom_v_predictor_32x32 neon msa sse2 avx2/;
+specialize qw/aom_v_predictor_32x32 neon sse2 avx2/;
 specialize qw/aom_v_predictor_32x64 sse2 avx2/;
-specialize qw/aom_v_predictor_64x64 sse2 avx2/;
+specialize qw/aom_v_predictor_64x16 sse2 avx2/;
 specialize qw/aom_v_predictor_64x32 sse2 avx2/;
+specialize qw/aom_v_predictor_64x64 sse2 avx2/;
 
+specialize qw/aom_h_predictor_4x4 neon sse2/;
 specialize qw/aom_h_predictor_4x8 sse2/;
-specialize qw/aom_h_predictor_4x4 neon dspr2 msa sse2/;
+specialize qw/aom_h_predictor_4x16 sse2/;
 specialize qw/aom_h_predictor_8x4 sse2/;
-specialize qw/aom_h_predictor_8x8 neon dspr2 msa sse2/;
+specialize qw/aom_h_predictor_8x8 neon sse2/;
 specialize qw/aom_h_predictor_8x16 sse2/;
+specialize qw/aom_h_predictor_8x32 sse2/;
+specialize qw/aom_h_predictor_16x4 sse2/;
 specialize qw/aom_h_predictor_16x8 sse2/;
-specialize qw/aom_h_predictor_16x16 neon dspr2 msa sse2/;
+specialize qw/aom_h_predictor_16x16 neon sse2/;
 specialize qw/aom_h_predictor_16x32 sse2/;
+specialize qw/aom_h_predictor_16x64 sse2/;
+specialize qw/aom_h_predictor_32x8 sse2/;
 specialize qw/aom_h_predictor_32x16 sse2/;
-specialize qw/aom_h_predictor_32x32 neon msa sse2 avx2/;
+specialize qw/aom_h_predictor_32x32 neon sse2 avx2/;
 specialize qw/aom_h_predictor_32x64 sse2/;
-specialize qw/aom_h_predictor_64x64 sse2/;
+specialize qw/aom_h_predictor_64x16 sse2/;
 specialize qw/aom_h_predictor_64x32 sse2/;
+specialize qw/aom_h_predictor_64x64 sse2/;
 
-specialize qw/aom_paeth_predictor_4x4 ssse3/;
-specialize qw/aom_paeth_predictor_4x8 ssse3/;
-specialize qw/aom_paeth_predictor_8x4 ssse3/;
-specialize qw/aom_paeth_predictor_8x8 ssse3/;
-specialize qw/aom_paeth_predictor_8x16 ssse3/;
-specialize qw/aom_paeth_predictor_16x8 ssse3 avx2/;
-specialize qw/aom_paeth_predictor_16x16 ssse3 avx2/;
-specialize qw/aom_paeth_predictor_16x32 ssse3 avx2/;
-specialize qw/aom_paeth_predictor_32x16 ssse3 avx2/;
-specialize qw/aom_paeth_predictor_32x32 ssse3 avx2/;
-specialize qw/aom_paeth_predictor_32x64 ssse3 avx2/;
-specialize qw/aom_paeth_predictor_64x32 ssse3 avx2/;
-specialize qw/aom_paeth_predictor_64x64 ssse3 avx2/;
+specialize qw/aom_paeth_predictor_4x4 ssse3 neon/;
+specialize qw/aom_paeth_predictor_4x8 ssse3 neon/;
+specialize qw/aom_paeth_predictor_4x16 ssse3 neon/;
+specialize qw/aom_paeth_predictor_8x4 ssse3 neon/;
+specialize qw/aom_paeth_predictor_8x8 ssse3 neon/;
+specialize qw/aom_paeth_predictor_8x16 ssse3 neon/;
+specialize qw/aom_paeth_predictor_8x32 ssse3 neon/;
+specialize qw/aom_paeth_predictor_16x4 ssse3 neon/;
+specialize qw/aom_paeth_predictor_16x8 ssse3 avx2 neon/;
+specialize qw/aom_paeth_predictor_16x16 ssse3 avx2 neon/;
+specialize qw/aom_paeth_predictor_16x32 ssse3 avx2 neon/;
+specialize qw/aom_paeth_predictor_16x64 ssse3 avx2 neon/;
+specialize qw/aom_paeth_predictor_32x8 ssse3 neon/;
+specialize qw/aom_paeth_predictor_32x16 ssse3 avx2 neon/;
+specialize qw/aom_paeth_predictor_32x32 ssse3 avx2 neon/;
+specialize qw/aom_paeth_predictor_32x64 ssse3 avx2 neon/;
+specialize qw/aom_paeth_predictor_64x16 ssse3 avx2 neon/;
+specialize qw/aom_paeth_predictor_64x32 ssse3 avx2 neon/;
+specialize qw/aom_paeth_predictor_64x64 ssse3 avx2 neon/;
 
 specialize qw/aom_smooth_predictor_4x4 neon ssse3/;
 specialize qw/aom_smooth_predictor_4x8 neon ssse3/;
+specialize qw/aom_smooth_predictor_4x16 neon ssse3/;
 specialize qw/aom_smooth_predictor_8x4 neon ssse3/;
 specialize qw/aom_smooth_predictor_8x8 neon ssse3/;
 specialize qw/aom_smooth_predictor_8x16 neon ssse3/;
+specialize qw/aom_smooth_predictor_8x32 neon ssse3/;
+specialize qw/aom_smooth_predictor_16x4 neon ssse3/;
 specialize qw/aom_smooth_predictor_16x8 neon ssse3/;
 specialize qw/aom_smooth_predictor_16x16 neon ssse3/;
 specialize qw/aom_smooth_predictor_16x32 neon ssse3/;
+specialize qw/aom_smooth_predictor_16x64 neon ssse3/;
+specialize qw/aom_smooth_predictor_32x8 neon ssse3/;
 specialize qw/aom_smooth_predictor_32x16 neon ssse3/;
 specialize qw/aom_smooth_predictor_32x32 neon ssse3/;
 specialize qw/aom_smooth_predictor_32x64 neon ssse3/;
-specialize qw/aom_smooth_predictor_64x64 neon ssse3/;
+specialize qw/aom_smooth_predictor_64x16 neon ssse3/;
 specialize qw/aom_smooth_predictor_64x32 neon ssse3/;
+specialize qw/aom_smooth_predictor_64x64 neon ssse3/;
 
-specialize qw/aom_smooth_v_predictor_4x4 ssse3/;
-specialize qw/aom_smooth_v_predictor_4x8 ssse3/;
-specialize qw/aom_smooth_v_predictor_8x4 ssse3/;
-specialize qw/aom_smooth_v_predictor_8x8 ssse3/;
-specialize qw/aom_smooth_v_predictor_8x16 ssse3/;
-specialize qw/aom_smooth_v_predictor_16x8 ssse3/;
-specialize qw/aom_smooth_v_predictor_16x16 ssse3/;
-specialize qw/aom_smooth_v_predictor_16x32 ssse3/;
-specialize qw/aom_smooth_v_predictor_32x16 ssse3/;
-specialize qw/aom_smooth_v_predictor_32x32 ssse3/;
-specialize qw/aom_smooth_v_predictor_32x64 ssse3/;
-specialize qw/aom_smooth_v_predictor_64x64 ssse3/;
-specialize qw/aom_smooth_v_predictor_64x32 ssse3/;
-
-specialize qw/aom_smooth_h_predictor_4x4 ssse3/;
-specialize qw/aom_smooth_h_predictor_4x8 ssse3/;
-specialize qw/aom_smooth_h_predictor_8x4 ssse3/;
-specialize qw/aom_smooth_h_predictor_8x8 ssse3/;
-specialize qw/aom_smooth_h_predictor_8x16 ssse3/;
-specialize qw/aom_smooth_h_predictor_16x8 ssse3/;
-specialize qw/aom_smooth_h_predictor_16x16 ssse3/;
-specialize qw/aom_smooth_h_predictor_16x32 ssse3/;
-specialize qw/aom_smooth_h_predictor_32x16 ssse3/;
-specialize qw/aom_smooth_h_predictor_32x32 ssse3/;
-specialize qw/aom_smooth_h_predictor_32x64 ssse3/;
-specialize qw/aom_smooth_h_predictor_64x64 ssse3/;
-specialize qw/aom_smooth_h_predictor_64x32 ssse3/;
-
-if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") {
-  specialize qw/aom_dc_top_predictor_4x16 sse2/;
-  specialize qw/aom_dc_top_predictor_8x32 sse2/;
-  specialize qw/aom_dc_top_predictor_16x4 sse2/;
-  specialize qw/aom_dc_top_predictor_16x64 sse2/;
-  specialize qw/aom_dc_top_predictor_32x8 sse2/;
-  specialize qw/aom_dc_top_predictor_64x16 sse2 avx2/;
-
-  specialize qw/aom_dc_left_predictor_4x16 sse2/;
-  specialize qw/aom_dc_left_predictor_8x32 sse2/;
-  specialize qw/aom_dc_left_predictor_16x4 sse2/;
-  specialize qw/aom_dc_left_predictor_16x64 sse2/;
-  specialize qw/aom_dc_left_predictor_32x8 sse2/;
-  specialize qw/aom_dc_left_predictor_64x16 sse2 avx2/;
-
-  specialize qw/aom_dc_128_predictor_4x16 sse2/;
-  specialize qw/aom_dc_128_predictor_8x32 sse2/;
-  specialize qw/aom_dc_128_predictor_16x4 sse2/;
-  specialize qw/aom_dc_128_predictor_16x64 sse2/;
-  specialize qw/aom_dc_128_predictor_32x8 sse2/;
-  specialize qw/aom_dc_128_predictor_64x16 sse2 avx2/;
-
-  specialize qw/aom_v_predictor_4x16 sse2/;
-  specialize qw/aom_v_predictor_8x32 sse2/;
-  specialize qw/aom_v_predictor_16x4 sse2/;
-  specialize qw/aom_v_predictor_16x64 sse2/;
-  specialize qw/aom_v_predictor_32x8 sse2/;
-  specialize qw/aom_v_predictor_64x16 sse2 avx2/;
-
-  specialize qw/aom_h_predictor_4x16 sse2/;
-  specialize qw/aom_h_predictor_8x32 sse2/;
-  specialize qw/aom_h_predictor_16x4 sse2/;
-  specialize qw/aom_h_predictor_16x64 sse2/;
-  specialize qw/aom_h_predictor_32x8 sse2/;
-  specialize qw/aom_h_predictor_64x16 sse2/;
-
-  specialize qw/aom_paeth_predictor_4x16 ssse3/;
-  specialize qw/aom_paeth_predictor_8x32 ssse3/;
-  specialize qw/aom_paeth_predictor_16x4 ssse3/;
-  specialize qw/aom_paeth_predictor_16x64 ssse3 avx2/;
-  specialize qw/aom_paeth_predictor_32x8 ssse3/;
-  specialize qw/aom_paeth_predictor_64x16 ssse3 avx2/;
-
-  specialize qw/aom_smooth_predictor_4x16 neon ssse3/;
-  specialize qw/aom_smooth_predictor_8x32 neon ssse3/;
-  specialize qw/aom_smooth_predictor_16x4 neon ssse3/;
-  specialize qw/aom_smooth_predictor_16x64 neon ssse3/;
-  specialize qw/aom_smooth_predictor_32x8 neon ssse3/;
-  specialize qw/aom_smooth_predictor_64x16 neon ssse3/;
-
-  specialize qw/aom_smooth_v_predictor_4x16 ssse3/;
-  specialize qw/aom_smooth_v_predictor_8x32 ssse3/;
-  specialize qw/aom_smooth_v_predictor_16x4 ssse3/;
-  specialize qw/aom_smooth_v_predictor_16x64 ssse3/;
-  specialize qw/aom_smooth_v_predictor_32x8 ssse3/;
-  specialize qw/aom_smooth_v_predictor_64x16 ssse3/;
-
-  specialize qw/aom_smooth_h_predictor_4x16 ssse3/;
-  specialize qw/aom_smooth_h_predictor_8x32 ssse3/;
-  specialize qw/aom_smooth_h_predictor_16x4 ssse3/;
-  specialize qw/aom_smooth_h_predictor_16x64 ssse3/;
-  specialize qw/aom_smooth_h_predictor_32x8 ssse3/;
-  specialize qw/aom_smooth_h_predictor_64x16 ssse3/;
-}
+specialize qw/aom_smooth_v_predictor_4x4 neon ssse3/;
+specialize qw/aom_smooth_v_predictor_4x8 neon ssse3/;
+specialize qw/aom_smooth_v_predictor_4x16 neon ssse3/;
+specialize qw/aom_smooth_v_predictor_8x4 neon ssse3/;
+specialize qw/aom_smooth_v_predictor_8x8 neon ssse3/;
+specialize qw/aom_smooth_v_predictor_8x16 neon ssse3/;
+specialize qw/aom_smooth_v_predictor_8x32 neon ssse3/;
+specialize qw/aom_smooth_v_predictor_16x4 neon ssse3/;
+specialize qw/aom_smooth_v_predictor_16x8 neon ssse3/;
+specialize qw/aom_smooth_v_predictor_16x16 neon ssse3/;
+specialize qw/aom_smooth_v_predictor_16x32 neon ssse3/;
+specialize qw/aom_smooth_v_predictor_16x64 neon ssse3/;
+specialize qw/aom_smooth_v_predictor_32x8 neon ssse3/;
+specialize qw/aom_smooth_v_predictor_32x16 neon ssse3/;
+specialize qw/aom_smooth_v_predictor_32x32 neon ssse3/;
+specialize qw/aom_smooth_v_predictor_32x64 neon ssse3/;
+specialize qw/aom_smooth_v_predictor_64x16 neon ssse3/;
+specialize qw/aom_smooth_v_predictor_64x32 neon ssse3/;
+specialize qw/aom_smooth_v_predictor_64x64 neon ssse3/;
+
+specialize qw/aom_smooth_h_predictor_4x4 neon ssse3/;
+specialize qw/aom_smooth_h_predictor_4x8 neon ssse3/;
+specialize qw/aom_smooth_h_predictor_4x16 neon ssse3/;
+specialize qw/aom_smooth_h_predictor_8x4 neon ssse3/;
+specialize qw/aom_smooth_h_predictor_8x8 neon ssse3/;
+specialize qw/aom_smooth_h_predictor_8x16 neon ssse3/;
+specialize qw/aom_smooth_h_predictor_8x32 neon ssse3/;
+specialize qw/aom_smooth_h_predictor_16x4 neon ssse3/;
+specialize qw/aom_smooth_h_predictor_16x8 neon ssse3/;
+specialize qw/aom_smooth_h_predictor_16x16 neon ssse3/;
+specialize qw/aom_smooth_h_predictor_16x32 neon ssse3/;
+specialize qw/aom_smooth_h_predictor_16x64 neon ssse3/;
+specialize qw/aom_smooth_h_predictor_32x8 neon ssse3/;
+specialize qw/aom_smooth_h_predictor_32x16 neon ssse3/;
+specialize qw/aom_smooth_h_predictor_32x32 neon ssse3/;
+specialize qw/aom_smooth_h_predictor_32x64 neon ssse3/;
+specialize qw/aom_smooth_h_predictor_64x16 neon ssse3/;
+specialize qw/aom_smooth_h_predictor_64x32 neon ssse3/;
+specialize qw/aom_smooth_h_predictor_64x64 neon ssse3/;
 
 # TODO(yunqingwang): optimize rectangular DC_PRED to replace division
 # by multiply and shift.
-specialize qw/aom_dc_predictor_4x4 dspr2 msa neon sse2/;
+specialize qw/aom_dc_predictor_4x4 neon sse2/;
 specialize qw/aom_dc_predictor_4x8 sse2/;
 specialize qw/aom_dc_predictor_4x16 sse2/;
 specialize qw/aom_dc_predictor_8x4 sse2/;
-specialize qw/aom_dc_predictor_8x8 dspr2 neon msa sse2/;
+specialize qw/aom_dc_predictor_8x8 neon sse2/;
 specialize qw/aom_dc_predictor_8x16 sse2/;
 specialize qw/aom_dc_predictor_8x32 sse2/;
 specialize qw/aom_dc_predictor_16x4 sse2/;
 specialize qw/aom_dc_predictor_16x8 sse2/;
-specialize qw/aom_dc_predictor_16x16 dspr2 neon msa sse2/;
+specialize qw/aom_dc_predictor_16x16 neon sse2/;
 specialize qw/aom_dc_predictor_16x32 sse2/;
 specialize qw/aom_dc_predictor_16x64 sse2/;
 specialize qw/aom_dc_predictor_32x8 sse2/;
 specialize qw/aom_dc_predictor_32x16 sse2 avx2/;
-specialize qw/aom_dc_predictor_32x32 msa neon sse2 avx2/;
+specialize qw/aom_dc_predictor_32x32 neon sse2 avx2/;
 specialize qw/aom_dc_predictor_32x64 sse2 avx2/;
 specialize qw/aom_dc_predictor_64x64 sse2 avx2/;
 specialize qw/aom_dc_predictor_64x32 sse2 avx2/;
 specialize qw/aom_dc_predictor_64x16 sse2 avx2/;
 if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
-  specialize qw/aom_highbd_v_predictor_4x4 sse2/;
-  specialize qw/aom_highbd_v_predictor_4x8 sse2/;
-  specialize qw/aom_highbd_v_predictor_8x4 sse2/;
-  specialize qw/aom_highbd_v_predictor_8x8 sse2/;
-  specialize qw/aom_highbd_v_predictor_8x16 sse2/;
-  specialize qw/aom_highbd_v_predictor_16x8 sse2/;
-  specialize qw/aom_highbd_v_predictor_16x16 sse2/;
-  specialize qw/aom_highbd_v_predictor_16x32 sse2/;
-  specialize qw/aom_highbd_v_predictor_32x16 sse2/;
-  specialize qw/aom_highbd_v_predictor_32x32 sse2/;
+  specialize qw/aom_highbd_v_predictor_4x4 sse2 neon/;
+  specialize qw/aom_highbd_v_predictor_4x8 sse2 neon/;
+  specialize qw/aom_highbd_v_predictor_4x16 neon/;
+  specialize qw/aom_highbd_v_predictor_8x4 sse2 neon/;
+  specialize qw/aom_highbd_v_predictor_8x8 sse2 neon/;
+  specialize qw/aom_highbd_v_predictor_8x16 sse2 neon/;
+  specialize qw/aom_highbd_v_predictor_8x32 neon/;
+  specialize qw/aom_highbd_v_predictor_16x4 neon/;
+  specialize qw/aom_highbd_v_predictor_16x8 sse2 neon/;
+  specialize qw/aom_highbd_v_predictor_16x16 sse2 neon/;
+  specialize qw/aom_highbd_v_predictor_16x32 sse2 neon/;
+  specialize qw/aom_highbd_v_predictor_16x64 neon/;
+  specialize qw/aom_highbd_v_predictor_32x8 neon/;
+  specialize qw/aom_highbd_v_predictor_32x16 sse2 neon/;
+  specialize qw/aom_highbd_v_predictor_32x32 sse2 neon/;
+  specialize qw/aom_highbd_v_predictor_32x64 neon/;
+  specialize qw/aom_highbd_v_predictor_64x16 neon/;
+  specialize qw/aom_highbd_v_predictor_64x32 neon/;
+  specialize qw/aom_highbd_v_predictor_64x64 neon/;
 
   # TODO(yunqingwang): optimize rectangular DC_PRED to replace division
   # by multiply and shift.
   specialize qw/aom_highbd_dc_predictor_4x4 sse2 neon/;
   specialize qw/aom_highbd_dc_predictor_4x8 sse2/;
   specialize qw/aom_highbd_dc_predictor_8x4 sse2/;;
-  specialize qw/aom_highbd_dc_predictor_8x8 sse2 neon/;;
+  specialize qw/aom_highbd_dc_predictor_8x8 sse2 neon/;
   specialize qw/aom_highbd_dc_predictor_8x16 sse2/;;
   specialize qw/aom_highbd_dc_predictor_16x8 sse2/;
   specialize qw/aom_highbd_dc_predictor_16x16 sse2 neon/;
@@ -365,6 +361,86 @@ if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
   specialize qw/aom_highbd_dc_left_predictor_32x32 sse2/;
   specialize qw/aom_highbd_dc_top_predictor_32x32 sse2/;
   specialize qw/aom_highbd_dc_128_predictor_32x32 sse2/;
+
+  specialize qw/aom_highbd_paeth_predictor_4x4 neon/;
+  specialize qw/aom_highbd_paeth_predictor_4x8 neon/;
+  specialize qw/aom_highbd_paeth_predictor_4x16 neon/;
+  specialize qw/aom_highbd_paeth_predictor_8x4 neon/;
+  specialize qw/aom_highbd_paeth_predictor_8x8 neon/;
+  specialize qw/aom_highbd_paeth_predictor_8x16 neon/;
+  specialize qw/aom_highbd_paeth_predictor_8x32 neon/;
+  specialize qw/aom_highbd_paeth_predictor_16x4 neon/;
+  specialize qw/aom_highbd_paeth_predictor_16x8 neon/;
+  specialize qw/aom_highbd_paeth_predictor_16x16 neon/;
+  specialize qw/aom_highbd_paeth_predictor_16x32 neon/;
+  specialize qw/aom_highbd_paeth_predictor_16x64 neon/;
+  specialize qw/aom_highbd_paeth_predictor_32x8 neon/;
+  specialize qw/aom_highbd_paeth_predictor_32x16 neon/;
+  specialize qw/aom_highbd_paeth_predictor_32x32 neon/;
+  specialize qw/aom_highbd_paeth_predictor_32x64 neon/;
+  specialize qw/aom_highbd_paeth_predictor_64x16 neon/;
+  specialize qw/aom_highbd_paeth_predictor_64x32 neon/;
+  specialize qw/aom_highbd_paeth_predictor_64x64 neon/;
+
+  specialize qw/aom_highbd_smooth_predictor_4x4 neon/;
+  specialize qw/aom_highbd_smooth_predictor_4x8 neon/;
+  specialize qw/aom_highbd_smooth_predictor_4x16 neon/;
+  specialize qw/aom_highbd_smooth_predictor_8x4 neon/;
+  specialize qw/aom_highbd_smooth_predictor_8x8 neon/;
+  specialize qw/aom_highbd_smooth_predictor_8x16 neon/;
+  specialize qw/aom_highbd_smooth_predictor_8x32 neon/;
+  specialize qw/aom_highbd_smooth_predictor_16x4 neon/;
+  specialize qw/aom_highbd_smooth_predictor_16x8 neon/;
+  specialize qw/aom_highbd_smooth_predictor_16x16 neon/;
+  specialize qw/aom_highbd_smooth_predictor_16x32 neon/;
+  specialize qw/aom_highbd_smooth_predictor_16x64 neon/;
+  specialize qw/aom_highbd_smooth_predictor_32x8 neon/;
+  specialize qw/aom_highbd_smooth_predictor_32x16 neon/;
+  specialize qw/aom_highbd_smooth_predictor_32x32 neon/;
+  specialize qw/aom_highbd_smooth_predictor_32x64 neon/;
+  specialize qw/aom_highbd_smooth_predictor_64x16 neon/;
+  specialize qw/aom_highbd_smooth_predictor_64x32 neon/;
+  specialize qw/aom_highbd_smooth_predictor_64x64 neon/;
+
+  specialize qw/aom_highbd_smooth_v_predictor_4x4 neon/;
+  specialize qw/aom_highbd_smooth_v_predictor_4x8 neon/;
+  specialize qw/aom_highbd_smooth_v_predictor_4x16 neon/;
+  specialize qw/aom_highbd_smooth_v_predictor_8x4 neon/;
+  specialize qw/aom_highbd_smooth_v_predictor_8x8 neon/;
+  specialize qw/aom_highbd_smooth_v_predictor_8x16 neon/;
+  specialize qw/aom_highbd_smooth_v_predictor_8x32 neon/;
+  specialize qw/aom_highbd_smooth_v_predictor_16x4 neon/;
+  specialize qw/aom_highbd_smooth_v_predictor_16x8 neon/;
+  specialize qw/aom_highbd_smooth_v_predictor_16x16 neon/;
+  specialize qw/aom_highbd_smooth_v_predictor_16x32 neon/;
+  specialize qw/aom_highbd_smooth_v_predictor_16x64 neon/;
+  specialize qw/aom_highbd_smooth_v_predictor_32x8 neon/;
+  specialize qw/aom_highbd_smooth_v_predictor_32x16 neon/;
+  specialize qw/aom_highbd_smooth_v_predictor_32x32 neon/;
+  specialize qw/aom_highbd_smooth_v_predictor_32x64 neon/;
+  specialize qw/aom_highbd_smooth_v_predictor_64x16 neon/;
+  specialize qw/aom_highbd_smooth_v_predictor_64x32 neon/;
+  specialize qw/aom_highbd_smooth_v_predictor_64x64 neon/;
+
+  specialize qw/aom_highbd_smooth_h_predictor_4x4 neon/;
+  specialize qw/aom_highbd_smooth_h_predictor_4x8 neon/;
+  specialize qw/aom_highbd_smooth_h_predictor_4x16 neon/;
+  specialize qw/aom_highbd_smooth_h_predictor_8x4 neon/;
+  specialize qw/aom_highbd_smooth_h_predictor_8x8 neon/;
+  specialize qw/aom_highbd_smooth_h_predictor_8x16 neon/;
+  specialize qw/aom_highbd_smooth_h_predictor_8x32 neon/;
+  specialize qw/aom_highbd_smooth_h_predictor_16x4 neon/;
+  specialize qw/aom_highbd_smooth_h_predictor_16x8 neon/;
+  specialize qw/aom_highbd_smooth_h_predictor_16x16 neon/;
+  specialize qw/aom_highbd_smooth_h_predictor_16x32 neon/;
+  specialize qw/aom_highbd_smooth_h_predictor_16x64 neon/;
+  specialize qw/aom_highbd_smooth_h_predictor_32x8 neon/;
+  specialize qw/aom_highbd_smooth_h_predictor_32x16 neon/;
+  specialize qw/aom_highbd_smooth_h_predictor_32x32 neon/;
+  specialize qw/aom_highbd_smooth_h_predictor_32x64 neon/;
+  specialize qw/aom_highbd_smooth_h_predictor_64x16 neon/;
+  specialize qw/aom_highbd_smooth_h_predictor_64x32 neon/;
+  specialize qw/aom_highbd_smooth_h_predictor_64x64 neon/;
 }
 #
 # Sub Pixel Filters
@@ -374,7 +450,7 @@ add_proto qw/void aom_convolve_copy/,             "const uint8_t *src, ptrdiff_t
 add_proto qw/void aom_convolve8_horiz/,           "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
 add_proto qw/void aom_convolve8_vert/,            "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
 
-specialize qw/aom_convolve_copy       neon dspr2 msa sse2 avx2/;
+specialize qw/aom_convolve_copy       neon sse2 avx2/;
 specialize qw/aom_convolve8_horiz     sse2 ssse3/, "$avx2_ssse3";
 specialize qw/aom_convolve8_vert      sse2 ssse3/, "$avx2_ssse3";
 
@@ -402,7 +478,7 @@ add_proto qw/void aom_lpf_vertical_14_dual/, "uint8_t *s, int pitch, const uint8
 specialize qw/aom_lpf_vertical_14_dual sse2 neon/;
 
 add_proto qw/void aom_lpf_vertical_14_quad/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0";
-specialize qw/aom_lpf_vertical_14_quad sse2/;
+specialize qw/aom_lpf_vertical_14_quad sse2 neon/;
 
 add_proto qw/void aom_lpf_vertical_6/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
 specialize qw/aom_lpf_vertical_6 sse2 neon/;
@@ -414,7 +490,7 @@ add_proto qw/void aom_lpf_vertical_8_dual/, "uint8_t *s, int pitch, const uint8_
 specialize qw/aom_lpf_vertical_8_dual sse2 neon/;
 
 add_proto qw/void aom_lpf_vertical_8_quad/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0";
-specialize qw/aom_lpf_vertical_8_quad sse2/;
+specialize qw/aom_lpf_vertical_8_quad sse2 neon/;
 
 add_proto qw/void aom_lpf_vertical_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
 specialize qw/aom_lpf_vertical_4 sse2 neon/;
@@ -423,7 +499,7 @@ add_proto qw/void aom_lpf_vertical_4_dual/, "uint8_t *s, int pitch, const uint8_
 specialize qw/aom_lpf_vertical_4_dual sse2 neon/;
 
 add_proto qw/void aom_lpf_vertical_4_quad/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0";
-specialize qw/aom_lpf_vertical_4_quad sse2/;
+specialize qw/aom_lpf_vertical_4_quad sse2 neon/;
 
 add_proto qw/void aom_lpf_horizontal_14/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
 specialize qw/aom_lpf_horizontal_14 sse2 neon/;
@@ -432,7 +508,7 @@ add_proto qw/void aom_lpf_horizontal_14_dual/, "uint8_t *s, int pitch, const uin
 specialize qw/aom_lpf_horizontal_14_dual sse2 neon/;
 
 add_proto qw/void aom_lpf_horizontal_14_quad/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0";
-specialize qw/aom_lpf_horizontal_14_quad sse2 avx2/;
+specialize qw/aom_lpf_horizontal_14_quad sse2 avx2 neon/;
 
 add_proto qw/void aom_lpf_horizontal_6/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
 specialize qw/aom_lpf_horizontal_6 sse2 neon/;
@@ -441,7 +517,7 @@ add_proto qw/void aom_lpf_horizontal_6_dual/, "uint8_t *s, int pitch, const uint
 specialize qw/aom_lpf_horizontal_6_dual sse2 neon/;
 
 add_proto qw/void aom_lpf_horizontal_6_quad/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0";
-specialize qw/aom_lpf_horizontal_6_quad sse2 avx2/;
+specialize qw/aom_lpf_horizontal_6_quad sse2 avx2 neon/;
 
 add_proto qw/void aom_lpf_horizontal_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
 specialize qw/aom_lpf_horizontal_8 sse2 neon/;
@@ -450,7 +526,7 @@ add_proto qw/void aom_lpf_horizontal_8_dual/, "uint8_t *s, int pitch, const uint
 specialize qw/aom_lpf_horizontal_8_dual sse2 neon/;
 
 add_proto qw/void aom_lpf_horizontal_8_quad/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0";
-specialize qw/aom_lpf_horizontal_8_quad sse2 avx2/;
+specialize qw/aom_lpf_horizontal_8_quad sse2 avx2 neon/;
 
 add_proto qw/void aom_lpf_horizontal_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
 specialize qw/aom_lpf_horizontal_4 sse2 neon/;
@@ -459,13 +535,13 @@ add_proto qw/void aom_lpf_horizontal_4_dual/, "uint8_t *s, int pitch, const uint
 specialize qw/aom_lpf_horizontal_4_dual sse2 neon/;
 
 add_proto qw/void aom_lpf_horizontal_4_quad/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0";
-specialize qw/aom_lpf_horizontal_4_quad sse2/;
+specialize qw/aom_lpf_horizontal_4_quad sse2 neon/;
 
 add_proto qw/void aom_lpf_vertical_6_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
 specialize qw/aom_lpf_vertical_6_dual sse2 neon/;
 
 add_proto qw/void aom_lpf_vertical_6_quad/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0";
-specialize qw/aom_lpf_vertical_6_quad sse2/;
+specialize qw/aom_lpf_vertical_6_quad sse2 neon/;
 
 if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
   add_proto qw/void aom_highbd_lpf_vertical_14/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
@@ -573,42 +649,46 @@ if (aom_config("CONFIG_AV1_ENCODER") eq "yes"){
 #
 if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
   add_proto qw/void aom_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
-  specialize qw/aom_quantize_b sse2 neon avx/, "$ssse3_x86_64";
-
-  add_proto qw/void aom_quantize_b_adaptive/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
-  specialize qw/aom_quantize_b_adaptive sse2 avx2/;
+  specialize qw/aom_quantize_b sse2 neon avx avx2/, "$ssse3_x86_64";
 
   add_proto qw/void aom_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
-  specialize qw/aom_quantize_b_32x32 neon avx/, "$ssse3_x86_64";
-
-  add_proto qw/void aom_quantize_b_32x32_adaptive/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
-  specialize qw/aom_quantize_b_32x32_adaptive sse2/;
+  specialize qw/aom_quantize_b_32x32 neon avx avx2/, "$ssse3_x86_64";
 
   add_proto qw/void aom_quantize_b_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
-  specialize qw/aom_quantize_b_64x64 neon ssse3/;
+  specialize qw/aom_quantize_b_64x64 neon ssse3 avx2/;
+
+  if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") {
+    add_proto qw/void aom_quantize_b_adaptive/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
+    specialize qw/aom_quantize_b_adaptive sse2 avx2/;
 
-  add_proto qw/void aom_quantize_b_64x64_adaptive/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
-  specialize qw/aom_quantize_b_64x64_adaptive sse2/;
+    add_proto qw/void aom_quantize_b_32x32_adaptive/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
+    specialize qw/aom_quantize_b_32x32_adaptive sse2/;
+
+    add_proto qw/void aom_quantize_b_64x64_adaptive/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
+    specialize qw/aom_quantize_b_64x64_adaptive sse2/;
+  }
 }  # CONFIG_AV1_ENCODER
 
 if (aom_config("CONFIG_AV1_ENCODER") eq "yes" && aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
   add_proto qw/void aom_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
-  specialize qw/aom_highbd_quantize_b sse2 avx2/;
-
-  add_proto qw/void aom_highbd_quantize_b_adaptive/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
-  specialize qw/aom_highbd_quantize_b_adaptive sse2 avx2/;
+  specialize qw/aom_highbd_quantize_b sse2 avx2 neon/;
 
   add_proto qw/void aom_highbd_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
-  specialize qw/aom_highbd_quantize_b_32x32 sse2/;
-
-  add_proto qw/void aom_highbd_quantize_b_32x32_adaptive/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
-  specialize qw/aom_highbd_quantize_b_32x32_adaptive sse2 avx2/;
+  specialize qw/aom_highbd_quantize_b_32x32 sse2 avx2 neon/;
 
   add_proto qw/void aom_highbd_quantize_b_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
-  specialize qw/aom_highbd_quantize_b_64x64 sse2/;
+  specialize qw/aom_highbd_quantize_b_64x64 sse2 avx2 neon/;
+
+  if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") {
+    add_proto qw/void aom_highbd_quantize_b_adaptive/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
+    specialize qw/aom_highbd_quantize_b_adaptive sse2 avx2 neon/;
 
-  add_proto qw/void aom_highbd_quantize_b_64x64_adaptive/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
-  specialize qw/aom_highbd_quantize_b_64x64_adaptive sse2/;
+    add_proto qw/void aom_highbd_quantize_b_32x32_adaptive/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
+    specialize qw/aom_highbd_quantize_b_32x32_adaptive sse2 avx2 neon/;
+
+    add_proto qw/void aom_highbd_quantize_b_64x64_adaptive/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
+    specialize qw/aom_highbd_quantize_b_64x64_adaptive sse2 neon/;
+  }
 }  # CONFIG_AV1_ENCODER
 
 #
@@ -639,7 +719,7 @@ if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
   # Block subtraction
   #
   add_proto qw/void aom_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride";
-  specialize qw/aom_subtract_block neon msa sse2 avx2/;
+  specialize qw/aom_subtract_block neon sse2 avx2/;
 
   add_proto qw/int64_t/, "aom_sse", "const uint8_t *a, int a_stride, const uint8_t *b,int b_stride, int width, int height";
   specialize qw/aom_sse  sse4_1 avx2 neon/;
@@ -675,7 +755,7 @@ if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
   #
   # Single block SAD / Single block Avg SAD
   #
-  foreach (@block_sizes) {
+  foreach (@encoder_block_sizes) {
     ($w, $h) = @$_;
     add_proto qw/unsigned int/, "aom_sad${w}x${h}", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
     add_proto qw/unsigned int/, "aom_sad_skip_${w}x${h}", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
@@ -684,30 +764,30 @@ if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
   }
 
   add_proto qw/uint64_t aom_sum_sse_2d_i16/, "const int16_t *src, int src_stride, int width, int height, int *sum";
-  specialize qw/aom_sum_sse_2d_i16 sse2 avx2/;
-  specialize qw/aom_sad128x128    avx2 neon     sse2/;
-  specialize qw/aom_sad128x64     avx2          sse2/;
-  specialize qw/aom_sad64x128     avx2          sse2/;
-  specialize qw/aom_sad64x64      avx2 neon msa sse2/;
-  specialize qw/aom_sad64x32      avx2      msa sse2/;
-  specialize qw/aom_sad32x64      avx2      msa sse2/;
-  specialize qw/aom_sad32x32      avx2 neon msa sse2/;
-  specialize qw/aom_sad32x16      avx2      msa sse2/;
-  specialize qw/aom_sad16x32                msa sse2/;
-  specialize qw/aom_sad16x16           neon msa sse2/;
-  specialize qw/aom_sad16x8            neon msa sse2/;
-  specialize qw/aom_sad8x16            neon msa sse2/;
-  specialize qw/aom_sad8x8             neon msa sse2/;
-  specialize qw/aom_sad8x4                  msa sse2/;
-  specialize qw/aom_sad4x8                  msa sse2/;
-  specialize qw/aom_sad4x4             neon msa sse2/;
-
-  specialize qw/aom_sad4x16                     sse2/;
-  specialize qw/aom_sad16x4                     sse2/;
-  specialize qw/aom_sad8x32                     sse2/;
-  specialize qw/aom_sad32x8                     sse2/;
-  specialize qw/aom_sad16x64                    sse2/;
-  specialize qw/aom_sad64x16                    sse2/;
+  specialize qw/aom_sum_sse_2d_i16 avx2 neon sse2/;
+  specialize qw/aom_sad128x128    avx2 neon sse2/;
+  specialize qw/aom_sad128x64     avx2 neon sse2/;
+  specialize qw/aom_sad64x128     avx2 neon sse2/;
+  specialize qw/aom_sad64x64      avx2 neon sse2/;
+  specialize qw/aom_sad64x32      avx2 neon sse2/;
+  specialize qw/aom_sad32x64      avx2 neon sse2/;
+  specialize qw/aom_sad32x32      avx2 neon sse2/;
+  specialize qw/aom_sad32x16      avx2 neon sse2/;
+  specialize qw/aom_sad16x32           neon sse2/;
+  specialize qw/aom_sad16x16           neon sse2/;
+  specialize qw/aom_sad16x8            neon sse2/;
+  specialize qw/aom_sad8x16            neon sse2/;
+  specialize qw/aom_sad8x8             neon sse2/;
+  specialize qw/aom_sad8x4             neon sse2/;
+  specialize qw/aom_sad4x8             neon sse2/;
+  specialize qw/aom_sad4x4             neon sse2/;
+
+  specialize qw/aom_sad4x16            neon sse2/;
+  specialize qw/aom_sad16x4            neon sse2/;
+  specialize qw/aom_sad8x32            neon sse2/;
+  specialize qw/aom_sad32x8            neon sse2/;
+  specialize qw/aom_sad16x64           neon sse2/;
+  specialize qw/aom_sad64x16           neon sse2/;
 
   specialize qw/aom_sad_skip_128x128    avx2          sse2  neon/;
   specialize qw/aom_sad_skip_128x64     avx2          sse2  neon/;
@@ -730,29 +810,29 @@ if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
   specialize qw/aom_sad_skip_16x64                    sse2  neon/;
   specialize qw/aom_sad_skip_64x16                    sse2  neon/;
 
-  specialize qw/aom_sad128x128_avg avx2     sse2/;
-  specialize qw/aom_sad128x64_avg  avx2     sse2/;
-  specialize qw/aom_sad64x128_avg  avx2     sse2/;
-  specialize qw/aom_sad64x64_avg   avx2 msa sse2/;
-  specialize qw/aom_sad64x32_avg   avx2 msa sse2/;
-  specialize qw/aom_sad32x64_avg   avx2 msa sse2/;
-  specialize qw/aom_sad32x32_avg   avx2 msa sse2/;
-  specialize qw/aom_sad32x16_avg   avx2 msa sse2/;
-  specialize qw/aom_sad16x32_avg        msa sse2/;
-  specialize qw/aom_sad16x16_avg        msa sse2/;
-  specialize qw/aom_sad16x8_avg         msa sse2/;
-  specialize qw/aom_sad8x16_avg         msa sse2/;
-  specialize qw/aom_sad8x8_avg          msa sse2/;
-  specialize qw/aom_sad8x4_avg          msa sse2/;
-  specialize qw/aom_sad4x8_avg          msa sse2/;
-  specialize qw/aom_sad4x4_avg          msa sse2/;
-
-  specialize qw/aom_sad4x16_avg             sse2/;
-  specialize qw/aom_sad16x4_avg             sse2/;
-  specialize qw/aom_sad8x32_avg             sse2/;
-  specialize qw/aom_sad32x8_avg             sse2/;
-  specialize qw/aom_sad16x64_avg            sse2/;
-  specialize qw/aom_sad64x16_avg            sse2/;
+  specialize qw/aom_sad128x128_avg avx2 sse2/;
+  specialize qw/aom_sad128x64_avg  avx2 sse2/;
+  specialize qw/aom_sad64x128_avg  avx2 sse2/;
+  specialize qw/aom_sad64x64_avg   avx2 sse2/;
+  specialize qw/aom_sad64x32_avg   avx2 sse2/;
+  specialize qw/aom_sad32x64_avg   avx2 sse2/;
+  specialize qw/aom_sad32x32_avg   avx2 sse2/;
+  specialize qw/aom_sad32x16_avg   avx2 sse2/;
+  specialize qw/aom_sad16x32_avg        sse2/;
+  specialize qw/aom_sad16x16_avg        sse2/;
+  specialize qw/aom_sad16x8_avg         sse2/;
+  specialize qw/aom_sad8x16_avg         sse2/;
+  specialize qw/aom_sad8x8_avg          sse2/;
+  specialize qw/aom_sad8x4_avg          sse2/;
+  specialize qw/aom_sad4x8_avg          sse2/;
+  specialize qw/aom_sad4x4_avg          sse2/;
+
+  specialize qw/aom_sad4x16_avg         sse2/;
+  specialize qw/aom_sad16x4_avg         sse2/;
+  specialize qw/aom_sad8x32_avg         sse2/;
+  specialize qw/aom_sad32x8_avg         sse2/;
+  specialize qw/aom_sad16x64_avg        sse2/;
+  specialize qw/aom_sad64x16_avg        sse2/;
 
   specialize qw/aom_dist_wtd_sad128x128_avg ssse3/;
   specialize qw/aom_dist_wtd_sad128x64_avg  ssse3/;
@@ -793,7 +873,7 @@ if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
   specialize qw/aom_sad128xh sse2/;
 
   if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
-    foreach (@block_sizes) {
+    foreach (@encoder_block_sizes) {
       ($w, $h) = @$_;
       add_proto qw/unsigned int/, "aom_highbd_sad${w}x${h}", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
       add_proto qw/unsigned int/, "aom_highbd_sad_skip_${w}x${h}", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
@@ -874,14 +954,14 @@ if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
   #
   # Masked SAD
   #
-  foreach (@block_sizes) {
+  foreach (@encoder_block_sizes) {
     ($w, $h) = @$_;
     add_proto qw/unsigned int/, "aom_masked_sad${w}x${h}", "const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask";
     specialize "aom_masked_sad${w}x${h}", qw/ssse3 avx2/;
   }
 
   if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
-    foreach (@block_sizes) {
+    foreach (@encoder_block_sizes) {
       ($w, $h) = @$_;
       add_proto qw/unsigned int/, "aom_highbd_masked_sad${w}x${h}", "const uint8_t *src8, int src_stride, const uint8_t *ref8, int ref_stride, const uint8_t *second_pred8, const uint8_t *msk, int msk_stride, int invert_mask";
       specialize "aom_highbd_masked_sad${w}x${h}", qw/ssse3 avx2/;
@@ -892,7 +972,7 @@ if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
   # OBMC SAD
   #
   if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") {
-    foreach (@block_sizes) {
+    foreach (@encoder_block_sizes) {
       ($w, $h) = @$_;
       add_proto qw/unsigned int/, "aom_obmc_sad${w}x${h}", "const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask";
       if (! (($w == 128 && $h == 32) || ($w == 32 && $h == 128))) {
@@ -901,7 +981,7 @@ if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
     }
 
     if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
-      foreach (@block_sizes) {
+      foreach (@encoder_block_sizes) {
         ($w, $h) = @$_;
         add_proto qw/unsigned int/, "aom_highbd_obmc_sad${w}x${h}", "const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask";
         if (! (($w == 128 && $h == 32) || ($w == 32 && $h == 128))) {
@@ -914,7 +994,7 @@ if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
   #
   # Multi-block SAD, comparing a reference to N independent blocks
   #
-  foreach (@block_sizes) {
+  foreach (@encoder_block_sizes) {
     ($w, $h) = @$_;
     add_proto qw/void/, "aom_sad${w}x${h}x4d", "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[4], int ref_stride, uint32_t sad_array[4]";
     add_proto qw/void/, "aom_sad${w}x${h}x4d_avg", "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[4], int ref_stride, const uint8_t *second_pred, uint32_t sad_array[4]";
@@ -922,34 +1002,31 @@ if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
     add_proto qw/void/, "aom_masked_sad${w}x${h}x4d", "const uint8_t *src, int src_stride, const uint8_t *ref[4], int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned sads[4]";
   }
 
-  specialize qw/aom_sad128x128x4d avx2          sse2/;
-  specialize qw/aom_sad128x64x4d  avx2          sse2/;
-  specialize qw/aom_sad64x128x4d  avx2          sse2/;
-  specialize qw/aom_sad64x64x4d   avx2 neon msa sse2/;
-  specialize qw/aom_sad64x32x4d   avx2      msa sse2/;
-  specialize qw/aom_sad64x16x4d   avx2          sse2/;
-  specialize qw/aom_sad32x64x4d   avx2      msa sse2/;
-  specialize qw/aom_sad32x32x4d   avx2 neon msa sse2/;
-  specialize qw/aom_sad32x16x4d   avx2      msa sse2/;
-  specialize qw/aom_sad32x8x4d    avx2          sse2/;
-  specialize qw/aom_sad16x64x4d                 sse2/;
-  specialize qw/aom_sad16x32x4d             msa sse2/;
-  specialize qw/aom_sad16x16x4d         neon msa sse2/;
-  specialize qw/aom_sad16x8x4d               msa sse2/;
-
-  specialize qw/aom_sad8x16x4d              msa sse2/;
-  specialize qw/aom_sad8x8x4d               msa sse2/;
-  specialize qw/aom_sad8x4x4d               msa sse2/;
-  specialize qw/aom_sad4x16x4d              msa sse2/;
-  specialize qw/aom_sad4x8x4d               msa sse2/;
-  specialize qw/aom_sad4x4x4d               msa sse2/;
-
-  specialize qw/aom_sad4x32x4d  sse2/;
-  specialize qw/aom_sad4x16x4d  sse2/;
-  specialize qw/aom_sad16x4x4d  sse2/;
-  specialize qw/aom_sad8x32x4d  sse2/;
-  specialize qw/aom_sad32x8x4d  sse2/;
-  specialize qw/aom_sad64x16x4d sse2/;
+  specialize qw/aom_sad128x128x4d avx2 neon sse2/;
+  specialize qw/aom_sad128x64x4d  avx2 neon sse2/;
+  specialize qw/aom_sad64x128x4d  avx2 neon sse2/;
+  specialize qw/aom_sad64x64x4d   avx2 neon sse2/;
+  specialize qw/aom_sad64x32x4d   avx2 neon sse2/;
+  specialize qw/aom_sad32x64x4d   avx2 neon sse2/;
+  specialize qw/aom_sad32x32x4d   avx2 neon sse2/;
+  specialize qw/aom_sad32x16x4d   avx2 neon sse2/;
+  specialize qw/aom_sad16x32x4d        neon sse2/;
+  specialize qw/aom_sad16x16x4d        neon sse2/;
+  specialize qw/aom_sad16x8x4d         neon sse2/;
+
+  specialize qw/aom_sad8x16x4d         neon sse2/;
+  specialize qw/aom_sad8x8x4d          neon sse2/;
+  specialize qw/aom_sad8x4x4d          neon sse2/;
+  specialize qw/aom_sad4x32x4d         neon sse2/;
+  specialize qw/aom_sad4x8x4d          neon sse2/;
+  specialize qw/aom_sad4x4x4d          neon sse2/;
+
+  specialize qw/aom_sad64x16x4d   avx2 neon sse2/;
+  specialize qw/aom_sad32x8x4d    avx2 neon sse2/;
+  specialize qw/aom_sad16x64x4d        neon sse2/;
+  specialize qw/aom_sad16x4x4d         neon sse2/;
+  specialize qw/aom_sad8x32x4d         neon sse2/;
+  specialize qw/aom_sad4x16x4d         neon sse2/;
 
   specialize qw/aom_sad_skip_128x128x4d avx2 sse2 neon/;
   specialize qw/aom_sad_skip_128x64x4d  avx2 sse2 neon/;
@@ -966,15 +1043,12 @@ if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
   specialize qw/aom_sad_skip_16x32x4d        sse2 neon/;
   specialize qw/aom_sad_skip_16x16x4d        sse2 neon/;
   specialize qw/aom_sad_skip_16x8x4d         sse2 neon/;
+  specialize qw/aom_sad_skip_8x32x4d         sse2 neon/;
   specialize qw/aom_sad_skip_8x16x4d         sse2 neon/;
   specialize qw/aom_sad_skip_8x8x4d          sse2 neon/;
-  specialize qw/aom_sad_skip_4x16x4d         sse2 neon/;
-  specialize qw/aom_sad_skip_4x8x4d          sse2 neon/;
   specialize qw/aom_sad_skip_4x32x4d         sse2 neon/;
   specialize qw/aom_sad_skip_4x16x4d         sse2 neon/;
-  specialize qw/aom_sad_skip_8x32x4d         sse2 neon/;
-  specialize qw/aom_sad_skip_32x8x4d         sse2 neon/;
-  specialize qw/aom_sad_skip_64x16x4d        sse2 neon/;
+  specialize qw/aom_sad_skip_4x8x4d          sse2 neon/;
 
   if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") {
     specialize qw/aom_sad128x128x4d_avg sse2/;
@@ -1039,7 +1113,7 @@ if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
   # Multi-block SAD, comparing a reference to N independent blocks
   #
   if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
-    foreach (@block_sizes) {
+    foreach (@encoder_block_sizes) {
       ($w, $h) = @$_;
       add_proto qw/void/, "aom_highbd_sad${w}x${h}x4d", "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
       add_proto qw/void/, "aom_highbd_sad_skip_${w}x${h}x4d", "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[], int ref_stride, uint32_t *sad_array";
@@ -1102,7 +1176,7 @@ if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
   specialize qw/aom_avg_4x4 sse2 neon/;
 
   add_proto qw/void aom_avg_8x8_quad/, "const uint8_t *s, int p, int x16_idx, int y16_idx, int *avg";
-  specialize qw/aom_avg_8x8_quad avx2 sse2/;
+  specialize qw/aom_avg_8x8_quad avx2 sse2 neon/;
 
   add_proto qw/void aom_minmax_8x8/, "const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max";
   specialize qw/aom_minmax_8x8 sse2/;
@@ -1114,14 +1188,14 @@ if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
     add_proto qw/void aom_highbd_minmax_8x8/, "const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max";
   }
 
-  add_proto qw/void aom_int_pro_row/, "int16_t hbuf[16], const uint8_t *ref, const int ref_stride, const int height";
-  specialize qw/aom_int_pro_row sse2 neon/;
+  add_proto qw/void aom_int_pro_row/, "int16_t *hbuf, const uint8_t *ref, const int ref_stride, const int width, const int height, int norm_factor";
+  specialize qw/aom_int_pro_row avx2 sse2 neon/;
 
-  add_proto qw/int16_t aom_int_pro_col/, "const uint8_t *ref, const int width";
-  specialize qw/aom_int_pro_col sse2 neon/;
+  add_proto qw/void aom_int_pro_col/, "int16_t *vbuf, const uint8_t *ref, const int ref_stride, const int width, const int height, int norm_factor";
+  specialize qw/aom_int_pro_col avx2 sse2 neon/;
 
-  add_proto qw/int aom_vector_var/, "const int16_t *ref, const int16_t *src, const int bwl";
-  specialize qw/aom_vector_var neon/;
+  add_proto qw/int aom_vector_var/, "const int16_t *ref, const int16_t *src, int bwl";
+  specialize qw/aom_vector_var avx2 sse4_1 neon/;
   # TODO(kyslov@) bring back SSE2 by extending it to 128 block size
   #specialize qw/aom_vector_var neon sse2/;
 
@@ -1146,8 +1220,8 @@ if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
   add_proto qw/void aom_hadamard_lp_16x16/, "const int16_t *src_diff, ptrdiff_t src_stride, int16_t *coeff";
   specialize qw/aom_hadamard_lp_16x16 sse2 avx2 neon/;
 
-  add_proto qw/void aom_hadamard_8x8_dual/, "const int16_t *src_diff, ptrdiff_t src_stride, int16_t *coeff";
-  specialize qw/aom_hadamard_8x8_dual sse2 avx2/;
+  add_proto qw/void aom_hadamard_lp_8x8_dual/, "const int16_t *src_diff, ptrdiff_t src_stride, int16_t *coeff";
+  specialize qw/aom_hadamard_lp_8x8_dual sse2 avx2 neon/;
 
   add_proto qw/void aom_pixel_scale/, "const int16_t *src_diff, ptrdiff_t src_stride, int16_t *coeff, int log_scale, int h8, int w8";
   specialize qw/aom_pixel_scale sse2/;
@@ -1193,8 +1267,8 @@ if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
   add_proto qw/void aom_get16x16var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
   add_proto qw/void aom_get8x8var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
 
-  specialize qw/aom_get16x16var                neon msa/;
-  specialize qw/aom_get8x8var             sse2 neon msa/;
+  specialize qw/aom_get16x16var                neon/;
+  specialize qw/aom_get8x8var             sse2 neon/;
 
   add_proto qw/void aom_get_sse_sum_8x8_quad/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
   specialize qw/aom_get_sse_sum_8x8_quad        avx2 sse2 neon/;
@@ -1204,10 +1278,10 @@ if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
   add_proto qw/unsigned int aom_mse8x16/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
   add_proto qw/unsigned int aom_mse8x8/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
 
-  specialize qw/aom_mse16x16          sse2 avx2 neon msa/;
-  specialize qw/aom_mse16x8           sse2           msa/;
-  specialize qw/aom_mse8x16           sse2           msa/;
-  specialize qw/aom_mse8x8            sse2           msa/;
+  specialize qw/aom_mse16x16          sse2 avx2 neon/;
+  specialize qw/aom_mse16x8           sse2      neon/;
+  specialize qw/aom_mse8x16           sse2      neon/;
+  specialize qw/aom_mse8x8            sse2      neon/;
 
   if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
     foreach $bd (8, 10, 12) {
@@ -1230,8 +1304,8 @@ if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
   add_proto qw/unsigned int aom_get_mb_ss/, "const int16_t *";
   add_proto qw/unsigned int aom_get4x4sse_cs/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride";
 
-  specialize qw/aom_get_mb_ss sse2 msa/;
-  specialize qw/aom_get4x4sse_cs neon msa/;
+  specialize qw/aom_get_mb_ss sse2/;
+  specialize qw/aom_get4x4sse_cs neon/;
 
   #
   # Variance / Subpixel Variance / Subpixel Avg Variance
@@ -1245,71 +1319,74 @@ if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
   add_proto qw/uint64_t/, "aom_mse_wxh_16bit", "uint8_t *dst, int dstride,uint16_t *src, int sstride, int w, int h";
   specialize qw/aom_mse_wxh_16bit  sse2 avx2/;
 
-  foreach (@block_sizes) {
+  add_proto qw/uint64_t/, "aom_mse_16xh_16bit", "uint8_t *dst, int dstride,uint16_t *src, int w, int h";
+  specialize qw/aom_mse_16xh_16bit sse2 avx2/;
+
+  foreach (@encoder_block_sizes) {
     ($w, $h) = @$_;
     add_proto qw/unsigned int/, "aom_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
     add_proto qw/uint32_t/, "aom_sub_pixel_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
     add_proto qw/uint32_t/, "aom_sub_pixel_avg_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
     add_proto qw/uint32_t/, "aom_dist_wtd_sub_pixel_avg_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred, const DIST_WTD_COMP_PARAMS *jcp_param";
   }
-  specialize qw/aom_variance128x128   sse2 avx2 neon    /;
-  specialize qw/aom_variance128x64    sse2 avx2 neon    /;
-  specialize qw/aom_variance64x128    sse2 avx2 neon    /;
-  specialize qw/aom_variance64x64     sse2 avx2 neon msa/;
-  specialize qw/aom_variance64x32     sse2 avx2 neon msa/;
-  specialize qw/aom_variance32x64     sse2 avx2 neon msa/;
-  specialize qw/aom_variance32x32     sse2 avx2 neon msa/;
-  specialize qw/aom_variance32x16     sse2 avx2 neon msa/;
-  specialize qw/aom_variance16x32     sse2 avx2 neon msa/;
-  specialize qw/aom_variance16x16     sse2 avx2 neon msa/;
-  specialize qw/aom_variance16x8      sse2 avx2 neon msa/;
-  specialize qw/aom_variance8x16      sse2      neon msa/;
-  specialize qw/aom_variance8x8       sse2      neon msa/;
-  specialize qw/aom_variance8x4       sse2      neon msa/;
-  specialize qw/aom_variance4x8       sse2      neon msa/;
-  specialize qw/aom_variance4x4       sse2      neon msa/;
-
-  specialize qw/aom_sub_pixel_variance128x128   avx2 neon msa sse2 ssse3/;
-  specialize qw/aom_sub_pixel_variance128x64    avx2 neon msa sse2 ssse3/;
-  specialize qw/aom_sub_pixel_variance64x128    avx2 neon msa sse2 ssse3/;
-  specialize qw/aom_sub_pixel_variance64x64     avx2 neon msa sse2 ssse3/;
-  specialize qw/aom_sub_pixel_variance64x32     avx2 neon msa sse2 ssse3/;
-  specialize qw/aom_sub_pixel_variance32x64     avx2 neon msa sse2 ssse3/;
-  specialize qw/aom_sub_pixel_variance32x32     avx2 neon msa sse2 ssse3/;
-  specialize qw/aom_sub_pixel_variance32x16     avx2 neon msa sse2 ssse3/;
-  specialize qw/aom_sub_pixel_variance16x32     avx2 neon msa sse2 ssse3/;
-  specialize qw/aom_sub_pixel_variance16x16     avx2 neon msa sse2 ssse3/;
-  specialize qw/aom_sub_pixel_variance16x8      avx2 neon msa sse2 ssse3/;
-  specialize qw/aom_sub_pixel_variance8x16           neon msa sse2 ssse3/;
-  specialize qw/aom_sub_pixel_variance8x8            neon msa sse2 ssse3/;
-  specialize qw/aom_sub_pixel_variance8x4            neon msa sse2 ssse3/;
-  specialize qw/aom_sub_pixel_variance4x8            neon msa sse2 ssse3/;
-  specialize qw/aom_sub_pixel_variance4x4            neon msa sse2 ssse3/;
-
-  specialize qw/aom_sub_pixel_avg_variance128x128 avx2     sse2 ssse3/;
-  specialize qw/aom_sub_pixel_avg_variance128x64  avx2     sse2 ssse3/;
-  specialize qw/aom_sub_pixel_avg_variance64x128  avx2     sse2 ssse3/;
-  specialize qw/aom_sub_pixel_avg_variance64x64   avx2 msa sse2 ssse3/;
-  specialize qw/aom_sub_pixel_avg_variance64x32   avx2 msa sse2 ssse3/;
-  specialize qw/aom_sub_pixel_avg_variance32x64   avx2 msa sse2 ssse3/;
-  specialize qw/aom_sub_pixel_avg_variance32x32   avx2 msa sse2 ssse3/;
-  specialize qw/aom_sub_pixel_avg_variance32x16   avx2 msa sse2 ssse3/;
-  specialize qw/aom_sub_pixel_avg_variance16x32        msa sse2 ssse3/;
-  specialize qw/aom_sub_pixel_avg_variance16x16        msa sse2 ssse3/;
-  specialize qw/aom_sub_pixel_avg_variance16x8         msa sse2 ssse3/;
-  specialize qw/aom_sub_pixel_avg_variance8x16         msa sse2 ssse3/;
-  specialize qw/aom_sub_pixel_avg_variance8x8          msa sse2 ssse3/;
-  specialize qw/aom_sub_pixel_avg_variance8x4          msa sse2 ssse3/;
-  specialize qw/aom_sub_pixel_avg_variance4x8          msa sse2 ssse3/;
-  specialize qw/aom_sub_pixel_avg_variance4x4          msa sse2 ssse3/;
+  specialize qw/aom_variance128x128   sse2 avx2 neon/;
+  specialize qw/aom_variance128x64    sse2 avx2 neon/;
+  specialize qw/aom_variance64x128    sse2 avx2 neon/;
+  specialize qw/aom_variance64x64     sse2 avx2 neon/;
+  specialize qw/aom_variance64x32     sse2 avx2 neon/;
+  specialize qw/aom_variance32x64     sse2 avx2 neon/;
+  specialize qw/aom_variance32x32     sse2 avx2 neon/;
+  specialize qw/aom_variance32x16     sse2 avx2 neon/;
+  specialize qw/aom_variance16x32     sse2 avx2 neon/;
+  specialize qw/aom_variance16x16     sse2 avx2 neon/;
+  specialize qw/aom_variance16x8      sse2 avx2 neon/;
+  specialize qw/aom_variance8x16      sse2      neon/;
+  specialize qw/aom_variance8x8       sse2      neon/;
+  specialize qw/aom_variance8x4       sse2      neon/;
+  specialize qw/aom_variance4x8       sse2      neon/;
+  specialize qw/aom_variance4x4       sse2      neon/;
+
+  specialize qw/aom_sub_pixel_variance128x128   avx2 neon sse2 ssse3/;
+  specialize qw/aom_sub_pixel_variance128x64    avx2 neon sse2 ssse3/;
+  specialize qw/aom_sub_pixel_variance64x128    avx2 neon sse2 ssse3/;
+  specialize qw/aom_sub_pixel_variance64x64     avx2 neon sse2 ssse3/;
+  specialize qw/aom_sub_pixel_variance64x32     avx2 neon sse2 ssse3/;
+  specialize qw/aom_sub_pixel_variance32x64     avx2 neon sse2 ssse3/;
+  specialize qw/aom_sub_pixel_variance32x32     avx2 neon sse2 ssse3/;
+  specialize qw/aom_sub_pixel_variance32x16     avx2 neon sse2 ssse3/;
+  specialize qw/aom_sub_pixel_variance16x32     avx2 neon sse2 ssse3/;
+  specialize qw/aom_sub_pixel_variance16x16     avx2 neon sse2 ssse3/;
+  specialize qw/aom_sub_pixel_variance16x8      avx2 neon sse2 ssse3/;
+  specialize qw/aom_sub_pixel_variance8x16           neon sse2 ssse3/;
+  specialize qw/aom_sub_pixel_variance8x8            neon sse2 ssse3/;
+  specialize qw/aom_sub_pixel_variance8x4            neon sse2 ssse3/;
+  specialize qw/aom_sub_pixel_variance4x8            neon sse2 ssse3/;
+  specialize qw/aom_sub_pixel_variance4x4            neon sse2 ssse3/;
+
+  specialize qw/aom_sub_pixel_avg_variance128x128 avx2 sse2 ssse3/;
+  specialize qw/aom_sub_pixel_avg_variance128x64  avx2 sse2 ssse3/;
+  specialize qw/aom_sub_pixel_avg_variance64x128  avx2 sse2 ssse3/;
+  specialize qw/aom_sub_pixel_avg_variance64x64   avx2 sse2 ssse3/;
+  specialize qw/aom_sub_pixel_avg_variance64x32   avx2 sse2 ssse3/;
+  specialize qw/aom_sub_pixel_avg_variance32x64   avx2 sse2 ssse3/;
+  specialize qw/aom_sub_pixel_avg_variance32x32   avx2 sse2 ssse3/;
+  specialize qw/aom_sub_pixel_avg_variance32x16   avx2 sse2 ssse3/;
+  specialize qw/aom_sub_pixel_avg_variance16x32        sse2 ssse3/;
+  specialize qw/aom_sub_pixel_avg_variance16x16        sse2 ssse3/;
+  specialize qw/aom_sub_pixel_avg_variance16x8         sse2 ssse3/;
+  specialize qw/aom_sub_pixel_avg_variance8x16         sse2 ssse3/;
+  specialize qw/aom_sub_pixel_avg_variance8x8          sse2 ssse3/;
+  specialize qw/aom_sub_pixel_avg_variance8x4          sse2 ssse3/;
+  specialize qw/aom_sub_pixel_avg_variance4x8          sse2 ssse3/;
+  specialize qw/aom_sub_pixel_avg_variance4x4          sse2 ssse3/;
 
   if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") {
-    specialize qw/aom_variance4x16 sse2/;
-    specialize qw/aom_variance16x4 sse2 avx2/;
-    specialize qw/aom_variance8x32 sse2/;
-    specialize qw/aom_variance32x8 sse2 avx2/;
-    specialize qw/aom_variance16x64 sse2 avx2/;
-    specialize qw/aom_variance64x16 sse2 avx2/;
+    specialize qw/aom_variance4x16  neon sse2/;
+    specialize qw/aom_variance16x4  neon sse2 avx2/;
+    specialize qw/aom_variance8x32  neon sse2/;
+    specialize qw/aom_variance32x8  neon sse2 avx2/;
+    specialize qw/aom_variance16x64 neon sse2 avx2/;
+    specialize qw/aom_variance64x16 neon sse2 avx2/;
 
     specialize qw/aom_sub_pixel_variance4x16 neon sse2 ssse3/;
     specialize qw/aom_sub_pixel_variance16x4 neon avx2 sse2 ssse3/;
@@ -1358,7 +1435,7 @@ if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
 
       add_proto qw/unsigned int/, "aom_highbd_${bd}_variance4x2", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
 
-      foreach (@block_sizes) {
+      foreach (@encoder_block_sizes) {
         ($w, $h) = @$_;
         add_proto qw/unsigned int/, "aom_highbd_${bd}_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
         add_proto qw/uint32_t/, "aom_highbd_${bd}_sub_pixel_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
@@ -1404,7 +1481,7 @@ if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
   #
   # Masked Variance / Masked Subpixel Variance
   #
-  foreach (@block_sizes) {
+  foreach (@encoder_block_sizes) {
     ($w, $h) = @$_;
     add_proto qw/unsigned int/, "aom_masked_sub_pixel_variance${w}x${h}", "const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse";
     specialize "aom_masked_sub_pixel_variance${w}x${h}", qw/ssse3/;
@@ -1412,7 +1489,7 @@ if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
 
   if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
     foreach $bd ("_8_", "_10_", "_12_") {
-      foreach (@block_sizes) {
+      foreach (@encoder_block_sizes) {
         ($w, $h) = @$_;
         add_proto qw/unsigned int/, "aom_highbd${bd}masked_sub_pixel_variance${w}x${h}", "const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse";
         specialize "aom_highbd${bd}masked_sub_pixel_variance${w}x${h}", qw/ssse3/;
@@ -1424,7 +1501,7 @@ if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
   # OBMC Variance / OBMC Subpixel Variance
   #
   if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") {
-    foreach (@block_sizes) {
+    foreach (@encoder_block_sizes) {
       ($w, $h) = @$_;
       add_proto qw/unsigned int/, "aom_obmc_variance${w}x${h}", "const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse";
       add_proto qw/unsigned int/, "aom_obmc_sub_pixel_variance${w}x${h}", "const uint8_t *pre, int pre_stride, int xoffset, int yoffset, const int32_t *wsrc, const int32_t *mask, unsigned int *sse";
@@ -1434,7 +1511,7 @@ if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
 
     if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
       foreach $bd ("_", "_10_", "_12_") {
-        foreach (@block_sizes) {
+        foreach (@encoder_block_sizes) {
           ($w, $h) = @$_;
           add_proto qw/unsigned int/, "aom_highbd${bd}obmc_variance${w}x${h}", "const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse";
           add_proto qw/unsigned int/, "aom_highbd${bd}obmc_sub_pixel_variance${w}x${h}", "const uint8_t *pre, int pre_stride, int xoffset, int yoffset, const int32_t *wsrc, const int32_t *mask, unsigned int *sse";
@@ -1445,43 +1522,43 @@ if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
   }
 
   add_proto qw/uint32_t aom_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
-  specialize qw/aom_sub_pixel_avg_variance64x64 avx2 msa sse2 ssse3/;
+  specialize qw/aom_sub_pixel_avg_variance64x64 avx2 sse2 ssse3/;
 
   add_proto qw/uint32_t aom_sub_pixel_avg_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
-  specialize qw/aom_sub_pixel_avg_variance64x32 msa sse2 ssse3/;
+  specialize qw/aom_sub_pixel_avg_variance64x32 sse2 ssse3/;
 
   add_proto qw/uint32_t aom_sub_pixel_avg_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
-  specialize qw/aom_sub_pixel_avg_variance32x64 msa sse2 ssse3/;
+  specialize qw/aom_sub_pixel_avg_variance32x64 sse2 ssse3/;
 
   add_proto qw/uint32_t aom_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
-  specialize qw/aom_sub_pixel_avg_variance32x32 avx2 msa sse2 ssse3/;
+  specialize qw/aom_sub_pixel_avg_variance32x32 avx2 sse2 ssse3/;
 
   add_proto qw/uint32_t aom_sub_pixel_avg_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
-  specialize qw/aom_sub_pixel_avg_variance32x16 msa sse2 ssse3/;
+  specialize qw/aom_sub_pixel_avg_variance32x16 sse2 ssse3/;
 
   add_proto qw/uint32_t aom_sub_pixel_avg_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
-  specialize qw/aom_sub_pixel_avg_variance16x32 msa sse2 ssse3/;
+  specialize qw/aom_sub_pixel_avg_variance16x32 sse2 ssse3/;
 
   add_proto qw/uint32_t aom_sub_pixel_avg_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
-  specialize qw/aom_sub_pixel_avg_variance16x16 msa sse2 ssse3/;
+  specialize qw/aom_sub_pixel_avg_variance16x16 sse2 ssse3/;
 
   add_proto qw/uint32_t aom_sub_pixel_avg_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
-  specialize qw/aom_sub_pixel_avg_variance16x8 msa sse2 ssse3/;
+  specialize qw/aom_sub_pixel_avg_variance16x8 sse2 ssse3/;
 
   add_proto qw/uint32_t aom_sub_pixel_avg_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
-  specialize qw/aom_sub_pixel_avg_variance8x16 msa sse2 ssse3/;
+  specialize qw/aom_sub_pixel_avg_variance8x16 sse2 ssse3/;
 
   add_proto qw/uint32_t aom_sub_pixel_avg_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
-  specialize qw/aom_sub_pixel_avg_variance8x8 msa sse2 ssse3/;
+  specialize qw/aom_sub_pixel_avg_variance8x8 sse2 ssse3/;
 
   add_proto qw/uint32_t aom_sub_pixel_avg_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
-  specialize qw/aom_sub_pixel_avg_variance8x4 msa sse2 ssse3/;
+  specialize qw/aom_sub_pixel_avg_variance8x4 sse2 ssse3/;
 
   add_proto qw/uint32_t aom_sub_pixel_avg_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
-  specialize qw/aom_sub_pixel_avg_variance4x8 msa sse2 ssse3/;
+  specialize qw/aom_sub_pixel_avg_variance4x8 sse2 ssse3/;
 
   add_proto qw/uint32_t aom_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
-  specialize qw/aom_sub_pixel_avg_variance4x4 msa sse2 ssse3/;
+  specialize qw/aom_sub_pixel_avg_variance4x4 sse2 ssse3/;
 
   #
   # Comp Avg
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/aom_simd_inline.h b/chromium/third_party/libaom/source/libaom/aom_dsp/aom_simd_inline.h
index eb333f6f66f..b4b1b35637a 100644
--- a/chromium/third_party/libaom/source/libaom/aom_dsp/aom_simd_inline.h
+++ b/chromium/third_party/libaom/source/libaom/aom_dsp/aom_simd_inline.h
@@ -18,4 +18,7 @@
 #define SIMD_INLINE static AOM_FORCE_INLINE
 #endif
 
+#define SIMD_CLAMP(value, min, max) \
+  ((value) > (max) ? (max) : (value) < (min) ? (min) : (value))
+
 #endif  // AOM_AOM_DSP_AOM_SIMD_INLINE_H_
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/arm/avg_neon.c b/chromium/third_party/libaom/source/libaom/aom_dsp/arm/avg_neon.c
index ffb9daef7ca..991fd3f3b84 100644
--- a/chromium/third_party/libaom/source/libaom/aom_dsp/arm/avg_neon.c
+++ b/chromium/third_party/libaom/source/libaom/aom_dsp/arm/avg_neon.c
@@ -17,6 +17,15 @@
 #include "aom_dsp/arm/transpose_neon.h"
 #include "aom_ports/mem.h"
 
+#if !defined(__aarch64__)
+static INLINE uint32x2_t horizontal_add_u16x8_v(const uint16x8_t a) {
+  const uint32x4_t b = vpaddlq_u16(a);
+  const uint64x2_t c = vpaddlq_u32(b);
+  return vadd_u32(vreinterpret_u32_u64(vget_low_u64(c)),
+                  vreinterpret_u32_u64(vget_high_u64(c)));
+}
+#endif
+
 unsigned int aom_avg_4x4_neon(const uint8_t *a, int a_stride) {
   const uint8x16_t b = load_unaligned_u8q(a, a_stride);
   const uint16x8_t c = vaddl_u8(vget_low_u8(b), vget_high_u8(b));
@@ -24,14 +33,13 @@ unsigned int aom_avg_4x4_neon(const uint8_t *a, int a_stride) {
   const uint32_t d = vaddlvq_u16(c);
   return (d + 8) >> 4;
 #else
-  const uint32x2_t d = horizontal_add_u16x8(c);
+  const uint32x2_t d = horizontal_add_u16x8_v(c);
   return vget_lane_u32(vrshr_n_u32(d, 4), 0);
 #endif
 }
 
 unsigned int aom_avg_8x8_neon(const uint8_t *a, int a_stride) {
   uint16x8_t sum;
-  uint32x2_t d;
   uint8x8_t b = vld1_u8(a);
   a += a_stride;
   uint8x8_t c = vld1_u8(a);
@@ -44,9 +52,23 @@ unsigned int aom_avg_8x8_neon(const uint8_t *a, int a_stride) {
     sum = vaddw_u8(sum, e);
   }
 
-  d = horizontal_add_u16x8(sum);
-
+#if defined(__aarch64__)
+  const uint32_t d = vaddlvq_u16(sum);
+  return (d + 32) >> 6;
+#else
+  const uint32x2_t d = horizontal_add_u16x8_v(sum);
   return vget_lane_u32(vrshr_n_u32(d, 6), 0);
+#endif
+}
+
+void aom_avg_8x8_quad_neon(const uint8_t *s, int p, int x16_idx, int y16_idx,
+                           int *avg) {
+  for (int k = 0; k < 4; k++) {
+    const int x8_idx = x16_idx + ((k & 1) << 3);
+    const int y8_idx = y16_idx + ((k >> 1) << 3);
+    const uint8_t *s_tmp = s + y8_idx * p + x8_idx;
+    avg[k] = aom_avg_8x8_neon(s_tmp, p);
+  }
 }
 
 int aom_satd_lp_neon(const int16_t *coeff, int length) {
@@ -64,68 +86,58 @@ int aom_satd_lp_neon(const int16_t *coeff, int length) {
     coeff += 16;
   } while (length != 0);
 
-  {
-    // satd: 26 bits, dynamic range [-32640 * 1024, 32640 * 1024]
-    const int64x2_t s0 = vpaddlq_s32(accum);  // cascading summation of 'accum'.
-    const int32x2_t s1 = vadd_s32(vreinterpret_s32_s64(vget_low_s64(s0)),
-                                  vreinterpret_s32_s64(vget_high_s64(s0)));
-    const int satd = vget_lane_s32(s1, 0);
-    return satd;
-  }
+  return horizontal_add_s32x4(accum);
 }
 
-void aom_int_pro_row_neon(int16_t hbuf[16], const uint8_t *ref,
-                          const int ref_stride, const int height) {
-  int i;
+void aom_int_pro_row_neon(int16_t *hbuf, const uint8_t *ref,
+                          const int ref_stride, const int width,
+                          const int height, int norm_factor) {
   const uint8_t *idx = ref;
-  uint16x8_t vec0 = vdupq_n_u16(0);
-  uint16x8_t vec1 = vec0;
-  uint8x16_t tmp;
-
-  for (i = 0; i < height; ++i) {
-    tmp = vld1q_u8(idx);
-    idx += ref_stride;
-    vec0 = vaddw_u8(vec0, vget_low_u8(tmp));
-    vec1 = vaddw_u8(vec1, vget_high_u8(tmp));
-  }
-
-  if (128 == height) {
-    vec0 = vshrq_n_u16(vec0, 6);
-    vec1 = vshrq_n_u16(vec1, 6);
-  } else if (64 == height) {
-    vec0 = vshrq_n_u16(vec0, 5);
-    vec1 = vshrq_n_u16(vec1, 5);
-  } else if (32 == height) {
-    vec0 = vshrq_n_u16(vec0, 4);
-    vec1 = vshrq_n_u16(vec1, 4);
-  } else if (16 == height) {
-    vec0 = vshrq_n_u16(vec0, 3);
-    vec1 = vshrq_n_u16(vec1, 3);
+  const uint16x8_t zero = vdupq_n_u16(0);
+  const int16x8_t neg_norm_factor = vdupq_n_s16(-norm_factor);
+
+  for (int wd = 0; wd < width; wd += 16) {
+    uint16x8_t vec0 = zero;
+    uint16x8_t vec1 = zero;
+    idx = ref + wd;
+    for (int ht = 0; ht < height; ++ht) {
+      const uint8x16_t tmp = vld1q_u8(idx);
+      idx += ref_stride;
+      vec0 = vaddw_u8(vec0, vget_low_u8(tmp));
+      vec1 = vaddw_u8(vec1, vget_high_u8(tmp));
+    }
+
+    const int16x8_t result0 =
+        vshlq_s16(vreinterpretq_s16_u16(vec0), neg_norm_factor);
+    const int16x8_t result1 =
+        vshlq_s16(vreinterpretq_s16_u16(vec1), neg_norm_factor);
+
+    vst1q_s16(hbuf + wd, result0);
+    vst1q_s16(hbuf + wd + 8, result1);
   }
-
-  vst1q_s16(hbuf, vreinterpretq_s16_u16(vec0));
-  hbuf += 8;
-  vst1q_s16(hbuf, vreinterpretq_s16_u16(vec1));
 }
 
-int16_t aom_int_pro_col_neon(const uint8_t *ref, const int width) {
-  const uint8_t *idx;
-  uint16x8_t sum = vdupq_n_u16(0);
-
-  for (idx = ref; idx < (ref + width); idx += 16) {
-    uint8x16_t vec = vld1q_u8(idx);
-    sum = vaddq_u16(sum, vpaddlq_u8(vec));
-  }
+void aom_int_pro_col_neon(int16_t *vbuf, const uint8_t *ref,
+                          const int ref_stride, const int width,
+                          const int height, int norm_factor) {
+  for (int ht = 0; ht < height; ++ht) {
+    uint16x8_t sum = vdupq_n_u16(0);
+    for (int wd = 0; wd < width; wd += 16) {
+      const uint8x16_t vec = vld1q_u8(ref + wd);
+      sum = vaddq_u16(sum, vpaddlq_u8(vec));
+    }
 
 #if defined(__aarch64__)
-  return (int16_t)vaddvq_u16(sum);
+    vbuf[ht] = ((int16_t)vaddvq_u16(sum)) >> norm_factor;
 #else
-  const uint32x4_t a = vpaddlq_u16(sum);
-  const uint64x2_t b = vpaddlq_u32(a);
-  const uint32x2_t c = vadd_u32(vreinterpret_u32_u64(vget_low_u64(b)),
-                                vreinterpret_u32_u64(vget_high_u64(b)));
-  return (int16_t)vget_lane_u32(c, 0);
+    const uint32x4_t a = vpaddlq_u16(sum);
+    const uint64x2_t b = vpaddlq_u32(a);
+    const uint32x2_t c = vadd_u32(vreinterpret_u32_u64(vget_low_u64(b)),
+                                  vreinterpret_u32_u64(vget_high_u64(b)));
+    vbuf[ht] = ((int16_t)vget_lane_u32(c, 0)) >> norm_factor;
 #endif
+    ref += ref_stride;
+  }
 }
 
 // coeff: 16 bits, dynamic range [-32640, 32640].
@@ -147,14 +159,10 @@ int aom_satd_neon(const tran_low_t *coeff, int length) {
   } while (length != 0);
 
   // satd: 26 bits, dynamic range [-32640 * 1024, 32640 * 1024]
-#ifdef __aarch64__
-  return vaddvq_s32(accum);
-#else
   return horizontal_add_s32x4(accum);
-#endif  // __aarch64__
 }
 
-int aom_vector_var_neon(const int16_t *ref, const int16_t *src, const int bwl) {
+int aom_vector_var_neon(const int16_t *ref, const int16_t *src, int bwl) {
   int32x4_t v_mean = vdupq_n_s32(0);
   int32x4_t v_sse = v_mean;
   int16x8_t v_ref, v_src;
@@ -176,15 +184,11 @@ int aom_vector_var_neon(const int16_t *ref, const int16_t *src, const int bwl) {
     v_sse = vmlal_s16(v_sse, v_high, v_high);
 #endif
   }
-#if defined(__aarch64__)
-  int mean = vaddvq_s32(v_mean);
-  int sse = (int)vaddvq_s32(v_sse);
-#else
-  int mean = horizontal_add_s32x4(v_mean);
-  int sse = horizontal_add_s32x4(v_sse);
-#endif
+  const int mean = horizontal_add_s32x4(v_mean);
+  const int sse = horizontal_add_s32x4(v_sse);
+  const unsigned int mean_abs = mean >= 0 ? mean : -mean;
   // (mean * mean): dynamic range 31 bits.
-  int var = sse - ((mean * mean) >> (bwl + 2));
+  const int var = sse - ((mean_abs * mean_abs) >> (bwl + 2));
   return var;
 }
 
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/arm/blend_a64_mask_neon.c b/chromium/third_party/libaom/source/libaom/aom_dsp/arm/blend_a64_mask_neon.c
index 8709e38b804..f11d57e44c7 100644
--- a/chromium/third_party/libaom/source/libaom/aom_dsp/arm/blend_a64_mask_neon.c
+++ b/chromium/third_party/libaom/source/libaom/aom_dsp/arm/blend_a64_mask_neon.c
@@ -117,14 +117,10 @@ static INLINE void blend_4x4(uint8_t *dst, uint32_t dst_stride,
   uint8x8_t res_0 = vqmovun_s16(src0_0);
   uint8x8_t res_1 = vqmovun_s16(src0_1);
 
-  vst1_lane_u32((uint32_t *)(dst + 0 * dst_stride), vreinterpret_u32_u8(res_0),
-                0);
-  vst1_lane_u32((uint32_t *)(dst + 1 * dst_stride), vreinterpret_u32_u8(res_0),
-                1);
-  vst1_lane_u32((uint32_t *)(dst + 2 * dst_stride), vreinterpret_u32_u8(res_1),
-                0);
-  vst1_lane_u32((uint32_t *)(dst + 3 * dst_stride), vreinterpret_u32_u8(res_1),
-                1);
+  store_unaligned_u8_4x1(dst + 0 * dst_stride, res_0, 0);
+  store_unaligned_u8_4x1(dst + 1 * dst_stride, res_0, 1);
+  store_unaligned_u8_4x1(dst + 2 * dst_stride, res_1, 0);
+  store_unaligned_u8_4x1(dst + 3 * dst_stride, res_1, 1);
 }
 
 void aom_lowbd_blend_a64_d16_mask_neon(
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/arm/hadamard_neon.c b/chromium/third_party/libaom/source/libaom/aom_dsp/arm/hadamard_neon.c
index 7897155e242..939c9a6f2f2 100644
--- a/chromium/third_party/libaom/source/libaom/aom_dsp/arm/hadamard_neon.c
+++ b/chromium/third_party/libaom/source/libaom/aom_dsp/arm/hadamard_neon.c
@@ -104,6 +104,13 @@ void aom_hadamard_lp_8x8_neon(const int16_t *src_diff, ptrdiff_t src_stride,
   vst1q_s16(coeff + 56, a7);
 }
 
+void aom_hadamard_lp_8x8_dual_neon(const int16_t *src_diff,
+                                   ptrdiff_t src_stride, int16_t *coeff) {
+  for (int i = 0; i < 2; i++) {
+    aom_hadamard_lp_8x8_neon(src_diff + (i * 8), src_stride, coeff + (i * 64));
+  }
+}
+
 void aom_hadamard_lp_16x16_neon(const int16_t *src_diff, ptrdiff_t src_stride,
                                 int16_t *coeff) {
   /* Rearrange 16x16 to 8x32 and remove stride.
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/arm/highbd_intrapred_neon.c b/chromium/third_party/libaom/source/libaom/aom_dsp/arm/highbd_intrapred_neon.c
new file mode 100644
index 00000000000..fa2f11e4b74
--- /dev/null
+++ b/chromium/third_party/libaom/source/libaom/aom_dsp/arm/highbd_intrapred_neon.c
@@ -0,0 +1,835 @@
+/*
+ * Copyright (c) 2022, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <arm_neon.h>
+
+#include "config/aom_config.h"
+#include "config/aom_dsp_rtcd.h"
+
+#include "aom/aom_integer.h"
+#include "aom_dsp/intrapred_common.h"
+
+// -----------------------------------------------------------------------------
+// DC
+
+static INLINE void highbd_dc_predictor(uint16_t *dst, ptrdiff_t stride, int bw,
+                                       const uint16_t *above,
+                                       const uint16_t *left) {
+  assert(bw >= 4);
+  assert(IS_POWER_OF_TWO(bw));
+  int expected_dc, sum = 0;
+  const int count = bw * 2;
+  uint32x4_t sum_q = vdupq_n_u32(0);
+  uint32x2_t sum_d;
+  uint16_t *dst_1;
+  if (bw >= 8) {
+    for (int i = 0; i < bw; i += 8) {
+      sum_q = vpadalq_u16(sum_q, vld1q_u16(above));
+      sum_q = vpadalq_u16(sum_q, vld1q_u16(left));
+      above += 8;
+      left += 8;
+    }
+    sum_d = vadd_u32(vget_low_u32(sum_q), vget_high_u32(sum_q));
+    sum = vget_lane_s32(vreinterpret_s32_u64(vpaddl_u32(sum_d)), 0);
+    expected_dc = (sum + (count >> 1)) / count;
+    const uint16x8_t dc = vdupq_n_u16((uint16_t)expected_dc);
+    for (int r = 0; r < bw; r++) {
+      dst_1 = dst;
+      for (int i = 0; i < bw; i += 8) {
+        vst1q_u16(dst_1, dc);
+        dst_1 += 8;
+      }
+      dst += stride;
+    }
+  } else {  // 4x4
+    sum_q = vaddl_u16(vld1_u16(above), vld1_u16(left));
+    sum_d = vadd_u32(vget_low_u32(sum_q), vget_high_u32(sum_q));
+    sum = vget_lane_s32(vreinterpret_s32_u64(vpaddl_u32(sum_d)), 0);
+    expected_dc = (sum + (count >> 1)) / count;
+    const uint16x4_t dc = vdup_n_u16((uint16_t)expected_dc);
+    for (int r = 0; r < bw; r++) {
+      vst1_u16(dst, dc);
+      dst += stride;
+    }
+  }
+}
+
+#define INTRA_PRED_HIGHBD_SIZED_NEON(type, width)               \
+  void aom_highbd_##type##_predictor_##width##x##width##_neon(  \
+      uint16_t *dst, ptrdiff_t stride, const uint16_t *above,   \
+      const uint16_t *left, int bd) {                           \
+    (void)bd;                                                   \
+    highbd_##type##_predictor(dst, stride, width, above, left); \
+  }
+
+#define INTRA_PRED_SQUARE(type)          \
+  INTRA_PRED_HIGHBD_SIZED_NEON(type, 4)  \
+  INTRA_PRED_HIGHBD_SIZED_NEON(type, 8)  \
+  INTRA_PRED_HIGHBD_SIZED_NEON(type, 16) \
+  INTRA_PRED_HIGHBD_SIZED_NEON(type, 32) \
+  INTRA_PRED_HIGHBD_SIZED_NEON(type, 64)
+
+INTRA_PRED_SQUARE(dc)
+
+#undef INTRA_PRED_SQUARE
+
+// -----------------------------------------------------------------------------
+// V_PRED
+
+#define HIGHBD_V_NXM(W, H)                                    \
+  void aom_highbd_v_predictor_##W##x##H##_neon(               \
+      uint16_t *dst, ptrdiff_t stride, const uint16_t *above, \
+      const uint16_t *left, int bd) {                         \
+    (void)left;                                               \
+    (void)bd;                                                 \
+    vertical##W##xh_neon(dst, stride, above, H);              \
+  }
+
+static INLINE uint16x8x2_t load_uint16x8x2(uint16_t const *ptr) {
+  uint16x8x2_t x;
+  // Clang/gcc uses ldp here.
+  x.val[0] = vld1q_u16(ptr);
+  x.val[1] = vld1q_u16(ptr + 8);
+  return x;
+}
+
+static INLINE void store_uint16x8x2(uint16_t *ptr, uint16x8x2_t x) {
+  vst1q_u16(ptr, x.val[0]);
+  vst1q_u16(ptr + 8, x.val[1]);
+}
+
+static INLINE void vertical4xh_neon(uint16_t *dst, ptrdiff_t stride,
+                                    const uint16_t *const above, int height) {
+  const uint16x4_t row = vld1_u16(above);
+  int y = height;
+  do {
+    vst1_u16(dst, row);
+    vst1_u16(dst + stride, row);
+    dst += stride << 1;
+    y -= 2;
+  } while (y != 0);
+}
+
+static INLINE void vertical8xh_neon(uint16_t *dst, ptrdiff_t stride,
+                                    const uint16_t *const above, int height) {
+  const uint16x8_t row = vld1q_u16(above);
+  int y = height;
+  do {
+    vst1q_u16(dst, row);
+    vst1q_u16(dst + stride, row);
+    dst += stride << 1;
+    y -= 2;
+  } while (y != 0);
+}
+
+static INLINE void vertical16xh_neon(uint16_t *dst, ptrdiff_t stride,
+                                     const uint16_t *const above, int height) {
+  const uint16x8x2_t row = load_uint16x8x2(above);
+  int y = height;
+  do {
+    store_uint16x8x2(dst, row);
+    store_uint16x8x2(dst + stride, row);
+    dst += stride << 1;
+    y -= 2;
+  } while (y != 0);
+}
+
+static INLINE uint16x8x4_t load_uint16x8x4(uint16_t const *ptr) {
+  uint16x8x4_t x;
+  // Clang/gcc uses ldp here.
+  x.val[0] = vld1q_u16(ptr);
+  x.val[1] = vld1q_u16(ptr + 8);
+  x.val[2] = vld1q_u16(ptr + 16);
+  x.val[3] = vld1q_u16(ptr + 24);
+  return x;
+}
+
+static INLINE void store_uint16x8x4(uint16_t *ptr, uint16x8x4_t x) {
+  vst1q_u16(ptr, x.val[0]);
+  vst1q_u16(ptr + 8, x.val[1]);
+  vst1q_u16(ptr + 16, x.val[2]);
+  vst1q_u16(ptr + 24, x.val[3]);
+}
+
+static INLINE void vertical32xh_neon(uint16_t *dst, ptrdiff_t stride,
+                                     const uint16_t *const above, int height) {
+  const uint16x8x4_t row = load_uint16x8x4(above);
+  int y = height;
+  do {
+    store_uint16x8x4(dst, row);
+    store_uint16x8x4(dst + stride, row);
+    dst += stride << 1;
+    y -= 2;
+  } while (y != 0);
+}
+
+static INLINE void vertical64xh_neon(uint16_t *dst, ptrdiff_t stride,
+                                     const uint16_t *const above, int height) {
+  uint16_t *dst32 = dst + 32;
+  const uint16x8x4_t row = load_uint16x8x4(above);
+  const uint16x8x4_t row32 = load_uint16x8x4(above + 32);
+  int y = height;
+  do {
+    store_uint16x8x4(dst, row);
+    store_uint16x8x4(dst32, row32);
+    store_uint16x8x4(dst + stride, row);
+    store_uint16x8x4(dst32 + stride, row32);
+    dst += stride << 1;
+    dst32 += stride << 1;
+    y -= 2;
+  } while (y != 0);
+}
+
+HIGHBD_V_NXM(4, 4)
+HIGHBD_V_NXM(4, 8)
+HIGHBD_V_NXM(4, 16)
+
+HIGHBD_V_NXM(8, 4)
+HIGHBD_V_NXM(8, 8)
+HIGHBD_V_NXM(8, 16)
+HIGHBD_V_NXM(8, 32)
+
+HIGHBD_V_NXM(16, 4)
+HIGHBD_V_NXM(16, 8)
+HIGHBD_V_NXM(16, 16)
+HIGHBD_V_NXM(16, 32)
+HIGHBD_V_NXM(16, 64)
+
+HIGHBD_V_NXM(32, 8)
+HIGHBD_V_NXM(32, 16)
+HIGHBD_V_NXM(32, 32)
+HIGHBD_V_NXM(32, 64)
+
+HIGHBD_V_NXM(64, 16)
+HIGHBD_V_NXM(64, 32)
+HIGHBD_V_NXM(64, 64)
+
+// -----------------------------------------------------------------------------
+// PAETH
+
+static INLINE void highbd_paeth_4or8_x_h_neon(uint16_t *dest, ptrdiff_t stride,
+                                              const uint16_t *const top_row,
+                                              const uint16_t *const left_column,
+                                              int width, int height) {
+  const uint16x8_t top_left = vdupq_n_u16(top_row[-1]);
+  const uint16x8_t top_left_x2 = vdupq_n_u16(top_row[-1] + top_row[-1]);
+  uint16x8_t top;
+  if (width == 4) {
+    top = vcombine_u16(vld1_u16(top_row), vdup_n_u16(0));
+  } else {  // width == 8
+    top = vld1q_u16(top_row);
+  }
+
+  for (int y = 0; y < height; ++y) {
+    const uint16x8_t left = vdupq_n_u16(left_column[y]);
+
+    const uint16x8_t left_dist = vabdq_u16(top, top_left);
+    const uint16x8_t top_dist = vabdq_u16(left, top_left);
+    const uint16x8_t top_left_dist =
+        vabdq_u16(vaddq_u16(top, left), top_left_x2);
+
+    const uint16x8_t left_le_top = vcleq_u16(left_dist, top_dist);
+    const uint16x8_t left_le_top_left = vcleq_u16(left_dist, top_left_dist);
+    const uint16x8_t top_le_top_left = vcleq_u16(top_dist, top_left_dist);
+
+    // if (left_dist <= top_dist && left_dist <= top_left_dist)
+    const uint16x8_t left_mask = vandq_u16(left_le_top, left_le_top_left);
+    //   dest[x] = left_column[y];
+    // Fill all the unused spaces with 'top'. They will be overwritten when
+    // the positions for top_left are known.
+    uint16x8_t result = vbslq_u16(left_mask, left, top);
+    // else if (top_dist <= top_left_dist)
+    //   dest[x] = top_row[x];
+    // Add these values to the mask. They were already set.
+    const uint16x8_t left_or_top_mask = vorrq_u16(left_mask, top_le_top_left);
+    // else
+    //   dest[x] = top_left;
+    result = vbslq_u16(left_or_top_mask, result, top_left);
+
+    if (width == 4) {
+      vst1_u16(dest, vget_low_u16(result));
+    } else {  // width == 8
+      vst1q_u16(dest, result);
+    }
+    dest += stride;
+  }
+}
+
+#define HIGHBD_PAETH_NXM(W, H)                                  \
+  void aom_highbd_paeth_predictor_##W##x##H##_neon(             \
+      uint16_t *dst, ptrdiff_t stride, const uint16_t *above,   \
+      const uint16_t *left, int bd) {                           \
+    (void)bd;                                                   \
+    highbd_paeth_4or8_x_h_neon(dst, stride, above, left, W, H); \
+  }
+
+HIGHBD_PAETH_NXM(4, 4)
+HIGHBD_PAETH_NXM(4, 8)
+HIGHBD_PAETH_NXM(4, 16)
+HIGHBD_PAETH_NXM(8, 4)
+HIGHBD_PAETH_NXM(8, 8)
+HIGHBD_PAETH_NXM(8, 16)
+HIGHBD_PAETH_NXM(8, 32)
+
+// Select the closest values and collect them.
+static INLINE uint16x8_t select_paeth(const uint16x8_t top,
+                                      const uint16x8_t left,
+                                      const uint16x8_t top_left,
+                                      const uint16x8_t left_le_top,
+                                      const uint16x8_t left_le_top_left,
+                                      const uint16x8_t top_le_top_left) {
+  // if (left_dist <= top_dist && left_dist <= top_left_dist)
+  const uint16x8_t left_mask = vandq_u16(left_le_top, left_le_top_left);
+  //   dest[x] = left_column[y];
+  // Fill all the unused spaces with 'top'. They will be overwritten when
+  // the positions for top_left are known.
+  const uint16x8_t result = vbslq_u16(left_mask, left, top);
+  // else if (top_dist <= top_left_dist)
+  //   dest[x] = top_row[x];
+  // Add these values to the mask. They were already set.
+  const uint16x8_t left_or_top_mask = vorrq_u16(left_mask, top_le_top_left);
+  // else
+  //   dest[x] = top_left;
+  return vbslq_u16(left_or_top_mask, result, top_left);
+}
+
+#define PAETH_PREDICTOR(num)                                                  \
+  do {                                                                        \
+    const uint16x8_t left_dist = vabdq_u16(top[num], top_left);               \
+    const uint16x8_t top_left_dist =                                          \
+        vabdq_u16(vaddq_u16(top[num], left), top_left_x2);                    \
+    const uint16x8_t left_le_top = vcleq_u16(left_dist, top_dist);            \
+    const uint16x8_t left_le_top_left = vcleq_u16(left_dist, top_left_dist);  \
+    const uint16x8_t top_le_top_left = vcleq_u16(top_dist, top_left_dist);    \
+    const uint16x8_t result =                                                 \
+        select_paeth(top[num], left, top_left, left_le_top, left_le_top_left, \
+                     top_le_top_left);                                        \
+    vst1q_u16(dest + (num * 8), result);                                      \
+  } while (0)
+
+#define LOAD_TOP_ROW(num) vld1q_u16(top_row + (num * 8))
+
+static INLINE void highbd_paeth16_plus_x_h_neon(
+    uint16_t *dest, ptrdiff_t stride, const uint16_t *const top_row,
+    const uint16_t *const left_column, int width, int height) {
+  const uint16x8_t top_left = vdupq_n_u16(top_row[-1]);
+  const uint16x8_t top_left_x2 = vdupq_n_u16(top_row[-1] + top_row[-1]);
+  uint16x8_t top[8];
+  top[0] = LOAD_TOP_ROW(0);
+  top[1] = LOAD_TOP_ROW(1);
+  if (width > 16) {
+    top[2] = LOAD_TOP_ROW(2);
+    top[3] = LOAD_TOP_ROW(3);
+    if (width == 64) {
+      top[4] = LOAD_TOP_ROW(4);
+      top[5] = LOAD_TOP_ROW(5);
+      top[6] = LOAD_TOP_ROW(6);
+      top[7] = LOAD_TOP_ROW(7);
+    }
+  }
+
+  for (int y = 0; y < height; ++y) {
+    const uint16x8_t left = vdupq_n_u16(left_column[y]);
+    const uint16x8_t top_dist = vabdq_u16(left, top_left);
+    PAETH_PREDICTOR(0);
+    PAETH_PREDICTOR(1);
+    if (width > 16) {
+      PAETH_PREDICTOR(2);
+      PAETH_PREDICTOR(3);
+      if (width == 64) {
+        PAETH_PREDICTOR(4);
+        PAETH_PREDICTOR(5);
+        PAETH_PREDICTOR(6);
+        PAETH_PREDICTOR(7);
+      }
+    }
+    dest += stride;
+  }
+}
+
+#define HIGHBD_PAETH_NXM_WIDE(W, H)                               \
+  void aom_highbd_paeth_predictor_##W##x##H##_neon(               \
+      uint16_t *dst, ptrdiff_t stride, const uint16_t *above,     \
+      const uint16_t *left, int bd) {                             \
+    (void)bd;                                                     \
+    highbd_paeth16_plus_x_h_neon(dst, stride, above, left, W, H); \
+  }
+
+HIGHBD_PAETH_NXM_WIDE(16, 4)
+HIGHBD_PAETH_NXM_WIDE(16, 8)
+HIGHBD_PAETH_NXM_WIDE(16, 16)
+HIGHBD_PAETH_NXM_WIDE(16, 32)
+HIGHBD_PAETH_NXM_WIDE(16, 64)
+HIGHBD_PAETH_NXM_WIDE(32, 8)
+HIGHBD_PAETH_NXM_WIDE(32, 16)
+HIGHBD_PAETH_NXM_WIDE(32, 32)
+HIGHBD_PAETH_NXM_WIDE(32, 64)
+HIGHBD_PAETH_NXM_WIDE(64, 16)
+HIGHBD_PAETH_NXM_WIDE(64, 32)
+HIGHBD_PAETH_NXM_WIDE(64, 64)
+
+// -----------------------------------------------------------------------------
+// SMOOTH
+
+// 256 - v = vneg_s8(v)
+static INLINE uint16x4_t negate_s8(const uint16x4_t v) {
+  return vreinterpret_u16_s8(vneg_s8(vreinterpret_s8_u16(v)));
+}
+
+static INLINE void highbd_smooth_4xh_neon(uint16_t *dst, ptrdiff_t stride,
+                                          const uint16_t *const top_row,
+                                          const uint16_t *const left_column,
+                                          const int height) {
+  const uint16_t top_right = top_row[3];
+  const uint16_t bottom_left = left_column[height - 1];
+  const uint16_t *const weights_y = smooth_weights_u16 + height - 4;
+
+  const uint16x4_t top_v = vld1_u16(top_row);
+  const uint16x4_t bottom_left_v = vdup_n_u16(bottom_left);
+  const uint16x4_t weights_x_v = vld1_u16(smooth_weights_u16);
+  const uint16x4_t scaled_weights_x = negate_s8(weights_x_v);
+  const uint32x4_t weighted_tr = vmull_n_u16(scaled_weights_x, top_right);
+
+  for (int y = 0; y < height; ++y) {
+    // Each variable in the running summation is named for the last item to be
+    // accumulated.
+    const uint32x4_t weighted_top =
+        vmlal_n_u16(weighted_tr, top_v, weights_y[y]);
+    const uint32x4_t weighted_left =
+        vmlal_n_u16(weighted_top, weights_x_v, left_column[y]);
+    const uint32x4_t weighted_bl =
+        vmlal_n_u16(weighted_left, bottom_left_v, 256 - weights_y[y]);
+
+    const uint16x4_t pred =
+        vrshrn_n_u32(weighted_bl, SMOOTH_WEIGHT_LOG2_SCALE + 1);
+    vst1_u16(dst, pred);
+    dst += stride;
+  }
+}
+
+// Common code between 8xH and [16|32|64]xH.
+static INLINE void highbd_calculate_pred8(
+    uint16_t *dst, const uint32x4_t weighted_corners_low,
+    const uint32x4_t weighted_corners_high, const uint16x4x2_t top_vals,
+    const uint16x4x2_t weights_x, const uint16_t left_y,
+    const uint16_t weight_y) {
+  // Each variable in the running summation is named for the last item to be
+  // accumulated.
+  const uint32x4_t weighted_top_low =
+      vmlal_n_u16(weighted_corners_low, top_vals.val[0], weight_y);
+  const uint32x4_t weighted_edges_low =
+      vmlal_n_u16(weighted_top_low, weights_x.val[0], left_y);
+
+  const uint16x4_t pred_low =
+      vrshrn_n_u32(weighted_edges_low, SMOOTH_WEIGHT_LOG2_SCALE + 1);
+  vst1_u16(dst, pred_low);
+
+  const uint32x4_t weighted_top_high =
+      vmlal_n_u16(weighted_corners_high, top_vals.val[1], weight_y);
+  const uint32x4_t weighted_edges_high =
+      vmlal_n_u16(weighted_top_high, weights_x.val[1], left_y);
+
+  const uint16x4_t pred_high =
+      vrshrn_n_u32(weighted_edges_high, SMOOTH_WEIGHT_LOG2_SCALE + 1);
+  vst1_u16(dst + 4, pred_high);
+}
+
+static void highbd_smooth_8xh_neon(uint16_t *dst, ptrdiff_t stride,
+                                   const uint16_t *const top_row,
+                                   const uint16_t *const left_column,
+                                   const int height) {
+  const uint16_t top_right = top_row[7];
+  const uint16_t bottom_left = left_column[height - 1];
+  const uint16_t *const weights_y = smooth_weights_u16 + height - 4;
+
+  const uint16x4x2_t top_vals = { { vld1_u16(top_row),
+                                    vld1_u16(top_row + 4) } };
+  const uint16x4_t bottom_left_v = vdup_n_u16(bottom_left);
+  const uint16x4x2_t weights_x = { { vld1_u16(smooth_weights_u16 + 4),
+                                     vld1_u16(smooth_weights_u16 + 8) } };
+  const uint32x4_t weighted_tr_low =
+      vmull_n_u16(negate_s8(weights_x.val[0]), top_right);
+  const uint32x4_t weighted_tr_high =
+      vmull_n_u16(negate_s8(weights_x.val[1]), top_right);
+
+  for (int y = 0; y < height; ++y) {
+    const uint32x4_t weighted_bl =
+        vmull_n_u16(bottom_left_v, 256 - weights_y[y]);
+    const uint32x4_t weighted_corners_low =
+        vaddq_u32(weighted_bl, weighted_tr_low);
+    const uint32x4_t weighted_corners_high =
+        vaddq_u32(weighted_bl, weighted_tr_high);
+    highbd_calculate_pred8(dst, weighted_corners_low, weighted_corners_high,
+                           top_vals, weights_x, left_column[y], weights_y[y]);
+    dst += stride;
+  }
+}
+
+#define HIGHBD_SMOOTH_NXM(W, H)                                 \
+  void aom_highbd_smooth_predictor_##W##x##H##_neon(            \
+      uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, \
+      const uint16_t *left, int bd) {                           \
+    (void)bd;                                                   \
+    highbd_smooth_##W##xh_neon(dst, y_stride, above, left, H);  \
+  }
+
+HIGHBD_SMOOTH_NXM(4, 4)
+HIGHBD_SMOOTH_NXM(4, 8)
+HIGHBD_SMOOTH_NXM(8, 4)
+HIGHBD_SMOOTH_NXM(8, 8)
+HIGHBD_SMOOTH_NXM(4, 16)
+HIGHBD_SMOOTH_NXM(8, 16)
+HIGHBD_SMOOTH_NXM(8, 32)
+
+#undef HIGHBD_SMOOTH_NXM
+
+// For width 16 and above.
+#define HIGHBD_SMOOTH_PREDICTOR(W)                                             \
+  static void highbd_smooth_##W##xh_neon(                                      \
+      uint16_t *dst, ptrdiff_t stride, const uint16_t *const top_row,          \
+      const uint16_t *const left_column, const int height) {                   \
+    const uint16_t top_right = top_row[(W)-1];                                 \
+    const uint16_t bottom_left = left_column[height - 1];                      \
+    const uint16_t *const weights_y = smooth_weights_u16 + height - 4;         \
+                                                                               \
+    /* Precompute weighted values that don't vary with |y|. */                 \
+    uint32x4_t weighted_tr_low[(W) >> 3];                                      \
+    uint32x4_t weighted_tr_high[(W) >> 3];                                     \
+    for (int i = 0; i < (W) >> 3; ++i) {                                       \
+      const int x = i << 3;                                                    \
+      const uint16x4_t weights_x_low =                                         \
+          vld1_u16(smooth_weights_u16 + (W)-4 + x);                            \
+      weighted_tr_low[i] = vmull_n_u16(negate_s8(weights_x_low), top_right);   \
+      const uint16x4_t weights_x_high =                                        \
+          vld1_u16(smooth_weights_u16 + (W) + x);                              \
+      weighted_tr_high[i] = vmull_n_u16(negate_s8(weights_x_high), top_right); \
+    }                                                                          \
+                                                                               \
+    const uint16x4_t bottom_left_v = vdup_n_u16(bottom_left);                  \
+    for (int y = 0; y < height; ++y) {                                         \
+      const uint32x4_t weighted_bl =                                           \
+          vmull_n_u16(bottom_left_v, 256 - weights_y[y]);                      \
+      uint16_t *dst_x = dst;                                                   \
+      for (int i = 0; i < (W) >> 3; ++i) {                                     \
+        const int x = i << 3;                                                  \
+        const uint16x4x2_t top_vals = { { vld1_u16(top_row + x),               \
+                                          vld1_u16(top_row + x + 4) } };       \
+        const uint32x4_t weighted_corners_low =                                \
+            vaddq_u32(weighted_bl, weighted_tr_low[i]);                        \
+        const uint32x4_t weighted_corners_high =                               \
+            vaddq_u32(weighted_bl, weighted_tr_high[i]);                       \
+        /* Accumulate weighted edge values and store. */                       \
+        const uint16x4x2_t weights_x = {                                       \
+          { vld1_u16(smooth_weights_u16 + (W)-4 + x),                          \
+            vld1_u16(smooth_weights_u16 + (W) + x) }                           \
+        };                                                                     \
+        highbd_calculate_pred8(dst_x, weighted_corners_low,                    \
+                               weighted_corners_high, top_vals, weights_x,     \
+                               left_column[y], weights_y[y]);                  \
+        dst_x += 8;                                                            \
+      }                                                                        \
+      dst += stride;                                                           \
+    }                                                                          \
+  }
+
+HIGHBD_SMOOTH_PREDICTOR(16)
+HIGHBD_SMOOTH_PREDICTOR(32)
+HIGHBD_SMOOTH_PREDICTOR(64)
+
+#undef HIGHBD_SMOOTH_PREDICTOR
+
+#define HIGHBD_SMOOTH_NXM_WIDE(W, H)                            \
+  void aom_highbd_smooth_predictor_##W##x##H##_neon(            \
+      uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, \
+      const uint16_t *left, int bd) {                           \
+    (void)bd;                                                   \
+    highbd_smooth_##W##xh_neon(dst, y_stride, above, left, H);  \
+  }
+
+HIGHBD_SMOOTH_NXM_WIDE(16, 4)
+HIGHBD_SMOOTH_NXM_WIDE(16, 8)
+HIGHBD_SMOOTH_NXM_WIDE(16, 16)
+HIGHBD_SMOOTH_NXM_WIDE(16, 32)
+HIGHBD_SMOOTH_NXM_WIDE(16, 64)
+HIGHBD_SMOOTH_NXM_WIDE(32, 8)
+HIGHBD_SMOOTH_NXM_WIDE(32, 16)
+HIGHBD_SMOOTH_NXM_WIDE(32, 32)
+HIGHBD_SMOOTH_NXM_WIDE(32, 64)
+HIGHBD_SMOOTH_NXM_WIDE(64, 16)
+HIGHBD_SMOOTH_NXM_WIDE(64, 32)
+HIGHBD_SMOOTH_NXM_WIDE(64, 64)
+
+#undef HIGHBD_SMOOTH_NXM_WIDE
+
+static void highbd_smooth_v_4xh_neon(uint16_t *dst, ptrdiff_t stride,
+                                     const uint16_t *const top_row,
+                                     const uint16_t *const left_column,
+                                     const int height) {
+  const uint16_t bottom_left = left_column[height - 1];
+  const uint16_t *const weights_y = smooth_weights_u16 + height - 4;
+
+  const uint16x4_t top_v = vld1_u16(top_row);
+  const uint16x4_t bottom_left_v = vdup_n_u16(bottom_left);
+
+  for (int y = 0; y < height; ++y) {
+    const uint32x4_t weighted_bl =
+        vmull_n_u16(bottom_left_v, 256 - weights_y[y]);
+    const uint32x4_t weighted_top =
+        vmlal_n_u16(weighted_bl, top_v, weights_y[y]);
+    vst1_u16(dst, vrshrn_n_u32(weighted_top, SMOOTH_WEIGHT_LOG2_SCALE));
+
+    dst += stride;
+  }
+}
+
+static void highbd_smooth_v_8xh_neon(uint16_t *dst, const ptrdiff_t stride,
+                                     const uint16_t *const top_row,
+                                     const uint16_t *const left_column,
+                                     const int height) {
+  const uint16_t bottom_left = left_column[height - 1];
+  const uint16_t *const weights_y = smooth_weights_u16 + height - 4;
+
+  const uint16x4_t top_low = vld1_u16(top_row);
+  const uint16x4_t top_high = vld1_u16(top_row + 4);
+  const uint16x4_t bottom_left_v = vdup_n_u16(bottom_left);
+
+  for (int y = 0; y < height; ++y) {
+    const uint32x4_t weighted_bl =
+        vmull_n_u16(bottom_left_v, 256 - weights_y[y]);
+
+    const uint32x4_t weighted_top_low =
+        vmlal_n_u16(weighted_bl, top_low, weights_y[y]);
+    vst1_u16(dst, vrshrn_n_u32(weighted_top_low, SMOOTH_WEIGHT_LOG2_SCALE));
+
+    const uint32x4_t weighted_top_high =
+        vmlal_n_u16(weighted_bl, top_high, weights_y[y]);
+    vst1_u16(dst + 4,
+             vrshrn_n_u32(weighted_top_high, SMOOTH_WEIGHT_LOG2_SCALE));
+    dst += stride;
+  }
+}
+
+#define HIGHBD_SMOOTH_V_NXM(W, H)                                \
+  void aom_highbd_smooth_v_predictor_##W##x##H##_neon(           \
+      uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above,  \
+      const uint16_t *left, int bd) {                            \
+    (void)bd;                                                    \
+    highbd_smooth_v_##W##xh_neon(dst, y_stride, above, left, H); \
+  }
+
+HIGHBD_SMOOTH_V_NXM(4, 4)
+HIGHBD_SMOOTH_V_NXM(4, 8)
+HIGHBD_SMOOTH_V_NXM(4, 16)
+HIGHBD_SMOOTH_V_NXM(8, 4)
+HIGHBD_SMOOTH_V_NXM(8, 8)
+HIGHBD_SMOOTH_V_NXM(8, 16)
+HIGHBD_SMOOTH_V_NXM(8, 32)
+
+#undef HIGHBD_SMOOTH_V_NXM
+
+// For width 16 and above.
+#define HIGHBD_SMOOTH_V_PREDICTOR(W)                                         \
+  static void highbd_smooth_v_##W##xh_neon(                                  \
+      uint16_t *dst, const ptrdiff_t stride, const uint16_t *const top_row,  \
+      const uint16_t *const left_column, const int height) {                 \
+    const uint16_t bottom_left = left_column[height - 1];                    \
+    const uint16_t *const weights_y = smooth_weights_u16 + height - 4;       \
+                                                                             \
+    uint16x4x2_t top_vals[(W) >> 3];                                         \
+    for (int i = 0; i < (W) >> 3; ++i) {                                     \
+      const int x = i << 3;                                                  \
+      top_vals[i].val[0] = vld1_u16(top_row + x);                            \
+      top_vals[i].val[1] = vld1_u16(top_row + x + 4);                        \
+    }                                                                        \
+                                                                             \
+    const uint16x4_t bottom_left_v = vdup_n_u16(bottom_left);                \
+    for (int y = 0; y < height; ++y) {                                       \
+      const uint32x4_t weighted_bl =                                         \
+          vmull_n_u16(bottom_left_v, 256 - weights_y[y]);                    \
+                                                                             \
+      uint16_t *dst_x = dst;                                                 \
+      for (int i = 0; i < (W) >> 3; ++i) {                                   \
+        const uint32x4_t weighted_top_low =                                  \
+            vmlal_n_u16(weighted_bl, top_vals[i].val[0], weights_y[y]);      \
+        vst1_u16(dst_x,                                                      \
+                 vrshrn_n_u32(weighted_top_low, SMOOTH_WEIGHT_LOG2_SCALE));  \
+                                                                             \
+        const uint32x4_t weighted_top_high =                                 \
+            vmlal_n_u16(weighted_bl, top_vals[i].val[1], weights_y[y]);      \
+        vst1_u16(dst_x + 4,                                                  \
+                 vrshrn_n_u32(weighted_top_high, SMOOTH_WEIGHT_LOG2_SCALE)); \
+        dst_x += 8;                                                          \
+      }                                                                      \
+      dst += stride;                                                         \
+    }                                                                        \
+  }
+
+HIGHBD_SMOOTH_V_PREDICTOR(16)
+HIGHBD_SMOOTH_V_PREDICTOR(32)
+HIGHBD_SMOOTH_V_PREDICTOR(64)
+
+#undef HIGHBD_SMOOTH_V_PREDICTOR
+
+#define HIGHBD_SMOOTH_V_NXM_WIDE(W, H)                           \
+  void aom_highbd_smooth_v_predictor_##W##x##H##_neon(           \
+      uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above,  \
+      const uint16_t *left, int bd) {                            \
+    (void)bd;                                                    \
+    highbd_smooth_v_##W##xh_neon(dst, y_stride, above, left, H); \
+  }
+
+HIGHBD_SMOOTH_V_NXM_WIDE(16, 4)
+HIGHBD_SMOOTH_V_NXM_WIDE(16, 8)
+HIGHBD_SMOOTH_V_NXM_WIDE(16, 16)
+HIGHBD_SMOOTH_V_NXM_WIDE(16, 32)
+HIGHBD_SMOOTH_V_NXM_WIDE(16, 64)
+HIGHBD_SMOOTH_V_NXM_WIDE(32, 8)
+HIGHBD_SMOOTH_V_NXM_WIDE(32, 16)
+HIGHBD_SMOOTH_V_NXM_WIDE(32, 32)
+HIGHBD_SMOOTH_V_NXM_WIDE(32, 64)
+HIGHBD_SMOOTH_V_NXM_WIDE(64, 16)
+HIGHBD_SMOOTH_V_NXM_WIDE(64, 32)
+HIGHBD_SMOOTH_V_NXM_WIDE(64, 64)
+
+#undef HIGHBD_SMOOTH_V_NXM_WIDE
+
+static INLINE void highbd_smooth_h_4xh_neon(uint16_t *dst, ptrdiff_t stride,
+                                            const uint16_t *const top_row,
+                                            const uint16_t *const left_column,
+                                            const int height) {
+  const uint16_t top_right = top_row[3];
+
+  const uint16x4_t weights_x = vld1_u16(smooth_weights_u16);
+  const uint16x4_t scaled_weights_x = negate_s8(weights_x);
+
+  const uint32x4_t weighted_tr = vmull_n_u16(scaled_weights_x, top_right);
+  for (int y = 0; y < height; ++y) {
+    const uint32x4_t weighted_left =
+        vmlal_n_u16(weighted_tr, weights_x, left_column[y]);
+    vst1_u16(dst, vrshrn_n_u32(weighted_left, SMOOTH_WEIGHT_LOG2_SCALE));
+    dst += stride;
+  }
+}
+
+static INLINE void highbd_smooth_h_8xh_neon(uint16_t *dst, ptrdiff_t stride,
+                                            const uint16_t *const top_row,
+                                            const uint16_t *const left_column,
+                                            const int height) {
+  const uint16_t top_right = top_row[7];
+
+  const uint16x4x2_t weights_x = { { vld1_u16(smooth_weights_u16 + 4),
+                                     vld1_u16(smooth_weights_u16 + 8) } };
+
+  const uint32x4_t weighted_tr_low =
+      vmull_n_u16(negate_s8(weights_x.val[0]), top_right);
+  const uint32x4_t weighted_tr_high =
+      vmull_n_u16(negate_s8(weights_x.val[1]), top_right);
+
+  for (int y = 0; y < height; ++y) {
+    const uint16_t left_y = left_column[y];
+    const uint32x4_t weighted_left_low =
+        vmlal_n_u16(weighted_tr_low, weights_x.val[0], left_y);
+    vst1_u16(dst, vrshrn_n_u32(weighted_left_low, SMOOTH_WEIGHT_LOG2_SCALE));
+
+    const uint32x4_t weighted_left_high =
+        vmlal_n_u16(weighted_tr_high, weights_x.val[1], left_y);
+    vst1_u16(dst + 4,
+             vrshrn_n_u32(weighted_left_high, SMOOTH_WEIGHT_LOG2_SCALE));
+    dst += stride;
+  }
+}
+
+#define HIGHBD_SMOOTH_H_NXM(W, H)                                \
+  void aom_highbd_smooth_h_predictor_##W##x##H##_neon(           \
+      uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above,  \
+      const uint16_t *left, int bd) {                            \
+    (void)bd;                                                    \
+    highbd_smooth_h_##W##xh_neon(dst, y_stride, above, left, H); \
+  }
+
+HIGHBD_SMOOTH_H_NXM(4, 4)
+HIGHBD_SMOOTH_H_NXM(4, 8)
+HIGHBD_SMOOTH_H_NXM(4, 16)
+HIGHBD_SMOOTH_H_NXM(8, 4)
+HIGHBD_SMOOTH_H_NXM(8, 8)
+HIGHBD_SMOOTH_H_NXM(8, 16)
+HIGHBD_SMOOTH_H_NXM(8, 32)
+
+#undef HIGHBD_SMOOTH_H_NXM
+
+// For width 16 and above.
+#define HIGHBD_SMOOTH_H_PREDICTOR(W)                                          \
+  void highbd_smooth_h_##W##xh_neon(                                          \
+      uint16_t *dst, ptrdiff_t stride, const uint16_t *const top_row,         \
+      const uint16_t *const left_column, const int height) {                  \
+    const uint16_t top_right = top_row[(W)-1];                                \
+                                                                              \
+    uint16x4_t weights_x_low[(W) >> 3];                                       \
+    uint16x4_t weights_x_high[(W) >> 3];                                      \
+    uint32x4_t weighted_tr_low[(W) >> 3];                                     \
+    uint32x4_t weighted_tr_high[(W) >> 3];                                    \
+    for (int i = 0; i < (W) >> 3; ++i) {                                      \
+      const int x = i << 3;                                                   \
+      weights_x_low[i] = vld1_u16(smooth_weights_u16 + (W)-4 + x);            \
+      weighted_tr_low[i] =                                                    \
+          vmull_n_u16(negate_s8(weights_x_low[i]), top_right);                \
+      weights_x_high[i] = vld1_u16(smooth_weights_u16 + (W) + x);             \
+      weighted_tr_high[i] =                                                   \
+          vmull_n_u16(negate_s8(weights_x_high[i]), top_right);               \
+    }                                                                         \
+                                                                              \
+    for (int y = 0; y < height; ++y) {                                        \
+      uint16_t *dst_x = dst;                                                  \
+      const uint16_t left_y = left_column[y];                                 \
+      for (int i = 0; i < (W) >> 3; ++i) {                                    \
+        const uint32x4_t weighted_left_low =                                  \
+            vmlal_n_u16(weighted_tr_low[i], weights_x_low[i], left_y);        \
+        vst1_u16(dst_x,                                                       \
+                 vrshrn_n_u32(weighted_left_low, SMOOTH_WEIGHT_LOG2_SCALE));  \
+                                                                              \
+        const uint32x4_t weighted_left_high =                                 \
+            vmlal_n_u16(weighted_tr_high[i], weights_x_high[i], left_y);      \
+        vst1_u16(dst_x + 4,                                                   \
+                 vrshrn_n_u32(weighted_left_high, SMOOTH_WEIGHT_LOG2_SCALE)); \
+        dst_x += 8;                                                           \
+      }                                                                       \
+      dst += stride;                                                          \
+    }                                                                         \
+  }
+
+HIGHBD_SMOOTH_H_PREDICTOR(16)
+HIGHBD_SMOOTH_H_PREDICTOR(32)
+HIGHBD_SMOOTH_H_PREDICTOR(64)
+
+#undef HIGHBD_SMOOTH_H_PREDICTOR
+
+#define HIGHBD_SMOOTH_H_NXM_WIDE(W, H)                           \
+  void aom_highbd_smooth_h_predictor_##W##x##H##_neon(           \
+      uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above,  \
+      const uint16_t *left, int bd) {                            \
+    (void)bd;                                                    \
+    highbd_smooth_h_##W##xh_neon(dst, y_stride, above, left, H); \
+  }
+
+HIGHBD_SMOOTH_H_NXM_WIDE(16, 4)
+HIGHBD_SMOOTH_H_NXM_WIDE(16, 8)
+HIGHBD_SMOOTH_H_NXM_WIDE(16, 16)
+HIGHBD_SMOOTH_H_NXM_WIDE(16, 32)
+HIGHBD_SMOOTH_H_NXM_WIDE(16, 64)
+HIGHBD_SMOOTH_H_NXM_WIDE(32, 8)
+HIGHBD_SMOOTH_H_NXM_WIDE(32, 16)
+HIGHBD_SMOOTH_H_NXM_WIDE(32, 32)
+HIGHBD_SMOOTH_H_NXM_WIDE(32, 64)
+HIGHBD_SMOOTH_H_NXM_WIDE(64, 16)
+HIGHBD_SMOOTH_H_NXM_WIDE(64, 32)
+HIGHBD_SMOOTH_H_NXM_WIDE(64, 64)
+
+#undef HIGHBD_SMOOTH_H_NXM_WIDE
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/arm/highbd_loopfilter_neon.c b/chromium/third_party/libaom/source/libaom/aom_dsp/arm/highbd_loopfilter_neon.c
index 453751328b7..0b720ce9c7f 100644
--- a/chromium/third_party/libaom/source/libaom/aom_dsp/arm/highbd_loopfilter_neon.c
+++ b/chromium/third_party/libaom/source/libaom/aom_dsp/arm/highbd_loopfilter_neon.c
@@ -90,7 +90,7 @@ static INLINE uint16x4_t needs_filter8(const uint16x8_t abd_p0p1_q0q1,
 }
 
 // -----------------------------------------------------------------------------
-// FilterNMasks functions.
+// filterN_masks functions.
 
 static INLINE void filter4_masks(const uint16x8_t p0q0, const uint16x8_t p1q1,
                                  const uint16_t hev_thresh,
@@ -99,13 +99,13 @@ static INLINE void filter4_masks(const uint16x8_t p0q0, const uint16x8_t p1q1,
                                  uint16x4_t *const hev_mask,
                                  uint16x4_t *const needs_filter4_mask) {
   const uint16x8_t p0p1_q0q1 = vabdq_u16(p0q0, p1q1);
-  // This includes cases where needs_filter4() is not true and so Filter2() will
+  // This includes cases where needs_filter4() is not true and so filter2() will
   // not be applied.
   const uint16x4_t hev_tmp_mask = hev(p0p1_q0q1, hev_thresh);
 
   *needs_filter4_mask = needs_filter4(p0p1_q0q1, inner_thresh, outer_mask);
 
-  // Filter2() will only be applied if both needs_filter4() and hev() are true.
+  // filter2() will only be applied if both needs_filter4() and hev() are true.
   *hev_mask = vand_u16(hev_tmp_mask, *needs_filter4_mask);
 }
 
@@ -172,9 +172,9 @@ static INLINE void filter8_masks(
 }
 
 // -----------------------------------------------------------------------------
-// FilterN functions.
+// filterN functions.
 
-// Calculate filter4() or Filter2() based on |hev_mask|.
+// Calculate filter4() or filter2() based on |hev_mask|.
 static INLINE void filter4(const uint16x8_t p0q0, const uint16x8_t p0q1,
                            const uint16x8_t p1q1, const uint16x4_t hev_mask,
                            int bitdepth, uint16x8_t *const p1q1_result,
@@ -185,7 +185,7 @@ static INLINE void filter4(const uint16x8_t p0q0, const uint16x8_t p0q1,
   const int16x8_t q0mp0_p1mq1 = vreinterpretq_s16_u16(vsubq_u16(q0p1, p0q1));
   const int16x4_t q0mp0_3 = vmul_n_s16(vget_low_s16(q0mp0_p1mq1), 3);
 
-  // If this is for Filter2() then include |p1mq1|. Otherwise zero it.
+  // If this is for filter2() then include |p1mq1|. Otherwise zero it.
   const int16x4_t min_signed_pixel = vdup_n_s16(-(1 << (bitdepth - 1)));
   const int16x4_t max_signed_pixel = vdup_n_s16((1 << (bitdepth - 1)) - 1);
   const int16x4_t p1mq1 = vget_high_s16(q0mp0_p1mq1);
@@ -252,13 +252,6 @@ void aom_highbd_lpf_horizontal_4_neon(uint16_t *s, int pitch,
     // None of the values will be filtered.
     return;
   }
-#else   // !defined(__aarch64__)
-  const uint64x1_t needs_filter4_mask64 =
-      vreinterpret_u64_u16(needs_filter4_mask);
-  if (vget_lane_u64(needs_filter4_mask64, 0) == 0) {
-    // None of the values will be filtered.
-    return;
-  }
 #endif  // defined(__aarch64__)
 
   // Copy the masks to the high bits for packed comparisons later.
@@ -325,13 +318,6 @@ void aom_highbd_lpf_vertical_4_neon(uint16_t *s, int pitch,
     // None of the values will be filtered.
     return;
   }
-#else   // !defined(__aarch64__)
-  const uint64x1_t needs_filter4_mask64 =
-      vreinterpret_u64_u16(needs_filter4_mask);
-  if (vget_lane_u64(needs_filter4_mask64, 0) == 0) {
-    // None of the values will be filtered.
-    return;
-  }
 #endif  // defined(__aarch64__)
 
   // Copy the masks to the high bits for packed comparisons later.
@@ -456,15 +442,6 @@ void aom_highbd_lpf_horizontal_6_neon(uint16_t *s, int pitch,
     // None of the values will be filtered.
     return;
   }
-#else   // !defined(__aarch64__)
-  // This might be faster than vaddv (latency 3) because mov to general register
-  // has latency 2.
-  const uint64x1_t needs_filter_mask64 =
-      vreinterpret_u64_u16(needs_filter_mask);
-  if (vget_lane_u64(needs_filter_mask64, 0) == 0) {
-    // None of the values will be filtered.
-    return;
-  }
 #endif  // defined(__aarch64__)
 
   // Copy the masks to the high bits for packed comparisons later.
@@ -556,15 +533,6 @@ void aom_highbd_lpf_vertical_6_neon(uint16_t *s, int pitch,
     // None of the values will be filtered.
     return;
   }
-#else   // !defined(__aarch64__)
-  // This might be faster than vaddv (latency 3) because mov to general register
-  // has latency 2.
-  const uint64x1_t needs_filter_mask64 =
-      vreinterpret_u64_u16(needs_filter_mask);
-  if (vget_lane_u64(needs_filter_mask64, 0) == 0) {
-    // None of the values will be filtered.
-    return;
-  }
 #endif  // defined(__aarch64__)
 
   // Copy the masks to the high bits for packed comparisons later.
@@ -721,15 +689,6 @@ void aom_highbd_lpf_horizontal_8_neon(uint16_t *s, int pitch,
     // None of the values will be filtered.
     return;
   }
-#else   // !defined(__aarch64__)
-  // This might be faster than vaddv (latency 3) because mov to general register
-  // has latency 2.
-  const uint64x1_t needs_filter_mask64 =
-      vreinterpret_u64_u16(needs_filter_mask);
-  if (vget_lane_u64(needs_filter_mask64, 0) == 0) {
-    // None of the values will be filtered.
-    return;
-  }
 #endif  // defined(__aarch64__)
 
   // Copy the masks to the high bits for packed comparisons later.
@@ -829,15 +788,6 @@ void aom_highbd_lpf_vertical_8_neon(uint16_t *s, int pitch,
     // None of the values will be filtered.
     return;
   }
-#else   // !defined(__aarch64__)
-  // This might be faster than vaddv (latency 3) because mov to general register
-  // has latency 2.
-  const uint64x1_t needs_filter_mask64 =
-      vreinterpret_u64_u16(needs_filter_mask);
-  if (vget_lane_u64(needs_filter_mask64, 0) == 0) {
-    // None of the values will be filtered.
-    return;
-  }
 #endif  // defined(__aarch64__)
 
   // Copy the masks to the high bits for packed comparisons later.
@@ -1031,15 +981,6 @@ void aom_highbd_lpf_horizontal_14_neon(uint16_t *s, int pitch,
     // None of the values will be filtered.
     return;
   }
-#else   // !defined(__aarch64__)
-  // This might be faster than vaddv (latency 3) because mov to general register
-  // has latency 2.
-  const uint64x1_t needs_filter_mask64 =
-      vreinterpret_u64_u16(needs_filter_mask);
-  if (vget_lane_u64(needs_filter_mask64, 0) == 0) {
-    // None of the values will be filtered.
-    return;
-  }
 #endif  // defined(__aarch64__)
   const uint16x8_t p4q4 = vcombine_u16(src[2], src[11]);
   const uint16x8_t p5q5 = vcombine_u16(src[1], src[12]);
@@ -1208,15 +1149,6 @@ void aom_highbd_lpf_vertical_14_neon(uint16_t *s, int pitch,
     // None of the values will be filtered.
     return;
   }
-#else   // !defined(__aarch64__)
-  // This might be faster than vaddv (latency 3) because mov to general register
-  // has latency 2.
-  const uint64x1_t needs_filter_mask64 =
-      vreinterpret_u64_u16(needs_filter_mask);
-  if (vget_lane_u64(needs_filter_mask64, 0) == 0) {
-    // None of the values will be filtered.
-    return;
-  }
 #endif  // defined(__aarch64__)
   const uint16x8_t p4q4 =
       vcombine_u16(vget_low_u16(src_p[3]), vget_high_u16(src_q[0]));
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/arm/highbd_quantize_neon.c b/chromium/third_party/libaom/source/libaom/aom_dsp/arm/highbd_quantize_neon.c
new file mode 100644
index 00000000000..927e13c9a22
--- /dev/null
+++ b/chromium/third_party/libaom/source/libaom/aom_dsp/arm/highbd_quantize_neon.c
@@ -0,0 +1,428 @@
+/*
+ * Copyright (c) 2022, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <arm_neon.h>
+#include <assert.h>
+
+#include "aom_dsp/quantize.h"
+#include "aom_dsp/arm/mem_neon.h"
+
+#include "av1/common/quant_common.h"
+#include "av1/encoder/av1_quantize.h"
+
+static INLINE uint32_t sum_abs_coeff(const uint32x4_t a) {
+#if defined(__aarch64__)
+  return vaddvq_u32(a);
+#else
+  const uint64x2_t b = vpaddlq_u32(a);
+  const uint64x1_t c = vadd_u64(vget_low_u64(b), vget_high_u64(b));
+  return (uint32_t)vget_lane_u64(c, 0);
+#endif
+}
+
+static INLINE uint16x4_t
+quantize_4(const tran_low_t *coeff_ptr, tran_low_t *qcoeff_ptr,
+           tran_low_t *dqcoeff_ptr, int32x4_t v_quant_s32,
+           int32x4_t v_dequant_s32, int32x4_t v_round_s32, int32x4_t v_zbin_s32,
+           int32x4_t v_quant_shift_s32, int log_scale) {
+  const int32x4_t v_coeff = vld1q_s32(coeff_ptr);
+  const int32x4_t v_coeff_sign =
+      vreinterpretq_s32_u32(vcltq_s32(v_coeff, vdupq_n_s32(0)));
+  const int32x4_t v_abs_coeff = vabsq_s32(v_coeff);
+  // if (abs_coeff < zbins[rc != 0]),
+  const uint32x4_t v_zbin_mask = vcgeq_s32(v_abs_coeff, v_zbin_s32);
+  const int32x4_t v_log_scale = vdupq_n_s32(log_scale);
+  // const int64_t tmp = (int64_t)abs_coeff + log_scaled_round;
+  const int32x4_t v_tmp = vaddq_s32(v_abs_coeff, v_round_s32);
+  //  const int32_t tmpw32 = tmp * wt;
+  const int32x4_t v_tmpw32 = vmulq_s32(v_tmp, vdupq_n_s32((1 << AOM_QM_BITS)));
+  //  const int32_t tmp2 = (int32_t)((tmpw32 * quant64) >> 16);
+  const int32x4_t v_tmp2 = vqdmulhq_s32(v_tmpw32, v_quant_s32);
+  // const int32_t tmp3 =
+  //    ((((tmp2 + tmpw32)<< log_scale) * (int64_t)(quant_shift << 15)) >> 32);
+  const int32x4_t v_tmp3 = vqdmulhq_s32(
+      vshlq_s32(vaddq_s32(v_tmp2, v_tmpw32), v_log_scale), v_quant_shift_s32);
+  // const int abs_qcoeff = vmask ? (int)tmp3 >> AOM_QM_BITS : 0;
+  const int32x4_t v_abs_qcoeff = vandq_s32(vreinterpretq_s32_u32(v_zbin_mask),
+                                           vshrq_n_s32(v_tmp3, AOM_QM_BITS));
+  // const tran_low_t abs_dqcoeff = (abs_qcoeff * dequant_iwt) >> log_scale;
+  // vshlq_s32 will shift right if shift value is negative.
+  const int32x4_t v_abs_dqcoeff =
+      vshlq_s32(vmulq_s32(v_abs_qcoeff, v_dequant_s32), vnegq_s32(v_log_scale));
+  //  qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
+  const int32x4_t v_qcoeff =
+      vsubq_s32(veorq_s32(v_abs_qcoeff, v_coeff_sign), v_coeff_sign);
+  //  dqcoeff_ptr[rc] = (tran_low_t)((abs_dqcoeff ^ coeff_sign) - coeff_sign);
+  const int32x4_t v_dqcoeff =
+      vsubq_s32(veorq_s32(v_abs_dqcoeff, v_coeff_sign), v_coeff_sign);
+
+  vst1q_s32(qcoeff_ptr, v_qcoeff);
+  vst1q_s32(dqcoeff_ptr, v_dqcoeff);
+
+  // Used to find eob.
+  const uint32x4_t nz_qcoeff_mask = vcgtq_s32(v_abs_qcoeff, vdupq_n_s32(0));
+  return vmovn_u32(nz_qcoeff_mask);
+}
+
+static INLINE int16x8_t get_max_lane_eob(const int16_t *iscan,
+                                         int16x8_t v_eobmax,
+                                         uint16x8_t v_mask) {
+  const int16x8_t v_iscan = vld1q_s16(&iscan[0]);
+  const int16x8_t v_iscan_plus1 = vaddq_s16(v_iscan, vdupq_n_s16(1));
+  const int16x8_t v_nz_iscan = vbslq_s16(v_mask, v_iscan_plus1, vdupq_n_s16(0));
+  return vmaxq_s16(v_eobmax, v_nz_iscan);
+}
+
+static INLINE void get_min_max_lane_eob(const int16_t *iscan,
+                                        int16x8_t *v_eobmin,
+                                        int16x8_t *v_eobmax, uint16x8_t v_mask,
+                                        intptr_t n_coeffs) {
+  const int16x8_t v_iscan = vld1q_s16(&iscan[0]);
+  const int16x8_t v_nz_iscan_max = vbslq_s16(v_mask, v_iscan, vdupq_n_s16(-1));
+#if SKIP_EOB_FACTOR_ADJUST
+  const int16x8_t v_nz_iscan_min =
+      vbslq_s16(v_mask, v_iscan, vdupq_n_s16(n_coeffs));
+  *v_eobmin = vminq_s16(*v_eobmin, v_nz_iscan_min);
+#else
+  (void)v_eobmin;
+#endif
+  *v_eobmax = vmaxq_s16(*v_eobmax, v_nz_iscan_max);
+}
+
+static INLINE uint16_t get_max_eob(int16x8_t v_eobmax) {
+#ifdef __aarch64__
+  return (uint16_t)vmaxvq_s16(v_eobmax);
+#else
+  const int16x4_t v_eobmax_3210 =
+      vmax_s16(vget_low_s16(v_eobmax), vget_high_s16(v_eobmax));
+  const int64x1_t v_eobmax_xx32 =
+      vshr_n_s64(vreinterpret_s64_s16(v_eobmax_3210), 32);
+  const int16x4_t v_eobmax_tmp =
+      vmax_s16(v_eobmax_3210, vreinterpret_s16_s64(v_eobmax_xx32));
+  const int64x1_t v_eobmax_xxx3 =
+      vshr_n_s64(vreinterpret_s64_s16(v_eobmax_tmp), 16);
+  const int16x4_t v_eobmax_final =
+      vmax_s16(v_eobmax_tmp, vreinterpret_s16_s64(v_eobmax_xxx3));
+  return (uint16_t)vget_lane_s16(v_eobmax_final, 0);
+#endif
+}
+
+static INLINE uint16_t get_min_eob(int16x8_t v_eobmin) {
+#ifdef __aarch64__
+  return (uint16_t)vminvq_s16(v_eobmin);
+#else
+  const int16x4_t v_eobmin_3210 =
+      vmin_s16(vget_low_s16(v_eobmin), vget_high_s16(v_eobmin));
+  const int64x1_t v_eobmin_xx32 =
+      vshr_n_s64(vreinterpret_s64_s16(v_eobmin_3210), 32);
+  const int16x4_t v_eobmin_tmp =
+      vmin_s16(v_eobmin_3210, vreinterpret_s16_s64(v_eobmin_xx32));
+  const int64x1_t v_eobmin_xxx3 =
+      vshr_n_s64(vreinterpret_s64_s16(v_eobmin_tmp), 16);
+  const int16x4_t v_eobmin_final =
+      vmin_s16(v_eobmin_tmp, vreinterpret_s16_s64(v_eobmin_xxx3));
+  return (uint16_t)vget_lane_s16(v_eobmin_final, 0);
+#endif
+}
+
+static void highbd_quantize_b_neon(
+    const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr,
+    const int16_t *round_ptr, const int16_t *quant_ptr,
+    const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
+    tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
+    const int16_t *scan, const int16_t *iscan, const int log_scale) {
+  (void)scan;
+  const int16x4_t v_quant = vld1_s16(quant_ptr);
+  const int16x4_t v_dequant = vld1_s16(dequant_ptr);
+  const int16x4_t v_zero = vdup_n_s16(0);
+  const uint16x4_t v_round_select = vcgt_s16(vdup_n_s16(log_scale), v_zero);
+  const int16x4_t v_round_no_scale = vld1_s16(round_ptr);
+  const int16x4_t v_round_log_scale =
+      vqrdmulh_n_s16(v_round_no_scale, (int16_t)(1 << (15 - log_scale)));
+  const int16x4_t v_round =
+      vbsl_s16(v_round_select, v_round_log_scale, v_round_no_scale);
+  const int16x4_t v_quant_shift = vld1_s16(quant_shift_ptr);
+  const int16x4_t v_zbin_no_scale = vld1_s16(zbin_ptr);
+  const int16x4_t v_zbin_log_scale =
+      vqrdmulh_n_s16(v_zbin_no_scale, (int16_t)(1 << (15 - log_scale)));
+  const int16x4_t v_zbin =
+      vbsl_s16(v_round_select, v_zbin_log_scale, v_zbin_no_scale);
+  int32x4_t v_round_s32 = vmovl_s16(v_round);
+  int32x4_t v_quant_s32 = vshlq_n_s32(vmovl_s16(v_quant), 15);
+  int32x4_t v_dequant_s32 = vmovl_s16(v_dequant);
+  int32x4_t v_quant_shift_s32 = vshlq_n_s32(vmovl_s16(v_quant_shift), 15);
+  int32x4_t v_zbin_s32 = vmovl_s16(v_zbin);
+  uint16x4_t v_mask_lo, v_mask_hi;
+  int16x8_t v_eobmax = vdupq_n_s16(-1);
+
+  intptr_t non_zero_count = n_coeffs;
+
+  assert(n_coeffs > 8);
+  // Pre-scan pass
+  const int32x4_t v_zbin_s32x = vdupq_lane_s32(vget_low_s32(v_zbin_s32), 1);
+  intptr_t i = n_coeffs;
+  do {
+    const int32x4_t v_coeff_a = vld1q_s32(coeff_ptr + i - 4);
+    const int32x4_t v_coeff_b = vld1q_s32(coeff_ptr + i - 8);
+    const int32x4_t v_abs_coeff_a = vabsq_s32(v_coeff_a);
+    const int32x4_t v_abs_coeff_b = vabsq_s32(v_coeff_b);
+    const uint32x4_t v_mask_a = vcgeq_s32(v_abs_coeff_a, v_zbin_s32x);
+    const uint32x4_t v_mask_b = vcgeq_s32(v_abs_coeff_b, v_zbin_s32x);
+    // If the coefficient is in the base ZBIN range, then discard.
+    if (sum_abs_coeff(v_mask_a) + sum_abs_coeff(v_mask_b) == 0) {
+      non_zero_count -= 8;
+    } else {
+      break;
+    }
+    i -= 8;
+  } while (i > 0);
+
+  const intptr_t remaining_zcoeffs = n_coeffs - non_zero_count;
+  memset(qcoeff_ptr + non_zero_count, 0,
+         remaining_zcoeffs * sizeof(*qcoeff_ptr));
+  memset(dqcoeff_ptr + non_zero_count, 0,
+         remaining_zcoeffs * sizeof(*dqcoeff_ptr));
+
+  // DC and first 3 AC
+  v_mask_lo =
+      quantize_4(coeff_ptr, qcoeff_ptr, dqcoeff_ptr, v_quant_s32, v_dequant_s32,
+                 v_round_s32, v_zbin_s32, v_quant_shift_s32, log_scale);
+
+  // overwrite the DC constants with AC constants
+  v_round_s32 = vdupq_lane_s32(vget_low_s32(v_round_s32), 1);
+  v_quant_s32 = vdupq_lane_s32(vget_low_s32(v_quant_s32), 1);
+  v_dequant_s32 = vdupq_lane_s32(vget_low_s32(v_dequant_s32), 1);
+  v_quant_shift_s32 = vdupq_lane_s32(vget_low_s32(v_quant_shift_s32), 1);
+  v_zbin_s32 = vdupq_lane_s32(vget_low_s32(v_zbin_s32), 1);
+
+  // 4 more AC
+  v_mask_hi = quantize_4(coeff_ptr + 4, qcoeff_ptr + 4, dqcoeff_ptr + 4,
+                         v_quant_s32, v_dequant_s32, v_round_s32, v_zbin_s32,
+                         v_quant_shift_s32, log_scale);
+
+  v_eobmax =
+      get_max_lane_eob(iscan, v_eobmax, vcombine_u16(v_mask_lo, v_mask_hi));
+
+  intptr_t count = non_zero_count - 8;
+  for (; count > 0; count -= 8) {
+    coeff_ptr += 8;
+    qcoeff_ptr += 8;
+    dqcoeff_ptr += 8;
+    iscan += 8;
+    v_mask_lo = quantize_4(coeff_ptr, qcoeff_ptr, dqcoeff_ptr, v_quant_s32,
+                           v_dequant_s32, v_round_s32, v_zbin_s32,
+                           v_quant_shift_s32, log_scale);
+    v_mask_hi = quantize_4(coeff_ptr + 4, qcoeff_ptr + 4, dqcoeff_ptr + 4,
+                           v_quant_s32, v_dequant_s32, v_round_s32, v_zbin_s32,
+                           v_quant_shift_s32, log_scale);
+    // Find the max lane eob for 8 coeffs.
+    v_eobmax =
+        get_max_lane_eob(iscan, v_eobmax, vcombine_u16(v_mask_lo, v_mask_hi));
+  }
+
+  *eob_ptr = get_max_eob(v_eobmax);
+}
+
+void aom_highbd_quantize_b_neon(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
+                                const int16_t *zbin_ptr,
+                                const int16_t *round_ptr,
+                                const int16_t *quant_ptr,
+                                const int16_t *quant_shift_ptr,
+                                tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
+                                const int16_t *dequant_ptr, uint16_t *eob_ptr,
+                                const int16_t *scan, const int16_t *iscan) {
+  highbd_quantize_b_neon(coeff_ptr, n_coeffs, zbin_ptr, round_ptr, quant_ptr,
+                         quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr, dequant_ptr,
+                         eob_ptr, scan, iscan, 0);
+}
+
+void aom_highbd_quantize_b_32x32_neon(
+    const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr,
+    const int16_t *round_ptr, const int16_t *quant_ptr,
+    const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
+    tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
+    const int16_t *scan, const int16_t *iscan) {
+  highbd_quantize_b_neon(coeff_ptr, n_coeffs, zbin_ptr, round_ptr, quant_ptr,
+                         quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr, dequant_ptr,
+                         eob_ptr, scan, iscan, 1);
+}
+
+void aom_highbd_quantize_b_64x64_neon(
+    const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr,
+    const int16_t *round_ptr, const int16_t *quant_ptr,
+    const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
+    tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
+    const int16_t *scan, const int16_t *iscan) {
+  highbd_quantize_b_neon(coeff_ptr, n_coeffs, zbin_ptr, round_ptr, quant_ptr,
+                         quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr, dequant_ptr,
+                         eob_ptr, scan, iscan, 2);
+}
+
+#if !CONFIG_REALTIME_ONLY
+static void highbd_quantize_b_adaptive_neon(
+    const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr,
+    const int16_t *round_ptr, const int16_t *quant_ptr,
+    const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
+    tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
+    const int16_t *scan, const int16_t *iscan, const int log_scale) {
+  (void)scan;
+  const int16x4_t v_quant = vld1_s16(quant_ptr);
+  const int16x4_t v_dequant = vld1_s16(dequant_ptr);
+  const int16x4_t v_zero = vdup_n_s16(0);
+  const uint16x4_t v_round_select = vcgt_s16(vdup_n_s16(log_scale), v_zero);
+  const int16x4_t v_round_no_scale = vld1_s16(round_ptr);
+  const int16x4_t v_round_log_scale =
+      vqrdmulh_n_s16(v_round_no_scale, (int16_t)(1 << (15 - log_scale)));
+  const int16x4_t v_round =
+      vbsl_s16(v_round_select, v_round_log_scale, v_round_no_scale);
+  const int16x4_t v_quant_shift = vld1_s16(quant_shift_ptr);
+  const int16x4_t v_zbin_no_scale = vld1_s16(zbin_ptr);
+  const int16x4_t v_zbin_log_scale =
+      vqrdmulh_n_s16(v_zbin_no_scale, (int16_t)(1 << (15 - log_scale)));
+  const int16x4_t v_zbin =
+      vbsl_s16(v_round_select, v_zbin_log_scale, v_zbin_no_scale);
+  int32x4_t v_round_s32 = vmovl_s16(v_round);
+  int32x4_t v_quant_s32 = vshlq_n_s32(vmovl_s16(v_quant), 15);
+  int32x4_t v_dequant_s32 = vmovl_s16(v_dequant);
+  int32x4_t v_quant_shift_s32 = vshlq_n_s32(vmovl_s16(v_quant_shift), 15);
+  int32x4_t v_zbin_s32 = vmovl_s16(v_zbin);
+  uint16x4_t v_mask_lo, v_mask_hi;
+  int16x8_t v_eobmax = vdupq_n_s16(-1);
+  int16x8_t v_eobmin = vdupq_n_s16(n_coeffs);
+
+  assert(n_coeffs > 8);
+  // Pre-scan pass
+  const int32x4_t v_zbin_s32x = vdupq_lane_s32(vget_low_s32(v_zbin_s32), 1);
+  const int prescan_add_1 =
+      ROUND_POWER_OF_TWO(dequant_ptr[1] * EOB_FACTOR, 7 + AOM_QM_BITS);
+  const int32x4_t v_zbin_prescan =
+      vaddq_s32(v_zbin_s32x, vdupq_n_s32(prescan_add_1));
+  intptr_t non_zero_count = n_coeffs;
+  intptr_t i = n_coeffs;
+  do {
+    const int32x4_t v_coeff_a = vld1q_s32(coeff_ptr + i - 4);
+    const int32x4_t v_coeff_b = vld1q_s32(coeff_ptr + i - 8);
+    const int32x4_t v_abs_coeff_a = vabsq_s32(v_coeff_a);
+    const int32x4_t v_abs_coeff_b = vabsq_s32(v_coeff_b);
+    const uint32x4_t v_mask_a = vcgeq_s32(v_abs_coeff_a, v_zbin_prescan);
+    const uint32x4_t v_mask_b = vcgeq_s32(v_abs_coeff_b, v_zbin_prescan);
+    // If the coefficient is in the base ZBIN range, then discard.
+    if (sum_abs_coeff(v_mask_a) + sum_abs_coeff(v_mask_b) == 0) {
+      non_zero_count -= 8;
+    } else {
+      break;
+    }
+    i -= 8;
+  } while (i > 0);
+
+  const intptr_t remaining_zcoeffs = n_coeffs - non_zero_count;
+  memset(qcoeff_ptr + non_zero_count, 0,
+         remaining_zcoeffs * sizeof(*qcoeff_ptr));
+  memset(dqcoeff_ptr + non_zero_count, 0,
+         remaining_zcoeffs * sizeof(*dqcoeff_ptr));
+
+  // DC and first 3 AC
+  v_mask_lo =
+      quantize_4(coeff_ptr, qcoeff_ptr, dqcoeff_ptr, v_quant_s32, v_dequant_s32,
+                 v_round_s32, v_zbin_s32, v_quant_shift_s32, log_scale);
+
+  // overwrite the DC constants with AC constants
+  v_round_s32 = vdupq_lane_s32(vget_low_s32(v_round_s32), 1);
+  v_quant_s32 = vdupq_lane_s32(vget_low_s32(v_quant_s32), 1);
+  v_dequant_s32 = vdupq_lane_s32(vget_low_s32(v_dequant_s32), 1);
+  v_quant_shift_s32 = vdupq_lane_s32(vget_low_s32(v_quant_shift_s32), 1);
+  v_zbin_s32 = vdupq_lane_s32(vget_low_s32(v_zbin_s32), 1);
+
+  // 4 more AC
+  v_mask_hi = quantize_4(coeff_ptr + 4, qcoeff_ptr + 4, dqcoeff_ptr + 4,
+                         v_quant_s32, v_dequant_s32, v_round_s32, v_zbin_s32,
+                         v_quant_shift_s32, log_scale);
+
+  get_min_max_lane_eob(iscan, &v_eobmin, &v_eobmax,
+                       vcombine_u16(v_mask_lo, v_mask_hi), n_coeffs);
+
+  intptr_t count = non_zero_count - 8;
+  for (; count > 0; count -= 8) {
+    coeff_ptr += 8;
+    qcoeff_ptr += 8;
+    dqcoeff_ptr += 8;
+    iscan += 8;
+    v_mask_lo = quantize_4(coeff_ptr, qcoeff_ptr, dqcoeff_ptr, v_quant_s32,
+                           v_dequant_s32, v_round_s32, v_zbin_s32,
+                           v_quant_shift_s32, log_scale);
+    v_mask_hi = quantize_4(coeff_ptr + 4, qcoeff_ptr + 4, dqcoeff_ptr + 4,
+                           v_quant_s32, v_dequant_s32, v_round_s32, v_zbin_s32,
+                           v_quant_shift_s32, log_scale);
+
+    get_min_max_lane_eob(iscan, &v_eobmin, &v_eobmax,
+                         vcombine_u16(v_mask_lo, v_mask_hi), n_coeffs);
+  }
+
+  int eob = get_max_eob(v_eobmax);
+
+#if SKIP_EOB_FACTOR_ADJUST
+  const int first = get_min_eob(v_eobmin);
+  if (eob >= 0 && first == eob) {
+    const int rc = scan[eob];
+    if (qcoeff_ptr[rc] == 1 || qcoeff_ptr[rc] == -1) {
+      const int zbins[2] = { ROUND_POWER_OF_TWO(zbin_ptr[0], log_scale),
+                             ROUND_POWER_OF_TWO(zbin_ptr[1], log_scale) };
+      const int nzbins[2] = { zbins[0] * -1, zbins[1] * -1 };
+      const qm_val_t wt = (1 << AOM_QM_BITS);
+      const int coeff = coeff_ptr[rc] * wt;
+      const int factor = EOB_FACTOR + SKIP_EOB_FACTOR_ADJUST;
+      const int prescan_add_val =
+          ROUND_POWER_OF_TWO(dequant_ptr[rc != 0] * factor, 7);
+      if (coeff < (zbins[rc != 0] * (1 << AOM_QM_BITS) + prescan_add_val) &&
+          coeff > (nzbins[rc != 0] * (1 << AOM_QM_BITS) - prescan_add_val)) {
+        qcoeff_ptr[rc] = 0;
+        dqcoeff_ptr[rc] = 0;
+        eob = -1;
+      }
+    }
+  }
+#endif  // SKIP_EOB_FACTOR_ADJUST
+  *eob_ptr = eob + 1;
+}
+
+void aom_highbd_quantize_b_adaptive_neon(
+    const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr,
+    const int16_t *round_ptr, const int16_t *quant_ptr,
+    const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
+    tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
+    const int16_t *scan, const int16_t *iscan) {
+  highbd_quantize_b_adaptive_neon(
+      coeff_ptr, n_coeffs, zbin_ptr, round_ptr, quant_ptr, quant_shift_ptr,
+      qcoeff_ptr, dqcoeff_ptr, dequant_ptr, eob_ptr, scan, iscan, 0);
+}
+
+void aom_highbd_quantize_b_32x32_adaptive_neon(
+    const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr,
+    const int16_t *round_ptr, const int16_t *quant_ptr,
+    const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
+    tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
+    const int16_t *scan, const int16_t *iscan) {
+  highbd_quantize_b_adaptive_neon(
+      coeff_ptr, n_coeffs, zbin_ptr, round_ptr, quant_ptr, quant_shift_ptr,
+      qcoeff_ptr, dqcoeff_ptr, dequant_ptr, eob_ptr, scan, iscan, 1);
+}
+
+void aom_highbd_quantize_b_64x64_adaptive_neon(
+    const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr,
+    const int16_t *round_ptr, const int16_t *quant_ptr,
+    const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
+    tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
+    const int16_t *scan, const int16_t *iscan) {
+  highbd_quantize_b_adaptive_neon(
+      coeff_ptr, n_coeffs, zbin_ptr, round_ptr, quant_ptr, quant_shift_ptr,
+      qcoeff_ptr, dqcoeff_ptr, dequant_ptr, eob_ptr, scan, iscan, 2);
+}
+#endif  // !CONFIG_REALTIME_ONLY
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/arm/intrapred_neon.c b/chromium/third_party/libaom/source/libaom/aom_dsp/arm/intrapred_neon.c
index 05d0316f419..8e6dc120032 100644
--- a/chromium/third_party/libaom/source/libaom/aom_dsp/arm/intrapred_neon.c
+++ b/chromium/third_party/libaom/source/libaom/aom_dsp/arm/intrapred_neon.c
@@ -16,6 +16,8 @@
 #include "config/aom_dsp_rtcd.h"
 
 #include "aom/aom_integer.h"
+#include "aom_dsp/arm/mem_neon.h"
+#include "aom_dsp/intrapred_common.h"
 
 //------------------------------------------------------------------------------
 // DC 4x4
@@ -530,67 +532,6 @@ void aom_h_predictor_32x32_neon(uint8_t *dst, ptrdiff_t stride,
   }
 }
 
-static INLINE void highbd_dc_predictor(uint16_t *dst, ptrdiff_t stride, int bw,
-                                       const uint16_t *above,
-                                       const uint16_t *left) {
-  assert(bw >= 4);
-  assert(IS_POWER_OF_TWO(bw));
-  int expected_dc, sum = 0;
-  const int count = bw * 2;
-  uint32x4_t sum_q = vdupq_n_u32(0);
-  uint32x2_t sum_d;
-  uint16_t *dst_1;
-  if (bw >= 8) {
-    for (int i = 0; i < bw; i += 8) {
-      sum_q = vpadalq_u16(sum_q, vld1q_u16(above));
-      sum_q = vpadalq_u16(sum_q, vld1q_u16(left));
-      above += 8;
-      left += 8;
-    }
-    sum_d = vadd_u32(vget_low_u32(sum_q), vget_high_u32(sum_q));
-    sum = vget_lane_s32(vreinterpret_s32_u64(vpaddl_u32(sum_d)), 0);
-    expected_dc = (sum + (count >> 1)) / count;
-    const uint16x8_t dc = vdupq_n_u16((uint16_t)expected_dc);
-    for (int r = 0; r < bw; r++) {
-      dst_1 = dst;
-      for (int i = 0; i < bw; i += 8) {
-        vst1q_u16(dst_1, dc);
-        dst_1 += 8;
-      }
-      dst += stride;
-    }
-  } else {  // 4x4
-    sum_q = vaddl_u16(vld1_u16(above), vld1_u16(left));
-    sum_d = vadd_u32(vget_low_u32(sum_q), vget_high_u32(sum_q));
-    sum = vget_lane_s32(vreinterpret_s32_u64(vpaddl_u32(sum_d)), 0);
-    expected_dc = (sum + (count >> 1)) / count;
-    const uint16x4_t dc = vdup_n_u16((uint16_t)expected_dc);
-    for (int r = 0; r < bw; r++) {
-      vst1_u16(dst, dc);
-      dst += stride;
-    }
-  }
-}
-
-#define INTRA_PRED_HIGHBD_SIZED_NEON(type, width)               \
-  void aom_highbd_##type##_predictor_##width##x##width##_neon(  \
-      uint16_t *dst, ptrdiff_t stride, const uint16_t *above,   \
-      const uint16_t *left, int bd) {                           \
-    (void)bd;                                                   \
-    highbd_##type##_predictor(dst, stride, width, above, left); \
-  }
-
-#define INTRA_PRED_SQUARE(type)          \
-  INTRA_PRED_HIGHBD_SIZED_NEON(type, 4)  \
-  INTRA_PRED_HIGHBD_SIZED_NEON(type, 8)  \
-  INTRA_PRED_HIGHBD_SIZED_NEON(type, 16) \
-  INTRA_PRED_HIGHBD_SIZED_NEON(type, 32) \
-  INTRA_PRED_HIGHBD_SIZED_NEON(type, 64)
-
-INTRA_PRED_SQUARE(dc)
-
-#undef INTRA_PRED_SQUARE
-
 /* ---------------------P R E D I C T I O N   Z 1--------------------------- */
 
 static DECLARE_ALIGNED(16, uint8_t, EvenOddMaskx[8][16]) = {
@@ -2705,524 +2646,776 @@ void av1_dr_prediction_z3_neon(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
     }
   }
 }
-static const int sm_weight_log2_scale = 8;
-
-// max(block_size_wide[BLOCK_LARGEST], block_size_high[BLOCK_LARGEST])
-#define MAX_BLOCK_DIM 64
-
-/* clang-format off */
-static const uint8_t sm_weight_arrays[2 * MAX_BLOCK_DIM] = {
-    // Unused, because we always offset by bs, which is at least 2.
-    0, 0,
-    // bs = 2
-    255, 128,
-    // bs = 4
-    255, 149, 85, 64,
-    // bs = 8
-    255, 197, 146, 105, 73, 50, 37, 32,
-    // bs = 16
-    255, 225, 196, 170, 145, 123, 102, 84, 68, 54, 43, 33, 26, 20, 17, 16,
-    // bs = 32
-    255, 240, 225, 210, 196, 182, 169, 157, 145, 133, 122, 111, 101, 92, 83, 74,
-    66, 59, 52, 45, 39, 34, 29, 25, 21, 17, 14, 12, 10, 9, 8, 8,
-    // bs = 64
-    255, 248, 240, 233, 225, 218, 210, 203, 196, 189, 182, 176, 169, 163, 156,
-    150, 144, 138, 133, 127, 121, 116, 111, 106, 101, 96, 91, 86, 82, 77, 73,
-    69, 65, 61, 57, 54, 50, 47, 44, 41, 38, 35, 32, 29, 27, 25, 22, 20, 18, 16,
-    15, 13, 12, 10, 9, 8, 7, 6, 6, 5, 5, 4, 4, 4,
-};
-/* clang-format on */
 
 // -----------------------------------------------------------------------------
 // SMOOTH_PRED
 
-// pixels[0]: above and below_pred interleave vector
-// pixels[1]: left vector
-// pixels[2]: right_pred vector
-static INLINE void load_pixel_w4(const uint8_t *above, const uint8_t *left,
-                                 int height, uint8x16_t *pixels) {
-  uint32x4_t zero = vdupq_n_u32(0);
-  const uint8x8_t d = vcreate_u8(((const uint32_t *)above)[0]);
-  if (height == 4)
-    pixels[1] =
-        vreinterpretq_u8_u32(vld1q_lane_u32((const uint32_t *)left, zero, 0));
-  else if (height == 8) {
-    pixels[1] = vreinterpretq_u8_u64(vsetq_lane_u64(
-        ((const uint64_t *)left)[0], vreinterpretq_u64_u32(zero), 0));
-  } else {
-    pixels[1] = vld1q_u8(left);
-  }
-
-  pixels[2] = vreinterpretq_u8_u16(vdupq_n_u16(above[3]));
-
-  const uint16x8_t bp = vdupq_n_u16(left[height - 1]);
-#if defined(__aarch64__)
-  pixels[0] = vreinterpretq_u8_u16(vzip1q_u16(vmovl_u8(d), bp));
-#else
-  pixels[0] = vreinterpretq_u8_u16(vzipq_u16(vmovl_u8(d), bp).val[0]);
-#endif  // (__aarch64__)
-}
-
-// weight_h[0]: weight_h vector
-// weight_h[1]: scale - weight_h vector
-// weight_h[2]: same as [0], second half for height = 16 only
-// weight_h[3]: same as [1], second half for height = 16 only
-// weight_w[0]: weights_w and scale - weights_w interleave vector
-static INLINE void load_weight_w4(const uint8_t *weight_array, int height,
-                                  uint16x8_t *weight_h, uint16x8_t *weight_w) {
-  const uint16x8_t d = vdupq_n_u16((uint16_t)(1 << sm_weight_log2_scale));
-  const uint8x8_t t = vcreate_u8(((const uint32_t *)(weight_array))[1]);
-  weight_h[0] = vmovl_u8(t);
-  weight_h[1] = vsubw_u8(d, t);
-#if defined(__aarch64__)
-  weight_w[0] = vzip1q_u16(weight_h[0], weight_h[1]);
-#else
-  weight_w[0] = vzipq_u16(weight_h[0], weight_h[1]).val[0];
-#endif  // (__aarch64__)
-
-  if (height == 8) {
-    const uint8x8_t weight = vld1_u8(&weight_array[8]);
-    weight_h[0] = vmovl_u8(weight);
-    weight_h[1] = vsubw_u8(d, weight);
-  } else if (height == 16) {
-    const uint8x16_t zero = vdupq_n_u8(0);
-    const uint8x16_t weight = vld1q_u8(&weight_array[16]);
-    const uint8x16x2_t weight_h_02 = vzipq_u8(weight, zero);
-    weight_h[0] = vreinterpretq_u16_u8(weight_h_02.val[0]);
-    weight_h[1] = vsubq_u16(d, vreinterpretq_u16_u8(weight_h_02.val[0]));
-    weight_h[2] = vreinterpretq_u16_u8(weight_h_02.val[1]);
-    weight_h[3] = vsubq_u16(d, vreinterpretq_u16_u8(weight_h_02.val[1]));
-  }
-}
-
-static INLINE void smooth_pred_4xh(const uint8x16_t *pixel,
-                                   const uint16x8_t *wh, const uint16x8_t *ww,
-                                   int h, uint8_t *dst, ptrdiff_t stride,
-                                   int second_half) {
-  const uint16x4_t one = vdup_n_u16(1);
-  const uint16x4_t inc = vdup_n_u16(0x202);
-  uint16x4_t rep =
-      second_half ? vdup_n_u16((uint16_t)0x8008) : vdup_n_u16((uint16_t)0x8000);
-  uint16x4_t d = vdup_n_u16(0x100);
-  const uint16x4_t v_pixel_0_lo = vmovn_u32(vreinterpretq_u32_u8(pixel[0]));
-  const uint16x4_t v_pixel_0_hi =
-      vmovn_u32(vreinterpretq_u32_u8(vextq_u8(pixel[0], pixel[0], 2)));
-  const uint16x4_t v_pixel_2 = vget_low_u16(vreinterpretq_u16_u8(pixel[2]));
-  const uint16x4_t ww_0_lo = vmovn_u32(vreinterpretq_u32_u16(ww[0]));
-  const uint16x4_t ww_0_hi =
-      vmovn_u32(vreinterpretq_u32_u16(vextq_u16(ww[0], ww[0], 1)));
-  const uint8x8_t save_mask = vcreate_u8(0 + (2 << 8) + (4 << 16) + (6 << 24));
-
-#if !defined(__aarch64__)
-  const uint8x8x2_t v_split1 = { { vget_low_u8(vreinterpretq_u8_u16(wh[0])),
-                                   vget_high_u8(
-                                       vreinterpretq_u8_u16(wh[0])) } };
-  const uint8x8x2_t v_split2 = { { vget_low_u8(vreinterpretq_u8_u16(wh[1])),
-                                   vget_high_u8(
-                                       vreinterpretq_u8_u16(wh[1])) } };
-  const uint8x8x2_t v_split3 = { { vget_low_u8(pixel[1]),
-                                   vget_high_u8(pixel[1]) } };
-#endif  // (__aarch64__)
-
-  for (int i = 0; i < h; ++i) {
-#if defined(__aarch64__)
-    const uint8x8_t wg =
-        vqtbl1_u8(vreinterpretq_u8_u16(wh[0]), vreinterpret_u8_u16(d));
-    const uint8x8_t sc =
-        vqtbl1_u8(vreinterpretq_u8_u16(wh[1]), vreinterpret_u8_u16(d));
-#else
-    const uint8x8_t wg = vtbl2_u8(v_split1, vreinterpret_u8_u16(d));
-    const uint8x8_t sc = vtbl2_u8(v_split2, vreinterpret_u8_u16(d));
-#endif  // (__aarch64__)
-
-    uint32x4_t sum = vmull_u16(v_pixel_0_lo, vreinterpret_u16_u8(wg));
-    sum = vmlal_u16(sum, v_pixel_0_hi, vreinterpret_u16_u8(sc));
-
-#if defined(__aarch64__)
-    uint8x8_t b = vqtbl1_u8(pixel[1], vreinterpret_u8_u16(rep));
-#else
-    uint8x8_t b = vtbl2_u8(v_split3, vreinterpret_u8_u16(rep));
-#endif  // (__aarch64__)
-
-    sum = vmlal_u16(sum, vreinterpret_u16_u8(b), ww_0_lo);
-    sum = vmlal_u16(sum, v_pixel_2, ww_0_hi);
-    uint8x8_t sum_l = vreinterpret_u8_u16(vqrshrn_n_u32(sum, 9));
-    uint32x2_t predsh = vreinterpret_u32_u8(vtbl1_u8(sum_l, save_mask));
-    vst1_lane_u32((uint32_t *)dst, predsh, 0);
-
+// 256 - v = vneg_s8(v)
+static INLINE uint8x8_t negate_s8(const uint8x8_t v) {
+  return vreinterpret_u8_s8(vneg_s8(vreinterpret_s8_u8(v)));
+}
+
+static void smooth_4xh_neon(uint8_t *dst, ptrdiff_t stride,
+                            const uint8_t *const top_row,
+                            const uint8_t *const left_column,
+                            const int height) {
+  const uint8_t top_right = top_row[3];
+  const uint8_t bottom_left = left_column[height - 1];
+  const uint8_t *const weights_y = smooth_weights + height - 4;
+
+  uint8x8_t UNINITIALIZED_IS_SAFE(top_v);
+  load_u8_4x1(top_row, &top_v, 0);
+  const uint8x8_t top_right_v = vdup_n_u8(top_right);
+  const uint8x8_t bottom_left_v = vdup_n_u8(bottom_left);
+  uint8x8_t UNINITIALIZED_IS_SAFE(weights_x_v);
+  load_u8_4x1(smooth_weights, &weights_x_v, 0);
+  const uint8x8_t scaled_weights_x = negate_s8(weights_x_v);
+  const uint16x8_t weighted_tr = vmull_u8(scaled_weights_x, top_right_v);
+
+  assert(height > 0);
+  int y = 0;
+  do {
+    const uint8x8_t left_v = vdup_n_u8(left_column[y]);
+    const uint8x8_t weights_y_v = vdup_n_u8(weights_y[y]);
+    const uint8x8_t scaled_weights_y = negate_s8(weights_y_v);
+    const uint16x8_t weighted_bl = vmull_u8(scaled_weights_y, bottom_left_v);
+    const uint16x8_t weighted_top_bl =
+        vmlal_u8(weighted_bl, weights_y_v, top_v);
+    const uint16x8_t weighted_left_tr =
+        vmlal_u8(weighted_tr, weights_x_v, left_v);
+    // Maximum value of each parameter: 0xFF00
+    const uint16x8_t avg = vhaddq_u16(weighted_top_bl, weighted_left_tr);
+    const uint8x8_t result = vrshrn_n_u16(avg, SMOOTH_WEIGHT_LOG2_SCALE);
+
+    vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(result), 0);
     dst += stride;
-
-    rep = vadd_u16(rep, one);
-    d = vadd_u16(d, inc);
-  }
-}
-
-void aom_smooth_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride,
-                                   const uint8_t *above, const uint8_t *left) {
-  uint8x16_t pixels[3];
-  load_pixel_w4(above, left, 4, pixels);
-
-  uint16x8_t wh[4], ww[2];
-  load_weight_w4(sm_weight_arrays, 4, wh, ww);
-
-  smooth_pred_4xh(pixels, wh, ww, 4, dst, stride, 0);
-}
-
-void aom_smooth_predictor_4x8_neon(uint8_t *dst, ptrdiff_t stride,
-                                   const uint8_t *above, const uint8_t *left) {
-  uint8x16_t pixels[3];
-  load_pixel_w4(above, left, 8, pixels);
-
-  uint16x8_t wh[4], ww[2];
-  load_weight_w4(sm_weight_arrays, 8, wh, ww);
-
-  smooth_pred_4xh(pixels, wh, ww, 8, dst, stride, 0);
+  } while (++y != height);
+}
+
+static INLINE uint8x8_t calculate_pred(const uint16x8_t weighted_top_bl,
+                                       const uint16x8_t weighted_left_tr) {
+  // Maximum value of each parameter: 0xFF00
+  const uint16x8_t avg = vhaddq_u16(weighted_top_bl, weighted_left_tr);
+  return vrshrn_n_u16(avg, SMOOTH_WEIGHT_LOG2_SCALE);
+}
+
+static INLINE uint8x8_t calculate_weights_and_pred(
+    const uint8x8_t top, const uint8x8_t left, const uint16x8_t weighted_tr,
+    const uint8x8_t bottom_left, const uint8x8_t weights_x,
+    const uint8x8_t scaled_weights_y, const uint8x8_t weights_y) {
+  const uint16x8_t weighted_top = vmull_u8(weights_y, top);
+  const uint16x8_t weighted_top_bl =
+      vmlal_u8(weighted_top, scaled_weights_y, bottom_left);
+  const uint16x8_t weighted_left_tr = vmlal_u8(weighted_tr, weights_x, left);
+  return calculate_pred(weighted_top_bl, weighted_left_tr);
+}
+
+static void smooth_8xh_neon(uint8_t *dst, ptrdiff_t stride,
+                            const uint8_t *const top_row,
+                            const uint8_t *const left_column,
+                            const int height) {
+  const uint8_t top_right = top_row[7];
+  const uint8_t bottom_left = left_column[height - 1];
+  const uint8_t *const weights_y = smooth_weights + height - 4;
+
+  const uint8x8_t top_v = vld1_u8(top_row);
+  const uint8x8_t top_right_v = vdup_n_u8(top_right);
+  const uint8x8_t bottom_left_v = vdup_n_u8(bottom_left);
+  const uint8x8_t weights_x_v = vld1_u8(smooth_weights + 4);
+  const uint8x8_t scaled_weights_x = negate_s8(weights_x_v);
+  const uint16x8_t weighted_tr = vmull_u8(scaled_weights_x, top_right_v);
+
+  assert(height > 0);
+  int y = 0;
+  do {
+    const uint8x8_t left_v = vdup_n_u8(left_column[y]);
+    const uint8x8_t weights_y_v = vdup_n_u8(weights_y[y]);
+    const uint8x8_t scaled_weights_y = negate_s8(weights_y_v);
+    const uint8x8_t result =
+        calculate_weights_and_pred(top_v, left_v, weighted_tr, bottom_left_v,
+                                   weights_x_v, scaled_weights_y, weights_y_v);
+
+    vst1_u8(dst, result);
+    dst += stride;
+  } while (++y != height);
 }
 
-void aom_smooth_predictor_4x16_neon(uint8_t *dst, ptrdiff_t stride,
-                                    const uint8_t *above, const uint8_t *left) {
-  uint8x16_t pixels[3];
-  load_pixel_w4(above, left, 16, pixels);
-
-  uint16x8_t wh[4], ww[2];
-  load_weight_w4(sm_weight_arrays, 16, wh, ww);
-
-  smooth_pred_4xh(pixels, wh, ww, 8, dst, stride, 0);
-  dst += stride << 3;
-  smooth_pred_4xh(pixels, &wh[2], ww, 8, dst, stride, 1);
-}
-
-// pixels[0]: above and below_pred interleave vector, first half
-// pixels[1]: above and below_pred interleave vector, second half
-// pixels[2]: left vector
-// pixels[3]: right_pred vector
-// pixels[4]: above and below_pred interleave vector, first half
-// pixels[5]: above and below_pred interleave vector, second half
-// pixels[6]: left vector + 16
-// pixels[7]: right_pred vector
-static INLINE void load_pixel_w8(const uint8_t *above, const uint8_t *left,
-                                 int height, uint8x16_t *pixels) {
-  pixels[0] = vreinterpretq_u8_u16(vmovl_u8(vld1_u8(above)));
-  pixels[1] = vreinterpretq_u8_u16(vdupq_n_u16((uint16_t)left[height - 1]));
-  pixels[3] = vreinterpretq_u8_u16(vdupq_n_u16((uint16_t)above[7]));
-
-  if (height == 4) {
-    const uint32x4_t zero32 = vdupq_n_u32(0);
-    pixels[2] =
-        vreinterpretq_u8_u32(vld1q_lane_u32((const uint32_t *)left, zero32, 0));
-  } else if (height == 8) {
-    const uint64x2_t zero64 = vdupq_n_u64(0);
-    pixels[2] = vreinterpretq_u8_u64(
-        vsetq_lane_u64(((const uint64_t *)left)[0], zero64, 0));
-  } else if (height == 16) {
-    pixels[2] = vld1q_u8(left);
-  } else {
-    pixels[2] = vld1q_u8(left);
-    pixels[4] = pixels[0];
-    pixels[5] = pixels[1];
-    pixels[6] = vld1q_u8(left + 16);
-    pixels[7] = pixels[3];
-  }
-}
-
-// weight_h[0]: weight_h vector
-// weight_h[1]: scale - weight_h vector
-// weight_h[2]: same as [0], offset 8
-// weight_h[3]: same as [1], offset 8
-// weight_h[4]: same as [0], offset 16
-// weight_h[5]: same as [1], offset 16
-// weight_h[6]: same as [0], offset 24
-// weight_h[7]: same as [1], offset 24
-// weight_w[0]: weights_w and scale - weights_w interleave vector, first half
-// weight_w[1]: weights_w and scale - weights_w interleave vector, second half
-static INLINE void load_weight_w8(const uint8_t *weight_array, int height,
-                                  uint16x8_t *weight_h, uint16x8_t *weight_w) {
-  const uint8x16_t zero = vdupq_n_u8(0);
-  const int we_offset = height < 8 ? 4 : 8;
-  uint8x16_t we = vld1q_u8(&weight_array[we_offset]);
-#if defined(__aarch64__)
-  weight_h[0] = vreinterpretq_u16_u8(vzip1q_u8(we, zero));
-#else
-  weight_h[0] = vreinterpretq_u16_u8(vzipq_u8(we, zero).val[0]);
-#endif  // (__aarch64__)
-  const uint16x8_t d = vdupq_n_u16(256);
-  weight_h[1] = vsubq_u16(d, weight_h[0]);
-
-  if (height == 4) {
-    we = vextq_u8(we, zero, 4);
-#if defined(__aarch64__)
-    weight_w[0] = vreinterpretq_u16_u8(vzip1q_u8(we, zero));
-#else
-    weight_w[0] = vmovl_u8(vget_low_u8(we));
-#endif  // (__aarch64__)
-    weight_w[1] = vsubq_u16(d, weight_w[0]);
-  } else {
-    weight_w[0] = weight_h[0];
-    weight_w[1] = weight_h[1];
-  }
-
-  if (height == 16) {
-    we = vld1q_u8(&weight_array[16]);
-    const uint8x16x2_t weight_h_02 = vzipq_u8(we, zero);
-    weight_h[0] = vreinterpretq_u16_u8(weight_h_02.val[0]);
-    weight_h[1] = vsubq_u16(d, weight_h[0]);
-    weight_h[2] = vreinterpretq_u16_u8(weight_h_02.val[1]);
-    weight_h[3] = vsubq_u16(d, weight_h[2]);
-  } else if (height == 32) {
-    const uint8x16_t weight_lo = vld1q_u8(&weight_array[32]);
-    const uint8x16x2_t weight_h_02 = vzipq_u8(weight_lo, zero);
-    weight_h[0] = vreinterpretq_u16_u8(weight_h_02.val[0]);
-    weight_h[1] = vsubq_u16(d, weight_h[0]);
-    weight_h[2] = vreinterpretq_u16_u8(weight_h_02.val[1]);
-    weight_h[3] = vsubq_u16(d, weight_h[2]);
-    const uint8x16_t weight_hi = vld1q_u8(&weight_array[32 + 16]);
-    const uint8x16x2_t weight_h_46 = vzipq_u8(weight_hi, zero);
-    weight_h[4] = vreinterpretq_u16_u8(weight_h_46.val[0]);
-    weight_h[5] = vsubq_u16(d, weight_h[4]);
-    weight_h[6] = vreinterpretq_u16_u8(weight_h_46.val[1]);
-    weight_h[7] = vsubq_u16(d, weight_h[6]);
-  }
-}
-
-static INLINE void smooth_pred_8xh(const uint8x16_t *pixels,
-                                   const uint16x8_t *wh, const uint16x8_t *ww,
-                                   int h, uint8_t *dst, ptrdiff_t stride,
-                                   int second_half) {
-  const uint16x8_t one = vdupq_n_u16(1);
-  const uint16x8_t inc = vdupq_n_u16(0x202);
-  uint16x8_t rep = second_half ? vdupq_n_u16((uint16_t)0x8008)
-                               : vdupq_n_u16((uint16_t)0x8000);
-  uint16x8_t d = vdupq_n_u16(0x100);
-
-#if !defined(__aarch64__)
-  const uint8x8x2_t v_split1 = { { vget_low_u8(vreinterpretq_u8_u16(wh[0])),
-                                   vget_high_u8(
-                                       vreinterpretq_u8_u16(wh[0])) } };
-  const uint8x8x2_t v_split2 = { { vget_low_u8(vreinterpretq_u8_u16(wh[1])),
-                                   vget_high_u8(
-                                       vreinterpretq_u8_u16(wh[1])) } };
-  const uint8x8x2_t v_split3 = { { vget_low_u8(pixels[2]),
-                                   vget_high_u8(pixels[2]) } };
-#endif
-
-  for (int i = 0; i < h; ++i) {
-#if defined(__aarch64__)
-    const uint8x16_t wg_wg =
-        vqtbl1q_u8(vreinterpretq_u8_u16(wh[0]), vreinterpretq_u8_u16(d));
-    const uint8x16_t sc_sc =
-        vqtbl1q_u8(vreinterpretq_u8_u16(wh[1]), vreinterpretq_u8_u16(d));
-#else
-    const uint8x8_t v_d_lo = vreinterpret_u8_u16(vget_low_u16(d));
-    const uint8x8_t v_d_hi = vreinterpret_u8_u16(vget_high_u16(d));
-    const uint8x16_t wg_wg =
-        vcombine_u8(vtbl2_u8(v_split1, v_d_lo), vtbl2_u8(v_split1, v_d_hi));
-    const uint8x16_t sc_sc =
-        vcombine_u8(vtbl2_u8(v_split2, v_d_lo), vtbl2_u8(v_split2, v_d_hi));
-#endif  // (__aarch64__)
-    uint16x8_t s01 =
-        vmulq_u16(vreinterpretq_u16_u8(pixels[0]), vreinterpretq_u16_u8(wg_wg));
-    s01 = vmlaq_u16(s01, vreinterpretq_u16_u8(pixels[1]),
-                    vreinterpretq_u16_u8(sc_sc));
-#if defined(__aarch64__)
-    const uint8x16_t b = vqtbl1q_u8(pixels[2], vreinterpretq_u8_u16(rep));
-#else
-    const uint8x16_t b = vcombine_u8(
-        vtbl2_u8(v_split3, vget_low_u8(vreinterpretq_u8_u16(rep))),
-        vtbl2_u8(v_split3, vget_high_u8(vreinterpretq_u8_u16(rep))));
-#endif  // (__aarch64__)
-    uint16x8_t sum0 = vmulq_u16(vreinterpretq_u16_u8(b), ww[0]);
-    sum0 = vmlaq_u16(sum0, vreinterpretq_u16_u8(pixels[3]), ww[1]);
-
-    uint32x4_t s0 = vaddl_u16(vget_low_u16(s01), vget_low_u16(sum0));
-#if defined(__aarch64__)
-    uint32x4_t s1 = vaddl_high_u16(s01, sum0);
-#else
-    uint32x4_t s1 = vaddl_u16(vget_high_u16(s01), vget_high_u16(sum0));
-#endif  // (__aarch64__)
-
-    sum0 = vcombine_u16(vqrshrn_n_u32(s0, 9), vqrshrn_n_u32(s1, 9));
-    uint8x8_t predsh = vqmovn_u16(sum0);
-    vst1_u8(dst, predsh);
+#define SMOOTH_NXM(W, H)                                                       \
+  void aom_smooth_predictor_##W##x##H##_neon(uint8_t *dst, ptrdiff_t y_stride, \
+                                             const uint8_t *above,             \
+                                             const uint8_t *left) {            \
+    smooth_##W##xh_neon(dst, y_stride, above, left, H);                        \
+  }
 
-    dst += stride;
-    rep = vaddq_u16(rep, one);
-    d = vaddq_u16(d, inc);
+SMOOTH_NXM(4, 4)
+SMOOTH_NXM(4, 8)
+SMOOTH_NXM(8, 4)
+SMOOTH_NXM(8, 8)
+SMOOTH_NXM(4, 16)
+SMOOTH_NXM(8, 16)
+SMOOTH_NXM(8, 32)
+
+#undef SMOOTH_NXM
+
+static INLINE uint8x16_t calculate_weights_and_predq(
+    const uint8x16_t top, const uint8x8_t left, const uint8x8_t top_right,
+    const uint8x8_t weights_y, const uint8x16_t weights_x,
+    const uint8x16_t scaled_weights_x, const uint16x8_t weighted_bl) {
+  const uint16x8_t weighted_top_bl_low =
+      vmlal_u8(weighted_bl, weights_y, vget_low_u8(top));
+  const uint16x8_t weighted_left_low = vmull_u8(vget_low_u8(weights_x), left);
+  const uint16x8_t weighted_left_tr_low =
+      vmlal_u8(weighted_left_low, vget_low_u8(scaled_weights_x), top_right);
+  const uint8x8_t result_low =
+      calculate_pred(weighted_top_bl_low, weighted_left_tr_low);
+
+  const uint16x8_t weighted_top_bl_high =
+      vmlal_u8(weighted_bl, weights_y, vget_high_u8(top));
+  const uint16x8_t weighted_left_high = vmull_u8(vget_high_u8(weights_x), left);
+  const uint16x8_t weighted_left_tr_high =
+      vmlal_u8(weighted_left_high, vget_high_u8(scaled_weights_x), top_right);
+  const uint8x8_t result_high =
+      calculate_pred(weighted_top_bl_high, weighted_left_tr_high);
+
+  return vcombine_u8(result_low, result_high);
+}
+
+// 256 - v = vneg_s8(v)
+static INLINE uint8x16_t negate_s8q(const uint8x16_t v) {
+  return vreinterpretq_u8_s8(vnegq_s8(vreinterpretq_s8_u8(v)));
+}
+
+// For width 16 and above.
+#define SMOOTH_PREDICTOR(W)                                                 \
+  static void smooth_##W##xh_neon(                                          \
+      uint8_t *dst, ptrdiff_t stride, const uint8_t *const top_row,         \
+      const uint8_t *const left_column, const int height) {                 \
+    const uint8_t top_right = top_row[(W)-1];                               \
+    const uint8_t bottom_left = left_column[height - 1];                    \
+    const uint8_t *const weights_y = smooth_weights + height - 4;           \
+                                                                            \
+    uint8x16_t top_v[4];                                                    \
+    top_v[0] = vld1q_u8(top_row);                                           \
+    if ((W) > 16) {                                                         \
+      top_v[1] = vld1q_u8(top_row + 16);                                    \
+      if ((W) == 64) {                                                      \
+        top_v[2] = vld1q_u8(top_row + 32);                                  \
+        top_v[3] = vld1q_u8(top_row + 48);                                  \
+      }                                                                     \
+    }                                                                       \
+                                                                            \
+    const uint8x8_t top_right_v = vdup_n_u8(top_right);                     \
+    const uint8x8_t bottom_left_v = vdup_n_u8(bottom_left);                 \
+                                                                            \
+    uint8x16_t weights_x_v[4];                                              \
+    weights_x_v[0] = vld1q_u8(smooth_weights + (W)-4);                      \
+    if ((W) > 16) {                                                         \
+      weights_x_v[1] = vld1q_u8(smooth_weights + (W) + 16 - 4);             \
+      if ((W) == 64) {                                                      \
+        weights_x_v[2] = vld1q_u8(smooth_weights + (W) + 32 - 4);           \
+        weights_x_v[3] = vld1q_u8(smooth_weights + (W) + 48 - 4);           \
+      }                                                                     \
+    }                                                                       \
+                                                                            \
+    uint8x16_t scaled_weights_x[4];                                         \
+    scaled_weights_x[0] = negate_s8q(weights_x_v[0]);                       \
+    if ((W) > 16) {                                                         \
+      scaled_weights_x[1] = negate_s8q(weights_x_v[1]);                     \
+      if ((W) == 64) {                                                      \
+        scaled_weights_x[2] = negate_s8q(weights_x_v[2]);                   \
+        scaled_weights_x[3] = negate_s8q(weights_x_v[3]);                   \
+      }                                                                     \
+    }                                                                       \
+                                                                            \
+    for (int y = 0; y < height; ++y) {                                      \
+      const uint8x8_t left_v = vdup_n_u8(left_column[y]);                   \
+      const uint8x8_t weights_y_v = vdup_n_u8(weights_y[y]);                \
+      const uint8x8_t scaled_weights_y = negate_s8(weights_y_v);            \
+      const uint16x8_t weighted_bl =                                        \
+          vmull_u8(scaled_weights_y, bottom_left_v);                        \
+                                                                            \
+      vst1q_u8(dst, calculate_weights_and_predq(                            \
+                        top_v[0], left_v, top_right_v, weights_y_v,         \
+                        weights_x_v[0], scaled_weights_x[0], weighted_bl)); \
+                                                                            \
+      if ((W) > 16) {                                                       \
+        vst1q_u8(dst + 16,                                                  \
+                 calculate_weights_and_predq(                               \
+                     top_v[1], left_v, top_right_v, weights_y_v,            \
+                     weights_x_v[1], scaled_weights_x[1], weighted_bl));    \
+        if ((W) == 64) {                                                    \
+          vst1q_u8(dst + 32,                                                \
+                   calculate_weights_and_predq(                             \
+                       top_v[2], left_v, top_right_v, weights_y_v,          \
+                       weights_x_v[2], scaled_weights_x[2], weighted_bl));  \
+          vst1q_u8(dst + 48,                                                \
+                   calculate_weights_and_predq(                             \
+                       top_v[3], left_v, top_right_v, weights_y_v,          \
+                       weights_x_v[3], scaled_weights_x[3], weighted_bl));  \
+        }                                                                   \
+      }                                                                     \
+                                                                            \
+      dst += stride;                                                        \
+    }                                                                       \
   }
-}
 
-void aom_smooth_predictor_8x4_neon(uint8_t *dst, ptrdiff_t stride,
-                                   const uint8_t *above, const uint8_t *left) {
-  uint8x16_t pixels[4];
-  load_pixel_w8(above, left, 4, pixels);
+SMOOTH_PREDICTOR(16)
+SMOOTH_PREDICTOR(32)
+SMOOTH_PREDICTOR(64)
 
-  uint16x8_t wh[4], ww[2];
-  load_weight_w8(sm_weight_arrays, 4, wh, ww);
+#undef SMOOTH_PREDICTOR
 
-  smooth_pred_8xh(pixels, wh, ww, 4, dst, stride, 0);
-}
+#define SMOOTH_NXM_WIDE(W, H)                                                  \
+  void aom_smooth_predictor_##W##x##H##_neon(uint8_t *dst, ptrdiff_t y_stride, \
+                                             const uint8_t *above,             \
+                                             const uint8_t *left) {            \
+    smooth_##W##xh_neon(dst, y_stride, above, left, H);                        \
+  }
 
-void aom_smooth_predictor_8x8_neon(uint8_t *dst, ptrdiff_t stride,
-                                   const uint8_t *above, const uint8_t *left) {
-  uint8x16_t pixels[4];
-  load_pixel_w8(above, left, 8, pixels);
+SMOOTH_NXM_WIDE(16, 4)
+SMOOTH_NXM_WIDE(16, 8)
+SMOOTH_NXM_WIDE(16, 16)
+SMOOTH_NXM_WIDE(16, 32)
+SMOOTH_NXM_WIDE(16, 64)
+SMOOTH_NXM_WIDE(32, 8)
+SMOOTH_NXM_WIDE(32, 16)
+SMOOTH_NXM_WIDE(32, 32)
+SMOOTH_NXM_WIDE(32, 64)
+SMOOTH_NXM_WIDE(64, 16)
+SMOOTH_NXM_WIDE(64, 32)
+SMOOTH_NXM_WIDE(64, 64)
+
+#undef SMOOTH_NXM_WIDE
 
-  uint16x8_t wh[4], ww[2];
-  load_weight_w8(sm_weight_arrays, 8, wh, ww);
+// -----------------------------------------------------------------------------
+// SMOOTH_V_PRED
+
+// For widths 4 and 8.
+#define SMOOTH_V_PREDICTOR(W)                                         \
+  static void smooth_v_##W##xh_neon(                                  \
+      uint8_t *dst, ptrdiff_t stride, const uint8_t *const top_row,   \
+      const uint8_t *const left_column, const int height) {           \
+    const uint8_t bottom_left = left_column[height - 1];              \
+    const uint8_t *const weights_y = smooth_weights + height - 4;     \
+                                                                      \
+    uint8x8_t UNINITIALIZED_IS_SAFE(top_v);                           \
+    if ((W) == 4) {                                                   \
+      load_u8_4x1(top_row, &top_v, 0);                                \
+    } else { /* width == 8 */                                         \
+      top_v = vld1_u8(top_row);                                       \
+    }                                                                 \
+                                                                      \
+    const uint8x8_t bottom_left_v = vdup_n_u8(bottom_left);           \
+                                                                      \
+    assert(height > 0);                                               \
+    int y = 0;                                                        \
+    do {                                                              \
+      const uint8x8_t weights_y_v = vdup_n_u8(weights_y[y]);          \
+      const uint8x8_t scaled_weights_y = negate_s8(weights_y_v);      \
+                                                                      \
+      const uint16x8_t weighted_top = vmull_u8(weights_y_v, top_v);   \
+      const uint16x8_t weighted_top_bl =                              \
+          vmlal_u8(weighted_top, scaled_weights_y, bottom_left_v);    \
+      const uint8x8_t pred =                                          \
+          vrshrn_n_u16(weighted_top_bl, SMOOTH_WEIGHT_LOG2_SCALE);    \
+                                                                      \
+      if ((W) == 4) {                                                 \
+        vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(pred), 0); \
+      } else { /* width == 8 */                                       \
+        vst1_u8(dst, pred);                                           \
+      }                                                               \
+      dst += stride;                                                  \
+    } while (++y != height);                                          \
+  }
 
-  smooth_pred_8xh(pixels, wh, ww, 8, dst, stride, 0);
-}
+SMOOTH_V_PREDICTOR(4)
+SMOOTH_V_PREDICTOR(8)
 
-void aom_smooth_predictor_8x16_neon(uint8_t *dst, ptrdiff_t stride,
-                                    const uint8_t *above, const uint8_t *left) {
-  uint8x16_t pixels[4];
-  load_pixel_w8(above, left, 16, pixels);
+#undef SMOOTH_V_PREDICTOR
 
-  uint16x8_t wh[4], ww[2];
-  load_weight_w8(sm_weight_arrays, 16, wh, ww);
+#define SMOOTH_V_NXM(W, H)                                    \
+  void aom_smooth_v_predictor_##W##x##H##_neon(               \
+      uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, \
+      const uint8_t *left) {                                  \
+    smooth_v_##W##xh_neon(dst, y_stride, above, left, H);     \
+  }
 
-  smooth_pred_8xh(pixels, wh, ww, 8, dst, stride, 0);
-  dst += stride << 3;
-  smooth_pred_8xh(pixels, &wh[2], ww, 8, dst, stride, 1);
-}
+SMOOTH_V_NXM(4, 4)
+SMOOTH_V_NXM(4, 8)
+SMOOTH_V_NXM(4, 16)
+SMOOTH_V_NXM(8, 4)
+SMOOTH_V_NXM(8, 8)
+SMOOTH_V_NXM(8, 16)
+SMOOTH_V_NXM(8, 32)
+
+#undef SMOOTH_V_NXM
+
+static INLINE uint8x16_t calculate_vertical_weights_and_pred(
+    const uint8x16_t top, const uint8x8_t weights_y,
+    const uint16x8_t weighted_bl) {
+  const uint16x8_t pred_low =
+      vmlal_u8(weighted_bl, weights_y, vget_low_u8(top));
+  const uint16x8_t pred_high =
+      vmlal_u8(weighted_bl, weights_y, vget_high_u8(top));
+  const uint8x8_t pred_scaled_low =
+      vrshrn_n_u16(pred_low, SMOOTH_WEIGHT_LOG2_SCALE);
+  const uint8x8_t pred_scaled_high =
+      vrshrn_n_u16(pred_high, SMOOTH_WEIGHT_LOG2_SCALE);
+  return vcombine_u8(pred_scaled_low, pred_scaled_high);
+}
+
+// For width 16 and above.
+#define SMOOTH_V_PREDICTOR(W)                                            \
+  static void smooth_v_##W##xh_neon(                                     \
+      uint8_t *dst, ptrdiff_t stride, const uint8_t *const top_row,      \
+      const uint8_t *const left_column, const int height) {              \
+    const uint8_t bottom_left = left_column[height - 1];                 \
+    const uint8_t *const weights_y = smooth_weights + height - 4;        \
+                                                                         \
+    uint8x16_t top_v[4];                                                 \
+    top_v[0] = vld1q_u8(top_row);                                        \
+    if ((W) > 16) {                                                      \
+      top_v[1] = vld1q_u8(top_row + 16);                                 \
+      if ((W) == 64) {                                                   \
+        top_v[2] = vld1q_u8(top_row + 32);                               \
+        top_v[3] = vld1q_u8(top_row + 48);                               \
+      }                                                                  \
+    }                                                                    \
+                                                                         \
+    const uint8x8_t bottom_left_v = vdup_n_u8(bottom_left);              \
+                                                                         \
+    assert(height > 0);                                                  \
+    int y = 0;                                                           \
+    do {                                                                 \
+      const uint8x8_t weights_y_v = vdup_n_u8(weights_y[y]);             \
+      const uint8x8_t scaled_weights_y = negate_s8(weights_y_v);         \
+      const uint16x8_t weighted_bl =                                     \
+          vmull_u8(scaled_weights_y, bottom_left_v);                     \
+                                                                         \
+      const uint8x16_t pred_0 = calculate_vertical_weights_and_pred(     \
+          top_v[0], weights_y_v, weighted_bl);                           \
+      vst1q_u8(dst, pred_0);                                             \
+                                                                         \
+      if ((W) > 16) {                                                    \
+        const uint8x16_t pred_1 = calculate_vertical_weights_and_pred(   \
+            top_v[1], weights_y_v, weighted_bl);                         \
+        vst1q_u8(dst + 16, pred_1);                                      \
+                                                                         \
+        if ((W) == 64) {                                                 \
+          const uint8x16_t pred_2 = calculate_vertical_weights_and_pred( \
+              top_v[2], weights_y_v, weighted_bl);                       \
+          vst1q_u8(dst + 32, pred_2);                                    \
+                                                                         \
+          const uint8x16_t pred_3 = calculate_vertical_weights_and_pred( \
+              top_v[3], weights_y_v, weighted_bl);                       \
+          vst1q_u8(dst + 48, pred_3);                                    \
+        }                                                                \
+      }                                                                  \
+                                                                         \
+      dst += stride;                                                     \
+    } while (++y != height);                                             \
+  }
 
-void aom_smooth_predictor_8x32_neon(uint8_t *dst, ptrdiff_t stride,
-                                    const uint8_t *above, const uint8_t *left) {
-  uint8x16_t pixels[8];
-  load_pixel_w8(above, left, 32, pixels);
-
-  uint16x8_t wh[8], ww[2];
-  load_weight_w8(sm_weight_arrays, 32, wh, ww);
-
-  smooth_pred_8xh(&pixels[0], wh, ww, 8, dst, stride, 0);
-  dst += stride << 3;
-  smooth_pred_8xh(&pixels[0], &wh[2], ww, 8, dst, stride, 1);
-  dst += stride << 3;
-  smooth_pred_8xh(&pixels[4], &wh[4], ww, 8, dst, stride, 0);
-  dst += stride << 3;
-  smooth_pred_8xh(&pixels[4], &wh[6], ww, 8, dst, stride, 1);
-}
-
-static INLINE void smooth_predictor_wxh(uint8_t *dst, ptrdiff_t stride,
-                                        const uint8_t *above,
-                                        const uint8_t *left, uint32_t bw,
-                                        uint32_t bh) {
-  const uint8_t *const sm_weights_w = sm_weight_arrays + bw;
-  const uint8_t *const sm_weights_h = sm_weight_arrays + bh;
-  const uint16x8_t scale_value = vdupq_n_u16(256);
-
-  for (uint32_t y = 0; y < bh; ++y) {
-    const uint8x8_t left_y = vdup_n_u8(left[y]);
-    const uint8x8_t weights_y_dup = vdup_n_u8(sm_weights_h[y]);
-    const uint32x4_t pred_scaled_bl =
-        vdupq_n_u32(256 + (256 - sm_weights_h[y]) * left[bh - 1]);
-
-    for (uint32_t x = 0; x < bw; x += 8) {
-      const uint8x8_t weights_x = vld1_u8(sm_weights_w + x);
-      const uint8x8_t top_x = vld1_u8(above + x);
-
-      uint16x8_t pred_m1, pred_m2;
-      uint32x4_t pred_lo, pred_hi;
-      pred_m1 = vmull_u8(top_x, weights_y_dup);
-      pred_m2 = vmull_u8(weights_x, left_y);
-
-      pred_lo = vaddl_u16(vget_low_u16(pred_m1), vget_low_u16(pred_m2));
-#if defined(__aarch64__)
-      pred_hi = vaddl_high_u16(pred_m1, pred_m2);
-#else
-      pred_hi = vaddl_u16(vget_high_u16(pred_m1), vget_high_u16(pred_m2));
-#endif  // (__aarch64__)
+SMOOTH_V_PREDICTOR(16)
+SMOOTH_V_PREDICTOR(32)
+SMOOTH_V_PREDICTOR(64)
 
-      const uint16x8_t scale_m_weights_x = vsubw_u8(scale_value, weights_x);
+#undef SMOOTH_V_PREDICTOR
 
-      const uint16x8_t swxtr = vmulq_n_u16(scale_m_weights_x, above[bw - 1]);
+#define SMOOTH_V_NXM_WIDE(W, H)                               \
+  void aom_smooth_v_predictor_##W##x##H##_neon(               \
+      uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, \
+      const uint8_t *left) {                                  \
+    smooth_v_##W##xh_neon(dst, y_stride, above, left, H);     \
+  }
 
-      pred_lo = vaddq_u32(pred_lo, pred_scaled_bl);
-      pred_hi = vaddq_u32(pred_hi, pred_scaled_bl);
+SMOOTH_V_NXM_WIDE(16, 4)
+SMOOTH_V_NXM_WIDE(16, 8)
+SMOOTH_V_NXM_WIDE(16, 16)
+SMOOTH_V_NXM_WIDE(16, 32)
+SMOOTH_V_NXM_WIDE(16, 64)
+SMOOTH_V_NXM_WIDE(32, 8)
+SMOOTH_V_NXM_WIDE(32, 16)
+SMOOTH_V_NXM_WIDE(32, 32)
+SMOOTH_V_NXM_WIDE(32, 64)
+SMOOTH_V_NXM_WIDE(64, 16)
+SMOOTH_V_NXM_WIDE(64, 32)
+SMOOTH_V_NXM_WIDE(64, 64)
+
+#undef SMOOTH_V_NXM_WIDE
 
-      pred_lo = vaddw_u16(pred_lo, vget_low_u16(swxtr));
-#if defined(__aarch64__)
-      pred_hi = vaddw_high_u16(pred_hi, swxtr);
-#else
-      pred_hi = vaddw_u16(pred_hi, vget_high_u16(swxtr));
-#endif  // (__aarch64__)
+// -----------------------------------------------------------------------------
+// SMOOTH_H_PRED
+
+// For widths 4 and 8.
+#define SMOOTH_H_PREDICTOR(W)                                               \
+  static void smooth_h_##W##xh_neon(                                        \
+      uint8_t *dst, ptrdiff_t stride, const uint8_t *const top_row,         \
+      const uint8_t *const left_column, const int height) {                 \
+    const uint8_t top_right = top_row[(W)-1];                               \
+                                                                            \
+    const uint8x8_t top_right_v = vdup_n_u8(top_right);                     \
+    /* Over-reads for 4xN but still within the array. */                    \
+    const uint8x8_t weights_x = vld1_u8(smooth_weights + (W)-4);            \
+    const uint8x8_t scaled_weights_x = negate_s8(weights_x);                \
+    const uint16x8_t weighted_tr = vmull_u8(scaled_weights_x, top_right_v); \
+                                                                            \
+    assert(height > 0);                                                     \
+    int y = 0;                                                              \
+    do {                                                                    \
+      const uint8x8_t left_v = vdup_n_u8(left_column[y]);                   \
+      const uint16x8_t weighted_left_tr =                                   \
+          vmlal_u8(weighted_tr, weights_x, left_v);                         \
+      const uint8x8_t pred =                                                \
+          vrshrn_n_u16(weighted_left_tr, SMOOTH_WEIGHT_LOG2_SCALE);         \
+                                                                            \
+      if ((W) == 4) {                                                       \
+        vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(pred), 0);       \
+      } else { /* width == 8 */                                             \
+        vst1_u8(dst, pred);                                                 \
+      }                                                                     \
+      dst += stride;                                                        \
+    } while (++y != height);                                                \
+  }
 
-      uint16x8_t pred =
-          vcombine_u16(vshrn_n_u32(pred_lo, 9), vshrn_n_u32(pred_hi, 9));
+SMOOTH_H_PREDICTOR(4)
+SMOOTH_H_PREDICTOR(8)
 
-      uint8x8_t predsh = vqmovn_u16(pred);
+#undef SMOOTH_H_PREDICTOR
 
-      vst1_u8(dst + x, predsh);
-    }
+#define SMOOTH_H_NXM(W, H)                                    \
+  void aom_smooth_h_predictor_##W##x##H##_neon(               \
+      uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, \
+      const uint8_t *left) {                                  \
+    smooth_h_##W##xh_neon(dst, y_stride, above, left, H);     \
+  }
 
-    dst += stride;
+SMOOTH_H_NXM(4, 4)
+SMOOTH_H_NXM(4, 8)
+SMOOTH_H_NXM(4, 16)
+SMOOTH_H_NXM(8, 4)
+SMOOTH_H_NXM(8, 8)
+SMOOTH_H_NXM(8, 16)
+SMOOTH_H_NXM(8, 32)
+
+#undef SMOOTH_H_NXM
+
+static INLINE uint8x16_t calculate_horizontal_weights_and_pred(
+    const uint8x8_t left, const uint8x8_t top_right, const uint8x16_t weights_x,
+    const uint8x16_t scaled_weights_x) {
+  const uint16x8_t weighted_left_low = vmull_u8(vget_low_u8(weights_x), left);
+  const uint16x8_t weighted_left_tr_low =
+      vmlal_u8(weighted_left_low, vget_low_u8(scaled_weights_x), top_right);
+  const uint8x8_t pred_scaled_low =
+      vrshrn_n_u16(weighted_left_tr_low, SMOOTH_WEIGHT_LOG2_SCALE);
+
+  const uint16x8_t weighted_left_high = vmull_u8(vget_high_u8(weights_x), left);
+  const uint16x8_t weighted_left_tr_high =
+      vmlal_u8(weighted_left_high, vget_high_u8(scaled_weights_x), top_right);
+  const uint8x8_t pred_scaled_high =
+      vrshrn_n_u16(weighted_left_tr_high, SMOOTH_WEIGHT_LOG2_SCALE);
+
+  return vcombine_u8(pred_scaled_low, pred_scaled_high);
+}
+
+// For width 16 and above.
+#define SMOOTH_H_PREDICTOR(W)                                              \
+  static void smooth_h_##W##xh_neon(                                       \
+      uint8_t *dst, ptrdiff_t stride, const uint8_t *const top_row,        \
+      const uint8_t *const left_column, const int height) {                \
+    const uint8_t top_right = top_row[(W)-1];                              \
+                                                                           \
+    const uint8x8_t top_right_v = vdup_n_u8(top_right);                    \
+                                                                           \
+    uint8x16_t weights_x[4];                                               \
+    weights_x[0] = vld1q_u8(smooth_weights + (W)-4);                       \
+    if ((W) > 16) {                                                        \
+      weights_x[1] = vld1q_u8(smooth_weights + (W) + 16 - 4);              \
+      if ((W) == 64) {                                                     \
+        weights_x[2] = vld1q_u8(smooth_weights + (W) + 32 - 4);            \
+        weights_x[3] = vld1q_u8(smooth_weights + (W) + 48 - 4);            \
+      }                                                                    \
+    }                                                                      \
+                                                                           \
+    uint8x16_t scaled_weights_x[4];                                        \
+    scaled_weights_x[0] = negate_s8q(weights_x[0]);                        \
+    if ((W) > 16) {                                                        \
+      scaled_weights_x[1] = negate_s8q(weights_x[1]);                      \
+      if ((W) == 64) {                                                     \
+        scaled_weights_x[2] = negate_s8q(weights_x[2]);                    \
+        scaled_weights_x[3] = negate_s8q(weights_x[3]);                    \
+      }                                                                    \
+    }                                                                      \
+                                                                           \
+    assert(height > 0);                                                    \
+    int y = 0;                                                             \
+    do {                                                                   \
+      const uint8x8_t left_v = vdup_n_u8(left_column[y]);                  \
+                                                                           \
+      const uint8x16_t pred_0 = calculate_horizontal_weights_and_pred(     \
+          left_v, top_right_v, weights_x[0], scaled_weights_x[0]);         \
+      vst1q_u8(dst, pred_0);                                               \
+                                                                           \
+      if ((W) > 16) {                                                      \
+        const uint8x16_t pred_1 = calculate_horizontal_weights_and_pred(   \
+            left_v, top_right_v, weights_x[1], scaled_weights_x[1]);       \
+        vst1q_u8(dst + 16, pred_1);                                        \
+                                                                           \
+        if ((W) == 64) {                                                   \
+          const uint8x16_t pred_2 = calculate_horizontal_weights_and_pred( \
+              left_v, top_right_v, weights_x[2], scaled_weights_x[2]);     \
+          vst1q_u8(dst + 32, pred_2);                                      \
+                                                                           \
+          const uint8x16_t pred_3 = calculate_horizontal_weights_and_pred( \
+              left_v, top_right_v, weights_x[3], scaled_weights_x[3]);     \
+          vst1q_u8(dst + 48, pred_3);                                      \
+        }                                                                  \
+      }                                                                    \
+      dst += stride;                                                       \
+    } while (++y != height);                                               \
   }
-}
 
-void aom_smooth_predictor_16x4_neon(uint8_t *dst, ptrdiff_t stride,
-                                    const uint8_t *above, const uint8_t *left) {
-  smooth_predictor_wxh(dst, stride, above, left, 16, 4);
-}
+SMOOTH_H_PREDICTOR(16)
+SMOOTH_H_PREDICTOR(32)
+SMOOTH_H_PREDICTOR(64)
 
-void aom_smooth_predictor_16x8_neon(uint8_t *dst, ptrdiff_t stride,
-                                    const uint8_t *above, const uint8_t *left) {
-  smooth_predictor_wxh(dst, stride, above, left, 16, 8);
-}
+#undef SMOOTH_H_PREDICTOR
 
-void aom_smooth_predictor_16x16_neon(uint8_t *dst, ptrdiff_t stride,
-                                     const uint8_t *above,
-                                     const uint8_t *left) {
-  smooth_predictor_wxh(dst, stride, above, left, 16, 16);
-}
+#define SMOOTH_H_NXM_WIDE(W, H)                               \
+  void aom_smooth_h_predictor_##W##x##H##_neon(               \
+      uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, \
+      const uint8_t *left) {                                  \
+    smooth_h_##W##xh_neon(dst, y_stride, above, left, H);     \
+  }
 
-void aom_smooth_predictor_16x32_neon(uint8_t *dst, ptrdiff_t stride,
-                                     const uint8_t *above,
-                                     const uint8_t *left) {
-  smooth_predictor_wxh(dst, stride, above, left, 16, 32);
-}
+SMOOTH_H_NXM_WIDE(16, 4)
+SMOOTH_H_NXM_WIDE(16, 8)
+SMOOTH_H_NXM_WIDE(16, 16)
+SMOOTH_H_NXM_WIDE(16, 32)
+SMOOTH_H_NXM_WIDE(16, 64)
+SMOOTH_H_NXM_WIDE(32, 8)
+SMOOTH_H_NXM_WIDE(32, 16)
+SMOOTH_H_NXM_WIDE(32, 32)
+SMOOTH_H_NXM_WIDE(32, 64)
+SMOOTH_H_NXM_WIDE(64, 16)
+SMOOTH_H_NXM_WIDE(64, 32)
+SMOOTH_H_NXM_WIDE(64, 64)
+
+#undef SMOOTH_H_NXM_WIDE
 
-void aom_smooth_predictor_32x8_neon(uint8_t *dst, ptrdiff_t stride,
-                                    const uint8_t *above, const uint8_t *left) {
-  smooth_predictor_wxh(dst, stride, above, left, 32, 8);
-}
+// -----------------------------------------------------------------------------
+// PAETH
+
+static INLINE void paeth_4or8_x_h_neon(uint8_t *dest, ptrdiff_t stride,
+                                       const uint8_t *const top_row,
+                                       const uint8_t *const left_column,
+                                       int width, int height) {
+  const uint8x8_t top_left = vdup_n_u8(top_row[-1]);
+  const uint16x8_t top_left_x2 = vdupq_n_u16(top_row[-1] + top_row[-1]);
+  uint8x8_t top;
+  if (width == 4) {
+    load_u8_4x1(top_row, &top, 0);
+  } else {  // width == 8
+    top = vld1_u8(top_row);
+  }
 
-void aom_smooth_predictor_32x16_neon(uint8_t *dst, ptrdiff_t stride,
-                                     const uint8_t *above,
-                                     const uint8_t *left) {
-  smooth_predictor_wxh(dst, stride, above, left, 32, 16);
+  assert(height > 0);
+  int y = 0;
+  do {
+    const uint8x8_t left = vdup_n_u8(left_column[y]);
+
+    const uint8x8_t left_dist = vabd_u8(top, top_left);
+    const uint8x8_t top_dist = vabd_u8(left, top_left);
+    const uint16x8_t top_left_dist =
+        vabdq_u16(vaddl_u8(top, left), top_left_x2);
+
+    const uint8x8_t left_le_top = vcle_u8(left_dist, top_dist);
+    const uint8x8_t left_le_top_left =
+        vmovn_u16(vcleq_u16(vmovl_u8(left_dist), top_left_dist));
+    const uint8x8_t top_le_top_left =
+        vmovn_u16(vcleq_u16(vmovl_u8(top_dist), top_left_dist));
+
+    // if (left_dist <= top_dist && left_dist <= top_left_dist)
+    const uint8x8_t left_mask = vand_u8(left_le_top, left_le_top_left);
+    //   dest[x] = left_column[y];
+    // Fill all the unused spaces with 'top'. They will be overwritten when
+    // the positions for top_left are known.
+    uint8x8_t result = vbsl_u8(left_mask, left, top);
+    // else if (top_dist <= top_left_dist)
+    //   dest[x] = top_row[x];
+    // Add these values to the mask. They were already set.
+    const uint8x8_t left_or_top_mask = vorr_u8(left_mask, top_le_top_left);
+    // else
+    //   dest[x] = top_left;
+    result = vbsl_u8(left_or_top_mask, result, top_left);
+
+    if (width == 4) {
+      store_unaligned_u8_4x1(dest, result, 0);
+    } else {  // width == 8
+      vst1_u8(dest, result);
+    }
+    dest += stride;
+  } while (++y != height);
 }
 
-void aom_smooth_predictor_32x32_neon(uint8_t *dst, ptrdiff_t stride,
-                                     const uint8_t *above,
-                                     const uint8_t *left) {
-  smooth_predictor_wxh(dst, stride, above, left, 32, 32);
-}
+#define PAETH_NXM(W, H)                                                     \
+  void aom_paeth_predictor_##W##x##H##_neon(uint8_t *dst, ptrdiff_t stride, \
+                                            const uint8_t *above,           \
+                                            const uint8_t *left) {          \
+    paeth_4or8_x_h_neon(dst, stride, above, left, W, H);                    \
+  }
 
-void aom_smooth_predictor_32x64_neon(uint8_t *dst, ptrdiff_t stride,
-                                     const uint8_t *above,
-                                     const uint8_t *left) {
-  smooth_predictor_wxh(dst, stride, above, left, 32, 64);
-}
+PAETH_NXM(4, 4)
+PAETH_NXM(4, 8)
+PAETH_NXM(8, 4)
+PAETH_NXM(8, 8)
+PAETH_NXM(8, 16)
+
+PAETH_NXM(4, 16)
+PAETH_NXM(8, 32)
+
+// Calculate X distance <= TopLeft distance and pack the resulting mask into
+// uint8x8_t.
+static INLINE uint8x16_t x_le_top_left(const uint8x16_t x_dist,
+                                       const uint16x8_t top_left_dist_low,
+                                       const uint16x8_t top_left_dist_high) {
+  const uint8x16_t top_left_dist = vcombine_u8(vqmovn_u16(top_left_dist_low),
+                                               vqmovn_u16(top_left_dist_high));
+  return vcleq_u8(x_dist, top_left_dist);
+}
+
+// Select the closest values and collect them.
+static INLINE uint8x16_t select_paeth(const uint8x16_t top,
+                                      const uint8x16_t left,
+                                      const uint8x16_t top_left,
+                                      const uint8x16_t left_le_top,
+                                      const uint8x16_t left_le_top_left,
+                                      const uint8x16_t top_le_top_left) {
+  // if (left_dist <= top_dist && left_dist <= top_left_dist)
+  const uint8x16_t left_mask = vandq_u8(left_le_top, left_le_top_left);
+  //   dest[x] = left_column[y];
+  // Fill all the unused spaces with 'top'. They will be overwritten when
+  // the positions for top_left are known.
+  uint8x16_t result = vbslq_u8(left_mask, left, top);
+  // else if (top_dist <= top_left_dist)
+  //   dest[x] = top_row[x];
+  // Add these values to the mask. They were already set.
+  const uint8x16_t left_or_top_mask = vorrq_u8(left_mask, top_le_top_left);
+  // else
+  //   dest[x] = top_left;
+  return vbslq_u8(left_or_top_mask, result, top_left);
+}
+
+// Generate numbered and high/low versions of top_left_dist.
+#define TOP_LEFT_DIST(num)                                              \
+  const uint16x8_t top_left_##num##_dist_low = vabdq_u16(               \
+      vaddl_u8(vget_low_u8(top[num]), vget_low_u8(left)), top_left_x2); \
+  const uint16x8_t top_left_##num##_dist_high = vabdq_u16(              \
+      vaddl_u8(vget_high_u8(top[num]), vget_low_u8(left)), top_left_x2)
+
+// Generate numbered versions of XLeTopLeft with x = left.
+#define LEFT_LE_TOP_LEFT(num)                                     \
+  const uint8x16_t left_le_top_left_##num =                       \
+      x_le_top_left(left_##num##_dist, top_left_##num##_dist_low, \
+                    top_left_##num##_dist_high)
+
+// Generate numbered versions of XLeTopLeft with x = top.
+#define TOP_LE_TOP_LEFT(num)                              \
+  const uint8x16_t top_le_top_left_##num = x_le_top_left( \
+      top_dist, top_left_##num##_dist_low, top_left_##num##_dist_high)
+
+static INLINE void paeth16_plus_x_h_neon(uint8_t *dest, ptrdiff_t stride,
+                                         const uint8_t *const top_row,
+                                         const uint8_t *const left_column,
+                                         int width, int height) {
+  const uint8x16_t top_left = vdupq_n_u8(top_row[-1]);
+  const uint16x8_t top_left_x2 = vdupq_n_u16(top_row[-1] + top_row[-1]);
+  uint8x16_t top[4];
+  top[0] = vld1q_u8(top_row);
+  if (width > 16) {
+    top[1] = vld1q_u8(top_row + 16);
+    if (width == 64) {
+      top[2] = vld1q_u8(top_row + 32);
+      top[3] = vld1q_u8(top_row + 48);
+    }
+  }
 
-void aom_smooth_predictor_64x64_neon(uint8_t *dst, ptrdiff_t stride,
-                                     const uint8_t *above,
-                                     const uint8_t *left) {
-  smooth_predictor_wxh(dst, stride, above, left, 64, 64);
-}
+  assert(height > 0);
+  int y = 0;
+  do {
+    const uint8x16_t left = vdupq_n_u8(left_column[y]);
+
+    const uint8x16_t top_dist = vabdq_u8(left, top_left);
+
+    const uint8x16_t left_0_dist = vabdq_u8(top[0], top_left);
+    TOP_LEFT_DIST(0);
+    const uint8x16_t left_0_le_top = vcleq_u8(left_0_dist, top_dist);
+    LEFT_LE_TOP_LEFT(0);
+    TOP_LE_TOP_LEFT(0);
+
+    const uint8x16_t result_0 =
+        select_paeth(top[0], left, top_left, left_0_le_top, left_le_top_left_0,
+                     top_le_top_left_0);
+    vst1q_u8(dest, result_0);
+
+    if (width > 16) {
+      const uint8x16_t left_1_dist = vabdq_u8(top[1], top_left);
+      TOP_LEFT_DIST(1);
+      const uint8x16_t left_1_le_top = vcleq_u8(left_1_dist, top_dist);
+      LEFT_LE_TOP_LEFT(1);
+      TOP_LE_TOP_LEFT(1);
+
+      const uint8x16_t result_1 =
+          select_paeth(top[1], left, top_left, left_1_le_top,
+                       left_le_top_left_1, top_le_top_left_1);
+      vst1q_u8(dest + 16, result_1);
+
+      if (width == 64) {
+        const uint8x16_t left_2_dist = vabdq_u8(top[2], top_left);
+        TOP_LEFT_DIST(2);
+        const uint8x16_t left_2_le_top = vcleq_u8(left_2_dist, top_dist);
+        LEFT_LE_TOP_LEFT(2);
+        TOP_LE_TOP_LEFT(2);
+
+        const uint8x16_t result_2 =
+            select_paeth(top[2], left, top_left, left_2_le_top,
+                         left_le_top_left_2, top_le_top_left_2);
+        vst1q_u8(dest + 32, result_2);
+
+        const uint8x16_t left_3_dist = vabdq_u8(top[3], top_left);
+        TOP_LEFT_DIST(3);
+        const uint8x16_t left_3_le_top = vcleq_u8(left_3_dist, top_dist);
+        LEFT_LE_TOP_LEFT(3);
+        TOP_LE_TOP_LEFT(3);
+
+        const uint8x16_t result_3 =
+            select_paeth(top[3], left, top_left, left_3_le_top,
+                         left_le_top_left_3, top_le_top_left_3);
+        vst1q_u8(dest + 48, result_3);
+      }
+    }
 
-void aom_smooth_predictor_64x32_neon(uint8_t *dst, ptrdiff_t stride,
-                                     const uint8_t *above,
-                                     const uint8_t *left) {
-  smooth_predictor_wxh(dst, stride, above, left, 64, 32);
+    dest += stride;
+  } while (++y != height);
 }
 
-void aom_smooth_predictor_64x16_neon(uint8_t *dst, ptrdiff_t stride,
-                                     const uint8_t *above,
-                                     const uint8_t *left) {
-  smooth_predictor_wxh(dst, stride, above, left, 64, 16);
-}
+#define PAETH_NXM_WIDE(W, H)                                                \
+  void aom_paeth_predictor_##W##x##H##_neon(uint8_t *dst, ptrdiff_t stride, \
+                                            const uint8_t *above,           \
+                                            const uint8_t *left) {          \
+    paeth16_plus_x_h_neon(dst, stride, above, left, W, H);                  \
+  }
 
-void aom_smooth_predictor_16x64_neon(uint8_t *dst, ptrdiff_t stride,
-                                     const uint8_t *above,
-                                     const uint8_t *left) {
-  smooth_predictor_wxh(dst, stride, above, left, 16, 64);
-}
+PAETH_NXM_WIDE(16, 8)
+PAETH_NXM_WIDE(16, 16)
+PAETH_NXM_WIDE(16, 32)
+PAETH_NXM_WIDE(32, 16)
+PAETH_NXM_WIDE(32, 32)
+PAETH_NXM_WIDE(32, 64)
+PAETH_NXM_WIDE(64, 32)
+PAETH_NXM_WIDE(64, 64)
+
+PAETH_NXM_WIDE(16, 4)
+PAETH_NXM_WIDE(16, 64)
+PAETH_NXM_WIDE(32, 8)
+PAETH_NXM_WIDE(64, 16)
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/arm/loopfilter_neon.c b/chromium/third_party/libaom/source/libaom/aom_dsp/arm/loopfilter_neon.c
index c3adf29add5..f3f86a2b0e5 100644
--- a/chromium/third_party/libaom/source/libaom/aom_dsp/arm/loopfilter_neon.c
+++ b/chromium/third_party/libaom/source/libaom/aom_dsp/arm/loopfilter_neon.c
@@ -703,6 +703,15 @@ void aom_lpf_vertical_14_dual_neon(
   aom_lpf_vertical_14_neon(s + 4 * pitch, pitch, blimit1, limit1, thresh1);
 }
 
+void aom_lpf_vertical_14_quad_neon(uint8_t *s, int pitch, const uint8_t *blimit,
+                                   const uint8_t *limit,
+                                   const uint8_t *thresh) {
+  aom_lpf_vertical_14_dual_neon(s, pitch, blimit, limit, thresh, blimit, limit,
+                                thresh);
+  aom_lpf_vertical_14_dual_neon(s + 2 * MI_SIZE * pitch, pitch, blimit, limit,
+                                thresh, blimit, limit, thresh);
+}
+
 void aom_lpf_vertical_8_neon(uint8_t *src, int stride, const uint8_t *blimit,
                              const uint8_t *limit, const uint8_t *thresh) {
   uint32x2x2_t p2q2_p1q1, p3q3_p0q0;
@@ -754,6 +763,14 @@ void aom_lpf_vertical_8_dual_neon(uint8_t *s, int pitch, const uint8_t *blimit0,
   aom_lpf_vertical_8_neon(s + 4 * pitch, pitch, blimit1, limit1, thresh1);
 }
 
+void aom_lpf_vertical_8_quad_neon(uint8_t *s, int pitch, const uint8_t *blimit,
+                                  const uint8_t *limit, const uint8_t *thresh) {
+  aom_lpf_vertical_8_dual_neon(s, pitch, blimit, limit, thresh, blimit, limit,
+                               thresh);
+  aom_lpf_vertical_8_dual_neon(s + 2 * MI_SIZE * pitch, pitch, blimit, limit,
+                               thresh, blimit, limit, thresh);
+}
+
 void aom_lpf_vertical_6_neon(uint8_t *src, int stride, const uint8_t *blimit,
                              const uint8_t *limit, const uint8_t *thresh) {
   uint32x2x2_t p2q2_p1q1, pxqy_p0q0;
@@ -805,6 +822,14 @@ void aom_lpf_vertical_6_dual_neon(uint8_t *s, int pitch, const uint8_t *blimit0,
   aom_lpf_vertical_6_neon(s + 4 * pitch, pitch, blimit1, limit1, thresh1);
 }
 
+void aom_lpf_vertical_6_quad_neon(uint8_t *s, int pitch, const uint8_t *blimit,
+                                  const uint8_t *limit, const uint8_t *thresh) {
+  aom_lpf_vertical_6_dual_neon(s, pitch, blimit, limit, thresh, blimit, limit,
+                               thresh);
+  aom_lpf_vertical_6_dual_neon(s + 2 * MI_SIZE * pitch, pitch, blimit, limit,
+                               thresh, blimit, limit, thresh);
+}
+
 void aom_lpf_vertical_4_neon(uint8_t *src, int stride, const uint8_t *blimit,
                              const uint8_t *limit, const uint8_t *thresh) {
   uint32x2x2_t p1q0_p0q1, p1q1_p0q0, p1p0_q1q0;
@@ -852,6 +877,14 @@ void aom_lpf_vertical_4_dual_neon(uint8_t *s, int pitch, const uint8_t *blimit0,
   aom_lpf_vertical_4_neon(s + 4 * pitch, pitch, blimit1, limit1, thresh1);
 }
 
+void aom_lpf_vertical_4_quad_neon(uint8_t *s, int pitch, const uint8_t *blimit,
+                                  const uint8_t *limit, const uint8_t *thresh) {
+  aom_lpf_vertical_4_dual_neon(s, pitch, blimit, limit, thresh, blimit, limit,
+                               thresh);
+  aom_lpf_vertical_4_dual_neon(s + 2 * MI_SIZE * pitch, pitch, blimit, limit,
+                               thresh, blimit, limit, thresh);
+}
+
 void aom_lpf_horizontal_14_neon(uint8_t *src, int stride, const uint8_t *blimit,
                                 const uint8_t *limit, const uint8_t *thresh) {
   uint8x8_t UNINITIALIZED_IS_SAFE(p0q0), UNINITIALIZED_IS_SAFE(p1q1),
@@ -899,6 +932,18 @@ void aom_lpf_horizontal_14_dual_neon(
   aom_lpf_horizontal_14_neon(s + 4, pitch, blimit1, limit1, thresh1);
 }
 
+// TODO(any): Rewrite in NEON (similar to quad SSE2 functions) for better speed
+// up.
+void aom_lpf_horizontal_14_quad_neon(uint8_t *s, int pitch,
+                                     const uint8_t *blimit,
+                                     const uint8_t *limit,
+                                     const uint8_t *thresh) {
+  aom_lpf_horizontal_14_dual_neon(s, pitch, blimit, limit, thresh, blimit,
+                                  limit, thresh);
+  aom_lpf_horizontal_14_dual_neon(s + 2 * MI_SIZE, pitch, blimit, limit, thresh,
+                                  blimit, limit, thresh);
+}
+
 void aom_lpf_horizontal_8_neon(uint8_t *src, int stride, const uint8_t *blimit,
                                const uint8_t *limit, const uint8_t *thresh) {
   uint8x8_t p0q0, p1q1, p2q2, p3q3;
@@ -936,6 +981,17 @@ void aom_lpf_horizontal_8_dual_neon(
   aom_lpf_horizontal_8_neon(s + 4, pitch, blimit1, limit1, thresh1);
 }
 
+// TODO(any): Rewrite in NEON (similar to quad SSE2 functions) for better speed
+// up.
+void aom_lpf_horizontal_8_quad_neon(uint8_t *s, int pitch,
+                                    const uint8_t *blimit, const uint8_t *limit,
+                                    const uint8_t *thresh) {
+  aom_lpf_horizontal_8_dual_neon(s, pitch, blimit, limit, thresh, blimit, limit,
+                                 thresh);
+  aom_lpf_horizontal_8_dual_neon(s + 2 * MI_SIZE, pitch, blimit, limit, thresh,
+                                 blimit, limit, thresh);
+}
+
 void aom_lpf_horizontal_6_neon(uint8_t *src, int stride, const uint8_t *blimit,
                                const uint8_t *limit, const uint8_t *thresh) {
   uint8x8_t p0q0, p1q1, p2q2;
@@ -968,6 +1024,17 @@ void aom_lpf_horizontal_6_dual_neon(
   aom_lpf_horizontal_6_neon(s + 4, pitch, blimit1, limit1, thresh1);
 }
 
+// TODO(any): Rewrite in NEON (similar to quad SSE2 functions) for better speed
+// up.
+void aom_lpf_horizontal_6_quad_neon(uint8_t *s, int pitch,
+                                    const uint8_t *blimit, const uint8_t *limit,
+                                    const uint8_t *thresh) {
+  aom_lpf_horizontal_6_dual_neon(s, pitch, blimit, limit, thresh, blimit, limit,
+                                 thresh);
+  aom_lpf_horizontal_6_dual_neon(s + 2 * MI_SIZE, pitch, blimit, limit, thresh,
+                                 blimit, limit, thresh);
+}
+
 void aom_lpf_horizontal_4_neon(uint8_t *src, int stride, const uint8_t *blimit,
                                const uint8_t *limit, const uint8_t *thresh) {
   uint8x8_t p0q0, UNINITIALIZED_IS_SAFE(p1q1);
@@ -992,3 +1059,14 @@ void aom_lpf_horizontal_4_dual_neon(
   aom_lpf_horizontal_4_neon(s, pitch, blimit0, limit0, thresh0);
   aom_lpf_horizontal_4_neon(s + 4, pitch, blimit1, limit1, thresh1);
 }
+
+// TODO(any): Rewrite in NEON (similar to quad SSE2 functions) for better speed
+// up.
+void aom_lpf_horizontal_4_quad_neon(uint8_t *s, int pitch,
+                                    const uint8_t *blimit, const uint8_t *limit,
+                                    const uint8_t *thresh) {
+  aom_lpf_horizontal_4_dual_neon(s, pitch, blimit, limit, thresh, blimit, limit,
+                                 thresh);
+  aom_lpf_horizontal_4_dual_neon(s + 2 * MI_SIZE, pitch, blimit, limit, thresh,
+                                 blimit, limit, thresh);
+}
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/arm/mem_neon.h b/chromium/third_party/libaom/source/libaom/aom_dsp/arm/mem_neon.h
index c8236dad3ab..40be27deab8 100644
--- a/chromium/third_party/libaom/source/libaom/aom_dsp/arm/mem_neon.h
+++ b/chromium/third_party/libaom/source/libaom/aom_dsp/arm/mem_neon.h
@@ -15,6 +15,64 @@
 #include <string.h>
 #include "aom_dsp/aom_dsp_common.h"
 
+// Support for xN Neon intrinsics is lacking in some compilers.
+#if defined(__arm__) || defined(_M_ARM)
+#define ARM_32_BIT
+#endif
+
+// DEFICIENT_CLANG_32_BIT includes clang-cl.
+#if defined(__clang__) && defined(ARM_32_BIT) && \
+    (__clang_major__ <= 6 || (defined(__ANDROID__) && __clang_major__ <= 7))
+#define DEFICIENT_CLANG_32_BIT  // This includes clang-cl.
+#endif
+
+#if defined(__GNUC__) && !defined(__clang__) && defined(ARM_32_BIT)
+#define GCC_32_BIT
+#endif
+
+#if defined(DEFICIENT_CLANG_32_BIT) || defined(GCC_32_BIT)
+
+static INLINE uint8x16x3_t vld1q_u8_x3(const uint8_t *ptr) {
+  uint8x16x3_t res = { { vld1q_u8(ptr + 0 * 16), vld1q_u8(ptr + 1 * 16),
+                         vld1q_u8(ptr + 2 * 16) } };
+  return res;
+}
+
+static INLINE uint8x16x2_t vld1q_u8_x2(const uint8_t *ptr) {
+  uint8x16x2_t res = { { vld1q_u8(ptr + 0 * 16), vld1q_u8(ptr + 1 * 16) } };
+  return res;
+}
+
+static INLINE uint16x8x4_t vld1q_u16_x4(const uint16_t *ptr) {
+  uint16x8x4_t res = { { vld1q_u16(ptr + 0 * 8), vld1q_u16(ptr + 1 * 8),
+                         vld1q_u16(ptr + 2 * 8), vld1q_u16(ptr + 3 * 8) } };
+  return res;
+}
+
+#elif defined(__GNUC__) && !defined(__clang__)  // GCC 64-bit.
+#if __GNUC__ < 8
+
+static INLINE uint8x16x2_t vld1q_u8_x2(const uint8_t *ptr) {
+  uint8x16x2_t res = { { vld1q_u8(ptr + 0 * 16), vld1q_u8(ptr + 1 * 16) } };
+  return res;
+}
+
+static INLINE uint16x8x4_t vld1q_u16_x4(const uint16_t *ptr) {
+  uint16x8x4_t res = { { vld1q_u16(ptr + 0 * 8), vld1q_u16(ptr + 1 * 8),
+                         vld1q_u16(ptr + 2 * 8), vld1q_u16(ptr + 3 * 8) } };
+  return res;
+}
+#endif  // __GNUC__ < 8
+
+#if __GNUC__ < 9
+static INLINE uint8x16x3_t vld1q_u8_x3(const uint8_t *ptr) {
+  uint8x16x3_t res = { { vld1q_u8(ptr + 0 * 16), vld1q_u8(ptr + 1 * 16),
+                         vld1q_u8(ptr + 2 * 16) } };
+  return res;
+}
+#endif  // __GNUC__ < 9
+#endif  // defined(__GNUC__) && !defined(__clang__)
+
 static INLINE void store_row2_u8_8x8(uint8_t *s, int p, const uint8x8_t s0,
                                      const uint8x8_t s1) {
   vst1_u8(s, s0);
@@ -316,14 +374,25 @@ static INLINE void load_s16_8x4(const int16_t *s, ptrdiff_t p,
   *s3 = vld1q_s16(s);
 }
 
+// Load 2 sets of 4 bytes when alignment is not guaranteed.
+static INLINE uint8x8_t load_unaligned_u8(const uint8_t *buf, int stride) {
+  uint32_t a;
+  memcpy(&a, buf, 4);
+  buf += stride;
+  uint32x2_t a_u32 = vdup_n_u32(a);
+  memcpy(&a, buf, 4);
+  a_u32 = vset_lane_u32(a, a_u32, 1);
+  return vreinterpret_u8_u32(a_u32);
+}
+
 // Load 4 sets of 4 bytes when alignment is not guaranteed.
 static INLINE uint8x16_t load_unaligned_u8q(const uint8_t *buf, int stride) {
   uint32_t a;
-  uint32x4_t a_u32 = vdupq_n_u32(0);
+  uint32x4_t a_u32;
   if (stride == 4) return vld1q_u8(buf);
   memcpy(&a, buf, 4);
   buf += stride;
-  a_u32 = vsetq_lane_u32(a, a_u32, 0);
+  a_u32 = vdupq_n_u32(a);
   memcpy(&a, buf, 4);
   buf += stride;
   a_u32 = vsetq_lane_u32(a, a_u32, 1);
@@ -331,7 +400,6 @@ static INLINE uint8x16_t load_unaligned_u8q(const uint8_t *buf, int stride) {
   buf += stride;
   a_u32 = vsetq_lane_u32(a, a_u32, 2);
   memcpy(&a, buf, 4);
-  buf += stride;
   a_u32 = vsetq_lane_u32(a, a_u32, 3);
   return vreinterpretq_u8_u32(a_u32);
 }
@@ -343,25 +411,25 @@ static INLINE void load_unaligned_u8_4x8(const uint8_t *buf, int stride,
 
   memcpy(&a, buf, 4);
   buf += stride;
-  *tu0 = vset_lane_u32(a, *tu0, 0);
+  *tu0 = vdup_n_u32(a);
   memcpy(&a, buf, 4);
   buf += stride;
   *tu0 = vset_lane_u32(a, *tu0, 1);
   memcpy(&a, buf, 4);
   buf += stride;
-  *tu1 = vset_lane_u32(a, *tu1, 0);
+  *tu1 = vdup_n_u32(a);
   memcpy(&a, buf, 4);
   buf += stride;
   *tu1 = vset_lane_u32(a, *tu1, 1);
   memcpy(&a, buf, 4);
   buf += stride;
-  *tu2 = vset_lane_u32(a, *tu2, 0);
+  *tu2 = vdup_n_u32(a);
   memcpy(&a, buf, 4);
   buf += stride;
   *tu2 = vset_lane_u32(a, *tu2, 1);
   memcpy(&a, buf, 4);
   buf += stride;
-  *tu3 = vset_lane_u32(a, *tu3, 0);
+  *tu3 = vdup_n_u32(a);
   memcpy(&a, buf, 4);
   *tu3 = vset_lane_u32(a, *tu3, 1);
 }
@@ -372,13 +440,13 @@ static INLINE void load_unaligned_u8_4x4(const uint8_t *buf, int stride,
 
   memcpy(&a, buf, 4);
   buf += stride;
-  *tu0 = vset_lane_u32(a, *tu0, 0);
+  *tu0 = vdup_n_u32(a);
   memcpy(&a, buf, 4);
   buf += stride;
   *tu0 = vset_lane_u32(a, *tu0, 1);
   memcpy(&a, buf, 4);
   buf += stride;
-  *tu1 = vset_lane_u32(a, *tu1, 0);
+  *tu1 = vdup_n_u32(a);
   memcpy(&a, buf, 4);
   *tu1 = vset_lane_u32(a, *tu1, 1);
 }
@@ -398,9 +466,8 @@ static INLINE void load_unaligned_u8_4x2(const uint8_t *buf, int stride,
 
   memcpy(&a, buf, 4);
   buf += stride;
-  *tu0 = vset_lane_u32(a, *tu0, 0);
+  *tu0 = vdup_n_u32(a);
   memcpy(&a, buf, 4);
-  buf += stride;
   *tu0 = vset_lane_u32(a, *tu0, 1);
 }
 
@@ -413,15 +480,21 @@ static INLINE void load_unaligned_u8_4x2(const uint8_t *buf, int stride,
     memcpy(dst, &a, 4);                                \
   } while (0)
 
+#define store_unaligned_u8_2x1(dst, src, lane)         \
+  do {                                                 \
+    uint16_t a;                                        \
+    a = vget_lane_u16(vreinterpret_u16_u8(src), lane); \
+    memcpy(dst, &a, 2);                                \
+  } while (0)
+
 static INLINE void load_unaligned_u8_2x2(const uint8_t *buf, int stride,
                                          uint16x4_t *tu0) {
   uint16_t a;
 
   memcpy(&a, buf, 2);
   buf += stride;
-  *tu0 = vset_lane_u16(a, *tu0, 0);
+  *tu0 = vdup_n_u16(a);
   memcpy(&a, buf, 2);
-  buf += stride;
   *tu0 = vset_lane_u16(a, *tu0, 1);
 }
 
@@ -465,13 +538,13 @@ static INLINE void load_unaligned_u16_4x4(const uint16_t *buf, uint32_t stride,
 
   memcpy(&a, buf, 8);
   buf += stride;
-  *tu0 = vsetq_lane_u64(a, *tu0, 0);
+  *tu0 = vdupq_n_u64(a);
   memcpy(&a, buf, 8);
   buf += stride;
   *tu0 = vsetq_lane_u64(a, *tu0, 1);
   memcpy(&a, buf, 8);
   buf += stride;
-  *tu1 = vsetq_lane_u64(a, *tu1, 0);
+  *tu1 = vdupq_n_u64(a);
   memcpy(&a, buf, 8);
   *tu1 = vsetq_lane_u64(a, *tu1, 1);
 }
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/arm/sad4d_neon.c b/chromium/third_party/libaom/source/libaom/aom_dsp/arm/sad4d_neon.c
index 22f2e643e73..e1eccc356c4 100644
--- a/chromium/third_party/libaom/source/libaom/aom_dsp/arm/sad4d_neon.c
+++ b/chromium/third_party/libaom/source/libaom/aom_dsp/arm/sad4d_neon.c
@@ -15,578 +15,520 @@
 #include "config/aom_dsp_rtcd.h"
 
 #include "aom/aom_integer.h"
+#include "aom_dsp/arm/sum_neon.h"
 
-static INLINE unsigned int horizontal_long_add_16x8(const uint16x8_t vec_lo,
-                                                    const uint16x8_t vec_hi) {
-  const uint32x4_t vec_l_lo =
-      vaddl_u16(vget_low_u16(vec_lo), vget_high_u16(vec_lo));
-  const uint32x4_t vec_l_hi =
-      vaddl_u16(vget_low_u16(vec_hi), vget_high_u16(vec_hi));
-  const uint32x4_t a = vaddq_u32(vec_l_lo, vec_l_hi);
-  const uint64x2_t b = vpaddlq_u32(a);
-  const uint32x2_t c = vadd_u32(vreinterpret_u32_u64(vget_low_u64(b)),
-                                vreinterpret_u32_u64(vget_high_u64(b)));
-  return vget_lane_u32(c, 0);
+#if defined(__aarch64__) && defined(__ARM_FEATURE_DOTPROD)
+
+static INLINE void sad16_neon(uint8x16_t src, uint8x16_t ref,
+                              uint32x4_t *const sad_sum) {
+  uint8x16_t abs_diff = vabdq_u8(src, ref);
+  *sad_sum = vdotq_u32(*sad_sum, abs_diff, vdupq_n_u8(1));
 }
 
-// Calculate the absolute difference of 64 bytes from vec_src_00, vec_src_16,
-// vec_src_32, vec_src_48 and ref. Accumulate partial sums in vec_sum_ref_lo
-// and vec_sum_ref_hi.
-static void sad_neon_64(const uint8x16_t vec_src_00,
-                        const uint8x16_t vec_src_16,
-                        const uint8x16_t vec_src_32,
-                        const uint8x16_t vec_src_48, const uint8_t *ref,
-                        uint16x8_t *vec_sum_ref_lo,
-                        uint16x8_t *vec_sum_ref_hi) {
-  const uint8x16_t vec_ref_00 = vld1q_u8(ref);
-  const uint8x16_t vec_ref_16 = vld1q_u8(ref + 16);
-  const uint8x16_t vec_ref_32 = vld1q_u8(ref + 32);
-  const uint8x16_t vec_ref_48 = vld1q_u8(ref + 48);
-
-  *vec_sum_ref_lo = vabal_u8(*vec_sum_ref_lo, vget_low_u8(vec_src_00),
-                             vget_low_u8(vec_ref_00));
-  *vec_sum_ref_hi = vabal_u8(*vec_sum_ref_hi, vget_high_u8(vec_src_00),
-                             vget_high_u8(vec_ref_00));
-  *vec_sum_ref_lo = vabal_u8(*vec_sum_ref_lo, vget_low_u8(vec_src_16),
-                             vget_low_u8(vec_ref_16));
-  *vec_sum_ref_hi = vabal_u8(*vec_sum_ref_hi, vget_high_u8(vec_src_16),
-                             vget_high_u8(vec_ref_16));
-  *vec_sum_ref_lo = vabal_u8(*vec_sum_ref_lo, vget_low_u8(vec_src_32),
-                             vget_low_u8(vec_ref_32));
-  *vec_sum_ref_hi = vabal_u8(*vec_sum_ref_hi, vget_high_u8(vec_src_32),
-                             vget_high_u8(vec_ref_32));
-  *vec_sum_ref_lo = vabal_u8(*vec_sum_ref_lo, vget_low_u8(vec_src_48),
-                             vget_low_u8(vec_ref_48));
-  *vec_sum_ref_hi = vabal_u8(*vec_sum_ref_hi, vget_high_u8(vec_src_48),
-                             vget_high_u8(vec_ref_48));
+static INLINE void sad128xhx4d_neon(const uint8_t *src, int src_stride,
+                                    const uint8_t *const ref[4], int ref_stride,
+                                    uint32_t res[4], int h) {
+  uint32x4_t sum_lo[4] = { vdupq_n_u32(0), vdupq_n_u32(0), vdupq_n_u32(0),
+                           vdupq_n_u32(0) };
+  uint32x4_t sum_hi[4] = { vdupq_n_u32(0), vdupq_n_u32(0), vdupq_n_u32(0),
+                           vdupq_n_u32(0) };
+
+  int i = 0;
+  do {
+    const uint8x16_t s0 = vld1q_u8(src + i * src_stride);
+    sad16_neon(s0, vld1q_u8(ref[0] + i * ref_stride), &sum_lo[0]);
+    sad16_neon(s0, vld1q_u8(ref[1] + i * ref_stride), &sum_lo[1]);
+    sad16_neon(s0, vld1q_u8(ref[2] + i * ref_stride), &sum_lo[2]);
+    sad16_neon(s0, vld1q_u8(ref[3] + i * ref_stride), &sum_lo[3]);
+
+    const uint8x16_t s1 = vld1q_u8(src + i * src_stride + 16);
+    sad16_neon(s1, vld1q_u8(ref[0] + i * ref_stride + 16), &sum_hi[0]);
+    sad16_neon(s1, vld1q_u8(ref[1] + i * ref_stride + 16), &sum_hi[1]);
+    sad16_neon(s1, vld1q_u8(ref[2] + i * ref_stride + 16), &sum_hi[2]);
+    sad16_neon(s1, vld1q_u8(ref[3] + i * ref_stride + 16), &sum_hi[3]);
+
+    const uint8x16_t s2 = vld1q_u8(src + i * src_stride + 32);
+    sad16_neon(s2, vld1q_u8(ref[0] + i * ref_stride + 32), &sum_lo[0]);
+    sad16_neon(s2, vld1q_u8(ref[1] + i * ref_stride + 32), &sum_lo[1]);
+    sad16_neon(s2, vld1q_u8(ref[2] + i * ref_stride + 32), &sum_lo[2]);
+    sad16_neon(s2, vld1q_u8(ref[3] + i * ref_stride + 32), &sum_lo[3]);
+
+    const uint8x16_t s3 = vld1q_u8(src + i * src_stride + 48);
+    sad16_neon(s3, vld1q_u8(ref[0] + i * ref_stride + 48), &sum_hi[0]);
+    sad16_neon(s3, vld1q_u8(ref[1] + i * ref_stride + 48), &sum_hi[1]);
+    sad16_neon(s3, vld1q_u8(ref[2] + i * ref_stride + 48), &sum_hi[2]);
+    sad16_neon(s3, vld1q_u8(ref[3] + i * ref_stride + 48), &sum_hi[3]);
+
+    const uint8x16_t s4 = vld1q_u8(src + i * src_stride + 64);
+    sad16_neon(s4, vld1q_u8(ref[0] + i * ref_stride + 64), &sum_lo[0]);
+    sad16_neon(s4, vld1q_u8(ref[1] + i * ref_stride + 64), &sum_lo[1]);
+    sad16_neon(s4, vld1q_u8(ref[2] + i * ref_stride + 64), &sum_lo[2]);
+    sad16_neon(s4, vld1q_u8(ref[3] + i * ref_stride + 64), &sum_lo[3]);
+
+    const uint8x16_t s5 = vld1q_u8(src + i * src_stride + 80);
+    sad16_neon(s5, vld1q_u8(ref[0] + i * ref_stride + 80), &sum_hi[0]);
+    sad16_neon(s5, vld1q_u8(ref[1] + i * ref_stride + 80), &sum_hi[1]);
+    sad16_neon(s5, vld1q_u8(ref[2] + i * ref_stride + 80), &sum_hi[2]);
+    sad16_neon(s5, vld1q_u8(ref[3] + i * ref_stride + 80), &sum_hi[3]);
+
+    const uint8x16_t s6 = vld1q_u8(src + i * src_stride + 96);
+    sad16_neon(s6, vld1q_u8(ref[0] + i * ref_stride + 96), &sum_lo[0]);
+    sad16_neon(s6, vld1q_u8(ref[1] + i * ref_stride + 96), &sum_lo[1]);
+    sad16_neon(s6, vld1q_u8(ref[2] + i * ref_stride + 96), &sum_lo[2]);
+    sad16_neon(s6, vld1q_u8(ref[3] + i * ref_stride + 96), &sum_lo[3]);
+
+    const uint8x16_t s7 = vld1q_u8(src + i * src_stride + 112);
+    sad16_neon(s7, vld1q_u8(ref[0] + i * ref_stride + 112), &sum_hi[0]);
+    sad16_neon(s7, vld1q_u8(ref[1] + i * ref_stride + 112), &sum_hi[1]);
+    sad16_neon(s7, vld1q_u8(ref[2] + i * ref_stride + 112), &sum_hi[2]);
+    sad16_neon(s7, vld1q_u8(ref[3] + i * ref_stride + 112), &sum_hi[3]);
+
+    i++;
+  } while (i < h);
+
+  uint32x4_t res0 = vpaddq_u32(vaddq_u32(sum_lo[0], sum_hi[0]),
+                               vaddq_u32(sum_lo[1], sum_hi[1]));
+  uint32x4_t res1 = vpaddq_u32(vaddq_u32(sum_lo[2], sum_hi[2]),
+                               vaddq_u32(sum_lo[3], sum_hi[3]));
+  vst1q_u32(res, vpaddq_u32(res0, res1));
 }
 
-// Calculate the absolute difference of 32 bytes from vec_src_00, vec_src_16,
-// and ref. Accumulate partial sums in vec_sum_ref_lo and vec_sum_ref_hi.
-static void sad_neon_32(const uint8x16_t vec_src_00,
-                        const uint8x16_t vec_src_16, const uint8_t *ref,
-                        uint16x8_t *vec_sum_ref_lo,
-                        uint16x8_t *vec_sum_ref_hi) {
-  const uint8x16_t vec_ref_00 = vld1q_u8(ref);
-  const uint8x16_t vec_ref_16 = vld1q_u8(ref + 16);
-
-  *vec_sum_ref_lo = vabal_u8(*vec_sum_ref_lo, vget_low_u8(vec_src_00),
-                             vget_low_u8(vec_ref_00));
-  *vec_sum_ref_hi = vabal_u8(*vec_sum_ref_hi, vget_high_u8(vec_src_00),
-                             vget_high_u8(vec_ref_00));
-  *vec_sum_ref_lo = vabal_u8(*vec_sum_ref_lo, vget_low_u8(vec_src_16),
-                             vget_low_u8(vec_ref_16));
-  *vec_sum_ref_hi = vabal_u8(*vec_sum_ref_hi, vget_high_u8(vec_src_16),
-                             vget_high_u8(vec_ref_16));
+static INLINE void sad64xhx4d_neon(const uint8_t *src, int src_stride,
+                                   const uint8_t *const ref[4], int ref_stride,
+                                   uint32_t res[4], int h) {
+  uint32x4_t sum_lo[4] = { vdupq_n_u32(0), vdupq_n_u32(0), vdupq_n_u32(0),
+                           vdupq_n_u32(0) };
+  uint32x4_t sum_hi[4] = { vdupq_n_u32(0), vdupq_n_u32(0), vdupq_n_u32(0),
+                           vdupq_n_u32(0) };
+
+  int i = 0;
+  do {
+    const uint8x16_t s0 = vld1q_u8(src + i * src_stride);
+    sad16_neon(s0, vld1q_u8(ref[0] + i * ref_stride), &sum_lo[0]);
+    sad16_neon(s0, vld1q_u8(ref[1] + i * ref_stride), &sum_lo[1]);
+    sad16_neon(s0, vld1q_u8(ref[2] + i * ref_stride), &sum_lo[2]);
+    sad16_neon(s0, vld1q_u8(ref[3] + i * ref_stride), &sum_lo[3]);
+
+    const uint8x16_t s1 = vld1q_u8(src + i * src_stride + 16);
+    sad16_neon(s1, vld1q_u8(ref[0] + i * ref_stride + 16), &sum_hi[0]);
+    sad16_neon(s1, vld1q_u8(ref[1] + i * ref_stride + 16), &sum_hi[1]);
+    sad16_neon(s1, vld1q_u8(ref[2] + i * ref_stride + 16), &sum_hi[2]);
+    sad16_neon(s1, vld1q_u8(ref[3] + i * ref_stride + 16), &sum_hi[3]);
+
+    const uint8x16_t s2 = vld1q_u8(src + i * src_stride + 32);
+    sad16_neon(s2, vld1q_u8(ref[0] + i * ref_stride + 32), &sum_lo[0]);
+    sad16_neon(s2, vld1q_u8(ref[1] + i * ref_stride + 32), &sum_lo[1]);
+    sad16_neon(s2, vld1q_u8(ref[2] + i * ref_stride + 32), &sum_lo[2]);
+    sad16_neon(s2, vld1q_u8(ref[3] + i * ref_stride + 32), &sum_lo[3]);
+
+    const uint8x16_t s3 = vld1q_u8(src + i * src_stride + 48);
+    sad16_neon(s3, vld1q_u8(ref[0] + i * ref_stride + 48), &sum_hi[0]);
+    sad16_neon(s3, vld1q_u8(ref[1] + i * ref_stride + 48), &sum_hi[1]);
+    sad16_neon(s3, vld1q_u8(ref[2] + i * ref_stride + 48), &sum_hi[2]);
+    sad16_neon(s3, vld1q_u8(ref[3] + i * ref_stride + 48), &sum_hi[3]);
+
+    i++;
+  } while (i < h);
+
+  uint32x4_t res0 = vpaddq_u32(vaddq_u32(sum_lo[0], sum_hi[0]),
+                               vaddq_u32(sum_lo[1], sum_hi[1]));
+  uint32x4_t res1 = vpaddq_u32(vaddq_u32(sum_lo[2], sum_hi[2]),
+                               vaddq_u32(sum_lo[3], sum_hi[3]));
+  vst1q_u32(res, vpaddq_u32(res0, res1));
 }
 
-void aom_sad64x64x4d_neon(const uint8_t *src, int src_stride,
-                          const uint8_t *const ref[4], int ref_stride,
-                          uint32_t res[4]) {
-  int i;
-  uint16x8_t vec_sum_ref0_lo = vdupq_n_u16(0);
-  uint16x8_t vec_sum_ref0_hi = vdupq_n_u16(0);
-  uint16x8_t vec_sum_ref1_lo = vdupq_n_u16(0);
-  uint16x8_t vec_sum_ref1_hi = vdupq_n_u16(0);
-  uint16x8_t vec_sum_ref2_lo = vdupq_n_u16(0);
-  uint16x8_t vec_sum_ref2_hi = vdupq_n_u16(0);
-  uint16x8_t vec_sum_ref3_lo = vdupq_n_u16(0);
-  uint16x8_t vec_sum_ref3_hi = vdupq_n_u16(0);
-  const uint8_t *ref0, *ref1, *ref2, *ref3;
-  ref0 = ref[0];
-  ref1 = ref[1];
-  ref2 = ref[2];
-  ref3 = ref[3];
-
-  for (i = 0; i < 64; ++i) {
-    const uint8x16_t vec_src_00 = vld1q_u8(src);
-    const uint8x16_t vec_src_16 = vld1q_u8(src + 16);
-    const uint8x16_t vec_src_32 = vld1q_u8(src + 32);
-    const uint8x16_t vec_src_48 = vld1q_u8(src + 48);
-
-    sad_neon_64(vec_src_00, vec_src_16, vec_src_32, vec_src_48, ref0,
-                &vec_sum_ref0_lo, &vec_sum_ref0_hi);
-    sad_neon_64(vec_src_00, vec_src_16, vec_src_32, vec_src_48, ref1,
-                &vec_sum_ref1_lo, &vec_sum_ref1_hi);
-    sad_neon_64(vec_src_00, vec_src_16, vec_src_32, vec_src_48, ref2,
-                &vec_sum_ref2_lo, &vec_sum_ref2_hi);
-    sad_neon_64(vec_src_00, vec_src_16, vec_src_32, vec_src_48, ref3,
-                &vec_sum_ref3_lo, &vec_sum_ref3_hi);
-
-    src += src_stride;
-    ref0 += ref_stride;
-    ref1 += ref_stride;
-    ref2 += ref_stride;
-    ref3 += ref_stride;
-  }
+static INLINE void sad32xhx4d_neon(const uint8_t *src, int src_stride,
+                                   const uint8_t *const ref[4], int ref_stride,
+                                   uint32_t res[4], int h) {
+  uint32x4_t sum_lo[4] = { vdupq_n_u32(0), vdupq_n_u32(0), vdupq_n_u32(0),
+                           vdupq_n_u32(0) };
+  uint32x4_t sum_hi[4] = { vdupq_n_u32(0), vdupq_n_u32(0), vdupq_n_u32(0),
+                           vdupq_n_u32(0) };
+
+  int i = 0;
+  do {
+    const uint8x16_t s0 = vld1q_u8(src + i * src_stride);
+    sad16_neon(s0, vld1q_u8(ref[0] + i * ref_stride), &sum_lo[0]);
+    sad16_neon(s0, vld1q_u8(ref[1] + i * ref_stride), &sum_lo[1]);
+    sad16_neon(s0, vld1q_u8(ref[2] + i * ref_stride), &sum_lo[2]);
+    sad16_neon(s0, vld1q_u8(ref[3] + i * ref_stride), &sum_lo[3]);
+
+    const uint8x16_t s1 = vld1q_u8(src + i * src_stride + 16);
+    sad16_neon(s1, vld1q_u8(ref[0] + i * ref_stride + 16), &sum_hi[0]);
+    sad16_neon(s1, vld1q_u8(ref[1] + i * ref_stride + 16), &sum_hi[1]);
+    sad16_neon(s1, vld1q_u8(ref[2] + i * ref_stride + 16), &sum_hi[2]);
+    sad16_neon(s1, vld1q_u8(ref[3] + i * ref_stride + 16), &sum_hi[3]);
+
+    i++;
+  } while (i < h);
+
+  uint32x4_t res0 = vpaddq_u32(vaddq_u32(sum_lo[0], sum_hi[0]),
+                               vaddq_u32(sum_lo[1], sum_hi[1]));
+  uint32x4_t res1 = vpaddq_u32(vaddq_u32(sum_lo[2], sum_hi[2]),
+                               vaddq_u32(sum_lo[3], sum_hi[3]));
+  vst1q_u32(res, vpaddq_u32(res0, res1));
+}
 
-  res[0] = horizontal_long_add_16x8(vec_sum_ref0_lo, vec_sum_ref0_hi);
-  res[1] = horizontal_long_add_16x8(vec_sum_ref1_lo, vec_sum_ref1_hi);
-  res[2] = horizontal_long_add_16x8(vec_sum_ref2_lo, vec_sum_ref2_hi);
-  res[3] = horizontal_long_add_16x8(vec_sum_ref3_lo, vec_sum_ref3_hi);
+static INLINE void sad16xhx4d_neon(const uint8_t *src, int src_stride,
+                                   const uint8_t *const ref[4], int ref_stride,
+                                   uint32_t res[4], int h) {
+  uint32x4_t sum[4] = { vdupq_n_u32(0), vdupq_n_u32(0), vdupq_n_u32(0),
+                        vdupq_n_u32(0) };
+
+  int i = 0;
+  do {
+    const uint8x16_t s = vld1q_u8(src + i * src_stride);
+    sad16_neon(s, vld1q_u8(ref[0] + i * ref_stride), &sum[0]);
+    sad16_neon(s, vld1q_u8(ref[1] + i * ref_stride), &sum[1]);
+    sad16_neon(s, vld1q_u8(ref[2] + i * ref_stride), &sum[2]);
+    sad16_neon(s, vld1q_u8(ref[3] + i * ref_stride), &sum[3]);
+
+    i++;
+  } while (i < h);
+
+  uint32x4_t res0 = vpaddq_u32(sum[0], sum[1]);
+  uint32x4_t res1 = vpaddq_u32(sum[2], sum[3]);
+  vst1q_u32(res, vpaddq_u32(res0, res1));
 }
 
-void aom_sad32x32x4d_neon(const uint8_t *src, int src_stride,
-                          const uint8_t *const ref[4], int ref_stride,
-                          uint32_t res[4]) {
-  int i;
-  uint16x8_t vec_sum_ref0_lo = vdupq_n_u16(0);
-  uint16x8_t vec_sum_ref0_hi = vdupq_n_u16(0);
-  uint16x8_t vec_sum_ref1_lo = vdupq_n_u16(0);
-  uint16x8_t vec_sum_ref1_hi = vdupq_n_u16(0);
-  uint16x8_t vec_sum_ref2_lo = vdupq_n_u16(0);
-  uint16x8_t vec_sum_ref2_hi = vdupq_n_u16(0);
-  uint16x8_t vec_sum_ref3_lo = vdupq_n_u16(0);
-  uint16x8_t vec_sum_ref3_hi = vdupq_n_u16(0);
-  const uint8_t *ref0, *ref1, *ref2, *ref3;
-  ref0 = ref[0];
-  ref1 = ref[1];
-  ref2 = ref[2];
-  ref3 = ref[3];
-
-  for (i = 0; i < 32; ++i) {
-    const uint8x16_t vec_src_00 = vld1q_u8(src);
-    const uint8x16_t vec_src_16 = vld1q_u8(src + 16);
-
-    sad_neon_32(vec_src_00, vec_src_16, ref0, &vec_sum_ref0_lo,
-                &vec_sum_ref0_hi);
-    sad_neon_32(vec_src_00, vec_src_16, ref1, &vec_sum_ref1_lo,
-                &vec_sum_ref1_hi);
-    sad_neon_32(vec_src_00, vec_src_16, ref2, &vec_sum_ref2_lo,
-                &vec_sum_ref2_hi);
-    sad_neon_32(vec_src_00, vec_src_16, ref3, &vec_sum_ref3_lo,
-                &vec_sum_ref3_hi);
-
-    src += src_stride;
-    ref0 += ref_stride;
-    ref1 += ref_stride;
-    ref2 += ref_stride;
-    ref3 += ref_stride;
-  }
+#else  // !(defined(__aarch64__) && defined(__ARM_FEATURE_DOTPROD))
 
-  res[0] = horizontal_long_add_16x8(vec_sum_ref0_lo, vec_sum_ref0_hi);
-  res[1] = horizontal_long_add_16x8(vec_sum_ref1_lo, vec_sum_ref1_hi);
-  res[2] = horizontal_long_add_16x8(vec_sum_ref2_lo, vec_sum_ref2_hi);
-  res[3] = horizontal_long_add_16x8(vec_sum_ref3_lo, vec_sum_ref3_hi);
+static INLINE void sad16_neon(uint8x16_t src, uint8x16_t ref,
+                              uint16x8_t *const sad_sum) {
+  uint8x16_t abs_diff = vabdq_u8(src, ref);
+  *sad_sum = vpadalq_u8(*sad_sum, abs_diff);
 }
 
-void aom_sad16x16x4d_neon(const uint8_t *src, int src_stride,
-                          const uint8_t *const ref[4], int ref_stride,
-                          uint32_t res[4]) {
-  int i;
-  uint16x8_t vec_sum_ref0_lo = vdupq_n_u16(0);
-  uint16x8_t vec_sum_ref0_hi = vdupq_n_u16(0);
-  uint16x8_t vec_sum_ref1_lo = vdupq_n_u16(0);
-  uint16x8_t vec_sum_ref1_hi = vdupq_n_u16(0);
-  uint16x8_t vec_sum_ref2_lo = vdupq_n_u16(0);
-  uint16x8_t vec_sum_ref2_hi = vdupq_n_u16(0);
-  uint16x8_t vec_sum_ref3_lo = vdupq_n_u16(0);
-  uint16x8_t vec_sum_ref3_hi = vdupq_n_u16(0);
-  const uint8_t *ref0, *ref1, *ref2, *ref3;
-  ref0 = ref[0];
-  ref1 = ref[1];
-  ref2 = ref[2];
-  ref3 = ref[3];
-
-  for (i = 0; i < 16; ++i) {
-    const uint8x16_t vec_src = vld1q_u8(src);
-    const uint8x16_t vec_ref0 = vld1q_u8(ref0);
-    const uint8x16_t vec_ref1 = vld1q_u8(ref1);
-    const uint8x16_t vec_ref2 = vld1q_u8(ref2);
-    const uint8x16_t vec_ref3 = vld1q_u8(ref3);
-
-    vec_sum_ref0_lo =
-        vabal_u8(vec_sum_ref0_lo, vget_low_u8(vec_src), vget_low_u8(vec_ref0));
-    vec_sum_ref0_hi = vabal_u8(vec_sum_ref0_hi, vget_high_u8(vec_src),
-                               vget_high_u8(vec_ref0));
-    vec_sum_ref1_lo =
-        vabal_u8(vec_sum_ref1_lo, vget_low_u8(vec_src), vget_low_u8(vec_ref1));
-    vec_sum_ref1_hi = vabal_u8(vec_sum_ref1_hi, vget_high_u8(vec_src),
-                               vget_high_u8(vec_ref1));
-    vec_sum_ref2_lo =
-        vabal_u8(vec_sum_ref2_lo, vget_low_u8(vec_src), vget_low_u8(vec_ref2));
-    vec_sum_ref2_hi = vabal_u8(vec_sum_ref2_hi, vget_high_u8(vec_src),
-                               vget_high_u8(vec_ref2));
-    vec_sum_ref3_lo =
-        vabal_u8(vec_sum_ref3_lo, vget_low_u8(vec_src), vget_low_u8(vec_ref3));
-    vec_sum_ref3_hi = vabal_u8(vec_sum_ref3_hi, vget_high_u8(vec_src),
-                               vget_high_u8(vec_ref3));
-
-    src += src_stride;
-    ref0 += ref_stride;
-    ref1 += ref_stride;
-    ref2 += ref_stride;
-    ref3 += ref_stride;
-  }
+static INLINE void sad128xhx4d_neon(const uint8_t *src, int src_stride,
+                                    const uint8_t *const ref[4], int ref_stride,
+                                    uint32_t res[4], int h) {
+  vst1q_u32(res, vdupq_n_u32(0));
+  int h_tmp = h > 32 ? 32 : h;
+
+  int i = 0;
+  do {
+    uint16x8_t sum_lo[4] = { vdupq_n_u16(0), vdupq_n_u16(0), vdupq_n_u16(0),
+                             vdupq_n_u16(0) };
+    uint16x8_t sum_hi[4] = { vdupq_n_u16(0), vdupq_n_u16(0), vdupq_n_u16(0),
+                             vdupq_n_u16(0) };
+
+    do {
+      const uint8x16_t s0 = vld1q_u8(src + i * src_stride);
+      sad16_neon(s0, vld1q_u8(ref[0] + i * ref_stride), &sum_lo[0]);
+      sad16_neon(s0, vld1q_u8(ref[1] + i * ref_stride), &sum_lo[1]);
+      sad16_neon(s0, vld1q_u8(ref[2] + i * ref_stride), &sum_lo[2]);
+      sad16_neon(s0, vld1q_u8(ref[3] + i * ref_stride), &sum_lo[3]);
+
+      const uint8x16_t s1 = vld1q_u8(src + i * src_stride + 16);
+      sad16_neon(s1, vld1q_u8(ref[0] + i * ref_stride + 16), &sum_hi[0]);
+      sad16_neon(s1, vld1q_u8(ref[1] + i * ref_stride + 16), &sum_hi[1]);
+      sad16_neon(s1, vld1q_u8(ref[2] + i * ref_stride + 16), &sum_hi[2]);
+      sad16_neon(s1, vld1q_u8(ref[3] + i * ref_stride + 16), &sum_hi[3]);
+
+      const uint8x16_t s2 = vld1q_u8(src + i * src_stride + 32);
+      sad16_neon(s2, vld1q_u8(ref[0] + i * ref_stride + 32), &sum_lo[0]);
+      sad16_neon(s2, vld1q_u8(ref[1] + i * ref_stride + 32), &sum_lo[1]);
+      sad16_neon(s2, vld1q_u8(ref[2] + i * ref_stride + 32), &sum_lo[2]);
+      sad16_neon(s2, vld1q_u8(ref[3] + i * ref_stride + 32), &sum_lo[3]);
+
+      const uint8x16_t s3 = vld1q_u8(src + i * src_stride + 48);
+      sad16_neon(s3, vld1q_u8(ref[0] + i * ref_stride + 48), &sum_hi[0]);
+      sad16_neon(s3, vld1q_u8(ref[1] + i * ref_stride + 48), &sum_hi[1]);
+      sad16_neon(s3, vld1q_u8(ref[2] + i * ref_stride + 48), &sum_hi[2]);
+      sad16_neon(s3, vld1q_u8(ref[3] + i * ref_stride + 48), &sum_hi[3]);
+
+      const uint8x16_t s4 = vld1q_u8(src + i * src_stride + 64);
+      sad16_neon(s4, vld1q_u8(ref[0] + i * ref_stride + 64), &sum_lo[0]);
+      sad16_neon(s4, vld1q_u8(ref[1] + i * ref_stride + 64), &sum_lo[1]);
+      sad16_neon(s4, vld1q_u8(ref[2] + i * ref_stride + 64), &sum_lo[2]);
+      sad16_neon(s4, vld1q_u8(ref[3] + i * ref_stride + 64), &sum_lo[3]);
+
+      const uint8x16_t s5 = vld1q_u8(src + i * src_stride + 80);
+      sad16_neon(s5, vld1q_u8(ref[0] + i * ref_stride + 80), &sum_hi[0]);
+      sad16_neon(s5, vld1q_u8(ref[1] + i * ref_stride + 80), &sum_hi[1]);
+      sad16_neon(s5, vld1q_u8(ref[2] + i * ref_stride + 80), &sum_hi[2]);
+      sad16_neon(s5, vld1q_u8(ref[3] + i * ref_stride + 80), &sum_hi[3]);
+
+      const uint8x16_t s6 = vld1q_u8(src + i * src_stride + 96);
+      sad16_neon(s6, vld1q_u8(ref[0] + i * ref_stride + 96), &sum_lo[0]);
+      sad16_neon(s6, vld1q_u8(ref[1] + i * ref_stride + 96), &sum_lo[1]);
+      sad16_neon(s6, vld1q_u8(ref[2] + i * ref_stride + 96), &sum_lo[2]);
+      sad16_neon(s6, vld1q_u8(ref[3] + i * ref_stride + 96), &sum_lo[3]);
+
+      const uint8x16_t s7 = vld1q_u8(src + i * src_stride + 112);
+      sad16_neon(s7, vld1q_u8(ref[0] + i * ref_stride + 112), &sum_hi[0]);
+      sad16_neon(s7, vld1q_u8(ref[1] + i * ref_stride + 112), &sum_hi[1]);
+      sad16_neon(s7, vld1q_u8(ref[2] + i * ref_stride + 112), &sum_hi[2]);
+      sad16_neon(s7, vld1q_u8(ref[3] + i * ref_stride + 112), &sum_hi[3]);
+
+      i++;
+    } while (i < h_tmp);
+
+    res[0] += horizontal_long_add_u16x8(sum_lo[0], sum_hi[0]);
+    res[1] += horizontal_long_add_u16x8(sum_lo[1], sum_hi[1]);
+    res[2] += horizontal_long_add_u16x8(sum_lo[2], sum_hi[2]);
+    res[3] += horizontal_long_add_u16x8(sum_lo[3], sum_hi[3]);
+
+    h_tmp += 32;
+  } while (i < h);
+}
 
-  res[0] = horizontal_long_add_16x8(vec_sum_ref0_lo, vec_sum_ref0_hi);
-  res[1] = horizontal_long_add_16x8(vec_sum_ref1_lo, vec_sum_ref1_hi);
-  res[2] = horizontal_long_add_16x8(vec_sum_ref2_lo, vec_sum_ref2_hi);
-  res[3] = horizontal_long_add_16x8(vec_sum_ref3_lo, vec_sum_ref3_hi);
+static INLINE void sad64xhx4d_neon(const uint8_t *src, int src_stride,
+                                   const uint8_t *const ref[4], int ref_stride,
+                                   uint32_t res[4], int h) {
+  vst1q_u32(res, vdupq_n_u32(0));
+  int h_tmp = h > 64 ? 64 : h;
+
+  int i = 0;
+  do {
+    uint16x8_t sum_lo[4] = { vdupq_n_u16(0), vdupq_n_u16(0), vdupq_n_u16(0),
+                             vdupq_n_u16(0) };
+    uint16x8_t sum_hi[4] = { vdupq_n_u16(0), vdupq_n_u16(0), vdupq_n_u16(0),
+                             vdupq_n_u16(0) };
+
+    do {
+      const uint8x16_t s0 = vld1q_u8(src + i * src_stride);
+      sad16_neon(s0, vld1q_u8(ref[0] + i * ref_stride), &sum_lo[0]);
+      sad16_neon(s0, vld1q_u8(ref[1] + i * ref_stride), &sum_lo[1]);
+      sad16_neon(s0, vld1q_u8(ref[2] + i * ref_stride), &sum_lo[2]);
+      sad16_neon(s0, vld1q_u8(ref[3] + i * ref_stride), &sum_lo[3]);
+
+      const uint8x16_t s1 = vld1q_u8(src + i * src_stride + 16);
+      sad16_neon(s1, vld1q_u8(ref[0] + i * ref_stride + 16), &sum_hi[0]);
+      sad16_neon(s1, vld1q_u8(ref[1] + i * ref_stride + 16), &sum_hi[1]);
+      sad16_neon(s1, vld1q_u8(ref[2] + i * ref_stride + 16), &sum_hi[2]);
+      sad16_neon(s1, vld1q_u8(ref[3] + i * ref_stride + 16), &sum_hi[3]);
+
+      const uint8x16_t s2 = vld1q_u8(src + i * src_stride + 32);
+      sad16_neon(s2, vld1q_u8(ref[0] + i * ref_stride + 32), &sum_lo[0]);
+      sad16_neon(s2, vld1q_u8(ref[1] + i * ref_stride + 32), &sum_lo[1]);
+      sad16_neon(s2, vld1q_u8(ref[2] + i * ref_stride + 32), &sum_lo[2]);
+      sad16_neon(s2, vld1q_u8(ref[3] + i * ref_stride + 32), &sum_lo[3]);
+
+      const uint8x16_t s3 = vld1q_u8(src + i * src_stride + 48);
+      sad16_neon(s3, vld1q_u8(ref[0] + i * ref_stride + 48), &sum_hi[0]);
+      sad16_neon(s3, vld1q_u8(ref[1] + i * ref_stride + 48), &sum_hi[1]);
+      sad16_neon(s3, vld1q_u8(ref[2] + i * ref_stride + 48), &sum_hi[2]);
+      sad16_neon(s3, vld1q_u8(ref[3] + i * ref_stride + 48), &sum_hi[3]);
+
+      i++;
+    } while (i < h_tmp);
+
+    res[0] += horizontal_long_add_u16x8(sum_lo[0], sum_hi[0]);
+    res[1] += horizontal_long_add_u16x8(sum_lo[1], sum_hi[1]);
+    res[2] += horizontal_long_add_u16x8(sum_lo[2], sum_hi[2]);
+    res[3] += horizontal_long_add_u16x8(sum_lo[3], sum_hi[3]);
+
+    h_tmp += 64;
+  } while (i < h);
 }
 
-static INLINE unsigned int horizontal_add_16x4(const uint16x4_t vec_16x4) {
-  const uint32x2_t a = vpaddl_u16(vec_16x4);
-  const uint64x1_t b = vpaddl_u32(a);
-  return vget_lane_u32(vreinterpret_u32_u64(b), 0);
+static INLINE void sad32xhx4d_neon(const uint8_t *src, int src_stride,
+                                   const uint8_t *const ref[4], int ref_stride,
+                                   uint32_t res[4], int h) {
+  uint16x8_t sum_lo[4] = { vdupq_n_u16(0), vdupq_n_u16(0), vdupq_n_u16(0),
+                           vdupq_n_u16(0) };
+  uint16x8_t sum_hi[4] = { vdupq_n_u16(0), vdupq_n_u16(0), vdupq_n_u16(0),
+                           vdupq_n_u16(0) };
+
+  int i = 0;
+  do {
+    const uint8x16_t s0 = vld1q_u8(src + i * src_stride);
+    sad16_neon(s0, vld1q_u8(ref[0] + i * ref_stride), &sum_lo[0]);
+    sad16_neon(s0, vld1q_u8(ref[1] + i * ref_stride), &sum_lo[1]);
+    sad16_neon(s0, vld1q_u8(ref[2] + i * ref_stride), &sum_lo[2]);
+    sad16_neon(s0, vld1q_u8(ref[3] + i * ref_stride), &sum_lo[3]);
+
+    const uint8x16_t s1 = vld1q_u8(src + i * src_stride + 16);
+    sad16_neon(s1, vld1q_u8(ref[0] + i * ref_stride + 16), &sum_hi[0]);
+    sad16_neon(s1, vld1q_u8(ref[1] + i * ref_stride + 16), &sum_hi[1]);
+    sad16_neon(s1, vld1q_u8(ref[2] + i * ref_stride + 16), &sum_hi[2]);
+    sad16_neon(s1, vld1q_u8(ref[3] + i * ref_stride + 16), &sum_hi[3]);
+
+    i++;
+  } while (i < h);
+
+  res[0] = horizontal_long_add_u16x8(sum_lo[0], sum_hi[0]);
+  res[1] = horizontal_long_add_u16x8(sum_lo[1], sum_hi[1]);
+  res[2] = horizontal_long_add_u16x8(sum_lo[2], sum_hi[2]);
+  res[3] = horizontal_long_add_u16x8(sum_lo[3], sum_hi[3]);
 }
 
-static INLINE unsigned int horizontal_add_16x8(const uint16x8_t vec_16x8) {
-  const uint32x4_t a = vpaddlq_u16(vec_16x8);
-  const uint64x2_t b = vpaddlq_u32(a);
-  const uint32x2_t c = vadd_u32(vreinterpret_u32_u64(vget_low_u64(b)),
-                                vreinterpret_u32_u64(vget_high_u64(b)));
-  return vget_lane_u32(c, 0);
+static INLINE void sad16xhx4d_neon(const uint8_t *src, int src_stride,
+                                   const uint8_t *const ref[4], int ref_stride,
+                                   uint32_t res[4], int h) {
+  uint16x8_t sum[4] = { vdupq_n_u16(0), vdupq_n_u16(0), vdupq_n_u16(0),
+                        vdupq_n_u16(0) };
+
+  int i = 0;
+  do {
+    const uint8x16_t s = vld1q_u8(src + i * src_stride);
+    sad16_neon(s, vld1q_u8(ref[0] + i * ref_stride), &sum[0]);
+    sad16_neon(s, vld1q_u8(ref[1] + i * ref_stride), &sum[1]);
+    sad16_neon(s, vld1q_u8(ref[2] + i * ref_stride), &sum[2]);
+    sad16_neon(s, vld1q_u8(ref[3] + i * ref_stride), &sum[3]);
+
+    i++;
+  } while (i < h);
+
+  res[0] = horizontal_add_u16x8(sum[0]);
+  res[1] = horizontal_add_u16x8(sum[1]);
+  res[2] = horizontal_add_u16x8(sum[2]);
+  res[3] = horizontal_add_u16x8(sum[3]);
 }
 
-static void sad_row4_neon(uint16x4_t *vec_src, const uint8x8_t q0,
-                          const uint8x8_t ref) {
-  uint8x8_t q2 = vabd_u8(q0, ref);
-  *vec_src = vpadal_u8(*vec_src, q2);
+#endif  // defined(__aarch64__) && defined(__ARM_FEATURE_DOTPROD)
+
+static INLINE void sad8_neon(uint8x8_t src, uint8x8_t ref,
+                             uint16x8_t *const sad_sum) {
+  uint8x8_t abs_diff = vabd_u8(src, ref);
+  *sad_sum = vaddw_u8(*sad_sum, abs_diff);
 }
 
-static void sad_row8_neon(uint16x4_t *vec_src, const uint8x8_t *q0,
-                          const uint8_t *ref_ptr) {
-  uint8x8_t q1 = vld1_u8(ref_ptr);
-  uint8x8_t q2 = vabd_u8(*q0, q1);
-  *vec_src = vpadal_u8(*vec_src, q2);
+static INLINE void sad8xhx4d_neon(const uint8_t *src, int src_stride,
+                                  const uint8_t *const ref[4], int ref_stride,
+                                  uint32_t res[4], int h) {
+  uint16x8_t sum[4] = { vdupq_n_u16(0), vdupq_n_u16(0), vdupq_n_u16(0),
+                        vdupq_n_u16(0) };
+
+  int i = 0;
+  do {
+    const uint8x8_t s = vld1_u8(src + i * src_stride);
+    sad8_neon(s, vld1_u8(ref[0] + i * ref_stride), &sum[0]);
+    sad8_neon(s, vld1_u8(ref[1] + i * ref_stride), &sum[1]);
+    sad8_neon(s, vld1_u8(ref[2] + i * ref_stride), &sum[2]);
+    sad8_neon(s, vld1_u8(ref[3] + i * ref_stride), &sum[3]);
+
+    i++;
+  } while (i < h);
+
+  res[0] = horizontal_add_u16x8(sum[0]);
+  res[1] = horizontal_add_u16x8(sum[1]);
+  res[2] = horizontal_add_u16x8(sum[2]);
+  res[3] = horizontal_add_u16x8(sum[3]);
 }
 
-static void sad_row16_neon(uint16x8_t *vec_src, const uint8x16_t *q0,
-                           const uint8_t *ref_ptr) {
-  uint8x16_t q1 = vld1q_u8(ref_ptr);
-  uint8x16_t q2 = vabdq_u8(*q0, q1);
-  *vec_src = vpadalq_u8(*vec_src, q2);
+static INLINE void sad4xhx4d_neon(const uint8_t *src, int src_stride,
+                                  const uint8_t *const ref[4], int ref_stride,
+                                  uint32_t res[4], int h) {
+  uint16x8_t sum[4] = { vdupq_n_u16(0), vdupq_n_u16(0), vdupq_n_u16(0),
+                        vdupq_n_u16(0) };
+
+  int i = 0;
+  do {
+    uint32x2_t s, r0, r1, r2, r3;
+    uint32_t s_lo, s_hi, r0_lo, r0_hi, r1_lo, r1_hi, r2_lo, r2_hi, r3_lo, r3_hi;
+
+    memcpy(&s_lo, src + i * src_stride, 4);
+    memcpy(&r0_lo, ref[0] + i * ref_stride, 4);
+    memcpy(&r1_lo, ref[1] + i * ref_stride, 4);
+    memcpy(&r2_lo, ref[2] + i * ref_stride, 4);
+    memcpy(&r3_lo, ref[3] + i * ref_stride, 4);
+    s = vdup_n_u32(s_lo);
+    r0 = vdup_n_u32(r0_lo);
+    r1 = vdup_n_u32(r1_lo);
+    r2 = vdup_n_u32(r2_lo);
+    r3 = vdup_n_u32(r3_lo);
+
+    memcpy(&s_hi, src + (i + 1) * src_stride, 4);
+    memcpy(&r0_hi, ref[0] + (i + 1) * ref_stride, 4);
+    memcpy(&r1_hi, ref[1] + (i + 1) * ref_stride, 4);
+    memcpy(&r2_hi, ref[2] + (i + 1) * ref_stride, 4);
+    memcpy(&r3_hi, ref[3] + (i + 1) * ref_stride, 4);
+    s = vset_lane_u32(s_hi, s, 1);
+    r0 = vset_lane_u32(r0_hi, r0, 1);
+    r1 = vset_lane_u32(r1_hi, r1, 1);
+    r2 = vset_lane_u32(r2_hi, r2, 1);
+    r3 = vset_lane_u32(r3_hi, r3, 1);
+
+    sad8_neon(vreinterpret_u8_u32(s), vreinterpret_u8_u32(r0), &sum[0]);
+    sad8_neon(vreinterpret_u8_u32(s), vreinterpret_u8_u32(r1), &sum[1]);
+    sad8_neon(vreinterpret_u8_u32(s), vreinterpret_u8_u32(r2), &sum[2]);
+    sad8_neon(vreinterpret_u8_u32(s), vreinterpret_u8_u32(r3), &sum[3]);
+
+    i += 2;
+  } while (i < h);
+
+  res[0] = horizontal_add_u16x8(sum[0]);
+  res[1] = horizontal_add_u16x8(sum[1]);
+  res[2] = horizontal_add_u16x8(sum[2]);
+  res[3] = horizontal_add_u16x8(sum[3]);
 }
 
-void aom_sadMxNx4d_neon(int width, int height, const uint8_t *src,
-                        int src_stride, const uint8_t *const ref[4],
-                        int ref_stride, uint32_t res[4]) {
-  const uint8_t *ref0, *ref1, *ref2, *ref3;
-
-  ref0 = ref[0];
-  ref1 = ref[1];
-  ref2 = ref[2];
-  ref3 = ref[3];
-
-  res[0] = 0;
-  res[1] = 0;
-  res[2] = 0;
-  res[3] = 0;
-
-  switch (width) {
-    case 4: {
-      uint32_t src4, ref40, ref41, ref42, ref43;
-      uint32x2_t q8 = vdup_n_u32(0);
-      uint32x2_t q4 = vdup_n_u32(0);
-      uint32x2_t q5 = vdup_n_u32(0);
-      uint32x2_t q6 = vdup_n_u32(0);
-      uint32x2_t q7 = vdup_n_u32(0);
-
-      for (int i = 0; i < height / 2; i++) {
-        uint16x4_t q0 = vdup_n_u16(0);
-        uint16x4_t q1 = vdup_n_u16(0);
-        uint16x4_t q2 = vdup_n_u16(0);
-        uint16x4_t q3 = vdup_n_u16(0);
-
-        memcpy(&src4, src, 4);
-        memcpy(&ref40, ref0, 4);
-        memcpy(&ref41, ref1, 4);
-        memcpy(&ref42, ref2, 4);
-        memcpy(&ref43, ref3, 4);
-
-        src += src_stride;
-        ref0 += ref_stride;
-        ref1 += ref_stride;
-        ref2 += ref_stride;
-        ref3 += ref_stride;
-
-        q8 = vset_lane_u32(src4, q8, 0);
-        q4 = vset_lane_u32(ref40, q4, 0);
-        q5 = vset_lane_u32(ref41, q5, 0);
-        q6 = vset_lane_u32(ref42, q6, 0);
-        q7 = vset_lane_u32(ref43, q7, 0);
-
-        memcpy(&src4, src, 4);
-        memcpy(&ref40, ref0, 4);
-        memcpy(&ref41, ref1, 4);
-        memcpy(&ref42, ref2, 4);
-        memcpy(&ref43, ref3, 4);
-
-        src += src_stride;
-        ref0 += ref_stride;
-        ref1 += ref_stride;
-        ref2 += ref_stride;
-        ref3 += ref_stride;
-
-        q8 = vset_lane_u32(src4, q8, 1);
-        q4 = vset_lane_u32(ref40, q4, 1);
-        q5 = vset_lane_u32(ref41, q5, 1);
-        q6 = vset_lane_u32(ref42, q6, 1);
-        q7 = vset_lane_u32(ref43, q7, 1);
-
-        sad_row4_neon(&q0, vreinterpret_u8_u32(q8), vreinterpret_u8_u32(q4));
-        sad_row4_neon(&q1, vreinterpret_u8_u32(q8), vreinterpret_u8_u32(q5));
-        sad_row4_neon(&q2, vreinterpret_u8_u32(q8), vreinterpret_u8_u32(q6));
-        sad_row4_neon(&q3, vreinterpret_u8_u32(q8), vreinterpret_u8_u32(q7));
-
-        res[0] += horizontal_add_16x4(q0);
-        res[1] += horizontal_add_16x4(q1);
-        res[2] += horizontal_add_16x4(q2);
-        res[3] += horizontal_add_16x4(q3);
-      }
-      break;
-    }
-    case 8: {
-      for (int i = 0; i < height; i++) {
-        uint16x4_t q0 = vdup_n_u16(0);
-        uint16x4_t q1 = vdup_n_u16(0);
-        uint16x4_t q2 = vdup_n_u16(0);
-        uint16x4_t q3 = vdup_n_u16(0);
-
-        uint8x8_t q5 = vld1_u8(src);
-
-        sad_row8_neon(&q0, &q5, ref0);
-        sad_row8_neon(&q1, &q5, ref1);
-        sad_row8_neon(&q2, &q5, ref2);
-        sad_row8_neon(&q3, &q5, ref3);
-
-        src += src_stride;
-        ref0 += ref_stride;
-        ref1 += ref_stride;
-        ref2 += ref_stride;
-        ref3 += ref_stride;
-
-        res[0] += horizontal_add_16x4(q0);
-        res[1] += horizontal_add_16x4(q1);
-        res[2] += horizontal_add_16x4(q2);
-        res[3] += horizontal_add_16x4(q3);
-      }
-      break;
-    }
-    case 16: {
-      for (int i = 0; i < height; i++) {
-        uint16x8_t q0 = vdupq_n_u16(0);
-        uint16x8_t q1 = vdupq_n_u16(0);
-        uint16x8_t q2 = vdupq_n_u16(0);
-        uint16x8_t q3 = vdupq_n_u16(0);
-
-        uint8x16_t q4 = vld1q_u8(src);
-
-        sad_row16_neon(&q0, &q4, ref0);
-        sad_row16_neon(&q1, &q4, ref1);
-        sad_row16_neon(&q2, &q4, ref2);
-        sad_row16_neon(&q3, &q4, ref3);
-
-        src += src_stride;
-        ref0 += ref_stride;
-        ref1 += ref_stride;
-        ref2 += ref_stride;
-        ref3 += ref_stride;
-
-        res[0] += horizontal_add_16x8(q0);
-        res[1] += horizontal_add_16x8(q1);
-        res[2] += horizontal_add_16x8(q2);
-        res[3] += horizontal_add_16x8(q3);
-      }
-      break;
-    }
-    case 32: {
-      for (int i = 0; i < height; i++) {
-        uint16x8_t q0 = vdupq_n_u16(0);
-        uint16x8_t q1 = vdupq_n_u16(0);
-        uint16x8_t q2 = vdupq_n_u16(0);
-        uint16x8_t q3 = vdupq_n_u16(0);
-
-        uint8x16_t q4 = vld1q_u8(src);
-
-        sad_row16_neon(&q0, &q4, ref0);
-        sad_row16_neon(&q1, &q4, ref1);
-        sad_row16_neon(&q2, &q4, ref2);
-        sad_row16_neon(&q3, &q4, ref3);
-
-        q4 = vld1q_u8(src + 16);
-
-        sad_row16_neon(&q0, &q4, ref0 + 16);
-        sad_row16_neon(&q1, &q4, ref1 + 16);
-        sad_row16_neon(&q2, &q4, ref2 + 16);
-        sad_row16_neon(&q3, &q4, ref3 + 16);
-
-        src += src_stride;
-        ref0 += ref_stride;
-        ref1 += ref_stride;
-        ref2 += ref_stride;
-        ref3 += ref_stride;
-
-        res[0] += horizontal_add_16x8(q0);
-        res[1] += horizontal_add_16x8(q1);
-        res[2] += horizontal_add_16x8(q2);
-        res[3] += horizontal_add_16x8(q3);
-      }
-      break;
-    }
-    case 64: {
-      for (int i = 0; i < height; i++) {
-        uint16x8_t q0 = vdupq_n_u16(0);
-        uint16x8_t q1 = vdupq_n_u16(0);
-        uint16x8_t q2 = vdupq_n_u16(0);
-        uint16x8_t q3 = vdupq_n_u16(0);
-
-        uint8x16_t q4 = vld1q_u8(src);
-
-        sad_row16_neon(&q0, &q4, ref0);
-        sad_row16_neon(&q1, &q4, ref1);
-        sad_row16_neon(&q2, &q4, ref2);
-        sad_row16_neon(&q3, &q4, ref3);
-
-        q4 = vld1q_u8(src + 16);
-
-        sad_row16_neon(&q0, &q4, ref0 + 16);
-        sad_row16_neon(&q1, &q4, ref1 + 16);
-        sad_row16_neon(&q2, &q4, ref2 + 16);
-        sad_row16_neon(&q3, &q4, ref3 + 16);
-
-        q4 = vld1q_u8(src + 32);
-
-        sad_row16_neon(&q0, &q4, ref0 + 32);
-        sad_row16_neon(&q1, &q4, ref1 + 32);
-        sad_row16_neon(&q2, &q4, ref2 + 32);
-        sad_row16_neon(&q3, &q4, ref3 + 32);
-
-        q4 = vld1q_u8(src + 48);
-
-        sad_row16_neon(&q0, &q4, ref0 + 48);
-        sad_row16_neon(&q1, &q4, ref1 + 48);
-        sad_row16_neon(&q2, &q4, ref2 + 48);
-        sad_row16_neon(&q3, &q4, ref3 + 48);
-
-        src += src_stride;
-        ref0 += ref_stride;
-        ref1 += ref_stride;
-        ref2 += ref_stride;
-        ref3 += ref_stride;
-
-        res[0] += horizontal_add_16x8(q0);
-        res[1] += horizontal_add_16x8(q1);
-        res[2] += horizontal_add_16x8(q2);
-        res[3] += horizontal_add_16x8(q3);
-      }
-      break;
-    }
-    case 128: {
-      for (int i = 0; i < height; i++) {
-        uint16x8_t q0 = vdupq_n_u16(0);
-        uint16x8_t q1 = vdupq_n_u16(0);
-        uint16x8_t q2 = vdupq_n_u16(0);
-        uint16x8_t q3 = vdupq_n_u16(0);
-
-        uint8x16_t q4 = vld1q_u8(src);
-
-        sad_row16_neon(&q0, &q4, ref0);
-        sad_row16_neon(&q1, &q4, ref1);
-        sad_row16_neon(&q2, &q4, ref2);
-        sad_row16_neon(&q3, &q4, ref3);
-
-        q4 = vld1q_u8(src + 16);
-
-        sad_row16_neon(&q0, &q4, ref0 + 16);
-        sad_row16_neon(&q1, &q4, ref1 + 16);
-        sad_row16_neon(&q2, &q4, ref2 + 16);
-        sad_row16_neon(&q3, &q4, ref3 + 16);
-
-        q4 = vld1q_u8(src + 32);
-
-        sad_row16_neon(&q0, &q4, ref0 + 32);
-        sad_row16_neon(&q1, &q4, ref1 + 32);
-        sad_row16_neon(&q2, &q4, ref2 + 32);
-        sad_row16_neon(&q3, &q4, ref3 + 32);
-
-        q4 = vld1q_u8(src + 48);
-
-        sad_row16_neon(&q0, &q4, ref0 + 48);
-        sad_row16_neon(&q1, &q4, ref1 + 48);
-        sad_row16_neon(&q2, &q4, ref2 + 48);
-        sad_row16_neon(&q3, &q4, ref3 + 48);
-
-        q4 = vld1q_u8(src + 64);
-
-        sad_row16_neon(&q0, &q4, ref0 + 64);
-        sad_row16_neon(&q1, &q4, ref1 + 64);
-        sad_row16_neon(&q2, &q4, ref2 + 64);
-        sad_row16_neon(&q3, &q4, ref3 + 64);
-
-        q4 = vld1q_u8(src + 80);
-
-        sad_row16_neon(&q0, &q4, ref0 + 80);
-        sad_row16_neon(&q1, &q4, ref1 + 80);
-        sad_row16_neon(&q2, &q4, ref2 + 80);
-        sad_row16_neon(&q3, &q4, ref3 + 80);
-
-        q4 = vld1q_u8(src + 96);
-
-        sad_row16_neon(&q0, &q4, ref0 + 96);
-        sad_row16_neon(&q1, &q4, ref1 + 96);
-        sad_row16_neon(&q2, &q4, ref2 + 96);
-        sad_row16_neon(&q3, &q4, ref3 + 96);
-
-        q4 = vld1q_u8(src + 112);
-
-        sad_row16_neon(&q0, &q4, ref0 + 112);
-        sad_row16_neon(&q1, &q4, ref1 + 112);
-        sad_row16_neon(&q2, &q4, ref2 + 112);
-        sad_row16_neon(&q3, &q4, ref3 + 112);
-
-        src += src_stride;
-        ref0 += ref_stride;
-        ref1 += ref_stride;
-        ref2 += ref_stride;
-        ref3 += ref_stride;
-
-        res[0] += horizontal_add_16x8(q0);
-        res[1] += horizontal_add_16x8(q1);
-        res[2] += horizontal_add_16x8(q2);
-        res[3] += horizontal_add_16x8(q3);
-      }
-    }
+#define SAD_WXH_4D_NEON(w, h)                                                  \
+  void aom_sad##w##x##h##x4d_neon(const uint8_t *src, int src_stride,          \
+                                  const uint8_t *const ref[4], int ref_stride, \
+                                  uint32_t res[4]) {                           \
+    sad##w##xhx4d_neon(src, src_stride, ref, ref_stride, res, (h));            \
   }
-}
 
-#define SAD_SKIP_MXN_NEON(m, n)                                             \
-  void aom_sad_skip_##m##x##n##x4d_neon(const uint8_t *src, int src_stride, \
+SAD_WXH_4D_NEON(4, 4)
+SAD_WXH_4D_NEON(4, 8)
+SAD_WXH_4D_NEON(4, 16)
+SAD_WXH_4D_NEON(4, 32)
+
+SAD_WXH_4D_NEON(8, 4)
+SAD_WXH_4D_NEON(8, 8)
+SAD_WXH_4D_NEON(8, 16)
+SAD_WXH_4D_NEON(8, 32)
+
+SAD_WXH_4D_NEON(16, 4)
+SAD_WXH_4D_NEON(16, 8)
+SAD_WXH_4D_NEON(16, 16)
+SAD_WXH_4D_NEON(16, 32)
+SAD_WXH_4D_NEON(16, 64)
+
+SAD_WXH_4D_NEON(32, 8)
+SAD_WXH_4D_NEON(32, 16)
+SAD_WXH_4D_NEON(32, 32)
+SAD_WXH_4D_NEON(32, 64)
+
+SAD_WXH_4D_NEON(64, 16)
+SAD_WXH_4D_NEON(64, 32)
+SAD_WXH_4D_NEON(64, 64)
+SAD_WXH_4D_NEON(64, 128)
+
+SAD_WXH_4D_NEON(128, 64)
+SAD_WXH_4D_NEON(128, 128)
+
+#undef SAD_WXH_4D_NEON
+
+#define SAD_SKIP_WXH_4D_NEON(w, h)                                          \
+  void aom_sad_skip_##w##x##h##x4d_neon(const uint8_t *src, int src_stride, \
                                         const uint8_t *const ref[4],        \
                                         int ref_stride, uint32_t res[4]) {  \
-    aom_sadMxNx4d_neon(m, ((n) >> 1), src, 2 * src_stride, ref,             \
-                       2 * ref_stride, res);                                \
+    sad##w##xhx4d_neon(src, 2 * src_stride, ref, 2 * ref_stride, res,       \
+                       ((h) >> 1));                                         \
     res[0] <<= 1;                                                           \
     res[1] <<= 1;                                                           \
     res[2] <<= 1;                                                           \
     res[3] <<= 1;                                                           \
   }
 
-SAD_SKIP_MXN_NEON(4, 8)
-SAD_SKIP_MXN_NEON(4, 16)
-SAD_SKIP_MXN_NEON(4, 32)
+SAD_SKIP_WXH_4D_NEON(4, 8)
+SAD_SKIP_WXH_4D_NEON(4, 16)
+SAD_SKIP_WXH_4D_NEON(4, 32)
 
-SAD_SKIP_MXN_NEON(8, 8)
-SAD_SKIP_MXN_NEON(8, 16)
-SAD_SKIP_MXN_NEON(8, 32)
+SAD_SKIP_WXH_4D_NEON(8, 8)
+SAD_SKIP_WXH_4D_NEON(8, 16)
+SAD_SKIP_WXH_4D_NEON(8, 32)
 
-SAD_SKIP_MXN_NEON(16, 8)
-SAD_SKIP_MXN_NEON(16, 16)
-SAD_SKIP_MXN_NEON(16, 32)
-SAD_SKIP_MXN_NEON(16, 64)
+SAD_SKIP_WXH_4D_NEON(16, 8)
+SAD_SKIP_WXH_4D_NEON(16, 16)
+SAD_SKIP_WXH_4D_NEON(16, 32)
+SAD_SKIP_WXH_4D_NEON(16, 64)
 
-SAD_SKIP_MXN_NEON(32, 8)
-SAD_SKIP_MXN_NEON(32, 16)
-SAD_SKIP_MXN_NEON(32, 32)
-SAD_SKIP_MXN_NEON(32, 64)
+SAD_SKIP_WXH_4D_NEON(32, 8)
+SAD_SKIP_WXH_4D_NEON(32, 16)
+SAD_SKIP_WXH_4D_NEON(32, 32)
+SAD_SKIP_WXH_4D_NEON(32, 64)
 
-SAD_SKIP_MXN_NEON(64, 16)
-SAD_SKIP_MXN_NEON(64, 32)
-SAD_SKIP_MXN_NEON(64, 64)
-SAD_SKIP_MXN_NEON(64, 128)
+SAD_SKIP_WXH_4D_NEON(64, 16)
+SAD_SKIP_WXH_4D_NEON(64, 32)
+SAD_SKIP_WXH_4D_NEON(64, 64)
+SAD_SKIP_WXH_4D_NEON(64, 128)
 
-SAD_SKIP_MXN_NEON(128, 64)
-SAD_SKIP_MXN_NEON(128, 128)
+SAD_SKIP_WXH_4D_NEON(128, 64)
+SAD_SKIP_WXH_4D_NEON(128, 128)
 
-#undef SAD_SKIP_MXN_NEON
+#undef SAD_SKIP_WXH_4D_NEON
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/arm/sad_neon.c b/chromium/third_party/libaom/source/libaom/aom_dsp/arm/sad_neon.c
index 4f0a1990ca0..47e00452c80 100644
--- a/chromium/third_party/libaom/source/libaom/aom_dsp/arm/sad_neon.c
+++ b/chromium/third_party/libaom/source/libaom/aom_dsp/arm/sad_neon.c
@@ -13,552 +13,380 @@
 #include "config/aom_config.h"
 #include "config/aom_dsp_rtcd.h"
 #include "aom/aom_integer.h"
+#include "aom_dsp/arm/sum_neon.h"
 
-unsigned int aom_sad8x16_neon(const uint8_t *src_ptr, int src_stride,
-                              const uint8_t *ref_ptr, int ref_stride) {
-  uint8x8_t d0, d8;
-  uint16x8_t q12;
-  uint32x4_t q1;
-  uint64x2_t q3;
-  uint32x2_t d5;
-  int i;
-
-  d0 = vld1_u8(src_ptr);
-  src_ptr += src_stride;
-  d8 = vld1_u8(ref_ptr);
-  ref_ptr += ref_stride;
-  q12 = vabdl_u8(d0, d8);
-
-  for (i = 0; i < 15; i++) {
-    d0 = vld1_u8(src_ptr);
-    src_ptr += src_stride;
-    d8 = vld1_u8(ref_ptr);
-    ref_ptr += ref_stride;
-    q12 = vabal_u8(q12, d0, d8);
-  }
+#if defined(__ARM_FEATURE_DOTPROD)
 
-  q1 = vpaddlq_u16(q12);
-  q3 = vpaddlq_u32(q1);
-  d5 = vadd_u32(vreinterpret_u32_u64(vget_low_u64(q3)),
-                vreinterpret_u32_u64(vget_high_u64(q3)));
-
-  return vget_lane_u32(d5, 0);
-}
+static INLINE unsigned int sadwxh_neon(const uint8_t *src_ptr, int src_stride,
+                                       const uint8_t *ref_ptr, int ref_stride,
+                                       int w, int h) {
+  // Only two accumulators are required for optimal instruction throughput of
+  // the ABD, UDOT sequence on CPUs with either 2 or 4 Neon pipes.
+  uint32x4_t sum[2] = { vdupq_n_u32(0), vdupq_n_u32(0) };
+
+  int i = 0;
+  do {
+    int j = 0;
+    do {
+      uint8x16_t s0, s1, r0, r1, diff0, diff1;
+
+      s0 = vld1q_u8(src_ptr + j);
+      r0 = vld1q_u8(ref_ptr + j);
+      diff0 = vabdq_u8(s0, r0);
+      sum[0] = vdotq_u32(sum[0], diff0, vdupq_n_u8(1));
+
+      s1 = vld1q_u8(src_ptr + j + 16);
+      r1 = vld1q_u8(ref_ptr + j + 16);
+      diff1 = vabdq_u8(s1, r1);
+      sum[1] = vdotq_u32(sum[1], diff1, vdupq_n_u8(1));
+
+      j += 32;
+    } while (j < w);
 
-unsigned int aom_sad4x4_neon(const uint8_t *src_ptr, int src_stride,
-                             const uint8_t *ref_ptr, int ref_stride) {
-  uint8x8_t d0, d8;
-  uint16x8_t q12;
-  uint32x2_t d1;
-  uint64x1_t d3;
-  int i;
-
-  d0 = vld1_u8(src_ptr);
-  src_ptr += src_stride;
-  d8 = vld1_u8(ref_ptr);
-  ref_ptr += ref_stride;
-  q12 = vabdl_u8(d0, d8);
-
-  for (i = 0; i < 3; i++) {
-    d0 = vld1_u8(src_ptr);
     src_ptr += src_stride;
-    d8 = vld1_u8(ref_ptr);
     ref_ptr += ref_stride;
-    q12 = vabal_u8(q12, d0, d8);
-  }
-
-  d1 = vpaddl_u16(vget_low_u16(q12));
-  d3 = vpaddl_u32(d1);
+    i++;
+  } while (i < h);
 
-  return vget_lane_u32(vreinterpret_u32_u64(d3), 0);
+  return horizontal_add_u32x4(vaddq_u32(sum[0], sum[1]));
 }
 
-unsigned int aom_sad16x8_neon(const uint8_t *src_ptr, int src_stride,
-                              const uint8_t *ref_ptr, int ref_stride) {
-  uint8x16_t q0, q4;
-  uint16x8_t q12, q13;
-  uint32x4_t q1;
-  uint64x2_t q3;
-  uint32x2_t d5;
-  int i;
-
-  q0 = vld1q_u8(src_ptr);
-  src_ptr += src_stride;
-  q4 = vld1q_u8(ref_ptr);
-  ref_ptr += ref_stride;
-  q12 = vabdl_u8(vget_low_u8(q0), vget_low_u8(q4));
-  q13 = vabdl_u8(vget_high_u8(q0), vget_high_u8(q4));
-
-  for (i = 0; i < 7; i++) {
-    q0 = vld1q_u8(src_ptr);
-    src_ptr += src_stride;
-    q4 = vld1q_u8(ref_ptr);
-    ref_ptr += ref_stride;
-    q12 = vabal_u8(q12, vget_low_u8(q0), vget_low_u8(q4));
-    q13 = vabal_u8(q13, vget_high_u8(q0), vget_high_u8(q4));
-  }
-
-  q12 = vaddq_u16(q12, q13);
-  q1 = vpaddlq_u16(q12);
-  q3 = vpaddlq_u32(q1);
-  d5 = vadd_u32(vreinterpret_u32_u64(vget_low_u64(q3)),
-                vreinterpret_u32_u64(vget_high_u64(q3)));
-
-  return vget_lane_u32(d5, 0);
+static INLINE unsigned int sad128xh_neon(const uint8_t *src_ptr, int src_stride,
+                                         const uint8_t *ref_ptr, int ref_stride,
+                                         int h) {
+  return sadwxh_neon(src_ptr, src_stride, ref_ptr, ref_stride, 128, h);
 }
 
-static INLINE unsigned int horizontal_long_add_16x8(const uint16x8_t vec_lo,
-                                                    const uint16x8_t vec_hi) {
-  const uint32x4_t vec_l_lo =
-      vaddl_u16(vget_low_u16(vec_lo), vget_high_u16(vec_lo));
-  const uint32x4_t vec_l_hi =
-      vaddl_u16(vget_low_u16(vec_hi), vget_high_u16(vec_hi));
-  const uint32x4_t a = vaddq_u32(vec_l_lo, vec_l_hi);
-  const uint64x2_t b = vpaddlq_u32(a);
-  const uint32x2_t c = vadd_u32(vreinterpret_u32_u64(vget_low_u64(b)),
-                                vreinterpret_u32_u64(vget_high_u64(b)));
-  return vget_lane_u32(c, 0);
-}
-static INLINE unsigned int horizontal_add_16x8(const uint16x8_t vec_16x8) {
-  const uint32x4_t a = vpaddlq_u16(vec_16x8);
-  const uint64x2_t b = vpaddlq_u32(a);
-  const uint32x2_t c = vadd_u32(vreinterpret_u32_u64(vget_low_u64(b)),
-                                vreinterpret_u32_u64(vget_high_u64(b)));
-  return vget_lane_u32(c, 0);
+static INLINE unsigned int sad64xh_neon(const uint8_t *src_ptr, int src_stride,
+                                        const uint8_t *ref_ptr, int ref_stride,
+                                        int h) {
+  return sadwxh_neon(src_ptr, src_stride, ref_ptr, ref_stride, 64, h);
 }
 
-unsigned int aom_sad64x64_neon(const uint8_t *src, int src_stride,
-                               const uint8_t *ref, int ref_stride) {
-  int i;
-  uint16x8_t vec_accum_lo = vdupq_n_u16(0);
-  uint16x8_t vec_accum_hi = vdupq_n_u16(0);
-  for (i = 0; i < 64; ++i) {
-    const uint8x16_t vec_src_00 = vld1q_u8(src);
-    const uint8x16_t vec_src_16 = vld1q_u8(src + 16);
-    const uint8x16_t vec_src_32 = vld1q_u8(src + 32);
-    const uint8x16_t vec_src_48 = vld1q_u8(src + 48);
-    const uint8x16_t vec_ref_00 = vld1q_u8(ref);
-    const uint8x16_t vec_ref_16 = vld1q_u8(ref + 16);
-    const uint8x16_t vec_ref_32 = vld1q_u8(ref + 32);
-    const uint8x16_t vec_ref_48 = vld1q_u8(ref + 48);
-    src += src_stride;
-    ref += ref_stride;
-    vec_accum_lo = vabal_u8(vec_accum_lo, vget_low_u8(vec_src_00),
-                            vget_low_u8(vec_ref_00));
-    vec_accum_hi = vabal_u8(vec_accum_hi, vget_high_u8(vec_src_00),
-                            vget_high_u8(vec_ref_00));
-    vec_accum_lo = vabal_u8(vec_accum_lo, vget_low_u8(vec_src_16),
-                            vget_low_u8(vec_ref_16));
-    vec_accum_hi = vabal_u8(vec_accum_hi, vget_high_u8(vec_src_16),
-                            vget_high_u8(vec_ref_16));
-    vec_accum_lo = vabal_u8(vec_accum_lo, vget_low_u8(vec_src_32),
-                            vget_low_u8(vec_ref_32));
-    vec_accum_hi = vabal_u8(vec_accum_hi, vget_high_u8(vec_src_32),
-                            vget_high_u8(vec_ref_32));
-    vec_accum_lo = vabal_u8(vec_accum_lo, vget_low_u8(vec_src_48),
-                            vget_low_u8(vec_ref_48));
-    vec_accum_hi = vabal_u8(vec_accum_hi, vget_high_u8(vec_src_48),
-                            vget_high_u8(vec_ref_48));
-  }
-  return horizontal_long_add_16x8(vec_accum_lo, vec_accum_hi);
+static INLINE unsigned int sad32xh_neon(const uint8_t *src_ptr, int src_stride,
+                                        const uint8_t *ref_ptr, int ref_stride,
+                                        int h) {
+  return sadwxh_neon(src_ptr, src_stride, ref_ptr, ref_stride, 32, h);
 }
 
-unsigned int aom_sad128x128_neon(const uint8_t *src, int src_stride,
-                                 const uint8_t *ref, int ref_stride) {
-  uint16x8_t vec_accum_lo, vec_accum_hi;
-  uint32x4_t vec_accum_32lo = vdupq_n_u32(0);
-  uint32x4_t vec_accum_32hi = vdupq_n_u32(0);
-  uint16x8_t tmp;
-  for (int i = 0; i < 128; ++i) {
-    const uint8x16_t vec_src_00 = vld1q_u8(src);
-    const uint8x16_t vec_src_16 = vld1q_u8(src + 16);
-    const uint8x16_t vec_src_32 = vld1q_u8(src + 32);
-    const uint8x16_t vec_src_48 = vld1q_u8(src + 48);
-    const uint8x16_t vec_src_64 = vld1q_u8(src + 64);
-    const uint8x16_t vec_src_80 = vld1q_u8(src + 80);
-    const uint8x16_t vec_src_96 = vld1q_u8(src + 96);
-    const uint8x16_t vec_src_112 = vld1q_u8(src + 112);
-    const uint8x16_t vec_ref_00 = vld1q_u8(ref);
-    const uint8x16_t vec_ref_16 = vld1q_u8(ref + 16);
-    const uint8x16_t vec_ref_32 = vld1q_u8(ref + 32);
-    const uint8x16_t vec_ref_48 = vld1q_u8(ref + 48);
-    const uint8x16_t vec_ref_64 = vld1q_u8(ref + 64);
-    const uint8x16_t vec_ref_80 = vld1q_u8(ref + 80);
-    const uint8x16_t vec_ref_96 = vld1q_u8(ref + 96);
-    const uint8x16_t vec_ref_112 = vld1q_u8(ref + 112);
-    src += src_stride;
-    ref += ref_stride;
-    vec_accum_lo = vdupq_n_u16(0);
-    vec_accum_hi = vdupq_n_u16(0);
-    vec_accum_lo = vabal_u8(vec_accum_lo, vget_low_u8(vec_src_00),
-                            vget_low_u8(vec_ref_00));
-    vec_accum_hi = vabal_u8(vec_accum_hi, vget_high_u8(vec_src_00),
-                            vget_high_u8(vec_ref_00));
-    vec_accum_lo = vabal_u8(vec_accum_lo, vget_low_u8(vec_src_16),
-                            vget_low_u8(vec_ref_16));
-    vec_accum_hi = vabal_u8(vec_accum_hi, vget_high_u8(vec_src_16),
-                            vget_high_u8(vec_ref_16));
-    vec_accum_lo = vabal_u8(vec_accum_lo, vget_low_u8(vec_src_32),
-                            vget_low_u8(vec_ref_32));
-    vec_accum_hi = vabal_u8(vec_accum_hi, vget_high_u8(vec_src_32),
-                            vget_high_u8(vec_ref_32));
-    vec_accum_lo = vabal_u8(vec_accum_lo, vget_low_u8(vec_src_48),
-                            vget_low_u8(vec_ref_48));
-    vec_accum_hi = vabal_u8(vec_accum_hi, vget_high_u8(vec_src_48),
-                            vget_high_u8(vec_ref_48));
-    vec_accum_lo = vabal_u8(vec_accum_lo, vget_low_u8(vec_src_64),
-                            vget_low_u8(vec_ref_64));
-    vec_accum_hi = vabal_u8(vec_accum_hi, vget_high_u8(vec_src_64),
-                            vget_high_u8(vec_ref_64));
-    vec_accum_lo = vabal_u8(vec_accum_lo, vget_low_u8(vec_src_80),
-                            vget_low_u8(vec_ref_80));
-    vec_accum_hi = vabal_u8(vec_accum_hi, vget_high_u8(vec_src_80),
-                            vget_high_u8(vec_ref_80));
-    vec_accum_lo = vabal_u8(vec_accum_lo, vget_low_u8(vec_src_96),
-                            vget_low_u8(vec_ref_96));
-    vec_accum_hi = vabal_u8(vec_accum_hi, vget_high_u8(vec_src_96),
-                            vget_high_u8(vec_ref_96));
-    vec_accum_lo = vabal_u8(vec_accum_lo, vget_low_u8(vec_src_112),
-                            vget_low_u8(vec_ref_112));
-    vec_accum_hi = vabal_u8(vec_accum_hi, vget_high_u8(vec_src_112),
-                            vget_high_u8(vec_ref_112));
-
-    tmp = vaddq_u16(vec_accum_lo, vec_accum_hi);
-    vec_accum_32lo = vaddw_u16(vec_accum_32lo, vget_low_u16(tmp));
-    vec_accum_32hi = vaddw_u16(vec_accum_32hi, vget_high_u16(tmp));
-  }
-  const uint32x4_t a = vaddq_u32(vec_accum_32lo, vec_accum_32hi);
-  const uint64x2_t b = vpaddlq_u32(a);
-  const uint32x2_t c = vadd_u32(vreinterpret_u32_u64(vget_low_u64(b)),
-                                vreinterpret_u32_u64(vget_high_u64(b)));
-  return vget_lane_u32(c, 0);
-}
+static INLINE unsigned int sad16xh_neon(const uint8_t *src_ptr, int src_stride,
+                                        const uint8_t *ref_ptr, int ref_stride,
+                                        int h) {
+  uint32x4_t sum[2] = { vdupq_n_u32(0), vdupq_n_u32(0) };
 
-unsigned int aom_sad32x32_neon(const uint8_t *src, int src_stride,
-                               const uint8_t *ref, int ref_stride) {
-  int i;
-  uint16x8_t vec_accum_lo = vdupq_n_u16(0);
-  uint16x8_t vec_accum_hi = vdupq_n_u16(0);
-
-  for (i = 0; i < 32; ++i) {
-    const uint8x16_t vec_src_00 = vld1q_u8(src);
-    const uint8x16_t vec_src_16 = vld1q_u8(src + 16);
-    const uint8x16_t vec_ref_00 = vld1q_u8(ref);
-    const uint8x16_t vec_ref_16 = vld1q_u8(ref + 16);
-    src += src_stride;
-    ref += ref_stride;
-    vec_accum_lo = vabal_u8(vec_accum_lo, vget_low_u8(vec_src_00),
-                            vget_low_u8(vec_ref_00));
-    vec_accum_hi = vabal_u8(vec_accum_hi, vget_high_u8(vec_src_00),
-                            vget_high_u8(vec_ref_00));
-    vec_accum_lo = vabal_u8(vec_accum_lo, vget_low_u8(vec_src_16),
-                            vget_low_u8(vec_ref_16));
-    vec_accum_hi = vabal_u8(vec_accum_hi, vget_high_u8(vec_src_16),
-                            vget_high_u8(vec_ref_16));
-  }
-  return horizontal_add_16x8(vaddq_u16(vec_accum_lo, vec_accum_hi));
-}
+  int i = 0;
+  do {
+    uint8x16_t s0, s1, r0, r1, diff0, diff1;
 
-unsigned int aom_sad16x16_neon(const uint8_t *src, int src_stride,
-                               const uint8_t *ref, int ref_stride) {
-  int i;
-  uint16x8_t vec_accum_lo = vdupq_n_u16(0);
-  uint16x8_t vec_accum_hi = vdupq_n_u16(0);
-
-  for (i = 0; i < 16; ++i) {
-    const uint8x16_t vec_src = vld1q_u8(src);
-    const uint8x16_t vec_ref = vld1q_u8(ref);
-    src += src_stride;
-    ref += ref_stride;
-    vec_accum_lo =
-        vabal_u8(vec_accum_lo, vget_low_u8(vec_src), vget_low_u8(vec_ref));
-    vec_accum_hi =
-        vabal_u8(vec_accum_hi, vget_high_u8(vec_src), vget_high_u8(vec_ref));
-  }
-  return horizontal_add_16x8(vaddq_u16(vec_accum_lo, vec_accum_hi));
-}
+    s0 = vld1q_u8(src_ptr);
+    r0 = vld1q_u8(ref_ptr);
+    diff0 = vabdq_u8(s0, r0);
+    sum[0] = vdotq_u32(sum[0], diff0, vdupq_n_u8(1));
 
-unsigned int aom_sad8x8_neon(const uint8_t *src, int src_stride,
-                             const uint8_t *ref, int ref_stride) {
-  int i;
-  uint16x8_t vec_accum = vdupq_n_u16(0);
-
-  for (i = 0; i < 8; ++i) {
-    const uint8x8_t vec_src = vld1_u8(src);
-    const uint8x8_t vec_ref = vld1_u8(ref);
-    src += src_stride;
-    ref += ref_stride;
-    vec_accum = vabal_u8(vec_accum, vec_src, vec_ref);
-  }
-  return horizontal_add_16x8(vec_accum);
+    src_ptr += src_stride;
+    ref_ptr += ref_stride;
+
+    s1 = vld1q_u8(src_ptr);
+    r1 = vld1q_u8(ref_ptr);
+    diff1 = vabdq_u8(s1, r1);
+    sum[1] = vdotq_u32(sum[1], diff1, vdupq_n_u8(1));
+
+    src_ptr += src_stride;
+    ref_ptr += ref_stride;
+    i++;
+  } while (i < h / 2);
+
+  return horizontal_add_u32x4(vaddq_u32(sum[0], sum[1]));
 }
 
+#else  // !defined(__ARM_FEATURE_DOTPROD)
+
 static INLINE unsigned int sad128xh_neon(const uint8_t *src_ptr, int src_stride,
                                          const uint8_t *ref_ptr, int ref_stride,
                                          int h) {
-  int sum = 0;
-  for (int i = 0; i < h; i++) {
-    uint16x8_t q3 = vdupq_n_u16(0);
-
-    uint8x16_t q0 = vld1q_u8(src_ptr);
-    uint8x16_t q1 = vld1q_u8(ref_ptr);
-    uint8x16_t q2 = vabdq_u8(q0, q1);
-    q3 = vpadalq_u8(q3, q2);
-
-    q0 = vld1q_u8(src_ptr + 16);
-    q1 = vld1q_u8(ref_ptr + 16);
-    q2 = vabdq_u8(q0, q1);
-    q3 = vpadalq_u8(q3, q2);
-
-    q0 = vld1q_u8(src_ptr + 32);
-    q1 = vld1q_u8(ref_ptr + 32);
-    q2 = vabdq_u8(q0, q1);
-    q3 = vpadalq_u8(q3, q2);
-
-    q0 = vld1q_u8(src_ptr + 48);
-    q1 = vld1q_u8(ref_ptr + 48);
-    q2 = vabdq_u8(q0, q1);
-    q3 = vpadalq_u8(q3, q2);
-
-    q0 = vld1q_u8(src_ptr + 64);
-    q1 = vld1q_u8(ref_ptr + 64);
-    q2 = vabdq_u8(q0, q1);
-    q3 = vpadalq_u8(q3, q2);
-
-    q0 = vld1q_u8(src_ptr + 80);
-    q1 = vld1q_u8(ref_ptr + 80);
-    q2 = vabdq_u8(q0, q1);
-    q3 = vpadalq_u8(q3, q2);
-
-    q0 = vld1q_u8(src_ptr + 96);
-    q1 = vld1q_u8(ref_ptr + 96);
-    q2 = vabdq_u8(q0, q1);
-    q3 = vpadalq_u8(q3, q2);
-
-    q0 = vld1q_u8(src_ptr + 112);
-    q1 = vld1q_u8(ref_ptr + 112);
-    q2 = vabdq_u8(q0, q1);
-    q3 = vpadalq_u8(q3, q2);
+  // We use 8 accumulators to prevent overflow for large values of 'h', as well
+  // as enabling optimal UADALP instruction throughput on CPUs that have either
+  // 2 or 4 Neon pipes.
+  uint16x8_t sum[8] = { vdupq_n_u16(0), vdupq_n_u16(0), vdupq_n_u16(0),
+                        vdupq_n_u16(0), vdupq_n_u16(0), vdupq_n_u16(0),
+                        vdupq_n_u16(0), vdupq_n_u16(0) };
+
+  int i = 0;
+  do {
+    uint8x16_t s0, s1, s2, s3, s4, s5, s6, s7;
+    uint8x16_t r0, r1, r2, r3, r4, r5, r6, r7;
+    uint8x16_t diff0, diff1, diff2, diff3, diff4, diff5, diff6, diff7;
+
+    s0 = vld1q_u8(src_ptr);
+    r0 = vld1q_u8(ref_ptr);
+    diff0 = vabdq_u8(s0, r0);
+    sum[0] = vpadalq_u8(sum[0], diff0);
+
+    s1 = vld1q_u8(src_ptr + 16);
+    r1 = vld1q_u8(ref_ptr + 16);
+    diff1 = vabdq_u8(s1, r1);
+    sum[1] = vpadalq_u8(sum[1], diff1);
+
+    s2 = vld1q_u8(src_ptr + 32);
+    r2 = vld1q_u8(ref_ptr + 32);
+    diff2 = vabdq_u8(s2, r2);
+    sum[2] = vpadalq_u8(sum[2], diff2);
+
+    s3 = vld1q_u8(src_ptr + 48);
+    r3 = vld1q_u8(ref_ptr + 48);
+    diff3 = vabdq_u8(s3, r3);
+    sum[3] = vpadalq_u8(sum[3], diff3);
+
+    s4 = vld1q_u8(src_ptr + 64);
+    r4 = vld1q_u8(ref_ptr + 64);
+    diff4 = vabdq_u8(s4, r4);
+    sum[4] = vpadalq_u8(sum[4], diff4);
+
+    s5 = vld1q_u8(src_ptr + 80);
+    r5 = vld1q_u8(ref_ptr + 80);
+    diff5 = vabdq_u8(s5, r5);
+    sum[5] = vpadalq_u8(sum[5], diff5);
+
+    s6 = vld1q_u8(src_ptr + 96);
+    r6 = vld1q_u8(ref_ptr + 96);
+    diff6 = vabdq_u8(s6, r6);
+    sum[6] = vpadalq_u8(sum[6], diff6);
+
+    s7 = vld1q_u8(src_ptr + 112);
+    r7 = vld1q_u8(ref_ptr + 112);
+    diff7 = vabdq_u8(s7, r7);
+    sum[7] = vpadalq_u8(sum[7], diff7);
 
     src_ptr += src_stride;
     ref_ptr += ref_stride;
-
-    sum += horizontal_add_16x8(q3);
-  }
-
-  return sum;
+    i++;
+  } while (i < h);
+
+  uint32x4_t sum_u32 = vpaddlq_u16(sum[0]);
+  sum_u32 = vpadalq_u16(sum_u32, sum[1]);
+  sum_u32 = vpadalq_u16(sum_u32, sum[2]);
+  sum_u32 = vpadalq_u16(sum_u32, sum[3]);
+  sum_u32 = vpadalq_u16(sum_u32, sum[4]);
+  sum_u32 = vpadalq_u16(sum_u32, sum[5]);
+  sum_u32 = vpadalq_u16(sum_u32, sum[6]);
+  sum_u32 = vpadalq_u16(sum_u32, sum[7]);
+
+  return horizontal_add_u32x4(sum_u32);
 }
 
 static INLINE unsigned int sad64xh_neon(const uint8_t *src_ptr, int src_stride,
                                         const uint8_t *ref_ptr, int ref_stride,
                                         int h) {
-  int sum = 0;
-  for (int i = 0; i < h; i++) {
-    uint16x8_t q3 = vdupq_n_u16(0);
-
-    uint8x16_t q0 = vld1q_u8(src_ptr);
-    uint8x16_t q1 = vld1q_u8(ref_ptr);
-    uint8x16_t q2 = vabdq_u8(q0, q1);
-    q3 = vpadalq_u8(q3, q2);
-
-    q0 = vld1q_u8(src_ptr + 16);
-    q1 = vld1q_u8(ref_ptr + 16);
-    q2 = vabdq_u8(q0, q1);
-    q3 = vpadalq_u8(q3, q2);
-
-    q0 = vld1q_u8(src_ptr + 32);
-    q1 = vld1q_u8(ref_ptr + 32);
-    q2 = vabdq_u8(q0, q1);
-    q3 = vpadalq_u8(q3, q2);
-
-    q0 = vld1q_u8(src_ptr + 48);
-    q1 = vld1q_u8(ref_ptr + 48);
-    q2 = vabdq_u8(q0, q1);
-    q3 = vpadalq_u8(q3, q2);
+  uint16x8_t sum[4] = { vdupq_n_u16(0), vdupq_n_u16(0), vdupq_n_u16(0),
+                        vdupq_n_u16(0) };
+
+  int i = 0;
+  do {
+    uint8x16_t s0, s1, s2, s3, r0, r1, r2, r3;
+    uint8x16_t diff0, diff1, diff2, diff3;
+
+    s0 = vld1q_u8(src_ptr);
+    r0 = vld1q_u8(ref_ptr);
+    diff0 = vabdq_u8(s0, r0);
+    sum[0] = vpadalq_u8(sum[0], diff0);
+
+    s1 = vld1q_u8(src_ptr + 16);
+    r1 = vld1q_u8(ref_ptr + 16);
+    diff1 = vabdq_u8(s1, r1);
+    sum[1] = vpadalq_u8(sum[1], diff1);
+
+    s2 = vld1q_u8(src_ptr + 32);
+    r2 = vld1q_u8(ref_ptr + 32);
+    diff2 = vabdq_u8(s2, r2);
+    sum[2] = vpadalq_u8(sum[2], diff2);
+
+    s3 = vld1q_u8(src_ptr + 48);
+    r3 = vld1q_u8(ref_ptr + 48);
+    diff3 = vabdq_u8(s3, r3);
+    sum[3] = vpadalq_u8(sum[3], diff3);
 
     src_ptr += src_stride;
     ref_ptr += ref_stride;
+    i++;
+  } while (i < h);
 
-    sum += horizontal_add_16x8(q3);
-  }
+  uint32x4_t sum_u32 = vpaddlq_u16(sum[0]);
+  sum_u32 = vpadalq_u16(sum_u32, sum[1]);
+  sum_u32 = vpadalq_u16(sum_u32, sum[2]);
+  sum_u32 = vpadalq_u16(sum_u32, sum[3]);
 
-  return sum;
+  return horizontal_add_u32x4(sum_u32);
 }
 
 static INLINE unsigned int sad32xh_neon(const uint8_t *src_ptr, int src_stride,
                                         const uint8_t *ref_ptr, int ref_stride,
                                         int h) {
-  int sum = 0;
-  for (int i = 0; i < h; i++) {
-    uint16x8_t q3 = vdupq_n_u16(0);
+  uint32x4_t sum = vdupq_n_u32(0);
 
-    uint8x16_t q0 = vld1q_u8(src_ptr);
-    uint8x16_t q1 = vld1q_u8(ref_ptr);
-    uint8x16_t q2 = vabdq_u8(q0, q1);
-    q3 = vpadalq_u8(q3, q2);
+  int i = 0;
+  do {
+    uint8x16_t s0 = vld1q_u8(src_ptr);
+    uint8x16_t r0 = vld1q_u8(ref_ptr);
+    uint8x16_t diff0 = vabdq_u8(s0, r0);
+    uint16x8_t sum0 = vpaddlq_u8(diff0);
 
-    q0 = vld1q_u8(src_ptr + 16);
-    q1 = vld1q_u8(ref_ptr + 16);
-    q2 = vabdq_u8(q0, q1);
-    q3 = vpadalq_u8(q3, q2);
+    uint8x16_t s1 = vld1q_u8(src_ptr + 16);
+    uint8x16_t r1 = vld1q_u8(ref_ptr + 16);
+    uint8x16_t diff1 = vabdq_u8(s1, r1);
+    uint16x8_t sum1 = vpaddlq_u8(diff1);
 
-    sum += horizontal_add_16x8(q3);
+    sum = vpadalq_u16(sum, sum0);
+    sum = vpadalq_u16(sum, sum1);
 
     src_ptr += src_stride;
     ref_ptr += ref_stride;
-  }
+    i++;
+  } while (i < h);
 
-  return sum;
+  return horizontal_add_u32x4(sum);
 }
 
 static INLINE unsigned int sad16xh_neon(const uint8_t *src_ptr, int src_stride,
                                         const uint8_t *ref_ptr, int ref_stride,
                                         int h) {
-  int sum = 0;
-  for (int i = 0; i < h; i++) {
-    uint8x8_t q0 = vld1_u8(src_ptr);
-    uint8x8_t q1 = vld1_u8(ref_ptr);
-    sum += vget_lane_u16(vpaddl_u8(vabd_u8(q0, q1)), 0);
-    sum += vget_lane_u16(vpaddl_u8(vabd_u8(q0, q1)), 1);
-    sum += vget_lane_u16(vpaddl_u8(vabd_u8(q0, q1)), 2);
-    sum += vget_lane_u16(vpaddl_u8(vabd_u8(q0, q1)), 3);
-    q0 = vld1_u8(src_ptr + 8);
-    q1 = vld1_u8(ref_ptr + 8);
-    sum += vget_lane_u16(vpaddl_u8(vabd_u8(q0, q1)), 0);
-    sum += vget_lane_u16(vpaddl_u8(vabd_u8(q0, q1)), 1);
-    sum += vget_lane_u16(vpaddl_u8(vabd_u8(q0, q1)), 2);
-    sum += vget_lane_u16(vpaddl_u8(vabd_u8(q0, q1)), 3);
+  uint16x8_t sum = vdupq_n_u16(0);
+
+  int i = 0;
+  do {
+    uint8x16_t s = vld1q_u8(src_ptr);
+    uint8x16_t r = vld1q_u8(ref_ptr);
+
+    uint8x16_t diff = vabdq_u8(s, r);
+    sum = vpadalq_u8(sum, diff);
 
     src_ptr += src_stride;
     ref_ptr += ref_stride;
-  }
+    i++;
+  } while (i < h);
 
-  return sum;
+  return horizontal_add_u16x8(sum);
 }
 
+#endif  // defined(__ARM_FEATURE_DOTPROD)
+
 static INLINE unsigned int sad8xh_neon(const uint8_t *src_ptr, int src_stride,
                                        const uint8_t *ref_ptr, int ref_stride,
                                        int h) {
-  uint16x8_t q3 = vdupq_n_u16(0);
-  for (int y = 0; y < h; y++) {
-    uint8x8_t q0 = vld1_u8(src_ptr);
-    uint8x8_t q1 = vld1_u8(ref_ptr);
+  uint16x8_t sum = vdupq_n_u16(0);
+
+  int i = 0;
+  do {
+    uint8x8_t s = vld1_u8(src_ptr);
+    uint8x8_t r = vld1_u8(ref_ptr);
+
+    sum = vabal_u8(sum, s, r);
+
     src_ptr += src_stride;
     ref_ptr += ref_stride;
-    q3 = vabal_u8(q3, q0, q1);
-  }
-  return horizontal_add_16x8(q3);
+    i++;
+  } while (i < h);
+
+  return horizontal_add_u16x8(sum);
 }
 
 static INLINE unsigned int sad4xh_neon(const uint8_t *src_ptr, int src_stride,
                                        const uint8_t *ref_ptr, int ref_stride,
                                        int h) {
-  uint16x8_t q3 = vdupq_n_u16(0);
-  uint32x2_t q0 = vdup_n_u32(0);
-  uint32x2_t q1 = vdup_n_u32(0);
-  uint32_t src4, ref4;
-  for (int y = 0; y < h / 2; y++) {
-    memcpy(&src4, src_ptr, 4);
-    memcpy(&ref4, ref_ptr, 4);
+  uint16x8_t sum = vdupq_n_u16(0);
+
+  int i = 0;
+  do {
+    uint32x2_t s, r;
+    uint32_t s0, s1, r0, r1;
+
+    memcpy(&s0, src_ptr, 4);
+    memcpy(&r0, ref_ptr, 4);
+    s = vdup_n_u32(s0);
+    r = vdup_n_u32(r0);
     src_ptr += src_stride;
     ref_ptr += ref_stride;
-    q0 = vset_lane_u32(src4, q0, 0);
-    q1 = vset_lane_u32(ref4, q1, 0);
 
-    memcpy(&src4, src_ptr, 4);
-    memcpy(&ref4, ref_ptr, 4);
+    memcpy(&s1, src_ptr, 4);
+    memcpy(&r1, ref_ptr, 4);
+    s = vset_lane_u32(s1, s, 1);
+    r = vset_lane_u32(r1, r, 1);
     src_ptr += src_stride;
     ref_ptr += ref_stride;
-    q0 = vset_lane_u32(src4, q0, 1);
-    q1 = vset_lane_u32(ref4, q1, 1);
-
-    q3 = vabal_u8(q3, vreinterpret_u8_u32(q0), vreinterpret_u8_u32(q1));
-  }
-  return horizontal_add_16x8(q3);
-}
-
-#define FSADS128_H(h)                                                    \
-  unsigned int aom_sad_skip_128x##h##_neon(                              \
-      const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr,    \
-      int ref_stride) {                                                  \
-    const uint32_t sum = sad128xh_neon(src_ptr, 2 * src_stride, ref_ptr, \
-                                       2 * ref_stride, h / 2);           \
-    return 2 * sum;                                                      \
-  }
-
-FSADS128_H(128)
-FSADS128_H(64)
 
-#undef FSADS128_H
+    sum = vabal_u8(sum, vreinterpret_u8_u32(s), vreinterpret_u8_u32(r));
+    i++;
+  } while (i < h / 2);
 
-#define FSADS64_H(h)                                                          \
-  unsigned int aom_sad_skip_64x##h##_neon(                                    \
-      const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr,         \
-      int ref_stride) {                                                       \
-    return 2 * sad64xh_neon(src_ptr, src_stride * 2, ref_ptr, ref_stride * 2, \
-                            h / 2);                                           \
-  }
-
-FSADS64_H(128)
-FSADS64_H(64)
-FSADS64_H(32)
-FSADS64_H(16)
-
-#undef FSADS64_H
+  return horizontal_add_u16x8(sum);
+}
 
-#define FSADS32_H(h)                                                          \
-  unsigned int aom_sad_skip_32x##h##_neon(                                    \
-      const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr,         \
-      int ref_stride) {                                                       \
-    return 2 * sad32xh_neon(src_ptr, src_stride * 2, ref_ptr, ref_stride * 2, \
-                            h / 2);                                           \
+#define SAD_WXH_NEON(w, h)                                                   \
+  unsigned int aom_sad##w##x##h##_neon(const uint8_t *src, int src_stride,   \
+                                       const uint8_t *ref, int ref_stride) { \
+    return sad##w##xh_neon(src, src_stride, ref, ref_stride, (h));           \
   }
 
-FSADS32_H(64)
-FSADS32_H(32)
-FSADS32_H(16)
-FSADS32_H(8)
-
-#undef FSADS32_H
-
-#define FSADS16_H(h)                                                          \
-  unsigned int aom_sad_skip_16x##h##_neon(                                    \
-      const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr,         \
-      int ref_stride) {                                                       \
-    return 2 * sad16xh_neon(src_ptr, src_stride * 2, ref_ptr, ref_stride * 2, \
-                            h / 2);                                           \
+SAD_WXH_NEON(4, 4)
+SAD_WXH_NEON(4, 8)
+SAD_WXH_NEON(4, 16)
+
+SAD_WXH_NEON(8, 4)
+SAD_WXH_NEON(8, 8)
+SAD_WXH_NEON(8, 16)
+SAD_WXH_NEON(8, 32)
+
+SAD_WXH_NEON(16, 4)
+SAD_WXH_NEON(16, 8)
+SAD_WXH_NEON(16, 16)
+SAD_WXH_NEON(16, 32)
+SAD_WXH_NEON(16, 64)
+
+SAD_WXH_NEON(32, 8)
+SAD_WXH_NEON(32, 16)
+SAD_WXH_NEON(32, 32)
+SAD_WXH_NEON(32, 64)
+
+SAD_WXH_NEON(64, 16)
+SAD_WXH_NEON(64, 32)
+SAD_WXH_NEON(64, 64)
+SAD_WXH_NEON(64, 128)
+
+SAD_WXH_NEON(128, 64)
+SAD_WXH_NEON(128, 128)
+
+#undef SAD_WXH_NEON
+
+#define SAD_SKIP_WXH_NEON(w, h)                                                \
+  unsigned int aom_sad_skip_##w##x##h##_neon(                                  \
+      const uint8_t *src, int src_stride, const uint8_t *ref,                  \
+      int ref_stride) {                                                        \
+    return 2 *                                                                 \
+           sad##w##xh_neon(src, 2 * src_stride, ref, 2 * ref_stride, (h) / 2); \
   }
 
-FSADS16_H(64)
-FSADS16_H(32)
-FSADS16_H(16)
-FSADS16_H(8)
-
-#undef FSADS16_H
+SAD_SKIP_WXH_NEON(4, 8)
+SAD_SKIP_WXH_NEON(4, 16)
 
-#define FSADS8_H(h)                                                          \
-  unsigned int aom_sad_skip_8x##h##_neon(                                    \
-      const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr,        \
-      int ref_stride) {                                                      \
-    return 2 * sad8xh_neon(src_ptr, src_stride * 2, ref_ptr, ref_stride * 2, \
-                           h / 2);                                           \
-  }
+SAD_SKIP_WXH_NEON(8, 8)
+SAD_SKIP_WXH_NEON(8, 16)
+SAD_SKIP_WXH_NEON(8, 32)
 
-FSADS8_H(32)
-FSADS8_H(16)
-FSADS8_H(8)
+SAD_SKIP_WXH_NEON(16, 8)
+SAD_SKIP_WXH_NEON(16, 16)
+SAD_SKIP_WXH_NEON(16, 32)
+SAD_SKIP_WXH_NEON(16, 64)
 
-#undef FSADS8_H
+SAD_SKIP_WXH_NEON(32, 8)
+SAD_SKIP_WXH_NEON(32, 16)
+SAD_SKIP_WXH_NEON(32, 32)
+SAD_SKIP_WXH_NEON(32, 64)
 
-#define FSADS4_H(h)                                                          \
-  unsigned int aom_sad_skip_4x##h##_neon(                                    \
-      const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr,        \
-      int ref_stride) {                                                      \
-    return 2 * sad4xh_neon(src_ptr, src_stride * 2, ref_ptr, ref_stride * 2, \
-                           h / 2);                                           \
-  }
+SAD_SKIP_WXH_NEON(64, 16)
+SAD_SKIP_WXH_NEON(64, 32)
+SAD_SKIP_WXH_NEON(64, 64)
+SAD_SKIP_WXH_NEON(64, 128)
 
-FSADS4_H(16)
-FSADS4_H(8)
+SAD_SKIP_WXH_NEON(128, 64)
+SAD_SKIP_WXH_NEON(128, 128)
 
-#undef FSADS4_H
+#undef SAD_SKIP_WXH_NEON
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/arm/sse_neon.c b/chromium/third_party/libaom/source/libaom/aom_dsp/arm/sse_neon.c
index 35b784a52d3..437014630c1 100644
--- a/chromium/third_party/libaom/source/libaom/aom_dsp/arm/sse_neon.c
+++ b/chromium/third_party/libaom/source/libaom/aom_dsp/arm/sse_neon.c
@@ -16,169 +16,315 @@
 #include "aom_dsp/arm/sum_neon.h"
 #include "aom_dsp/arm/transpose_neon.h"
 
-static INLINE void sse_w16_neon(uint32x4_t *sum, const uint8_t *a,
-                                const uint8_t *b) {
-  const uint8x16_t v_a0 = vld1q_u8(a);
-  const uint8x16_t v_b0 = vld1q_u8(b);
-  const uint8x16_t diff = vabdq_u8(v_a0, v_b0);
-  const uint8x8_t diff_lo = vget_low_u8(diff);
-  const uint8x8_t diff_hi = vget_high_u8(diff);
-  *sum = vpadalq_u16(*sum, vmull_u8(diff_lo, diff_lo));
-  *sum = vpadalq_u16(*sum, vmull_u8(diff_hi, diff_hi));
+#if defined(__ARM_FEATURE_DOTPROD)
+
+static INLINE void sse_16x1_neon(const uint8_t *src, const uint8_t *ref,
+                                 uint32x4_t *sse) {
+  uint8x16_t s = vld1q_u8(src);
+  uint8x16_t r = vld1q_u8(ref);
+
+  uint8x16_t abs_diff = vabdq_u8(s, r);
+
+  *sse = vdotq_u32(*sse, abs_diff, abs_diff);
 }
-static INLINE void aom_sse4x2_neon(const uint8_t *a, int a_stride,
-                                   const uint8_t *b, int b_stride,
-                                   uint32x4_t *sum) {
-  uint8x8_t v_a0, v_b0;
-  v_a0 = v_b0 = vcreate_u8(0);
-  // above line is only to shadow [-Werror=uninitialized]
-  v_a0 = vreinterpret_u8_u32(
-      vld1_lane_u32((uint32_t *)a, vreinterpret_u32_u8(v_a0), 0));
-  v_a0 = vreinterpret_u8_u32(
-      vld1_lane_u32((uint32_t *)(a + a_stride), vreinterpret_u32_u8(v_a0), 1));
-  v_b0 = vreinterpret_u8_u32(
-      vld1_lane_u32((uint32_t *)b, vreinterpret_u32_u8(v_b0), 0));
-  v_b0 = vreinterpret_u8_u32(
-      vld1_lane_u32((uint32_t *)(b + b_stride), vreinterpret_u32_u8(v_b0), 1));
-  const uint8x8_t v_a_w = vabd_u8(v_a0, v_b0);
-  *sum = vpadalq_u16(*sum, vmull_u8(v_a_w, v_a_w));
+
+static INLINE void sse_8x1_neon(const uint8_t *src, const uint8_t *ref,
+                                uint32x2_t *sse) {
+  uint8x8_t s = vld1_u8(src);
+  uint8x8_t r = vld1_u8(ref);
+
+  uint8x8_t abs_diff = vabd_u8(s, r);
+
+  *sse = vdot_u32(*sse, abs_diff, abs_diff);
 }
-static INLINE void aom_sse8_neon(const uint8_t *a, const uint8_t *b,
-                                 uint32x4_t *sum) {
-  const uint8x8_t v_a_w = vld1_u8(a);
-  const uint8x8_t v_b_w = vld1_u8(b);
-  const uint8x8_t v_d_w = vabd_u8(v_a_w, v_b_w);
-  *sum = vpadalq_u16(*sum, vmull_u8(v_d_w, v_d_w));
+
+static INLINE void sse_4x2_neon(const uint8_t *src, int src_stride,
+                                const uint8_t *ref, int ref_stride,
+                                uint32x2_t *sse) {
+  uint8x8_t s = load_unaligned_u8(src, src_stride);
+  uint8x8_t r = load_unaligned_u8(ref, ref_stride);
+
+  uint8x8_t abs_diff = vabd_u8(s, r);
+
+  *sse = vdot_u32(*sse, abs_diff, abs_diff);
 }
-int64_t aom_sse_neon(const uint8_t *a, int a_stride, const uint8_t *b,
-                     int b_stride, int width, int height) {
-  int y = 0;
-  int64_t sse = 0;
-  uint32x4_t sum = vdupq_n_u32(0);
-  switch (width) {
-    case 4:
-      do {
-        aom_sse4x2_neon(a, a_stride, b, b_stride, &sum);
-        a += a_stride << 1;
-        b += b_stride << 1;
-        y += 2;
-      } while (y < height);
-#if defined(__aarch64__)
-      sse = vaddvq_u32(sum);
-#else
-      sse = horizontal_add_s32x4(vreinterpretq_s32_u32(sum));
-#endif  // __aarch64__
-      break;
-    case 8:
-      do {
-        aom_sse8_neon(a, b, &sum);
-        a += a_stride;
-        b += b_stride;
-        y += 1;
-      } while (y < height);
-#if defined(__aarch64__)
-      sse = vaddvq_u32(sum);
-#else
-      sse = horizontal_add_s32x4(vreinterpretq_s32_u32(sum));
-#endif  // __aarch64__
-      break;
-    case 16:
+
+static INLINE uint32_t sse_8xh_neon(const uint8_t *src, int src_stride,
+                                    const uint8_t *ref, int ref_stride,
+                                    int height) {
+  uint32x2_t sse[2] = { vdup_n_u32(0), vdup_n_u32(0) };
+
+  int i = 0;
+  do {
+    sse_8x1_neon(src, ref, &sse[0]);
+    src += src_stride;
+    ref += ref_stride;
+    sse_8x1_neon(src, ref, &sse[1]);
+    src += src_stride;
+    ref += ref_stride;
+    i += 2;
+  } while (i < height);
+
+  return horizontal_add_u32x4(vcombine_u32(sse[0], sse[1]));
+}
+
+static INLINE uint32_t sse_4xh_neon(const uint8_t *src, int src_stride,
+                                    const uint8_t *ref, int ref_stride,
+                                    int height) {
+  uint32x2_t sse = vdup_n_u32(0);
+
+  int i = 0;
+  do {
+    sse_4x2_neon(src, src_stride, ref, ref_stride, &sse);
+
+    src += 2 * src_stride;
+    ref += 2 * ref_stride;
+    i += 2;
+  } while (i < height);
+
+  return horizontal_add_u32x2(sse);
+}
+
+static INLINE uint32_t sse_wxh_neon(const uint8_t *src, int src_stride,
+                                    const uint8_t *ref, int ref_stride,
+                                    int width, int height) {
+  uint32x2_t sse[2] = { vdup_n_u32(0), vdup_n_u32(0) };
+
+  if ((width & 0x07) && ((width & 0x07) < 5)) {
+    int i = 0;
+    do {
+      int j = 0;
       do {
-        sse_w16_neon(&sum, a, b);
-        a += a_stride;
-        b += b_stride;
-        y += 1;
-      } while (y < height);
-#if defined(__aarch64__)
-      sse = vaddvq_u32(sum);
-#else
-      sse = horizontal_add_s32x4(vreinterpretq_s32_u32(sum));
-#endif  // __aarch64__
-      break;
-    case 32:
+        sse_8x1_neon(src + j, ref + j, &sse[0]);
+        sse_8x1_neon(src + j + src_stride, ref + j + ref_stride, &sse[1]);
+        j += 8;
+      } while (j + 4 < width);
+
+      sse_4x2_neon(src + j, src_stride, ref + j, ref_stride, &sse[0]);
+      src += 2 * src_stride;
+      ref += 2 * ref_stride;
+      i += 2;
+    } while (i < height);
+  } else {
+    int i = 0;
+    do {
+      int j = 0;
       do {
-        sse_w16_neon(&sum, a, b);
-        sse_w16_neon(&sum, a + 16, b + 16);
-        a += a_stride;
-        b += b_stride;
-        y += 1;
-      } while (y < height);
-#if defined(__aarch64__)
-      sse = vaddvq_u32(sum);
-#else
-      sse = horizontal_add_s32x4(vreinterpretq_s32_u32(sum));
-#endif  // __aarch64__
-      break;
-    case 64:
+        sse_8x1_neon(src + j, ref + j, &sse[0]);
+        sse_8x1_neon(src + j + src_stride, ref + j + ref_stride, &sse[1]);
+        j += 8;
+      } while (j < width);
+
+      src += 2 * src_stride;
+      ref += 2 * ref_stride;
+      i += 2;
+    } while (i < height);
+  }
+  return horizontal_add_u32x4(vcombine_u32(sse[0], sse[1]));
+}
+
+#else  // !defined(__ARM_FEATURE_DOTPROD)
+
+static INLINE void sse_16x1_neon(const uint8_t *src, const uint8_t *ref,
+                                 uint32x4_t *sse) {
+  uint8x16_t s = vld1q_u8(src);
+  uint8x16_t r = vld1q_u8(ref);
+
+  uint8x16_t abs_diff = vabdq_u8(s, r);
+  uint8x8_t abs_diff_lo = vget_low_u8(abs_diff);
+  uint8x8_t abs_diff_hi = vget_high_u8(abs_diff);
+
+  *sse = vpadalq_u16(*sse, vmull_u8(abs_diff_lo, abs_diff_lo));
+  *sse = vpadalq_u16(*sse, vmull_u8(abs_diff_hi, abs_diff_hi));
+}
+
+static INLINE void sse_8x1_neon(const uint8_t *src, const uint8_t *ref,
+                                uint32x4_t *sse) {
+  uint8x8_t s = vld1_u8(src);
+  uint8x8_t r = vld1_u8(ref);
+
+  uint8x8_t abs_diff = vabd_u8(s, r);
+
+  *sse = vpadalq_u16(*sse, vmull_u8(abs_diff, abs_diff));
+}
+
+static INLINE void sse_4x2_neon(const uint8_t *src, int src_stride,
+                                const uint8_t *ref, int ref_stride,
+                                uint32x4_t *sse) {
+  uint8x8_t s = load_unaligned_u8(src, src_stride);
+  uint8x8_t r = load_unaligned_u8(ref, ref_stride);
+
+  uint8x8_t abs_diff = vabd_u8(s, r);
+
+  *sse = vpadalq_u16(*sse, vmull_u8(abs_diff, abs_diff));
+}
+
+static INLINE uint32_t sse_8xh_neon(const uint8_t *src, int src_stride,
+                                    const uint8_t *ref, int ref_stride,
+                                    int height) {
+  uint32x4_t sse = vdupq_n_u32(0);
+
+  int i = 0;
+  do {
+    sse_8x1_neon(src, ref, &sse);
+
+    src += src_stride;
+    ref += ref_stride;
+    i++;
+  } while (i < height);
+
+  return horizontal_add_u32x4(sse);
+}
+
+static INLINE uint32_t sse_4xh_neon(const uint8_t *src, int src_stride,
+                                    const uint8_t *ref, int ref_stride,
+                                    int height) {
+  uint32x4_t sse = vdupq_n_u32(0);
+
+  int i = 0;
+  do {
+    sse_4x2_neon(src, src_stride, ref, ref_stride, &sse);
+
+    src += 2 * src_stride;
+    ref += 2 * ref_stride;
+    i += 2;
+  } while (i < height);
+
+  return horizontal_add_u32x4(sse);
+}
+
+static INLINE uint32_t sse_wxh_neon(const uint8_t *src, int src_stride,
+                                    const uint8_t *ref, int ref_stride,
+                                    int width, int height) {
+  uint32x4_t sse = vdupq_n_u32(0);
+
+  if ((width & 0x07) && ((width & 0x07) < 5)) {
+    int i = 0;
+    do {
+      int j = 0;
       do {
-        sse_w16_neon(&sum, a, b);
-        sse_w16_neon(&sum, a + 16 * 1, b + 16 * 1);
-        sse_w16_neon(&sum, a + 16 * 2, b + 16 * 2);
-        sse_w16_neon(&sum, a + 16 * 3, b + 16 * 3);
-        a += a_stride;
-        b += b_stride;
-        y += 1;
-      } while (y < height);
-#if defined(__aarch64__)
-      sse = vaddvq_u32(sum);
-#else
-      sse = horizontal_add_s32x4(vreinterpretq_s32_u32(sum));
-#endif  // __aarch64__
-      break;
-    case 128:
+        sse_8x1_neon(src + j, ref + j, &sse);
+        sse_8x1_neon(src + j + src_stride, ref + j + ref_stride, &sse);
+        j += 8;
+      } while (j + 4 < width);
+
+      sse_4x2_neon(src + j, src_stride, ref + j, ref_stride, &sse);
+      src += 2 * src_stride;
+      ref += 2 * ref_stride;
+      i += 2;
+    } while (i < height);
+  } else {
+    int i = 0;
+    do {
+      int j = 0;
       do {
-        sse_w16_neon(&sum, a, b);
-        sse_w16_neon(&sum, a + 16 * 1, b + 16 * 1);
-        sse_w16_neon(&sum, a + 16 * 2, b + 16 * 2);
-        sse_w16_neon(&sum, a + 16 * 3, b + 16 * 3);
-        sse_w16_neon(&sum, a + 16 * 4, b + 16 * 4);
-        sse_w16_neon(&sum, a + 16 * 5, b + 16 * 5);
-        sse_w16_neon(&sum, a + 16 * 6, b + 16 * 6);
-        sse_w16_neon(&sum, a + 16 * 7, b + 16 * 7);
-        a += a_stride;
-        b += b_stride;
-        y += 1;
-      } while (y < height);
-#if defined(__aarch64__)
-      sse = vaddvq_u32(sum);
-#else
-      sse = horizontal_add_s32x4(vreinterpretq_s32_u32(sum));
-#endif  // __aarch64__
-      break;
+        sse_8x1_neon(src + j, ref + j, &sse);
+        j += 8;
+      } while (j < width);
+
+      src += src_stride;
+      ref += ref_stride;
+      i++;
+    } while (i < height);
+  }
+  return horizontal_add_u32x4(sse);
+}
+
+#endif  // defined(__ARM_FEATURE_DOTPROD)
+
+static INLINE uint32_t sse_128xh_neon(const uint8_t *src, int src_stride,
+                                      const uint8_t *ref, int ref_stride,
+                                      int height) {
+  uint32x4_t sse[2] = { vdupq_n_u32(0), vdupq_n_u32(0) };
+
+  int i = 0;
+  do {
+    sse_16x1_neon(src, ref, &sse[0]);
+    sse_16x1_neon(src + 16, ref + 16, &sse[1]);
+    sse_16x1_neon(src + 32, ref + 32, &sse[0]);
+    sse_16x1_neon(src + 48, ref + 48, &sse[1]);
+    sse_16x1_neon(src + 64, ref + 64, &sse[0]);
+    sse_16x1_neon(src + 80, ref + 80, &sse[1]);
+    sse_16x1_neon(src + 96, ref + 96, &sse[0]);
+    sse_16x1_neon(src + 112, ref + 112, &sse[1]);
+
+    src += src_stride;
+    ref += ref_stride;
+    i++;
+  } while (i < height);
+
+  return horizontal_add_u32x4(vaddq_u32(sse[0], sse[1]));
+}
+
+static INLINE uint32_t sse_64xh_neon(const uint8_t *src, int src_stride,
+                                     const uint8_t *ref, int ref_stride,
+                                     int height) {
+  uint32x4_t sse[2] = { vdupq_n_u32(0), vdupq_n_u32(0) };
+
+  int i = 0;
+  do {
+    sse_16x1_neon(src, ref, &sse[0]);
+    sse_16x1_neon(src + 16, ref + 16, &sse[1]);
+    sse_16x1_neon(src + 32, ref + 32, &sse[0]);
+    sse_16x1_neon(src + 48, ref + 48, &sse[1]);
+
+    src += src_stride;
+    ref += ref_stride;
+    i++;
+  } while (i < height);
+
+  return horizontal_add_u32x4(vaddq_u32(sse[0], sse[1]));
+}
+
+static INLINE uint32_t sse_32xh_neon(const uint8_t *src, int src_stride,
+                                     const uint8_t *ref, int ref_stride,
+                                     int height) {
+  uint32x4_t sse[2] = { vdupq_n_u32(0), vdupq_n_u32(0) };
+
+  int i = 0;
+  do {
+    sse_16x1_neon(src, ref, &sse[0]);
+    sse_16x1_neon(src + 16, ref + 16, &sse[1]);
+
+    src += src_stride;
+    ref += ref_stride;
+    i++;
+  } while (i < height);
+
+  return horizontal_add_u32x4(vaddq_u32(sse[0], sse[1]));
+}
+
+static INLINE uint32_t sse_16xh_neon(const uint8_t *src, int src_stride,
+                                     const uint8_t *ref, int ref_stride,
+                                     int height) {
+  uint32x4_t sse[2] = { vdupq_n_u32(0), vdupq_n_u32(0) };
+
+  int i = 0;
+  do {
+    sse_16x1_neon(src, ref, &sse[0]);
+    src += src_stride;
+    ref += ref_stride;
+    sse_16x1_neon(src, ref, &sse[1]);
+    src += src_stride;
+    ref += ref_stride;
+    i += 2;
+  } while (i < height);
+
+  return horizontal_add_u32x4(vaddq_u32(sse[0], sse[1]));
+}
+
+int64_t aom_sse_neon(const uint8_t *src, int src_stride, const uint8_t *ref,
+                     int ref_stride, int width, int height) {
+  switch (width) {
+    case 4: return sse_4xh_neon(src, src_stride, ref, ref_stride, height);
+    case 8: return sse_8xh_neon(src, src_stride, ref, ref_stride, height);
+    case 16: return sse_16xh_neon(src, src_stride, ref, ref_stride, height);
+    case 32: return sse_32xh_neon(src, src_stride, ref, ref_stride, height);
+    case 64: return sse_64xh_neon(src, src_stride, ref, ref_stride, height);
+    case 128: return sse_128xh_neon(src, src_stride, ref, ref_stride, height);
     default:
-      if (width & 0x07) {
-        do {
-          int i = 0;
-          do {
-            aom_sse8_neon(a + i, b + i, &sum);
-            aom_sse8_neon(a + i + a_stride, b + i + b_stride, &sum);
-            i += 8;
-          } while (i + 4 < width);
-          aom_sse4x2_neon(a + i, a_stride, b + i, b_stride, &sum);
-          a += (a_stride << 1);
-          b += (b_stride << 1);
-          y += 2;
-        } while (y < height);
-      } else {
-        do {
-          int i = 0;
-          do {
-            aom_sse8_neon(a + i, b + i, &sum);
-            i += 8;
-          } while (i < width);
-          a += a_stride;
-          b += b_stride;
-          y += 1;
-        } while (y < height);
-      }
-#if defined(__aarch64__)
-      sse = vaddvq_u32(sum);
-#else
-      sse = horizontal_add_s32x4(vreinterpretq_s32_u32(sum));
-#endif  // __aarch64__
-      break;
+      return sse_wxh_neon(src, src_stride, ref, ref_stride, width, height);
   }
-  return sse;
 }
 
 #if CONFIG_AV1_HIGHBITDEPTH
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/arm/subpel_variance_neon.c b/chromium/third_party/libaom/source/libaom/aom_dsp/arm/subpel_variance_neon.c
index 4ecf891cbeb..4615038f47c 100644
--- a/chromium/third_party/libaom/source/libaom/aom_dsp/arm/subpel_variance_neon.c
+++ b/chromium/third_party/libaom/source/libaom/aom_dsp/arm/subpel_variance_neon.c
@@ -17,424 +17,233 @@
 #include "aom_ports/mem.h"
 #include "aom/aom_integer.h"
 
-#include "aom_dsp/aom_filter.h"
 #include "aom_dsp/variance.h"
-
-// Load 2 sets of 4 bytes when alignment is not guaranteed.
-static INLINE uint8x8_t load_unaligned_u8(const uint8_t *buf, int stride) {
-  uint32_t a;
-  uint32x2_t a_u32 = vdup_n_u32(0);
-  if (stride == 4) return vld1_u8(buf);
-  memcpy(&a, buf, 4);
-  buf += stride;
-  a_u32 = vld1_lane_u32(&a, a_u32, 0);
-  memcpy(&a, buf, 4);
-  a_u32 = vld1_lane_u32(&a, a_u32, 1);
-  return vreinterpret_u8_u32(a_u32);
-}
-
-// Process a block exactly 4 wide and a multiple of 2 high.
-static void var_filter_block2d_bil_w4(const uint8_t *src_ptr,
-                                      uint8_t *output_ptr,
-                                      unsigned int src_pixels_per_line,
-                                      int pixel_step,
-                                      unsigned int output_height,
-                                      const uint8_t *filter) {
-  const uint8x8_t f0 = vdup_n_u8(filter[0]);
-  const uint8x8_t f1 = vdup_n_u8(filter[1]);
-  unsigned int i;
-  for (i = 0; i < output_height; i += 2) {
-    const uint8x8_t src_0 = load_unaligned_u8(src_ptr, src_pixels_per_line);
-    const uint8x8_t src_1 =
-        load_unaligned_u8(src_ptr + pixel_step, src_pixels_per_line);
-    const uint16x8_t a = vmull_u8(src_0, f0);
-    const uint16x8_t b = vmlal_u8(a, src_1, f1);
-    const uint8x8_t out = vrshrn_n_u16(b, FILTER_BITS);
-    vst1_u8(output_ptr, out);
-    src_ptr += 2 * src_pixels_per_line;
-    output_ptr += 8;
-  }
-}
-
-static void var_filter_block2d_bil_w8(const uint8_t *src_ptr,
-                                      uint8_t *output_ptr,
-                                      unsigned int src_pixels_per_line,
-                                      int pixel_step,
-                                      unsigned int output_height,
-                                      unsigned int output_width,
-                                      const uint8_t *filter) {
-  const uint8x8_t f0 = vdup_n_u8(filter[0]);
-  const uint8x8_t f1 = vdup_n_u8(filter[1]);
-  unsigned int i;
-  for (i = 0; i < output_height; ++i) {
-    const uint8x8_t src_0 = vld1_u8(&src_ptr[0]);
-    const uint8x8_t src_1 = vld1_u8(&src_ptr[pixel_step]);
-    const uint16x8_t a = vmull_u8(src_0, f0);
-    const uint16x8_t b = vmlal_u8(a, src_1, f1);
-    const uint8x8_t out = vrshrn_n_u16(b, FILTER_BITS);
-    vst1_u8(output_ptr, out);
-    // Next row...
-    src_ptr += src_pixels_per_line;
-    output_ptr += output_width;
+#include "aom_dsp/arm/mem_neon.h"
+
+static void var_filter_block2d_bil_w4(const uint8_t *src_ptr, uint8_t *dst_ptr,
+                                      int src_stride, int pixel_step,
+                                      int dst_height, int filter_offset) {
+  const uint8x8_t f0 = vdup_n_u8(8 - filter_offset);
+  const uint8x8_t f1 = vdup_n_u8(filter_offset);
+
+  int i = 0;
+  do {
+    uint8x8_t s0 = load_unaligned_u8(src_ptr, src_stride);
+    uint8x8_t s1 = load_unaligned_u8(src_ptr + pixel_step, src_stride);
+    uint16x8_t blend = vmull_u8(s0, f0);
+    blend = vmlal_u8(blend, s1, f1);
+    uint8x8_t blend_u8 = vrshrn_n_u16(blend, 3);
+    vst1_u8(dst_ptr, blend_u8);
+
+    src_ptr += 2 * src_stride;
+    dst_ptr += 2 * 4;
+    i += 2;
+  } while (i < dst_height);
+}
+
+static void var_filter_block2d_bil_w8(const uint8_t *src_ptr, uint8_t *dst_ptr,
+                                      int src_stride, int pixel_step,
+                                      int dst_height, int filter_offset) {
+  const uint8x8_t f0 = vdup_n_u8(8 - filter_offset);
+  const uint8x8_t f1 = vdup_n_u8(filter_offset);
+
+  int i = 0;
+  do {
+    uint8x8_t s0 = vld1_u8(src_ptr);
+    uint8x8_t s1 = vld1_u8(src_ptr + pixel_step);
+    uint16x8_t blend = vmull_u8(s0, f0);
+    blend = vmlal_u8(blend, s1, f1);
+    uint8x8_t blend_u8 = vrshrn_n_u16(blend, 3);
+    vst1_u8(dst_ptr, blend_u8);
+
+    src_ptr += src_stride;
+    dst_ptr += 8;
+    i++;
+  } while (i < dst_height);
+}
+
+static void var_filter_block2d_bil_large(const uint8_t *src_ptr,
+                                         uint8_t *dst_ptr, int src_stride,
+                                         int pixel_step, int dst_width,
+                                         int dst_height, int filter_offset) {
+  const uint8x8_t f0 = vdup_n_u8(8 - filter_offset);
+  const uint8x8_t f1 = vdup_n_u8(filter_offset);
+
+  int i = 0;
+  do {
+    int j = 0;
+    do {
+      uint8x16_t s0 = vld1q_u8(src_ptr + j);
+      uint8x16_t s1 = vld1q_u8(src_ptr + j + pixel_step);
+      uint16x8_t blend_l = vmull_u8(vget_low_u8(s0), f0);
+      blend_l = vmlal_u8(blend_l, vget_low_u8(s1), f1);
+      uint16x8_t blend_h = vmull_u8(vget_high_u8(s0), f0);
+      blend_h = vmlal_u8(blend_h, vget_high_u8(s1), f1);
+      uint8x16_t blend_u8 =
+          vcombine_u8(vrshrn_n_u16(blend_l, 3), vrshrn_n_u16(blend_h, 3));
+      vst1q_u8(dst_ptr + j, blend_u8);
+
+      j += 16;
+    } while (j < dst_width);
+
+    src_ptr += src_stride;
+    dst_ptr += dst_width;
+    i++;
+  } while (i < dst_height);
+}
+
+static void var_filter_block2d_bil_w16(const uint8_t *src_ptr, uint8_t *dst_ptr,
+                                       int src_stride, int pixel_step,
+                                       int dst_height, int filter_offset) {
+  var_filter_block2d_bil_large(src_ptr, dst_ptr, src_stride, pixel_step, 16,
+                               dst_height, filter_offset);
+}
+
+static void var_filter_block2d_bil_w32(const uint8_t *src_ptr, uint8_t *dst_ptr,
+                                       int src_stride, int pixel_step,
+                                       int dst_height, int filter_offset) {
+  var_filter_block2d_bil_large(src_ptr, dst_ptr, src_stride, pixel_step, 32,
+                               dst_height, filter_offset);
+}
+
+static void var_filter_block2d_bil_w64(const uint8_t *src_ptr, uint8_t *dst_ptr,
+                                       int src_stride, int pixel_step,
+                                       int dst_height, int filter_offset) {
+  var_filter_block2d_bil_large(src_ptr, dst_ptr, src_stride, pixel_step, 64,
+                               dst_height, filter_offset);
+}
+
+static void var_filter_block2d_bil_w128(const uint8_t *src_ptr,
+                                        uint8_t *dst_ptr, int src_stride,
+                                        int pixel_step, int dst_height,
+                                        int filter_offset) {
+  var_filter_block2d_bil_large(src_ptr, dst_ptr, src_stride, pixel_step, 128,
+                               dst_height, filter_offset);
+}
+
+static void var_filter_block2d_avg(const uint8_t *src_ptr, uint8_t *dst_ptr,
+                                   int src_stride, int pixel_step,
+                                   int dst_width, int dst_height) {
+  // We only specialise on the filter values for large block sizes (>= 16x16.)
+  assert(dst_width >= 16 && dst_width % 16 == 0);
+
+  int i = 0;
+  do {
+    int j = 0;
+    do {
+      uint8x16_t s0 = vld1q_u8(src_ptr + j);
+      uint8x16_t s1 = vld1q_u8(src_ptr + j + pixel_step);
+      uint8x16_t avg = vrhaddq_u8(s0, s1);
+      vst1q_u8(dst_ptr + j, avg);
+
+      j += 16;
+    } while (j < dst_width);
+
+    src_ptr += src_stride;
+    dst_ptr += dst_width;
+    i++;
+  } while (i < dst_height);
+}
+
+#define SUBPEL_VARIANCE_WXH_NEON(w, h, padding)                          \
+  unsigned int aom_sub_pixel_variance##w##x##h##_neon(                   \
+      const uint8_t *src, int src_stride, int xoffset, int yoffset,      \
+      const uint8_t *ref, int ref_stride, uint32_t *sse) {               \
+    uint8_t tmp0[w * (h + padding)];                                     \
+    uint8_t tmp1[w * h];                                                 \
+    var_filter_block2d_bil_w##w(src, tmp0, src_stride, 1, (h + padding), \
+                                xoffset);                                \
+    var_filter_block2d_bil_w##w(tmp0, tmp1, w, w, h, yoffset);           \
+    return aom_variance##w##x##h(tmp1, w, ref, ref_stride, sse);         \
   }
-}
 
-// Process a block which is a mutiple of 16 wide and any height.
-static void var_filter_block2d_bil_w16(const uint8_t *src_ptr,
-                                       uint8_t *output_ptr,
-                                       unsigned int src_pixels_per_line,
-                                       int pixel_step,
-                                       unsigned int output_height,
-                                       unsigned int output_width,
-                                       const uint8_t *filter) {
-  const uint8x8_t f0 = vdup_n_u8(filter[0]);
-  const uint8x8_t f1 = vdup_n_u8(filter[1]);
-  unsigned int i, j;
-  for (i = 0; i < output_height; ++i) {
-    for (j = 0; j < output_width; j += 16) {
-      const uint8x16_t src_0 = vld1q_u8(&src_ptr[j]);
-      const uint8x16_t src_1 = vld1q_u8(&src_ptr[j + pixel_step]);
-      const uint16x8_t a = vmull_u8(vget_low_u8(src_0), f0);
-      const uint16x8_t b = vmlal_u8(a, vget_low_u8(src_1), f1);
-      const uint8x8_t out_lo = vrshrn_n_u16(b, FILTER_BITS);
-      const uint16x8_t c = vmull_u8(vget_high_u8(src_0), f0);
-      const uint16x8_t d = vmlal_u8(c, vget_high_u8(src_1), f1);
-      const uint8x8_t out_hi = vrshrn_n_u16(d, FILTER_BITS);
-      vst1q_u8(output_ptr + j, vcombine_u8(out_lo, out_hi));
-    }
-    src_ptr += src_pixels_per_line;
-    output_ptr += output_width;
+#define SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(w, h, padding)                   \
+  unsigned int aom_sub_pixel_variance##w##x##h##_neon(                        \
+      const uint8_t *src, int src_stride, int xoffset, int yoffset,           \
+      const uint8_t *ref, int ref_stride, unsigned int *sse) {                \
+    if (xoffset == 0) {                                                       \
+      if (yoffset == 0) {                                                     \
+        return aom_variance##w##x##h##_neon(src, src_stride, ref, ref_stride, \
+                                            sse);                             \
+      } else if (yoffset == 4) {                                              \
+        uint8_t tmp[w * h];                                                   \
+        var_filter_block2d_avg(src, tmp, src_stride, src_stride, w, h);       \
+        return aom_variance##w##x##h##_neon(tmp, w, ref, ref_stride, sse);    \
+      } else {                                                                \
+        uint8_t tmp[w * h];                                                   \
+        var_filter_block2d_bil_w##w(src, tmp, src_stride, src_stride, h,      \
+                                    yoffset);                                 \
+        return aom_variance##w##x##h##_neon(tmp, w, ref, ref_stride, sse);    \
+      }                                                                       \
+    } else if (xoffset == 4) {                                                \
+      uint8_t tmp0[w * (h + padding)];                                        \
+      var_filter_block2d_avg(src, tmp0, src_stride, 1, w, h + padding);       \
+      if (yoffset == 0) {                                                     \
+        return aom_variance##w##x##h##_neon(tmp0, w, ref, ref_stride, sse);   \
+      } else if (yoffset == 4) {                                              \
+        uint8_t tmp1[w * (h + padding)];                                      \
+        var_filter_block2d_avg(tmp0, tmp1, w, w, w, h);                       \
+        return aom_variance##w##x##h##_neon(tmp1, w, ref, ref_stride, sse);   \
+      } else {                                                                \
+        uint8_t tmp1[w * (h + padding)];                                      \
+        var_filter_block2d_bil_w##w(tmp0, tmp1, w, w, h, yoffset);            \
+        return aom_variance##w##x##h##_neon(tmp1, w, ref, ref_stride, sse);   \
+      }                                                                       \
+    } else {                                                                  \
+      uint8_t tmp0[w * (h + padding)];                                        \
+      var_filter_block2d_bil_w##w(src, tmp0, src_stride, 1, (h + padding),    \
+                                  xoffset);                                   \
+      if (yoffset == 0) {                                                     \
+        return aom_variance##w##x##h##_neon(tmp0, w, ref, ref_stride, sse);   \
+      } else if (yoffset == 4) {                                              \
+        uint8_t tmp1[w * h];                                                  \
+        var_filter_block2d_avg(tmp0, tmp1, w, w, w, h);                       \
+        return aom_variance##w##x##h##_neon(tmp1, w, ref, ref_stride, sse);   \
+      } else {                                                                \
+        uint8_t tmp1[w * h];                                                  \
+        var_filter_block2d_bil_w##w(tmp0, tmp1, w, w, h, yoffset);            \
+        return aom_variance##w##x##h##_neon(tmp1, w, ref, ref_stride, sse);   \
+      }                                                                       \
+    }                                                                         \
   }
-}
-
-unsigned int aom_sub_pixel_variance8x8_neon(const uint8_t *src, int src_stride,
-                                            int xoffset, int yoffset,
-                                            const uint8_t *dst, int dst_stride,
-                                            unsigned int *sse) {
-  DECLARE_ALIGNED(16, uint8_t, temp2[8 * 8]);
-  DECLARE_ALIGNED(16, uint8_t, fdata3[9 * 8]);
-
-  var_filter_block2d_bil_w8(src, fdata3, src_stride, 1, 9, 8,
-                            bilinear_filters_2t[xoffset]);
-  var_filter_block2d_bil_w8(fdata3, temp2, 8, 8, 8, 8,
-                            bilinear_filters_2t[yoffset]);
-  return aom_variance8x8_neon(temp2, 8, dst, dst_stride, sse);
-}
-
-unsigned int aom_sub_pixel_variance16x16_neon(const uint8_t *src,
-                                              int src_stride, int xoffset,
-                                              int yoffset, const uint8_t *dst,
-                                              int dst_stride,
-                                              unsigned int *sse) {
-  DECLARE_ALIGNED(16, uint8_t, temp2[16 * 16]);
-  DECLARE_ALIGNED(16, uint8_t, fdata3[17 * 16]);
-
-  var_filter_block2d_bil_w16(src, fdata3, src_stride, 1, 17, 16,
-                             bilinear_filters_2t[xoffset]);
-  var_filter_block2d_bil_w16(fdata3, temp2, 16, 16, 16, 16,
-                             bilinear_filters_2t[yoffset]);
-  return aom_variance16x16_neon(temp2, 16, dst, dst_stride, sse);
-}
-
-unsigned int aom_sub_pixel_variance32x32_neon(const uint8_t *src,
-                                              int src_stride, int xoffset,
-                                              int yoffset, const uint8_t *dst,
-                                              int dst_stride,
-                                              unsigned int *sse) {
-  DECLARE_ALIGNED(16, uint8_t, temp2[32 * 32]);
-  DECLARE_ALIGNED(16, uint8_t, fdata3[33 * 32]);
-
-  var_filter_block2d_bil_w16(src, fdata3, src_stride, 1, 33, 32,
-                             bilinear_filters_2t[xoffset]);
-  var_filter_block2d_bil_w16(fdata3, temp2, 32, 32, 32, 32,
-                             bilinear_filters_2t[yoffset]);
-  return aom_variance32x32_neon(temp2, 32, dst, dst_stride, sse);
-}
-
-unsigned int aom_sub_pixel_variance64x64_neon(const uint8_t *src,
-                                              int src_stride, int xoffset,
-                                              int yoffset, const uint8_t *dst,
-                                              int dst_stride,
-                                              unsigned int *sse) {
-  DECLARE_ALIGNED(16, uint8_t, temp2[64 * 64]);
-  DECLARE_ALIGNED(16, uint8_t, fdata3[65 * 64]);
-
-  var_filter_block2d_bil_w16(src, fdata3, src_stride, 1, 65, 64,
-                             bilinear_filters_2t[xoffset]);
-  var_filter_block2d_bil_w16(fdata3, temp2, 64, 64, 64, 64,
-                             bilinear_filters_2t[yoffset]);
-  return aom_variance64x64_neon(temp2, 64, dst, dst_stride, sse);
-}
-
-unsigned int aom_sub_pixel_variance4x4_neon(const uint8_t *a, int a_stride,
-                                            int xoffset, int yoffset,
-                                            const uint8_t *b, int b_stride,
-                                            uint32_t *sse) {
-  uint8_t temp0[4 * (4 + 2)];
-  uint8_t temp1[4 * 4];
-
-  var_filter_block2d_bil_w4(a, temp0, a_stride, 1, (4 + 2),
-                            bilinear_filters_2t[xoffset]);
-  var_filter_block2d_bil_w4(temp0, temp1, 4, 4, 4,
-                            bilinear_filters_2t[yoffset]);
-
-  return aom_variance4x4(temp1, 4, b, b_stride, sse);
-}
-
-unsigned int aom_sub_pixel_variance4x8_neon(const uint8_t *a, int a_stride,
-                                            int xoffset, int yoffset,
-                                            const uint8_t *b, int b_stride,
-                                            uint32_t *sse) {
-  uint8_t temp0[4 * (8 + 2)];
-  uint8_t temp1[4 * 8];
-
-  var_filter_block2d_bil_w4(a, temp0, a_stride, 1, (8 + 2),
-                            bilinear_filters_2t[xoffset]);
-  var_filter_block2d_bil_w4(temp0, temp1, 4, 4, 8,
-                            bilinear_filters_2t[yoffset]);
-
-  return aom_variance4x8(temp1, 4, b, b_stride, sse);
-}
-
-unsigned int aom_sub_pixel_variance8x4_neon(const uint8_t *a, int a_stride,
-                                            int xoffset, int yoffset,
-                                            const uint8_t *b, int b_stride,
-                                            uint32_t *sse) {
-  uint8_t temp0[8 * (4 + 1)];
-  uint8_t temp1[8 * 4];
-
-  var_filter_block2d_bil_w8(a, temp0, a_stride, 1, (4 + 1), 8,
-                            bilinear_filters_2t[xoffset]);
-  var_filter_block2d_bil_w8(temp0, temp1, 8, 8, 4, 8,
-                            bilinear_filters_2t[yoffset]);
-
-  return aom_variance8x4(temp1, 8, b, b_stride, sse);
-}
-
-unsigned int aom_sub_pixel_variance8x16_neon(const uint8_t *a, int a_stride,
-                                             int xoffset, int yoffset,
-                                             const uint8_t *b, int b_stride,
-                                             uint32_t *sse) {
-  uint8_t temp0[8 * (16 + 1)];
-  uint8_t temp1[8 * 16];
-
-  var_filter_block2d_bil_w8(a, temp0, a_stride, 1, (16 + 1), 8,
-                            bilinear_filters_2t[xoffset]);
-  var_filter_block2d_bil_w8(temp0, temp1, 8, 8, 16, 8,
-                            bilinear_filters_2t[yoffset]);
-
-  return aom_variance8x16(temp1, 8, b, b_stride, sse);
-}
-
-unsigned int aom_sub_pixel_variance16x8_neon(const uint8_t *a, int a_stride,
-                                             int xoffset, int yoffset,
-                                             const uint8_t *b, int b_stride,
-                                             uint32_t *sse) {
-  uint8_t temp0[16 * (8 + 1)];
-  uint8_t temp1[16 * 8];
-
-  var_filter_block2d_bil_w16(a, temp0, a_stride, 1, (8 + 1), 16,
-                             bilinear_filters_2t[xoffset]);
-  var_filter_block2d_bil_w16(temp0, temp1, 16, 16, 8, 16,
-                             bilinear_filters_2t[yoffset]);
-
-  return aom_variance16x8(temp1, 16, b, b_stride, sse);
-}
-
-unsigned int aom_sub_pixel_variance16x32_neon(const uint8_t *a, int a_stride,
-                                              int xoffset, int yoffset,
-                                              const uint8_t *b, int b_stride,
-                                              uint32_t *sse) {
-  uint8_t temp0[16 * (32 + 1)];
-  uint8_t temp1[16 * 32];
 
-  var_filter_block2d_bil_w16(a, temp0, a_stride, 1, (32 + 1), 16,
-                             bilinear_filters_2t[xoffset]);
-  var_filter_block2d_bil_w16(temp0, temp1, 16, 16, 32, 16,
-                             bilinear_filters_2t[yoffset]);
+SUBPEL_VARIANCE_WXH_NEON(4, 4, 2)
+SUBPEL_VARIANCE_WXH_NEON(4, 8, 2)
 
-  return aom_variance16x32(temp1, 16, b, b_stride, sse);
-}
-
-unsigned int aom_sub_pixel_variance32x16_neon(const uint8_t *a, int a_stride,
-                                              int xoffset, int yoffset,
-                                              const uint8_t *b, int b_stride,
-                                              uint32_t *sse) {
-  uint8_t temp0[32 * (16 + 1)];
-  uint8_t temp1[32 * 16];
-
-  var_filter_block2d_bil_w16(a, temp0, a_stride, 1, (16 + 1), 32,
-                             bilinear_filters_2t[xoffset]);
-  var_filter_block2d_bil_w16(temp0, temp1, 32, 32, 16, 32,
-                             bilinear_filters_2t[yoffset]);
-
-  return aom_variance32x16(temp1, 32, b, b_stride, sse);
-}
-
-unsigned int aom_sub_pixel_variance32x64_neon(const uint8_t *a, int a_stride,
-                                              int xoffset, int yoffset,
-                                              const uint8_t *b, int b_stride,
-                                              uint32_t *sse) {
-  uint8_t temp0[32 * (64 + 1)];
-  uint8_t temp1[32 * 64];
-
-  var_filter_block2d_bil_w16(a, temp0, a_stride, 1, (64 + 1), 32,
-                             bilinear_filters_2t[xoffset]);
-  var_filter_block2d_bil_w16(temp0, temp1, 32, 32, 64, 32,
-                             bilinear_filters_2t[yoffset]);
-
-  return aom_variance32x64(temp1, 32, b, b_stride, sse);
-}
-
-unsigned int aom_sub_pixel_variance64x32_neon(const uint8_t *a, int a_stride,
-                                              int xoffset, int yoffset,
-                                              const uint8_t *b, int b_stride,
-                                              uint32_t *sse) {
-  uint8_t temp0[64 * (32 + 1)];
-  uint8_t temp1[64 * 32];
+SUBPEL_VARIANCE_WXH_NEON(8, 4, 1)
+SUBPEL_VARIANCE_WXH_NEON(8, 8, 1)
+SUBPEL_VARIANCE_WXH_NEON(8, 16, 1)
 
-  var_filter_block2d_bil_w16(a, temp0, a_stride, 1, (32 + 1), 64,
-                             bilinear_filters_2t[xoffset]);
-  var_filter_block2d_bil_w16(temp0, temp1, 64, 64, 32, 64,
-                             bilinear_filters_2t[yoffset]);
+SUBPEL_VARIANCE_WXH_NEON(16, 8, 1)
+SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(16, 16, 1)
+SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(16, 32, 1)
 
-  return aom_variance64x32(temp1, 64, b, b_stride, sse);
-}
-
-unsigned int aom_sub_pixel_variance64x128_neon(const uint8_t *a, int a_stride,
-                                               int xoffset, int yoffset,
-                                               const uint8_t *b, int b_stride,
-                                               uint32_t *sse) {
-  uint8_t temp0[64 * (128 + 1)];
-  uint8_t temp1[64 * 128];
-
-  var_filter_block2d_bil_w16(a, temp0, a_stride, 1, (128 + 1), 64,
-                             bilinear_filters_2t[xoffset]);
-  var_filter_block2d_bil_w16(temp0, temp1, 64, 64, 128, 64,
-                             bilinear_filters_2t[yoffset]);
-
-  return aom_variance64x128(temp1, 64, b, b_stride, sse);
-}
-
-unsigned int aom_sub_pixel_variance128x64_neon(const uint8_t *a, int a_stride,
-                                               int xoffset, int yoffset,
-                                               const uint8_t *b, int b_stride,
-                                               uint32_t *sse) {
-  uint8_t temp0[128 * (64 + 1)];
-  uint8_t temp1[128 * 64];
-
-  var_filter_block2d_bil_w16(a, temp0, a_stride, 1, (64 + 1), 128,
-                             bilinear_filters_2t[xoffset]);
-  var_filter_block2d_bil_w16(temp0, temp1, 128, 128, 64, 128,
-                             bilinear_filters_2t[yoffset]);
-
-  return aom_variance128x64(temp1, 128, b, b_stride, sse);
-}
+SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(32, 16, 1)
+SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(32, 32, 1)
+SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(32, 64, 1)
 
-unsigned int aom_sub_pixel_variance128x128_neon(const uint8_t *a, int a_stride,
-                                                int xoffset, int yoffset,
-                                                const uint8_t *b, int b_stride,
-                                                uint32_t *sse) {
-  uint8_t temp0[128 * (128 + 1)];
-  uint8_t temp1[128 * 128];
+SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(64, 32, 1)
+SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(64, 64, 1)
+SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(64, 128, 1)
 
-  var_filter_block2d_bil_w16(a, temp0, a_stride, 1, (128 + 1), 128,
-                             bilinear_filters_2t[xoffset]);
-  var_filter_block2d_bil_w16(temp0, temp1, 128, 128, 128, 128,
-                             bilinear_filters_2t[yoffset]);
-
-  return aom_variance128x128(temp1, 128, b, b_stride, sse);
-}
+SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(128, 64, 1)
+SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(128, 128, 1)
 
 // Realtime mode doesn't use 4x rectangular blocks.
 #if !CONFIG_REALTIME_ONLY
-unsigned int aom_sub_pixel_variance4x16_neon(const uint8_t *a, int a_stride,
-                                             int xoffset, int yoffset,
-                                             const uint8_t *b, int b_stride,
-                                             uint32_t *sse) {
-  uint8_t temp0[4 * (16 + 2)];
-  uint8_t temp1[4 * 16];
-
-  var_filter_block2d_bil_w4(a, temp0, a_stride, 1, (16 + 2),
-                            bilinear_filters_2t[xoffset]);
-  var_filter_block2d_bil_w4(temp0, temp1, 4, 4, 16,
-                            bilinear_filters_2t[yoffset]);
-
-  return aom_variance4x16(temp1, 4, b, b_stride, sse);
-}
-
-unsigned int aom_sub_pixel_variance8x32_neon(const uint8_t *a, int a_stride,
-                                             int xoffset, int yoffset,
-                                             const uint8_t *b, int b_stride,
-                                             uint32_t *sse) {
-  uint8_t temp0[8 * (32 + 1)];
-  uint8_t temp1[8 * 32];
-
-  var_filter_block2d_bil_w8(a, temp0, a_stride, 1, (32 + 1), 8,
-                            bilinear_filters_2t[xoffset]);
-  var_filter_block2d_bil_w8(temp0, temp1, 8, 8, 32, 8,
-                            bilinear_filters_2t[yoffset]);
-
-  return aom_variance8x32(temp1, 8, b, b_stride, sse);
-}
-
-unsigned int aom_sub_pixel_variance16x4_neon(const uint8_t *a, int a_stride,
-                                             int xoffset, int yoffset,
-                                             const uint8_t *b, int b_stride,
-                                             uint32_t *sse) {
-  uint8_t temp0[16 * (4 + 1)];
-  uint8_t temp1[16 * 4];
-
-  var_filter_block2d_bil_w16(a, temp0, a_stride, 1, (4 + 1), 16,
-                             bilinear_filters_2t[xoffset]);
-  var_filter_block2d_bil_w16(temp0, temp1, 16, 16, 4, 16,
-                             bilinear_filters_2t[yoffset]);
 
-  return aom_variance16x4(temp1, 16, b, b_stride, sse);
-}
-
-unsigned int aom_sub_pixel_variance64x16_neon(const uint8_t *a, int a_stride,
-                                              int xoffset, int yoffset,
-                                              const uint8_t *b, int b_stride,
-                                              uint32_t *sse) {
-  uint8_t temp0[64 * (16 + 1)];
-  uint8_t temp1[64 * 16];
+SUBPEL_VARIANCE_WXH_NEON(4, 16, 2)
 
-  var_filter_block2d_bil_w16(a, temp0, a_stride, 1, (16 + 1), 64,
-                             bilinear_filters_2t[xoffset]);
-  var_filter_block2d_bil_w16(temp0, temp1, 64, 64, 16, 64,
-                             bilinear_filters_2t[yoffset]);
+SUBPEL_VARIANCE_WXH_NEON(8, 32, 1)
 
-  return aom_variance64x16(temp1, 64, b, b_stride, sse);
-}
+SUBPEL_VARIANCE_WXH_NEON(16, 4, 1)
+SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(16, 64, 1)
 
-unsigned int aom_sub_pixel_variance16x64_neon(const uint8_t *a, int a_stride,
-                                              int xoffset, int yoffset,
-                                              const uint8_t *b, int b_stride,
-                                              uint32_t *sse) {
-  uint8_t temp0[16 * (64 + 1)];
-  uint8_t temp1[16 * 64];
+SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(32, 8, 1)
 
-  var_filter_block2d_bil_w16(a, temp0, a_stride, 1, (64 + 1), 16,
-                             bilinear_filters_2t[xoffset]);
-  var_filter_block2d_bil_w16(temp0, temp1, 16, 16, 64, 16,
-                             bilinear_filters_2t[yoffset]);
-
-  return aom_variance16x64(temp1, 16, b, b_stride, sse);
-}
+SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON(64, 16, 1)
 
-unsigned int aom_sub_pixel_variance32x8_neon(const uint8_t *a, int a_stride,
-                                             int xoffset, int yoffset,
-                                             const uint8_t *b, int b_stride,
-                                             uint32_t *sse) {
-  uint8_t temp0[32 * (8 + 1)];
-  uint8_t temp1[32 * 8];
-
-  var_filter_block2d_bil_w16(a, temp0, a_stride, 1, (8 + 1), 32,
-                             bilinear_filters_2t[xoffset]);
-  var_filter_block2d_bil_w16(temp0, temp1, 32, 32, 8, 32,
-                             bilinear_filters_2t[yoffset]);
-
-  return aom_variance32x8(temp1, 32, b, b_stride, sse);
-}
 #endif  // !CONFIG_REALTIME_ONLY
+
+#undef SUBPEL_VARIANCE_WXH_NEON
+#undef SPECIALIZED_SUBPEL_VARIANCE_WXH_NEON
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/arm/sum_neon.h b/chromium/third_party/libaom/source/libaom/aom_dsp/arm/sum_neon.h
index 809e51ce11d..855edf66722 100644
--- a/chromium/third_party/libaom/source/libaom/aom_dsp/arm/sum_neon.h
+++ b/chromium/third_party/libaom/source/libaom/aom_dsp/arm/sum_neon.h
@@ -14,24 +14,101 @@
 #include "aom/aom_integer.h"
 #include "aom_ports/mem.h"
 
-static INLINE int horizontal_add_s16x8(const int16x8_t v_16x8) {
-  const int32x4_t a = vpaddlq_s16(v_16x8);
+static INLINE int horizontal_add_s16x8(const int16x8_t a) {
+#if defined(__aarch64__)
+  return vaddlvq_s16(a);
+#else
+  const int32x4_t b = vpaddlq_s16(a);
+  const int64x2_t c = vpaddlq_s32(b);
+  const int32x2_t d = vadd_s32(vreinterpret_s32_s64(vget_low_s64(c)),
+                               vreinterpret_s32_s64(vget_high_s64(c)));
+  return vget_lane_s32(d, 0);
+#endif
+}
+
+static INLINE int horizontal_add_s32x4(const int32x4_t a) {
+#if defined(__aarch64__)
+  return vaddvq_s32(a);
+#else
   const int64x2_t b = vpaddlq_s32(a);
   const int32x2_t c = vadd_s32(vreinterpret_s32_s64(vget_low_s64(b)),
                                vreinterpret_s32_s64(vget_high_s64(b)));
   return vget_lane_s32(c, 0);
+#endif
 }
 
-static INLINE int horizontal_add_s32x4(const int32x4_t v_32x4) {
-  const int64x2_t b = vpaddlq_s32(v_32x4);
-  const int32x2_t c = vadd_s32(vreinterpret_s32_s64(vget_low_s64(b)),
-                               vreinterpret_s32_s64(vget_high_s64(b)));
-  return vget_lane_s32(c, 0);
+static INLINE uint64_t horizontal_add_u64x2(const uint64x2_t a) {
+#if defined(__aarch64__)
+  return vaddvq_u64(a);
+#else
+  return vgetq_lane_u64(a, 0) + vgetq_lane_u64(a, 1);
+#endif
+}
+
+static INLINE uint64_t horizontal_long_add_u32x4(const uint32x4_t a) {
+#if defined(__aarch64__)
+  return vaddlvq_u32(a);
+#else
+  const uint64x2_t b = vpaddlq_u32(a);
+  return vgetq_lane_u64(b, 0) + vgetq_lane_u64(b, 1);
+#endif
 }
 
-static INLINE uint32x2_t horizontal_add_u16x8(const uint16x8_t a) {
+static INLINE unsigned int horizontal_add_u32x4(const uint32x4_t a) {
+#if defined(__aarch64__)
+  return vaddvq_u32(a);
+#else
+  const uint64x2_t b = vpaddlq_u32(a);
+  const uint32x2_t c = vadd_u32(vreinterpret_u32_u64(vget_low_u64(b)),
+                                vreinterpret_u32_u64(vget_high_u64(b)));
+  return vget_lane_u32(c, 0);
+#endif
+}
+
+static INLINE uint32_t horizontal_long_add_u16x8(const uint16x8_t vec_lo,
+                                                 const uint16x8_t vec_hi) {
+#if defined(__aarch64__)
+  return vaddlvq_u16(vec_lo) + vaddlvq_u16(vec_hi);
+#else
+  const uint32x4_t vec_l_lo =
+      vaddl_u16(vget_low_u16(vec_lo), vget_high_u16(vec_lo));
+  const uint32x4_t vec_l_hi =
+      vaddl_u16(vget_low_u16(vec_hi), vget_high_u16(vec_hi));
+  const uint32x4_t a = vaddq_u32(vec_l_lo, vec_l_hi);
+  const uint64x2_t b = vpaddlq_u32(a);
+  const uint32x2_t c = vadd_u32(vreinterpret_u32_u64(vget_low_u64(b)),
+                                vreinterpret_u32_u64(vget_high_u64(b)));
+  return vget_lane_u32(c, 0);
+#endif
+}
+
+static INLINE uint32_t horizontal_add_u16x8(const uint16x8_t a) {
+#if defined(__aarch64__)
+  return vaddlvq_u16(a);
+#else
   const uint32x4_t b = vpaddlq_u16(a);
   const uint64x2_t c = vpaddlq_u32(b);
-  return vadd_u32(vreinterpret_u32_u64(vget_low_u64(c)),
-                  vreinterpret_u32_u64(vget_high_u64(c)));
+  const uint32x2_t d = vadd_u32(vreinterpret_u32_u64(vget_low_u64(c)),
+                                vreinterpret_u32_u64(vget_high_u64(c)));
+  return vget_lane_u32(d, 0);
+#endif
+}
+
+static INLINE uint32_t horizontal_add_u32x2(const uint32x2_t a) {
+#if defined(__aarch64__)
+  return vaddv_u32(a);
+#else
+  const uint64x1_t b = vpaddl_u32(a);
+  return vget_lane_u32(vreinterpret_u32_u64(b), 0);
+#endif
+}
+
+static INLINE uint32_t horizontal_add_u16x4(const uint16x4_t a) {
+#if defined(__aarch64__)
+  return vaddlv_u16(a);
+#else
+  const uint32x2_t b = vpaddl_u16(a);
+  const uint64x1_t c = vpaddl_u32(b);
+  return vget_lane_u32(vreinterpret_u32_u64(c), 0);
+#endif
 }
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/arm/sum_squares_neon.c b/chromium/third_party/libaom/source/libaom/aom_dsp/arm/sum_squares_neon.c
index 0b7337a941c..bf212a926f7 100644
--- a/chromium/third_party/libaom/source/libaom/aom_dsp/arm/sum_squares_neon.c
+++ b/chromium/third_party/libaom/source/libaom/aom_dsp/arm/sum_squares_neon.c
@@ -13,111 +13,83 @@
 #include <assert.h>
 
 #include "aom_dsp/arm/mem_neon.h"
+#include "aom_dsp/arm/sum_neon.h"
 #include "config/aom_dsp_rtcd.h"
 
-static INLINE uint32x4_t sum_squares_i16_4x4_neon(const int16_t *src,
-                                                  int stride) {
-  const int16x4_t v_val_01_lo = vld1_s16(src + 0 * stride);
-  const int16x4_t v_val_01_hi = vld1_s16(src + 1 * stride);
-  const int16x4_t v_val_23_lo = vld1_s16(src + 2 * stride);
-  const int16x4_t v_val_23_hi = vld1_s16(src + 3 * stride);
-  int32x4_t v_sq_01_d = vmull_s16(v_val_01_lo, v_val_01_lo);
-  v_sq_01_d = vmlal_s16(v_sq_01_d, v_val_01_hi, v_val_01_hi);
-  int32x4_t v_sq_23_d = vmull_s16(v_val_23_lo, v_val_23_lo);
-  v_sq_23_d = vmlal_s16(v_sq_23_d, v_val_23_hi, v_val_23_hi);
-#if defined(__aarch64__)
-  return vreinterpretq_u32_s32(vpaddq_s32(v_sq_01_d, v_sq_23_d));
-#else
-  return vreinterpretq_u32_s32(vcombine_s32(
-      vqmovn_s64(vpaddlq_s32(v_sq_01_d)), vqmovn_s64(vpaddlq_s32(v_sq_23_d))));
-#endif
-}
+static INLINE uint64_t aom_sum_squares_2d_i16_4x4_neon(const int16_t *src,
+                                                       int stride) {
+  int16x4_t s0 = vld1_s16(src + 0 * stride);
+  int16x4_t s1 = vld1_s16(src + 1 * stride);
+  int16x4_t s2 = vld1_s16(src + 2 * stride);
+  int16x4_t s3 = vld1_s16(src + 3 * stride);
+
+  int32x4_t sum_squares = vmull_s16(s0, s0);
+  sum_squares = vmlal_s16(sum_squares, s1, s1);
+  sum_squares = vmlal_s16(sum_squares, s2, s2);
+  sum_squares = vmlal_s16(sum_squares, s3, s3);
 
-uint64_t aom_sum_squares_2d_i16_4x4_neon(const int16_t *src, int stride) {
-  const uint32x4_t v_sum_0123_d = sum_squares_i16_4x4_neon(src, stride);
-#if defined(__aarch64__)
-  return (uint64_t)vaddvq_u32(v_sum_0123_d);
-#else
-  uint64x2_t v_sum_d = vpaddlq_u32(v_sum_0123_d);
-  v_sum_d = vaddq_u64(v_sum_d, vextq_u64(v_sum_d, v_sum_d, 1));
-  return vgetq_lane_u64(v_sum_d, 0);
-#endif
+  return horizontal_long_add_u32x4(vreinterpretq_u32_s32(sum_squares));
 }
 
-uint64_t aom_sum_squares_2d_i16_4xn_neon(const int16_t *src, int stride,
-                                         int height) {
-  int r = 0;
-  uint32x4_t v_acc_q = vdupq_n_u32(0);
+static INLINE uint64_t aom_sum_squares_2d_i16_4xn_neon(const int16_t *src,
+                                                       int stride, int height) {
+  int32x4_t sum_squares[2] = { vdupq_n_s32(0), vdupq_n_s32(0) };
+
+  int h = 0;
   do {
-    const uint32x4_t v_acc_d = sum_squares_i16_4x4_neon(src, stride);
-    v_acc_q = vaddq_u32(v_acc_q, v_acc_d);
-    src += stride << 2;
-    r += 4;
-  } while (r < height);
-
-  uint64x2_t v_acc_64 = vpaddlq_u32(v_acc_q);
-#if defined(__aarch64__)
-  return vaddvq_u64(v_acc_64);
-#else
-  v_acc_64 = vaddq_u64(v_acc_64, vextq_u64(v_acc_64, v_acc_64, 1));
-  return vgetq_lane_u64(v_acc_64, 0);
-#endif
+    int16x4_t s0 = vld1_s16(src + 0 * stride);
+    int16x4_t s1 = vld1_s16(src + 1 * stride);
+    int16x4_t s2 = vld1_s16(src + 2 * stride);
+    int16x4_t s3 = vld1_s16(src + 3 * stride);
+
+    sum_squares[0] = vmlal_s16(sum_squares[0], s0, s0);
+    sum_squares[0] = vmlal_s16(sum_squares[0], s1, s1);
+    sum_squares[1] = vmlal_s16(sum_squares[1], s2, s2);
+    sum_squares[1] = vmlal_s16(sum_squares[1], s3, s3);
+
+    src += 4 * stride;
+    h += 4;
+  } while (h < height);
+
+  return horizontal_long_add_u32x4(
+      vreinterpretq_u32_s32(vaddq_s32(sum_squares[0], sum_squares[1])));
 }
 
-uint64_t aom_sum_squares_2d_i16_nxn_neon(const int16_t *src, int stride,
-                                         int width, int height) {
-  int r = 0;
-  const int32x4_t zero = vdupq_n_s32(0);
-  uint64x2_t v_acc_q = vreinterpretq_u64_s32(zero);
+static INLINE uint64_t aom_sum_squares_2d_i16_nxn_neon(const int16_t *src,
+                                                       int stride, int width,
+                                                       int height) {
+  uint64x2_t sum_squares = vdupq_n_u64(0);
+
+  int h = 0;
   do {
-    int32x4_t v_sum = zero;
-    int c = 0;
+    int32x4_t ss_row[2] = { vdupq_n_s32(0), vdupq_n_s32(0) };
+    int w = 0;
     do {
-      const int16_t *b = src + c;
-      const int16x8_t v_val_0 = vld1q_s16(b + 0 * stride);
-      const int16x8_t v_val_1 = vld1q_s16(b + 1 * stride);
-      const int16x8_t v_val_2 = vld1q_s16(b + 2 * stride);
-      const int16x8_t v_val_3 = vld1q_s16(b + 3 * stride);
-      const int16x4_t v_val_0_lo = vget_low_s16(v_val_0);
-      const int16x4_t v_val_1_lo = vget_low_s16(v_val_1);
-      const int16x4_t v_val_2_lo = vget_low_s16(v_val_2);
-      const int16x4_t v_val_3_lo = vget_low_s16(v_val_3);
-      int32x4_t v_sum_01 = vmull_s16(v_val_0_lo, v_val_0_lo);
-      v_sum_01 = vmlal_s16(v_sum_01, v_val_1_lo, v_val_1_lo);
-      int32x4_t v_sum_23 = vmull_s16(v_val_2_lo, v_val_2_lo);
-      v_sum_23 = vmlal_s16(v_sum_23, v_val_3_lo, v_val_3_lo);
-#if defined(__aarch64__)
-      v_sum_01 = vmlal_high_s16(v_sum_01, v_val_0, v_val_0);
-      v_sum_01 = vmlal_high_s16(v_sum_01, v_val_1, v_val_1);
-      v_sum_23 = vmlal_high_s16(v_sum_23, v_val_2, v_val_2);
-      v_sum_23 = vmlal_high_s16(v_sum_23, v_val_3, v_val_3);
-      v_sum = vaddq_s32(v_sum, vpaddq_s32(v_sum_01, v_sum_23));
-#else
-      const int16x4_t v_val_0_hi = vget_high_s16(v_val_0);
-      const int16x4_t v_val_1_hi = vget_high_s16(v_val_1);
-      const int16x4_t v_val_2_hi = vget_high_s16(v_val_2);
-      const int16x4_t v_val_3_hi = vget_high_s16(v_val_3);
-      v_sum_01 = vmlal_s16(v_sum_01, v_val_0_hi, v_val_0_hi);
-      v_sum_01 = vmlal_s16(v_sum_01, v_val_1_hi, v_val_1_hi);
-      v_sum_23 = vmlal_s16(v_sum_23, v_val_2_hi, v_val_2_hi);
-      v_sum_23 = vmlal_s16(v_sum_23, v_val_3_hi, v_val_3_hi);
-      v_sum = vaddq_s32(v_sum, vcombine_s32(vqmovn_s64(vpaddlq_s32(v_sum_01)),
-                                            vqmovn_s64(vpaddlq_s32(v_sum_23))));
-#endif
-      c += 8;
-    } while (c < width);
-
-    v_acc_q = vpadalq_u32(v_acc_q, vreinterpretq_u32_s32(v_sum));
+      const int16_t *s = src + w;
+      int16x8_t s0 = vld1q_s16(s + 0 * stride);
+      int16x8_t s1 = vld1q_s16(s + 1 * stride);
+      int16x8_t s2 = vld1q_s16(s + 2 * stride);
+      int16x8_t s3 = vld1q_s16(s + 3 * stride);
+
+      ss_row[0] = vmlal_s16(ss_row[0], vget_low_s16(s0), vget_low_s16(s0));
+      ss_row[0] = vmlal_s16(ss_row[0], vget_low_s16(s1), vget_low_s16(s1));
+      ss_row[0] = vmlal_s16(ss_row[0], vget_low_s16(s2), vget_low_s16(s2));
+      ss_row[0] = vmlal_s16(ss_row[0], vget_low_s16(s3), vget_low_s16(s3));
+      ss_row[1] = vmlal_s16(ss_row[1], vget_high_s16(s0), vget_high_s16(s0));
+      ss_row[1] = vmlal_s16(ss_row[1], vget_high_s16(s1), vget_high_s16(s1));
+      ss_row[1] = vmlal_s16(ss_row[1], vget_high_s16(s2), vget_high_s16(s2));
+      ss_row[1] = vmlal_s16(ss_row[1], vget_high_s16(s3), vget_high_s16(s3));
+      w += 8;
+    } while (w < width);
+
+    sum_squares = vpadalq_u32(
+        sum_squares, vreinterpretq_u32_s32(vaddq_s32(ss_row[0], ss_row[1])));
 
     src += 4 * stride;
-    r += 4;
-  } while (r < height);
-#if defined(__aarch64__)
-  return vaddvq_u64(v_acc_q);
-#else
-  v_acc_q = vaddq_u64(v_acc_q, vextq_u64(v_acc_q, v_acc_q, 1));
-  return vgetq_lane_u64(v_acc_q, 0);
-#endif
+    h += 4;
+  } while (h < height);
+
+  return horizontal_add_u64x2(sum_squares);
 }
 
 uint64_t aom_sum_squares_2d_i16_neon(const int16_t *src, int stride, int width,
@@ -136,3 +108,118 @@ uint64_t aom_sum_squares_2d_i16_neon(const int16_t *src, int stride, int width,
     return aom_sum_squares_2d_i16_c(src, stride, width, height);
   }
 }
+
+static INLINE uint64_t aom_sum_sse_2d_i16_4x4_neon(const int16_t *src,
+                                                   int stride, int *sum) {
+  int16x4_t s0 = vld1_s16(src + 0 * stride);
+  int16x4_t s1 = vld1_s16(src + 1 * stride);
+  int16x4_t s2 = vld1_s16(src + 2 * stride);
+  int16x4_t s3 = vld1_s16(src + 3 * stride);
+
+  int32x4_t sse = vmull_s16(s0, s0);
+  sse = vmlal_s16(sse, s1, s1);
+  sse = vmlal_s16(sse, s2, s2);
+  sse = vmlal_s16(sse, s3, s3);
+
+  int32x4_t sum_01 = vaddl_s16(s0, s1);
+  int32x4_t sum_23 = vaddl_s16(s2, s3);
+  *sum += horizontal_add_s32x4(vaddq_s32(sum_01, sum_23));
+
+  return horizontal_long_add_u32x4(vreinterpretq_u32_s32(sse));
+}
+
+static INLINE uint64_t aom_sum_sse_2d_i16_4xn_neon(const int16_t *src,
+                                                   int stride, int height,
+                                                   int *sum) {
+  int32x4_t sse[2] = { vdupq_n_s32(0), vdupq_n_s32(0) };
+  int32x2_t sum_acc[2] = { vdup_n_s32(0), vdup_n_s32(0) };
+
+  int h = 0;
+  do {
+    int16x4_t s0 = vld1_s16(src + 0 * stride);
+    int16x4_t s1 = vld1_s16(src + 1 * stride);
+    int16x4_t s2 = vld1_s16(src + 2 * stride);
+    int16x4_t s3 = vld1_s16(src + 3 * stride);
+
+    sse[0] = vmlal_s16(sse[0], s0, s0);
+    sse[0] = vmlal_s16(sse[0], s1, s1);
+    sse[1] = vmlal_s16(sse[1], s2, s2);
+    sse[1] = vmlal_s16(sse[1], s3, s3);
+
+    sum_acc[0] = vpadal_s16(sum_acc[0], s0);
+    sum_acc[0] = vpadal_s16(sum_acc[0], s1);
+    sum_acc[1] = vpadal_s16(sum_acc[1], s2);
+    sum_acc[1] = vpadal_s16(sum_acc[1], s3);
+
+    src += 4 * stride;
+    h += 4;
+  } while (h < height);
+
+  *sum += horizontal_add_s32x4(vcombine_s32(sum_acc[0], sum_acc[1]));
+  return horizontal_long_add_u32x4(
+      vreinterpretq_u32_s32(vaddq_s32(sse[0], sse[1])));
+}
+
+static INLINE uint64_t aom_sum_sse_2d_i16_nxn_neon(const int16_t *src,
+                                                   int stride, int width,
+                                                   int height, int *sum) {
+  uint64x2_t sse = vdupq_n_u64(0);
+  int32x4_t sum_acc = vdupq_n_s32(0);
+
+  int h = 0;
+  do {
+    int32x4_t sse_row[2] = { vdupq_n_s32(0), vdupq_n_s32(0) };
+    int w = 0;
+    do {
+      const int16_t *s = src + w;
+      int16x8_t s0 = vld1q_s16(s + 0 * stride);
+      int16x8_t s1 = vld1q_s16(s + 1 * stride);
+      int16x8_t s2 = vld1q_s16(s + 2 * stride);
+      int16x8_t s3 = vld1q_s16(s + 3 * stride);
+
+      sse_row[0] = vmlal_s16(sse_row[0], vget_low_s16(s0), vget_low_s16(s0));
+      sse_row[0] = vmlal_s16(sse_row[0], vget_low_s16(s1), vget_low_s16(s1));
+      sse_row[0] = vmlal_s16(sse_row[0], vget_low_s16(s2), vget_low_s16(s2));
+      sse_row[0] = vmlal_s16(sse_row[0], vget_low_s16(s3), vget_low_s16(s3));
+      sse_row[1] = vmlal_s16(sse_row[1], vget_high_s16(s0), vget_high_s16(s0));
+      sse_row[1] = vmlal_s16(sse_row[1], vget_high_s16(s1), vget_high_s16(s1));
+      sse_row[1] = vmlal_s16(sse_row[1], vget_high_s16(s2), vget_high_s16(s2));
+      sse_row[1] = vmlal_s16(sse_row[1], vget_high_s16(s3), vget_high_s16(s3));
+
+      sum_acc = vpadalq_s16(sum_acc, s0);
+      sum_acc = vpadalq_s16(sum_acc, s1);
+      sum_acc = vpadalq_s16(sum_acc, s2);
+      sum_acc = vpadalq_s16(sum_acc, s3);
+
+      w += 8;
+    } while (w < width);
+
+    sse = vpadalq_u32(sse,
+                      vreinterpretq_u32_s32(vaddq_s32(sse_row[0], sse_row[1])));
+
+    src += 4 * stride;
+    h += 4;
+  } while (h < height);
+
+  *sum += horizontal_add_s32x4(sum_acc);
+  return horizontal_add_u64x2(sse);
+}
+
+uint64_t aom_sum_sse_2d_i16_neon(const int16_t *src, int stride, int width,
+                                 int height, int *sum) {
+  uint64_t sse;
+
+  if (LIKELY(width == 4 && height == 4)) {
+    sse = aom_sum_sse_2d_i16_4x4_neon(src, stride, sum);
+  } else if (LIKELY(width == 4 && (height & 3) == 0)) {
+    // width = 4, height is a multiple of 4.
+    sse = aom_sum_sse_2d_i16_4xn_neon(src, stride, height, sum);
+  } else if (LIKELY((width & 7) == 0 && (height & 3) == 0)) {
+    // Generic case - width is multiple of 8, height is multiple of 4.
+    sse = aom_sum_sse_2d_i16_nxn_neon(src, stride, width, height, sum);
+  } else {
+    sse = aom_sum_sse_2d_i16_c(src, stride, width, height, sum);
+  }
+
+  return sse;
+}
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/arm/variance_neon.c b/chromium/third_party/libaom/source/libaom/aom_dsp/arm/variance_neon.c
index e840f1307ec..f078705afe5 100644
--- a/chromium/third_party/libaom/source/libaom/aom_dsp/arm/variance_neon.c
+++ b/chromium/third_party/libaom/source/libaom/aom_dsp/arm/variance_neon.c
@@ -13,655 +13,604 @@
 
 #include "config/aom_dsp_rtcd.h"
 #include "config/aom_config.h"
+#include "aom_dsp/arm/mem_neon.h"
 #include "aom_dsp/arm/sum_neon.h"
 #include "aom/aom_integer.h"
 #include "aom_ports/mem.h"
 
-// w * h must be less than 2048 or local variable v_sum may overflow.
-static void variance_neon_w8(const uint8_t *a, int a_stride, const uint8_t *b,
-                             int b_stride, int w, int h, uint32_t *sse,
-                             int *sum) {
-  int i, j;
-  int16x8_t v_sum = vdupq_n_s16(0);
-  int32x4_t v_sse_lo = vdupq_n_s32(0);
-  int32x4_t v_sse_hi = vdupq_n_s32(0);
-
-  for (i = 0; i < h; ++i) {
-    for (j = 0; j < w; j += 8) {
-      const uint8x8_t v_a = vld1_u8(&a[j]);
-      const uint8x8_t v_b = vld1_u8(&b[j]);
-      const uint16x8_t v_diff = vsubl_u8(v_a, v_b);
-      const int16x8_t sv_diff = vreinterpretq_s16_u16(v_diff);
-      v_sum = vaddq_s16(v_sum, sv_diff);
-      v_sse_lo =
-          vmlal_s16(v_sse_lo, vget_low_s16(sv_diff), vget_low_s16(sv_diff));
-      v_sse_hi =
-          vmlal_s16(v_sse_hi, vget_high_s16(sv_diff), vget_high_s16(sv_diff));
-    }
-    a += a_stride;
-    b += b_stride;
-  }
+#if defined(__ARM_FEATURE_DOTPROD)
 
-  *sum = horizontal_add_s16x8(v_sum);
-  *sse = (unsigned int)horizontal_add_s32x4(vaddq_s32(v_sse_lo, v_sse_hi));
-}
+static INLINE void variance_4xh_neon(const uint8_t *src, int src_stride,
+                                     const uint8_t *ref, int ref_stride, int h,
+                                     uint32_t *sse, int *sum) {
+  uint32x4_t src_sum = vdupq_n_u32(0);
+  uint32x4_t ref_sum = vdupq_n_u32(0);
+  uint32x4_t sse_u32 = vdupq_n_u32(0);
+
+  int i = 0;
+  do {
+    uint8x16_t s = load_unaligned_u8q(src, src_stride);
+    uint8x16_t r = load_unaligned_u8q(ref, ref_stride);
+
+    src_sum = vdotq_u32(src_sum, s, vdupq_n_u8(1));
+    ref_sum = vdotq_u32(ref_sum, r, vdupq_n_u8(1));
+
+    uint8x16_t abs_diff = vabdq_u8(s, r);
+    sse_u32 = vdotq_u32(sse_u32, abs_diff, abs_diff);
+
+    src += 4 * src_stride;
+    ref += 4 * ref_stride;
+    i += 4;
+  } while (i < h);
 
-void aom_get8x8var_neon(const uint8_t *a, int a_stride, const uint8_t *b,
-                        int b_stride, unsigned int *sse, int *sum) {
-  variance_neon_w8(a, a_stride, b, b_stride, 8, 8, sse, sum);
+  int32x4_t sum_diff =
+      vsubq_s32(vreinterpretq_s32_u32(src_sum), vreinterpretq_s32_u32(ref_sum));
+  *sum = horizontal_add_s32x4(sum_diff);
+  *sse = horizontal_add_u32x4(sse_u32);
 }
 
-void aom_get16x16var_neon(const uint8_t *a, int a_stride, const uint8_t *b,
-                          int b_stride, unsigned int *sse, int *sum) {
-  variance_neon_w8(a, a_stride, b, b_stride, 16, 16, sse, sum);
+static INLINE void variance_8xh_neon(const uint8_t *src, int src_stride,
+                                     const uint8_t *ref, int ref_stride, int h,
+                                     uint32_t *sse, int *sum) {
+  uint32x4_t src_sum = vdupq_n_u32(0);
+  uint32x4_t ref_sum = vdupq_n_u32(0);
+  uint32x4_t sse_u32 = vdupq_n_u32(0);
+
+  int i = 0;
+  do {
+    uint8x16_t s = vcombine_u8(vld1_u8(src), vld1_u8(src + src_stride));
+    uint8x16_t r = vcombine_u8(vld1_u8(ref), vld1_u8(ref + ref_stride));
+
+    src_sum = vdotq_u32(src_sum, s, vdupq_n_u8(1));
+    ref_sum = vdotq_u32(ref_sum, r, vdupq_n_u8(1));
+
+    uint8x16_t abs_diff = vabdq_u8(s, r);
+    sse_u32 = vdotq_u32(sse_u32, abs_diff, abs_diff);
+
+    src += 2 * src_stride;
+    ref += 2 * ref_stride;
+    i += 2;
+  } while (i < h);
+
+  int32x4_t sum_diff =
+      vsubq_s32(vreinterpretq_s32_u32(src_sum), vreinterpretq_s32_u32(ref_sum));
+  *sum = horizontal_add_s32x4(sum_diff);
+  *sse = horizontal_add_u32x4(sse_u32);
 }
 
-// TODO(yunqingwang): Perform variance of two/four 8x8 blocks similar to that of
-// AVX2.
-void aom_get_sse_sum_8x8_quad_neon(const uint8_t *a, int a_stride,
-                                   const uint8_t *b, int b_stride,
-                                   unsigned int *sse, int *sum) {
-  // Loop over 4 8x8 blocks. Process one 8x32 block.
-  for (int k = 0; k < 4; k++) {
-    variance_neon_w8(a + (k * 8), a_stride, b + (k * 8), b_stride, 8, 8,
-                     &sse[k], &sum[k]);
-  }
+static INLINE void variance_16xh_neon(const uint8_t *src, int src_stride,
+                                      const uint8_t *ref, int ref_stride, int h,
+                                      uint32_t *sse, int *sum) {
+  uint32x4_t src_sum = vdupq_n_u32(0);
+  uint32x4_t ref_sum = vdupq_n_u32(0);
+  uint32x4_t sse_u32 = vdupq_n_u32(0);
+
+  int i = 0;
+  do {
+    uint8x16_t s = vld1q_u8(src);
+    uint8x16_t r = vld1q_u8(ref);
+
+    src_sum = vdotq_u32(src_sum, s, vdupq_n_u8(1));
+    ref_sum = vdotq_u32(ref_sum, r, vdupq_n_u8(1));
+
+    uint8x16_t abs_diff = vabdq_u8(s, r);
+    sse_u32 = vdotq_u32(sse_u32, abs_diff, abs_diff);
+
+    src += src_stride;
+    ref += ref_stride;
+    i++;
+  } while (i < h);
+
+  int32x4_t sum_diff =
+      vsubq_s32(vreinterpretq_s32_u32(src_sum), vreinterpretq_s32_u32(ref_sum));
+  *sum = horizontal_add_s32x4(sum_diff);
+  *sse = horizontal_add_u32x4(sse_u32);
 }
 
-unsigned int aom_variance8x8_neon(const uint8_t *a, int a_stride,
-                                  const uint8_t *b, int b_stride,
-                                  unsigned int *sse) {
-  int sum;
-  variance_neon_w8(a, a_stride, b, b_stride, 8, 8, sse, &sum);
-  return *sse - ((sum * sum) >> 6);
+static INLINE void variance_large_neon(const uint8_t *src, int src_stride,
+                                       const uint8_t *ref, int ref_stride,
+                                       int w, int h, uint32_t *sse, int *sum) {
+  uint32x4_t src_sum = vdupq_n_u32(0);
+  uint32x4_t ref_sum = vdupq_n_u32(0);
+  uint32x4_t sse_u32 = vdupq_n_u32(0);
+
+  int i = 0;
+  do {
+    int j = 0;
+    do {
+      uint8x16_t s = vld1q_u8(src + j);
+      uint8x16_t r = vld1q_u8(ref + j);
+
+      src_sum = vdotq_u32(src_sum, s, vdupq_n_u8(1));
+      ref_sum = vdotq_u32(ref_sum, r, vdupq_n_u8(1));
+
+      uint8x16_t abs_diff = vabdq_u8(s, r);
+      sse_u32 = vdotq_u32(sse_u32, abs_diff, abs_diff);
+
+      j += 16;
+    } while (j < w);
+
+    src += src_stride;
+    ref += ref_stride;
+    i++;
+  } while (i < h);
+
+  int32x4_t sum_diff =
+      vsubq_s32(vreinterpretq_s32_u32(src_sum), vreinterpretq_s32_u32(ref_sum));
+  *sum = horizontal_add_s32x4(sum_diff);
+  *sse = horizontal_add_u32x4(sse_u32);
 }
 
-unsigned int aom_variance16x16_neon(const uint8_t *a, int a_stride,
-                                    const uint8_t *b, int b_stride,
-                                    unsigned int *sse) {
-  int sum;
-  variance_neon_w8(a, a_stride, b, b_stride, 16, 16, sse, &sum);
-  return *sse - (((unsigned int)((int64_t)sum * sum)) >> 8);
+static INLINE void variance_32xh_neon(const uint8_t *src, int src_stride,
+                                      const uint8_t *ref, int ref_stride, int h,
+                                      uint32_t *sse, int *sum) {
+  variance_large_neon(src, src_stride, ref, ref_stride, 32, h, sse, sum);
 }
 
-unsigned int aom_variance32x32_neon(const uint8_t *a, int a_stride,
-                                    const uint8_t *b, int b_stride,
-                                    unsigned int *sse) {
-  int sum;
-  variance_neon_w8(a, a_stride, b, b_stride, 32, 32, sse, &sum);
-  return *sse - (unsigned int)(((int64_t)sum * sum) >> 10);
+static INLINE void variance_64xh_neon(const uint8_t *src, int src_stride,
+                                      const uint8_t *ref, int ref_stride, int h,
+                                      uint32_t *sse, int *sum) {
+  variance_large_neon(src, src_stride, ref, ref_stride, 64, h, sse, sum);
 }
 
-unsigned int aom_variance32x64_neon(const uint8_t *a, int a_stride,
-                                    const uint8_t *b, int b_stride,
-                                    unsigned int *sse) {
-  int sum1, sum2;
-  uint32_t sse1, sse2;
-  variance_neon_w8(a, a_stride, b, b_stride, 32, 32, &sse1, &sum1);
-  variance_neon_w8(a + (32 * a_stride), a_stride, b + (32 * b_stride), b_stride,
-                   32, 32, &sse2, &sum2);
-  *sse = sse1 + sse2;
-  sum1 += sum2;
-  return *sse - (unsigned int)(((int64_t)sum1 * sum1) >> 11);
+static INLINE void variance_128xh_neon(const uint8_t *src, int src_stride,
+                                       const uint8_t *ref, int ref_stride,
+                                       int h, uint32_t *sse, int *sum) {
+  variance_large_neon(src, src_stride, ref, ref_stride, 128, h, sse, sum);
 }
 
-unsigned int aom_variance64x32_neon(const uint8_t *a, int a_stride,
-                                    const uint8_t *b, int b_stride,
-                                    unsigned int *sse) {
-  int sum1, sum2;
-  uint32_t sse1, sse2;
-  variance_neon_w8(a, a_stride, b, b_stride, 64, 16, &sse1, &sum1);
-  variance_neon_w8(a + (16 * a_stride), a_stride, b + (16 * b_stride), b_stride,
-                   64, 16, &sse2, &sum2);
-  *sse = sse1 + sse2;
-  sum1 += sum2;
-  return *sse - (unsigned int)(((int64_t)sum1 * sum1) >> 11);
+#else  // !defined(__ARM_FEATURE_DOTPROD)
+
+static INLINE void variance_4xh_neon(const uint8_t *src, int src_stride,
+                                     const uint8_t *ref, int ref_stride, int h,
+                                     uint32_t *sse, int *sum) {
+  int16x8_t sum_s16 = vdupq_n_s16(0);
+  int32x4_t sse_s32 = vdupq_n_s32(0);
+
+  // Number of rows we can process before 'sum_s16' overflows:
+  // 32767 / 255 ~= 128, but we use an 8-wide accumulator; so 256 4-wide rows.
+  assert(h <= 256);
+
+  int i = 0;
+  do {
+    uint8x8_t s = load_unaligned_u8(src, src_stride);
+    uint8x8_t r = load_unaligned_u8(ref, ref_stride);
+    int16x8_t diff = vreinterpretq_s16_u16(vsubl_u8(s, r));
+
+    sum_s16 = vaddq_s16(sum_s16, diff);
+
+    sse_s32 = vmlal_s16(sse_s32, vget_low_s16(diff), vget_low_s16(diff));
+    sse_s32 = vmlal_s16(sse_s32, vget_high_s16(diff), vget_high_s16(diff));
+
+    src += 2 * src_stride;
+    ref += 2 * ref_stride;
+    i += 2;
+  } while (i < h);
+
+  *sum = horizontal_add_s16x8(sum_s16);
+  *sse = (uint32_t)horizontal_add_s32x4(sse_s32);
 }
 
-unsigned int aom_variance64x64_neon(const uint8_t *a, int a_stride,
-                                    const uint8_t *b, int b_stride,
-                                    unsigned int *sse) {
-  int sum1, sum2;
-  uint32_t sse1, sse2;
-
-  variance_neon_w8(a, a_stride, b, b_stride, 64, 16, &sse1, &sum1);
-  variance_neon_w8(a + (16 * a_stride), a_stride, b + (16 * b_stride), b_stride,
-                   64, 16, &sse2, &sum2);
-  sse1 += sse2;
-  sum1 += sum2;
-
-  variance_neon_w8(a + (16 * 2 * a_stride), a_stride, b + (16 * 2 * b_stride),
-                   b_stride, 64, 16, &sse2, &sum2);
-  sse1 += sse2;
-  sum1 += sum2;
-
-  variance_neon_w8(a + (16 * 3 * a_stride), a_stride, b + (16 * 3 * b_stride),
-                   b_stride, 64, 16, &sse2, &sum2);
-  *sse = sse1 + sse2;
-  sum1 += sum2;
-  return *sse - (unsigned int)(((int64_t)sum1 * sum1) >> 12);
+static INLINE void variance_8xh_neon(const uint8_t *src, int src_stride,
+                                     const uint8_t *ref, int ref_stride, int h,
+                                     uint32_t *sse, int *sum) {
+  int16x8_t sum_s16 = vdupq_n_s16(0);
+  int32x4_t sse_s32[2] = { vdupq_n_s32(0), vdupq_n_s32(0) };
+
+  // Number of rows we can process before 'sum_s16' overflows:
+  // 32767 / 255 ~= 128
+  assert(h <= 128);
+
+  int i = 0;
+  do {
+    uint8x8_t s = vld1_u8(src);
+    uint8x8_t r = vld1_u8(ref);
+    int16x8_t diff = vreinterpretq_s16_u16(vsubl_u8(s, r));
+
+    sum_s16 = vaddq_s16(sum_s16, diff);
+
+    sse_s32[0] = vmlal_s16(sse_s32[0], vget_low_s16(diff), vget_low_s16(diff));
+    sse_s32[1] =
+        vmlal_s16(sse_s32[1], vget_high_s16(diff), vget_high_s16(diff));
+
+    src += src_stride;
+    ref += ref_stride;
+    i++;
+  } while (i < h);
+
+  *sum = horizontal_add_s16x8(sum_s16);
+  *sse = (uint32_t)horizontal_add_s32x4(vaddq_s32(sse_s32[0], sse_s32[1]));
 }
 
-unsigned int aom_variance128x128_neon(const uint8_t *a, int a_stride,
-                                      const uint8_t *b, int b_stride,
-                                      unsigned int *sse) {
-  int sum1, sum2;
-  uint32_t sse1, sse2;
-  sum1 = sse1 = 0;
-  for (int i = 0; i < 16; i++) {
-    variance_neon_w8(a + (8 * i * a_stride), a_stride, b + (8 * i * b_stride),
-                     b_stride, 128, 8, &sse2, &sum2);
-    sse1 += sse2;
-    sum1 += sum2;
-  }
+static INLINE void variance_16xh_neon(const uint8_t *src, int src_stride,
+                                      const uint8_t *ref, int ref_stride, int h,
+                                      uint32_t *sse, int *sum) {
+  int16x8_t sum_s16[2] = { vdupq_n_s16(0), vdupq_n_s16(0) };
+  int32x4_t sse_s32[2] = { vdupq_n_s32(0), vdupq_n_s32(0) };
 
-  *sse = sse1;
+  // Number of rows we can process before 'sum_s16' accumulators overflow:
+  // 32767 / 255 ~= 128, so 128 16-wide rows.
+  assert(h <= 128);
 
-  return *sse - (unsigned int)(((int64_t)sum1 * sum1) >> 14);
+  int i = 0;
+  do {
+    uint8x16_t s = vld1q_u8(src);
+    uint8x16_t r = vld1q_u8(ref);
+
+    int16x8_t diff_l =
+        vreinterpretq_s16_u16(vsubl_u8(vget_low_u8(s), vget_low_u8(r)));
+    int16x8_t diff_h =
+        vreinterpretq_s16_u16(vsubl_u8(vget_high_u8(s), vget_high_u8(r)));
+
+    sum_s16[0] = vaddq_s16(sum_s16[0], diff_l);
+    sum_s16[1] = vaddq_s16(sum_s16[1], diff_h);
+
+    sse_s32[0] =
+        vmlal_s16(sse_s32[0], vget_low_s16(diff_l), vget_low_s16(diff_l));
+    sse_s32[1] =
+        vmlal_s16(sse_s32[1], vget_high_s16(diff_l), vget_high_s16(diff_l));
+    sse_s32[0] =
+        vmlal_s16(sse_s32[0], vget_low_s16(diff_h), vget_low_s16(diff_h));
+    sse_s32[1] =
+        vmlal_s16(sse_s32[1], vget_high_s16(diff_h), vget_high_s16(diff_h));
+
+    src += src_stride;
+    ref += ref_stride;
+    i++;
+  } while (i < h);
+
+  *sum = horizontal_add_s16x8(vaddq_s16(sum_s16[0], sum_s16[1]));
+  *sse = (uint32_t)horizontal_add_s32x4(vaddq_s32(sse_s32[0], sse_s32[1]));
 }
 
-unsigned int aom_variance16x8_neon(const unsigned char *src_ptr,
-                                   int source_stride,
-                                   const unsigned char *ref_ptr,
-                                   int recon_stride, unsigned int *sse) {
-  int i;
-  int16x4_t d22s16, d23s16, d24s16, d25s16, d26s16, d27s16, d28s16, d29s16;
-  uint32x2_t d0u32, d10u32;
-  int64x1_t d0s64, d1s64;
-  uint8x16_t q0u8, q1u8, q2u8, q3u8;
-  uint16x8_t q11u16, q12u16, q13u16, q14u16;
-  int32x4_t q8s32, q9s32, q10s32;
-  int64x2_t q0s64, q1s64, q5s64;
-
-  q8s32 = vdupq_n_s32(0);
-  q9s32 = vdupq_n_s32(0);
-  q10s32 = vdupq_n_s32(0);
-
-  for (i = 0; i < 4; i++) {
-    q0u8 = vld1q_u8(src_ptr);
-    src_ptr += source_stride;
-    q1u8 = vld1q_u8(src_ptr);
-    src_ptr += source_stride;
-    __builtin_prefetch(src_ptr);
-
-    q2u8 = vld1q_u8(ref_ptr);
-    ref_ptr += recon_stride;
-    q3u8 = vld1q_u8(ref_ptr);
-    ref_ptr += recon_stride;
-    __builtin_prefetch(ref_ptr);
-
-    q11u16 = vsubl_u8(vget_low_u8(q0u8), vget_low_u8(q2u8));
-    q12u16 = vsubl_u8(vget_high_u8(q0u8), vget_high_u8(q2u8));
-    q13u16 = vsubl_u8(vget_low_u8(q1u8), vget_low_u8(q3u8));
-    q14u16 = vsubl_u8(vget_high_u8(q1u8), vget_high_u8(q3u8));
-
-    d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16));
-    d23s16 = vreinterpret_s16_u16(vget_high_u16(q11u16));
-    q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q11u16));
-    q9s32 = vmlal_s16(q9s32, d22s16, d22s16);
-    q10s32 = vmlal_s16(q10s32, d23s16, d23s16);
-
-    d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16));
-    d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16));
-    q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q12u16));
-    q9s32 = vmlal_s16(q9s32, d24s16, d24s16);
-    q10s32 = vmlal_s16(q10s32, d25s16, d25s16);
-
-    d26s16 = vreinterpret_s16_u16(vget_low_u16(q13u16));
-    d27s16 = vreinterpret_s16_u16(vget_high_u16(q13u16));
-    q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q13u16));
-    q9s32 = vmlal_s16(q9s32, d26s16, d26s16);
-    q10s32 = vmlal_s16(q10s32, d27s16, d27s16);
-
-    d28s16 = vreinterpret_s16_u16(vget_low_u16(q14u16));
-    d29s16 = vreinterpret_s16_u16(vget_high_u16(q14u16));
-    q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q14u16));
-    q9s32 = vmlal_s16(q9s32, d28s16, d28s16);
-    q10s32 = vmlal_s16(q10s32, d29s16, d29s16);
-  }
+static INLINE void variance_large_neon(const uint8_t *src, int src_stride,
+                                       const uint8_t *ref, int ref_stride,
+                                       int w, int h, int h_limit, uint32_t *sse,
+                                       int *sum) {
+  int32x4_t sum_s32 = vdupq_n_s32(0);
+  int32x4_t sse_s32[2] = { vdupq_n_s32(0), vdupq_n_s32(0) };
 
-  q10s32 = vaddq_s32(q10s32, q9s32);
-  q0s64 = vpaddlq_s32(q8s32);
-  q1s64 = vpaddlq_s32(q10s32);
+  // 'h_limit' is the number of 'w'-width rows we can process before our 16-bit
+  // accumulator overflows. After hitting this limit we accumulate into 32-bit
+  // elements.
+  int h_tmp = h > h_limit ? h_limit : h;
 
-  d0s64 = vadd_s64(vget_low_s64(q0s64), vget_high_s64(q0s64));
-  d1s64 = vadd_s64(vget_low_s64(q1s64), vget_high_s64(q1s64));
+  int i = 0;
+  do {
+    int16x8_t sum_s16[2] = { vdupq_n_s16(0), vdupq_n_s16(0) };
+    do {
+      int j = 0;
+      do {
+        uint8x16_t s = vld1q_u8(src + j);
+        uint8x16_t r = vld1q_u8(ref + j);
+
+        int16x8_t diff_l =
+            vreinterpretq_s16_u16(vsubl_u8(vget_low_u8(s), vget_low_u8(r)));
+        int16x8_t diff_h =
+            vreinterpretq_s16_u16(vsubl_u8(vget_high_u8(s), vget_high_u8(r)));
+
+        sum_s16[0] = vaddq_s16(sum_s16[0], diff_l);
+        sum_s16[1] = vaddq_s16(sum_s16[1], diff_h);
+
+        sse_s32[0] =
+            vmlal_s16(sse_s32[0], vget_low_s16(diff_l), vget_low_s16(diff_l));
+        sse_s32[1] =
+            vmlal_s16(sse_s32[1], vget_high_s16(diff_l), vget_high_s16(diff_l));
+        sse_s32[0] =
+            vmlal_s16(sse_s32[0], vget_low_s16(diff_h), vget_low_s16(diff_h));
+        sse_s32[1] =
+            vmlal_s16(sse_s32[1], vget_high_s16(diff_h), vget_high_s16(diff_h));
+
+        j += 16;
+      } while (j < w);
+
+      src += src_stride;
+      ref += ref_stride;
+      i++;
+    } while (i < h_tmp);
+
+    sum_s32 = vpadalq_s16(sum_s32, sum_s16[0]);
+    sum_s32 = vpadalq_s16(sum_s32, sum_s16[1]);
+
+    h_tmp += h_limit;
+  } while (i < h);
 
-  q5s64 = vmull_s32(vreinterpret_s32_s64(d0s64), vreinterpret_s32_s64(d0s64));
-  vst1_lane_u32((uint32_t *)sse, vreinterpret_u32_s64(d1s64), 0);
+  *sum = horizontal_add_s32x4(sum_s32);
+  *sse = (uint32_t)horizontal_add_s32x4(vaddq_s32(sse_s32[0], sse_s32[1]));
+}
 
-  d10u32 = vshr_n_u32(vreinterpret_u32_s64(vget_low_s64(q5s64)), 7);
-  d0u32 = vsub_u32(vreinterpret_u32_s64(d1s64), d10u32);
+static INLINE void variance_32xh_neon(const uint8_t *src, int src_stride,
+                                      const uint8_t *ref, int ref_stride, int h,
+                                      uint32_t *sse, int *sum) {
+  variance_large_neon(src, src_stride, ref, ref_stride, 32, h, 64, sse, sum);
+}
+
+static INLINE void variance_64xh_neon(const uint8_t *src, int src_stride,
+                                      const uint8_t *ref, int ref_stride, int h,
+                                      uint32_t *sse, int *sum) {
+  variance_large_neon(src, src_stride, ref, ref_stride, 64, h, 32, sse, sum);
+}
 
-  return vget_lane_u32(d0u32, 0);
+static INLINE void variance_128xh_neon(const uint8_t *src, int src_stride,
+                                       const uint8_t *ref, int ref_stride,
+                                       int h, uint32_t *sse, int *sum) {
+  variance_large_neon(src, src_stride, ref, ref_stride, 128, h, 16, sse, sum);
 }
 
-unsigned int aom_variance8x16_neon(const unsigned char *src_ptr,
-                                   int source_stride,
-                                   const unsigned char *ref_ptr,
-                                   int recon_stride, unsigned int *sse) {
-  int i;
-  uint8x8_t d0u8, d2u8, d4u8, d6u8;
-  int16x4_t d22s16, d23s16, d24s16, d25s16;
-  uint32x2_t d0u32, d10u32;
-  int64x1_t d0s64, d1s64;
-  uint16x8_t q11u16, q12u16;
-  int32x4_t q8s32, q9s32, q10s32;
-  int64x2_t q0s64, q1s64, q5s64;
-
-  q8s32 = vdupq_n_s32(0);
-  q9s32 = vdupq_n_s32(0);
-  q10s32 = vdupq_n_s32(0);
-
-  for (i = 0; i < 8; i++) {
-    d0u8 = vld1_u8(src_ptr);
-    src_ptr += source_stride;
-    d2u8 = vld1_u8(src_ptr);
-    src_ptr += source_stride;
-    __builtin_prefetch(src_ptr);
-
-    d4u8 = vld1_u8(ref_ptr);
-    ref_ptr += recon_stride;
-    d6u8 = vld1_u8(ref_ptr);
-    ref_ptr += recon_stride;
-    __builtin_prefetch(ref_ptr);
-
-    q11u16 = vsubl_u8(d0u8, d4u8);
-    q12u16 = vsubl_u8(d2u8, d6u8);
-
-    d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16));
-    d23s16 = vreinterpret_s16_u16(vget_high_u16(q11u16));
-    q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q11u16));
-    q9s32 = vmlal_s16(q9s32, d22s16, d22s16);
-    q10s32 = vmlal_s16(q10s32, d23s16, d23s16);
-
-    d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16));
-    d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16));
-    q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q12u16));
-    q9s32 = vmlal_s16(q9s32, d24s16, d24s16);
-    q10s32 = vmlal_s16(q10s32, d25s16, d25s16);
+#endif  // defined(__ARM_FEATURE_DOTPROD)
+
+#define VARIANCE_WXH_NEON(w, h, shift)                                        \
+  unsigned int aom_variance##w##x##h##_neon(                                  \
+      const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, \
+      unsigned int *sse) {                                                    \
+    int sum;                                                                  \
+    variance_##w##xh_neon(src, src_stride, ref, ref_stride, h, sse, &sum);    \
+    return *sse - (uint32_t)(((int64_t)sum * sum) >> shift);                  \
   }
 
-  q10s32 = vaddq_s32(q10s32, q9s32);
-  q0s64 = vpaddlq_s32(q8s32);
-  q1s64 = vpaddlq_s32(q10s32);
+VARIANCE_WXH_NEON(4, 4, 4)
+VARIANCE_WXH_NEON(4, 8, 5)
+VARIANCE_WXH_NEON(4, 16, 6)
 
-  d0s64 = vadd_s64(vget_low_s64(q0s64), vget_high_s64(q0s64));
-  d1s64 = vadd_s64(vget_low_s64(q1s64), vget_high_s64(q1s64));
+VARIANCE_WXH_NEON(8, 4, 5)
+VARIANCE_WXH_NEON(8, 8, 6)
+VARIANCE_WXH_NEON(8, 16, 7)
+VARIANCE_WXH_NEON(8, 32, 8)
 
-  q5s64 = vmull_s32(vreinterpret_s32_s64(d0s64), vreinterpret_s32_s64(d0s64));
-  vst1_lane_u32((uint32_t *)sse, vreinterpret_u32_s64(d1s64), 0);
+VARIANCE_WXH_NEON(16, 4, 6)
+VARIANCE_WXH_NEON(16, 8, 7)
+VARIANCE_WXH_NEON(16, 16, 8)
+VARIANCE_WXH_NEON(16, 32, 9)
+VARIANCE_WXH_NEON(16, 64, 10)
 
-  d10u32 = vshr_n_u32(vreinterpret_u32_s64(vget_low_s64(q5s64)), 7);
-  d0u32 = vsub_u32(vreinterpret_u32_s64(d1s64), d10u32);
+VARIANCE_WXH_NEON(32, 8, 8)
+VARIANCE_WXH_NEON(32, 16, 9)
+VARIANCE_WXH_NEON(32, 32, 10)
+VARIANCE_WXH_NEON(32, 64, 11)
 
-  return vget_lane_u32(d0u32, 0);
-}
+VARIANCE_WXH_NEON(64, 16, 10)
+VARIANCE_WXH_NEON(64, 32, 11)
+VARIANCE_WXH_NEON(64, 64, 12)
+VARIANCE_WXH_NEON(64, 128, 13)
 
-unsigned int aom_mse16x16_neon(const unsigned char *src_ptr, int source_stride,
-                               const unsigned char *ref_ptr, int recon_stride,
-                               unsigned int *sse) {
-  int i;
-  int16x4_t d22s16, d23s16, d24s16, d25s16, d26s16, d27s16, d28s16, d29s16;
-  int64x1_t d0s64;
-  uint8x16_t q0u8, q1u8, q2u8, q3u8;
-  int32x4_t q7s32, q8s32, q9s32, q10s32;
-  uint16x8_t q11u16, q12u16, q13u16, q14u16;
-  int64x2_t q1s64;
-
-  q7s32 = vdupq_n_s32(0);
-  q8s32 = vdupq_n_s32(0);
-  q9s32 = vdupq_n_s32(0);
-  q10s32 = vdupq_n_s32(0);
-
-  for (i = 0; i < 8; i++) {  // mse16x16_neon_loop
-    q0u8 = vld1q_u8(src_ptr);
-    src_ptr += source_stride;
-    q1u8 = vld1q_u8(src_ptr);
-    src_ptr += source_stride;
-    q2u8 = vld1q_u8(ref_ptr);
-    ref_ptr += recon_stride;
-    q3u8 = vld1q_u8(ref_ptr);
-    ref_ptr += recon_stride;
-
-    q11u16 = vsubl_u8(vget_low_u8(q0u8), vget_low_u8(q2u8));
-    q12u16 = vsubl_u8(vget_high_u8(q0u8), vget_high_u8(q2u8));
-    q13u16 = vsubl_u8(vget_low_u8(q1u8), vget_low_u8(q3u8));
-    q14u16 = vsubl_u8(vget_high_u8(q1u8), vget_high_u8(q3u8));
-
-    d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16));
-    d23s16 = vreinterpret_s16_u16(vget_high_u16(q11u16));
-    q7s32 = vmlal_s16(q7s32, d22s16, d22s16);
-    q8s32 = vmlal_s16(q8s32, d23s16, d23s16);
-
-    d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16));
-    d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16));
-    q9s32 = vmlal_s16(q9s32, d24s16, d24s16);
-    q10s32 = vmlal_s16(q10s32, d25s16, d25s16);
-
-    d26s16 = vreinterpret_s16_u16(vget_low_u16(q13u16));
-    d27s16 = vreinterpret_s16_u16(vget_high_u16(q13u16));
-    q7s32 = vmlal_s16(q7s32, d26s16, d26s16);
-    q8s32 = vmlal_s16(q8s32, d27s16, d27s16);
-
-    d28s16 = vreinterpret_s16_u16(vget_low_u16(q14u16));
-    d29s16 = vreinterpret_s16_u16(vget_high_u16(q14u16));
-    q9s32 = vmlal_s16(q9s32, d28s16, d28s16);
-    q10s32 = vmlal_s16(q10s32, d29s16, d29s16);
-  }
+VARIANCE_WXH_NEON(128, 64, 13)
+VARIANCE_WXH_NEON(128, 128, 14)
 
-  q7s32 = vaddq_s32(q7s32, q8s32);
-  q9s32 = vaddq_s32(q9s32, q10s32);
-  q10s32 = vaddq_s32(q7s32, q9s32);
+#undef VARIANCE_WXH_NEON
 
-  q1s64 = vpaddlq_s32(q10s32);
-  d0s64 = vadd_s64(vget_low_s64(q1s64), vget_high_s64(q1s64));
+void aom_get8x8var_neon(const uint8_t *src, int src_stride, const uint8_t *ref,
+                        int ref_stride, unsigned int *sse, int *sum) {
+  variance_8xh_neon(src, src_stride, ref, ref_stride, 8, sse, sum);
+}
 
-  vst1_lane_u32((uint32_t *)sse, vreinterpret_u32_s64(d0s64), 0);
-  return vget_lane_u32(vreinterpret_u32_s64(d0s64), 0);
+void aom_get16x16var_neon(const uint8_t *src, int src_stride,
+                          const uint8_t *ref, int ref_stride, unsigned int *sse,
+                          int *sum) {
+  variance_16xh_neon(src, src_stride, ref, ref_stride, 16, sse, sum);
 }
 
-unsigned int aom_get4x4sse_cs_neon(const unsigned char *src_ptr,
-                                   int source_stride,
-                                   const unsigned char *ref_ptr,
-                                   int recon_stride) {
-  int16x4_t d22s16, d24s16, d26s16, d28s16;
-  int64x1_t d0s64;
-  uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8;
-  int32x4_t q7s32, q8s32, q9s32, q10s32;
-  uint16x8_t q11u16, q12u16, q13u16, q14u16;
-  int64x2_t q1s64;
-
-  d0u8 = vld1_u8(src_ptr);
-  src_ptr += source_stride;
-  d4u8 = vld1_u8(ref_ptr);
-  ref_ptr += recon_stride;
-  d1u8 = vld1_u8(src_ptr);
-  src_ptr += source_stride;
-  d5u8 = vld1_u8(ref_ptr);
-  ref_ptr += recon_stride;
-  d2u8 = vld1_u8(src_ptr);
-  src_ptr += source_stride;
-  d6u8 = vld1_u8(ref_ptr);
-  ref_ptr += recon_stride;
-  d3u8 = vld1_u8(src_ptr);
-  d7u8 = vld1_u8(ref_ptr);
-
-  q11u16 = vsubl_u8(d0u8, d4u8);
-  q12u16 = vsubl_u8(d1u8, d5u8);
-  q13u16 = vsubl_u8(d2u8, d6u8);
-  q14u16 = vsubl_u8(d3u8, d7u8);
-
-  d22s16 = vget_low_s16(vreinterpretq_s16_u16(q11u16));
-  d24s16 = vget_low_s16(vreinterpretq_s16_u16(q12u16));
-  d26s16 = vget_low_s16(vreinterpretq_s16_u16(q13u16));
-  d28s16 = vget_low_s16(vreinterpretq_s16_u16(q14u16));
-
-  q7s32 = vmull_s16(d22s16, d22s16);
-  q8s32 = vmull_s16(d24s16, d24s16);
-  q9s32 = vmull_s16(d26s16, d26s16);
-  q10s32 = vmull_s16(d28s16, d28s16);
-
-  q7s32 = vaddq_s32(q7s32, q8s32);
-  q9s32 = vaddq_s32(q9s32, q10s32);
-  q9s32 = vaddq_s32(q7s32, q9s32);
-
-  q1s64 = vpaddlq_s32(q9s32);
-  d0s64 = vadd_s64(vget_low_s64(q1s64), vget_high_s64(q1s64));
-
-  return vget_lane_u32(vreinterpret_u32_s64(d0s64), 0);
+// TODO(yunqingwang): Perform variance of two/four 8x8 blocks similar to that of
+// AVX2.
+void aom_get_sse_sum_8x8_quad_neon(const uint8_t *src, int src_stride,
+                                   const uint8_t *ref, int ref_stride,
+                                   unsigned int *sse, int *sum) {
+  // Loop over 4 8x8 blocks. Process one 8x32 block.
+  for (int k = 0; k < 4; k++) {
+    variance_8xh_neon(src + (k * 8), src_stride, ref + (k * 8), ref_stride, 8,
+                      &sse[k], &sum[k]);
+  }
 }
 
-// Load 4 sets of 4 bytes when alignment is not guaranteed.
-static INLINE uint8x16_t load_unaligned_u8q(const uint8_t *buf, int stride) {
-  uint32_t a;
-  uint32x4_t a_u32 = vdupq_n_u32(0);
-  if (stride == 4) return vld1q_u8(buf);
-  memcpy(&a, buf, 4);
-  buf += stride;
-  a_u32 = vld1q_lane_u32(&a, a_u32, 0);
-  memcpy(&a, buf, 4);
-  buf += stride;
-  a_u32 = vld1q_lane_u32(&a, a_u32, 1);
-  memcpy(&a, buf, 4);
-  buf += stride;
-  a_u32 = vld1q_lane_u32(&a, a_u32, 2);
-  memcpy(&a, buf, 4);
-  buf += stride;
-  a_u32 = vld1q_lane_u32(&a, a_u32, 3);
-  return vreinterpretq_u8_u32(a_u32);
+#if defined(__ARM_FEATURE_DOTPROD)
+
+static INLINE unsigned int mse8xh_neon(const uint8_t *src, int src_stride,
+                                       const uint8_t *ref, int ref_stride,
+                                       unsigned int *sse, int h) {
+  uint32x4_t sse_u32 = vdupq_n_u32(0);
+
+  int i = 0;
+  do {
+    uint8x16_t s = vcombine_u8(vld1_u8(src), vld1_u8(src + src_stride));
+    uint8x16_t r = vcombine_u8(vld1_u8(ref), vld1_u8(ref + ref_stride));
+
+    uint8x16_t abs_diff = vabdq_u8(s, r);
+
+    sse_u32 = vdotq_u32(sse_u32, abs_diff, abs_diff);
+
+    src += 2 * src_stride;
+    ref += 2 * ref_stride;
+    i += 2;
+  } while (i < h);
+
+  *sse = horizontal_add_u32x4(sse_u32);
+  return horizontal_add_u32x4(sse_u32);
 }
 
-// The variance helper functions use int16_t for sum. 8 values are accumulated
-// and then added (at which point they expand up to int32_t). To avoid overflow,
-// there can be no more than 32767 / 255 ~= 128 values accumulated in each
-// column. For a 32x32 buffer, this results in 32 / 8 = 4 values per row * 32
-// rows = 128. Asserts have been added to each function to warn against reaching
-// this limit.
-
-// Process a block of width 4 four rows at a time.
-static void variance_neon_w4x4(const uint8_t *a, int a_stride, const uint8_t *b,
-                               int b_stride, int h, uint32_t *sse, int *sum) {
-  const int32x4_t zero = vdupq_n_s32(0);
-  int16x8_t sum_s16 = vreinterpretq_s16_s32(zero);
-  int32x4_t sse_s32 = zero;
-
-  // Since width is only 4, sum_s16 only loads a half row per loop.
-  assert(h <= 256);
+static INLINE unsigned int mse16xh_neon(const uint8_t *src, int src_stride,
+                                        const uint8_t *ref, int ref_stride,
+                                        unsigned int *sse, int h) {
+  uint32x4_t sse_u32[2] = { vdupq_n_u32(0), vdupq_n_u32(0) };
 
-  int i;
-  for (i = 0; i < h; i += 4) {
-    const uint8x16_t a_u8 = load_unaligned_u8q(a, a_stride);
-    const uint8x16_t b_u8 = load_unaligned_u8q(b, b_stride);
-    const int16x8_t diff_lo_s16 =
-        vreinterpretq_s16_u16(vsubl_u8(vget_low_u8(a_u8), vget_low_u8(b_u8)));
-    const int16x8_t diff_hi_s16 =
-        vreinterpretq_s16_u16(vsubl_u8(vget_high_u8(a_u8), vget_high_u8(b_u8)));
-
-    sum_s16 = vaddq_s16(sum_s16, diff_lo_s16);
-    sum_s16 = vaddq_s16(sum_s16, diff_hi_s16);
-
-    sse_s32 = vmlal_s16(sse_s32, vget_low_s16(diff_lo_s16),
-                        vget_low_s16(diff_lo_s16));
-    sse_s32 = vmlal_s16(sse_s32, vget_high_s16(diff_lo_s16),
-                        vget_high_s16(diff_lo_s16));
-
-    sse_s32 = vmlal_s16(sse_s32, vget_low_s16(diff_hi_s16),
-                        vget_low_s16(diff_hi_s16));
-    sse_s32 = vmlal_s16(sse_s32, vget_high_s16(diff_hi_s16),
-                        vget_high_s16(diff_hi_s16));
-
-    a += 4 * a_stride;
-    b += 4 * b_stride;
-  }
+  int i = 0;
+  do {
+    uint8x16_t s0 = vld1q_u8(src);
+    uint8x16_t s1 = vld1q_u8(src + src_stride);
+    uint8x16_t r0 = vld1q_u8(ref);
+    uint8x16_t r1 = vld1q_u8(ref + ref_stride);
 
-#if defined(__aarch64__)
-  *sum = vaddvq_s32(vpaddlq_s16(sum_s16));
-  *sse = (uint32_t)vaddvq_s32(sse_s32);
-#else
-  *sum = horizontal_add_s16x8(sum_s16);
-  *sse = (uint32_t)horizontal_add_s32x4(sse_s32);
-#endif
+    uint8x16_t abs_diff0 = vabdq_u8(s0, r0);
+    uint8x16_t abs_diff1 = vabdq_u8(s1, r1);
+
+    sse_u32[0] = vdotq_u32(sse_u32[0], abs_diff0, abs_diff0);
+    sse_u32[1] = vdotq_u32(sse_u32[1], abs_diff1, abs_diff1);
+
+    src += 2 * src_stride;
+    ref += 2 * ref_stride;
+    i += 2;
+  } while (i < h);
+
+  *sse = horizontal_add_u32x4(vaddq_u32(sse_u32[0], sse_u32[1]));
+  return horizontal_add_u32x4(vaddq_u32(sse_u32[0], sse_u32[1]));
 }
 
-// Process a block of any size where the width is divisible by 16.
-static void variance_neon_w16(const uint8_t *a, int a_stride, const uint8_t *b,
-                              int b_stride, int w, int h, uint32_t *sse,
-                              int *sum) {
-  const int32x4_t zero = vdupq_n_s32(0);
-  int16x8_t sum_s16 = vreinterpretq_s16_s32(zero);
-  int32x4_t sse_s32 = zero;
-
-  // The loop loads 16 values at a time but doubles them up when accumulating
-  // into sum_s16.
-  assert(w / 8 * h <= 128);
-
-  int i, j;
-  for (i = 0; i < h; ++i) {
-    for (j = 0; j < w; j += 16) {
-      const uint8x16_t a_u8 = vld1q_u8(a + j);
-      const uint8x16_t b_u8 = vld1q_u8(b + j);
-
-      const int16x8_t diff_lo_s16 =
-          vreinterpretq_s16_u16(vsubl_u8(vget_low_u8(a_u8), vget_low_u8(b_u8)));
-      const int16x8_t diff_hi_s16 = vreinterpretq_s16_u16(
-          vsubl_u8(vget_high_u8(a_u8), vget_high_u8(b_u8)));
-
-      sum_s16 = vaddq_s16(sum_s16, diff_lo_s16);
-      sum_s16 = vaddq_s16(sum_s16, diff_hi_s16);
-
-      sse_s32 = vmlal_s16(sse_s32, vget_low_s16(diff_lo_s16),
-                          vget_low_s16(diff_lo_s16));
-      sse_s32 = vmlal_s16(sse_s32, vget_high_s16(diff_lo_s16),
-                          vget_high_s16(diff_lo_s16));
-
-      sse_s32 = vmlal_s16(sse_s32, vget_low_s16(diff_hi_s16),
-                          vget_low_s16(diff_hi_s16));
-      sse_s32 = vmlal_s16(sse_s32, vget_high_s16(diff_hi_s16),
-                          vget_high_s16(diff_hi_s16));
-    }
-    a += a_stride;
-    b += b_stride;
-  }
+unsigned int aom_get4x4sse_cs_neon(const uint8_t *src, int src_stride,
+                                   const uint8_t *ref, int ref_stride) {
+  uint8x16_t s = load_unaligned_u8q(src, src_stride);
+  uint8x16_t r = load_unaligned_u8q(ref, ref_stride);
 
-#if defined(__aarch64__)
-  *sum = vaddvq_s32(vpaddlq_s16(sum_s16));
-  *sse = (uint32_t)vaddvq_s32(sse_s32);
-#else
-  *sum = horizontal_add_s16x8(sum_s16);
-  *sse = (uint32_t)horizontal_add_s32x4(sse_s32);
-#endif
+  uint8x16_t abs_diff = vabdq_u8(s, r);
+
+  uint32x4_t sse = vdotq_u32(vdupq_n_u32(0), abs_diff, abs_diff);
+
+  return horizontal_add_u32x4(sse);
 }
 
-// Process a block of width 8 two rows at a time.
-static void variance_neon_w8x2(const uint8_t *a, int a_stride, const uint8_t *b,
-                               int b_stride, int h, uint32_t *sse, int *sum) {
-  const int32x4_t zero = vdupq_n_s32(0);
-  int16x8_t sum_s16 = vreinterpretq_s16_s32(zero);
-  int32x4_t sse_s32 = zero;
+#else  // !defined(__ARM_FEATURE_DOTPROD)
 
-  // Each column has it's own accumulator entry in sum_s16.
-  assert(h <= 128);
+static INLINE unsigned int mse8xh_neon(const uint8_t *src, int src_stride,
+                                       const uint8_t *ref, int ref_stride,
+                                       unsigned int *sse, int h) {
+  uint8x8_t s[2], r[2];
+  int16x4_t diff_lo[2], diff_hi[2];
+  uint16x8_t diff[2];
+  int32x4_t sse_s32[2] = { vdupq_n_s32(0), vdupq_n_s32(0) };
 
   int i = 0;
   do {
-    const uint8x8_t a_0_u8 = vld1_u8(a);
-    const uint8x8_t a_1_u8 = vld1_u8(a + a_stride);
-    const uint8x8_t b_0_u8 = vld1_u8(b);
-    const uint8x8_t b_1_u8 = vld1_u8(b + b_stride);
-    const int16x8_t diff_0_s16 =
-        vreinterpretq_s16_u16(vsubl_u8(a_0_u8, b_0_u8));
-    const int16x8_t diff_1_s16 =
-        vreinterpretq_s16_u16(vsubl_u8(a_1_u8, b_1_u8));
-    sum_s16 = vaddq_s16(sum_s16, diff_0_s16);
-    sum_s16 = vaddq_s16(sum_s16, diff_1_s16);
-    sse_s32 =
-        vmlal_s16(sse_s32, vget_low_s16(diff_0_s16), vget_low_s16(diff_0_s16));
-    sse_s32 =
-        vmlal_s16(sse_s32, vget_low_s16(diff_1_s16), vget_low_s16(diff_1_s16));
-    sse_s32 = vmlal_s16(sse_s32, vget_high_s16(diff_0_s16),
-                        vget_high_s16(diff_0_s16));
-    sse_s32 = vmlal_s16(sse_s32, vget_high_s16(diff_1_s16),
-                        vget_high_s16(diff_1_s16));
-    a += a_stride + a_stride;
-    b += b_stride + b_stride;
+    s[0] = vld1_u8(src);
+    src += src_stride;
+    s[1] = vld1_u8(src);
+    src += src_stride;
+    r[0] = vld1_u8(ref);
+    ref += ref_stride;
+    r[1] = vld1_u8(ref);
+    ref += ref_stride;
+
+    diff[0] = vsubl_u8(s[0], r[0]);
+    diff[1] = vsubl_u8(s[1], r[1]);
+
+    diff_lo[0] = vreinterpret_s16_u16(vget_low_u16(diff[0]));
+    diff_lo[1] = vreinterpret_s16_u16(vget_low_u16(diff[1]));
+    sse_s32[0] = vmlal_s16(sse_s32[0], diff_lo[0], diff_lo[0]);
+    sse_s32[1] = vmlal_s16(sse_s32[1], diff_lo[1], diff_lo[1]);
+
+    diff_hi[0] = vreinterpret_s16_u16(vget_high_u16(diff[0]));
+    diff_hi[1] = vreinterpret_s16_u16(vget_high_u16(diff[1]));
+    sse_s32[0] = vmlal_s16(sse_s32[0], diff_hi[0], diff_hi[0]);
+    sse_s32[1] = vmlal_s16(sse_s32[1], diff_hi[1], diff_hi[1]);
+
     i += 2;
   } while (i < h);
 
-#if defined(__aarch64__)
-  *sum = vaddvq_s32(vpaddlq_s16(sum_s16));
-  *sse = (uint32_t)vaddvq_s32(sse_s32);
-#else
-  *sum = horizontal_add_s16x8(sum_s16);
-  *sse = (uint32_t)horizontal_add_s32x4(sse_s32);
-#endif
+  sse_s32[0] = vaddq_s32(sse_s32[0], sse_s32[1]);
+
+  *sse = horizontal_add_u32x4(vreinterpretq_u32_s32(sse_s32[0]));
+  return horizontal_add_u32x4(vreinterpretq_u32_s32(sse_s32[0]));
 }
 
-#define VARIANCE_NXM(n, m, shift)                                           \
-  unsigned int aom_variance##n##x##m##_neon(const uint8_t *a, int a_stride, \
-                                            const uint8_t *b, int b_stride, \
-                                            unsigned int *sse) {            \
-    int sum;                                                                \
-    if (n == 4)                                                             \
-      variance_neon_w4x4(a, a_stride, b, b_stride, m, sse, &sum);           \
-    else if (n == 8)                                                        \
-      variance_neon_w8x2(a, a_stride, b, b_stride, m, sse, &sum);           \
-    else                                                                    \
-      variance_neon_w16(a, a_stride, b, b_stride, n, m, sse, &sum);         \
-    if (n * m < 16 * 16)                                                    \
-      return *sse - ((sum * sum) >> shift);                                 \
-    else                                                                    \
-      return *sse - (uint32_t)(((int64_t)sum * sum) >> shift);              \
-  }
+static INLINE unsigned int mse16xh_neon(const uint8_t *src, int src_stride,
+                                        const uint8_t *ref, int ref_stride,
+                                        unsigned int *sse, int h) {
+  uint8x16_t s[2], r[2];
+  int16x4_t diff_lo[4], diff_hi[4];
+  uint16x8_t diff[4];
+  int32x4_t sse_s32[4] = { vdupq_n_s32(0), vdupq_n_s32(0), vdupq_n_s32(0),
+                           vdupq_n_s32(0) };
 
-static void variance_neon_wide_block(const uint8_t *a, int a_stride,
-                                     const uint8_t *b, int b_stride, int w,
-                                     int h, uint32_t *sse, int *sum) {
-  const int32x4_t zero = vdupq_n_s32(0);
-  int32x4_t v_diff = zero;
-  int64x2_t v_sse = vreinterpretq_s64_s32(zero);
-
-  int s, i, j;
-  for (s = 0; s < 16; s++) {
-    int32x4_t sse_s32 = zero;
-    int16x8_t sum_s16 = vreinterpretq_s16_s32(zero);
-    for (i = (s * h) >> 4; i < (((s + 1) * h) >> 4); ++i) {
-      for (j = 0; j < w; j += 16) {
-        const uint8x16_t a_u8 = vld1q_u8(a + j);
-        const uint8x16_t b_u8 = vld1q_u8(b + j);
-
-        const int16x8_t diff_lo_s16 = vreinterpretq_s16_u16(
-            vsubl_u8(vget_low_u8(a_u8), vget_low_u8(b_u8)));
-        const int16x8_t diff_hi_s16 = vreinterpretq_s16_u16(
-            vsubl_u8(vget_high_u8(a_u8), vget_high_u8(b_u8)));
-
-        sum_s16 = vaddq_s16(sum_s16, diff_lo_s16);
-        sum_s16 = vaddq_s16(sum_s16, diff_hi_s16);
-
-        sse_s32 = vmlal_s16(sse_s32, vget_low_s16(diff_lo_s16),
-                            vget_low_s16(diff_lo_s16));
-        sse_s32 = vmlal_s16(sse_s32, vget_high_s16(diff_lo_s16),
-                            vget_high_s16(diff_lo_s16));
-        sse_s32 = vmlal_s16(sse_s32, vget_low_s16(diff_hi_s16),
-                            vget_low_s16(diff_hi_s16));
-        sse_s32 = vmlal_s16(sse_s32, vget_high_s16(diff_hi_s16),
-                            vget_high_s16(diff_hi_s16));
-      }
-
-      a += a_stride;
-      b += b_stride;
-    }
-
-    v_diff = vpadalq_s16(v_diff, sum_s16);
-    v_sse = vpadalq_s32(v_sse, sse_s32);
-  }
-#if defined(__aarch64__)
-  int diff = vaddvq_s32(v_diff);
-  uint32_t sq = (uint32_t)vaddvq_u64(vreinterpretq_u64_s64(v_sse));
-#else
-  int diff = horizontal_add_s32x4(v_diff);
-  uint32_t sq = vget_lane_u32(
-      vreinterpret_u32_s64(vadd_s64(vget_low_s64(v_sse), vget_high_s64(v_sse))),
-      0);
-#endif
-
-  *sum = diff;
-  *sse = sq;
+  int i = 0;
+  do {
+    s[0] = vld1q_u8(src);
+    src += src_stride;
+    s[1] = vld1q_u8(src);
+    src += src_stride;
+    r[0] = vld1q_u8(ref);
+    ref += ref_stride;
+    r[1] = vld1q_u8(ref);
+    ref += ref_stride;
+
+    diff[0] = vsubl_u8(vget_low_u8(s[0]), vget_low_u8(r[0]));
+    diff[1] = vsubl_u8(vget_high_u8(s[0]), vget_high_u8(r[0]));
+    diff[2] = vsubl_u8(vget_low_u8(s[1]), vget_low_u8(r[1]));
+    diff[3] = vsubl_u8(vget_high_u8(s[1]), vget_high_u8(r[1]));
+
+    diff_lo[0] = vreinterpret_s16_u16(vget_low_u16(diff[0]));
+    diff_lo[1] = vreinterpret_s16_u16(vget_low_u16(diff[1]));
+    sse_s32[0] = vmlal_s16(sse_s32[0], diff_lo[0], diff_lo[0]);
+    sse_s32[1] = vmlal_s16(sse_s32[1], diff_lo[1], diff_lo[1]);
+
+    diff_lo[2] = vreinterpret_s16_u16(vget_low_u16(diff[2]));
+    diff_lo[3] = vreinterpret_s16_u16(vget_low_u16(diff[3]));
+    sse_s32[2] = vmlal_s16(sse_s32[2], diff_lo[2], diff_lo[2]);
+    sse_s32[3] = vmlal_s16(sse_s32[3], diff_lo[3], diff_lo[3]);
+
+    diff_hi[0] = vreinterpret_s16_u16(vget_high_u16(diff[0]));
+    diff_hi[1] = vreinterpret_s16_u16(vget_high_u16(diff[1]));
+    sse_s32[0] = vmlal_s16(sse_s32[0], diff_hi[0], diff_hi[0]);
+    sse_s32[1] = vmlal_s16(sse_s32[1], diff_hi[1], diff_hi[1]);
+
+    diff_hi[2] = vreinterpret_s16_u16(vget_high_u16(diff[2]));
+    diff_hi[3] = vreinterpret_s16_u16(vget_high_u16(diff[3]));
+    sse_s32[2] = vmlal_s16(sse_s32[2], diff_hi[2], diff_hi[2]);
+    sse_s32[3] = vmlal_s16(sse_s32[3], diff_hi[3], diff_hi[3]);
+
+    i += 2;
+  } while (i < h);
+
+  sse_s32[0] = vaddq_s32(sse_s32[0], sse_s32[1]);
+  sse_s32[2] = vaddq_s32(sse_s32[2], sse_s32[3]);
+  sse_s32[0] = vaddq_s32(sse_s32[0], sse_s32[2]);
+
+  *sse = horizontal_add_u32x4(vreinterpretq_u32_s32(sse_s32[0]));
+  return horizontal_add_u32x4(vreinterpretq_u32_s32(sse_s32[0]));
+}
+
+unsigned int aom_get4x4sse_cs_neon(const uint8_t *src, int src_stride,
+                                   const uint8_t *ref, int ref_stride) {
+  uint8x8_t s[4], r[4];
+  int16x4_t diff[4];
+  int32x4_t sse;
+
+  s[0] = vld1_u8(src);
+  src += src_stride;
+  r[0] = vld1_u8(ref);
+  ref += ref_stride;
+  s[1] = vld1_u8(src);
+  src += src_stride;
+  r[1] = vld1_u8(ref);
+  ref += ref_stride;
+  s[2] = vld1_u8(src);
+  src += src_stride;
+  r[2] = vld1_u8(ref);
+  ref += ref_stride;
+  s[3] = vld1_u8(src);
+  r[3] = vld1_u8(ref);
+
+  diff[0] = vget_low_s16(vreinterpretq_s16_u16(vsubl_u8(s[0], r[0])));
+  diff[1] = vget_low_s16(vreinterpretq_s16_u16(vsubl_u8(s[1], r[1])));
+  diff[2] = vget_low_s16(vreinterpretq_s16_u16(vsubl_u8(s[2], r[2])));
+  diff[3] = vget_low_s16(vreinterpretq_s16_u16(vsubl_u8(s[3], r[3])));
+
+  sse = vmull_s16(diff[0], diff[0]);
+  sse = vmlal_s16(sse, diff[1], diff[1]);
+  sse = vmlal_s16(sse, diff[2], diff[2]);
+  sse = vmlal_s16(sse, diff[3], diff[3]);
+
+  return horizontal_add_u32x4(vreinterpretq_u32_s32(sse));
 }
 
-#define VARIANCE_NXM_WIDE(W, H)                                             \
-  unsigned int aom_variance##W##x##H##_neon(const uint8_t *a, int a_stride, \
-                                            const uint8_t *b, int b_stride, \
-                                            uint32_t *sse) {                \
-    int sum;                                                                \
-    variance_neon_wide_block(a, a_stride, b, b_stride, W, H, sse, &sum);    \
-    return *sse - (uint32_t)(((int64_t)sum * sum) / (W * H));               \
+#endif  // defined(__ARM_FEATURE_DOTPROD)
+
+#define MSE_WXH_NEON(w, h)                                                 \
+  unsigned int aom_mse##w##x##h##_neon(const uint8_t *src, int src_stride, \
+                                       const uint8_t *ref, int ref_stride, \
+                                       unsigned int *sse) {                \
+    return mse##w##xh_neon(src, src_stride, ref, ref_stride, sse, h);      \
   }
 
-VARIANCE_NXM(4, 4, 4)
-VARIANCE_NXM(4, 8, 5)
-VARIANCE_NXM(8, 4, 5)
-VARIANCE_NXM(16, 32, 9)
-VARIANCE_NXM(32, 16, 9)
-VARIANCE_NXM_WIDE(128, 64)
-VARIANCE_NXM_WIDE(64, 128)
+MSE_WXH_NEON(8, 8)
+MSE_WXH_NEON(8, 16)
+
+MSE_WXH_NEON(16, 8)
+MSE_WXH_NEON(16, 16)
+
+#undef MSE_WXH_NEON
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/avg.c b/chromium/third_party/libaom/source/libaom/aom_dsp/avg.c
index 05a5c1e9022..ceb102679e3 100644
--- a/chromium/third_party/libaom/source/libaom/aom_dsp/avg.c
+++ b/chromium/third_party/libaom/source/libaom/aom_dsp/avg.c
@@ -51,19 +51,11 @@ unsigned int aom_avg_8x8_c(const uint8_t *s, int p) {
 
 void aom_avg_8x8_quad_c(const uint8_t *s, int p, int x16_idx, int y16_idx,
                         int *avg) {
-  const uint8_t *s_tmp = s;
   for (int k = 0; k < 4; k++) {
-    int sum = 0;
     const int x8_idx = x16_idx + ((k & 1) << 3);
     const int y8_idx = y16_idx + ((k >> 1) << 3);
-    s_tmp = (s + y8_idx * p + x8_idx);
-    for (int i = 0; i < 8; i++) {
-      for (int j = 0; j < 8; j++) {
-        sum += s_tmp[j];
-      }
-      s_tmp += p;
-    }
-    avg[k] = (sum + 32) >> 6;
+    const uint8_t *s_tmp = s + y8_idx * p + x8_idx;
+    avg[k] = aom_avg_8x8_c(s_tmp, p);
   }
 }
 
@@ -150,7 +142,12 @@ void aom_hadamard_4x4_c(const int16_t *src_diff, ptrdiff_t src_stride,
     ++tmp_buf;
   }
 
-  for (idx = 0; idx < 16; ++idx) coeff[idx] = (tran_low_t)buffer2[idx];
+  // Extra transpose to match SSE2 behavior(i.e., aom_hadamard_4x4_sse2).
+  for (int i = 0; i < 4; i++) {
+    for (int j = 0; j < 4; j++) {
+      coeff[i * 4 + j] = (tran_low_t)buffer2[j * 4 + i];
+    }
+  }
 }
 
 // src_diff: first pass, 9 bit, dynamic range [-255, 255]
@@ -185,8 +182,6 @@ static void hadamard_col8(const int16_t *src_diff, ptrdiff_t src_stride,
   coeff[5] = c3 - c7;
 }
 
-// The order of the output coeff of the hadamard is not important. For
-// optimization purposes the final transpose may be skipped.
 void aom_hadamard_8x8_c(const int16_t *src_diff, ptrdiff_t src_stride,
                         tran_low_t *coeff) {
   int idx;
@@ -209,7 +204,12 @@ void aom_hadamard_8x8_c(const int16_t *src_diff, ptrdiff_t src_stride,
     ++tmp_buf;
   }
 
-  for (idx = 0; idx < 64; ++idx) coeff[idx] = (tran_low_t)buffer2[idx];
+  // Extra transpose to match SSE2 behavior(i.e., aom_hadamard_8x8_sse2).
+  for (int i = 0; i < 8; i++) {
+    for (int j = 0; j < 8; j++) {
+      coeff[i * 8 + j] = (tran_low_t)buffer2[j * 8 + i];
+    }
+  }
 }
 
 void aom_hadamard_lp_8x8_c(const int16_t *src_diff, ptrdiff_t src_stride,
@@ -234,10 +234,17 @@ void aom_hadamard_lp_8x8_c(const int16_t *src_diff, ptrdiff_t src_stride,
   }
 
   for (int idx = 0; idx < 64; ++idx) coeff[idx] = buffer2[idx];
+
+  // Extra transpose to match SSE2 behavior(i.e., aom_hadamard_lp_8x8_sse2).
+  for (int i = 0; i < 8; i++) {
+    for (int j = 0; j < 8; j++) {
+      coeff[i * 8 + j] = buffer2[j * 8 + i];
+    }
+  }
 }
 
-void aom_hadamard_8x8_dual_c(const int16_t *src_diff, ptrdiff_t src_stride,
-                             int16_t *coeff) {
+void aom_hadamard_lp_8x8_dual_c(const int16_t *src_diff, ptrdiff_t src_stride,
+                                int16_t *coeff) {
   for (int i = 0; i < 2; i++) {
     aom_hadamard_lp_8x8_c(src_diff + (i * 8), src_stride,
                           (int16_t *)coeff + (i * 64));
@@ -274,6 +281,17 @@ void aom_hadamard_16x16_c(const int16_t *src_diff, ptrdiff_t src_stride,
 
     ++coeff;
   }
+
+  coeff -= 64;
+  // Extra shift to match AVX2 output (i.e., aom_hadamard_16x16_avx2).
+  // Note that to match SSE2 output, it does not need this step.
+  for (int i = 0; i < 16; i++) {
+    for (int j = 0; j < 4; j++) {
+      tran_low_t temp = coeff[i * 16 + 4 + j];
+      coeff[i * 16 + 4 + j] = coeff[i * 16 + 8 + j];
+      coeff[i * 16 + 8 + j] = temp;
+    }
+  }
 }
 
 void aom_hadamard_lp_16x16_c(const int16_t *src_diff, ptrdiff_t src_stride,
@@ -515,35 +533,35 @@ int aom_satd_lp_c(const int16_t *coeff, int length) {
 
 // Integer projection onto row vectors.
 // height: value range {16, 32, 64, 128}.
-void aom_int_pro_row_c(int16_t hbuf[16], const uint8_t *ref,
-                       const int ref_stride, const int height) {
-  int idx;
-  const int norm_factor = height >> 1;
+void aom_int_pro_row_c(int16_t *hbuf, const uint8_t *ref, const int ref_stride,
+                       const int width, const int height, int norm_factor) {
   assert(height >= 2);
-  for (idx = 0; idx < 16; ++idx) {
-    int i;
+  for (int idx = 0; idx < width; ++idx) {
     hbuf[idx] = 0;
     // hbuf[idx]: 14 bit, dynamic range [0, 32640].
-    for (i = 0; i < height; ++i) hbuf[idx] += ref[i * ref_stride];
+    for (int i = 0; i < height; ++i) hbuf[idx] += ref[i * ref_stride];
     // hbuf[idx]: 9 bit, dynamic range [0, 1020].
-    hbuf[idx] /= norm_factor;
+    hbuf[idx] >>= norm_factor;
     ++ref;
   }
 }
 
 // width: value range {16, 32, 64, 128}.
-int16_t aom_int_pro_col_c(const uint8_t *ref, const int width) {
-  int idx;
-  int16_t sum = 0;
-  // sum: 14 bit, dynamic range [0, 32640]
-  for (idx = 0; idx < width; ++idx) sum += ref[idx];
-  return sum;
+void aom_int_pro_col_c(int16_t *vbuf, const uint8_t *ref, const int ref_stride,
+                       const int width, const int height, int norm_factor) {
+  for (int ht = 0; ht < height; ++ht) {
+    int16_t sum = 0;
+    // sum: 14 bit, dynamic range [0, 32640]
+    for (int idx = 0; idx < width; ++idx) sum += ref[idx];
+    vbuf[ht] = sum >> norm_factor;
+    ref += ref_stride;
+  }
 }
 
 // ref: [0 - 510]
 // src: [0 - 510]
 // bwl: {2, 3, 4, 5}
-int aom_vector_var_c(const int16_t *ref, const int16_t *src, const int bwl) {
+int aom_vector_var_c(const int16_t *ref, const int16_t *src, int bwl) {
   int i;
   int width = 4 << bwl;
   int sse = 0, mean = 0, var;
@@ -555,6 +573,9 @@ int aom_vector_var_c(const int16_t *ref, const int16_t *src, const int bwl) {
   }
 
   // (mean * mean): dynamic range 31 bits.
-  var = sse - ((mean * mean) >> (bwl + 2));
+  // If width == 128, the mean can be 510 * 128 = 65280, and log2(65280 ** 2) ~=
+  // 31.99, so it needs to be casted to unsigned int to compute its square.
+  const unsigned int mean_abs = abs(mean);
+  var = sse - ((mean_abs * mean_abs) >> (bwl + 2));
   return var;
 }
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/fastssim.c b/chromium/third_party/libaom/source/libaom/aom_dsp/fastssim.c
index ea58048f1d4..0ef0590e893 100644
--- a/chromium/third_party/libaom/source/libaom/aom_dsp/fastssim.c
+++ b/chromium/third_party/libaom/source/libaom/aom_dsp/fastssim.c
@@ -49,7 +49,7 @@ struct fs_ctx {
   unsigned *col_buf;
 };
 
-static void fs_ctx_init(fs_ctx *_ctx, int _w, int _h, int _nlevels) {
+static int fs_ctx_init(fs_ctx *_ctx, int _w, int _h, int _nlevels) {
   unsigned char *data;
   size_t data_size;
   int lw;
@@ -73,6 +73,7 @@ static void fs_ctx_init(fs_ctx *_ctx, int _w, int _h, int _nlevels) {
     lh = (lh + 1) >> 1;
   }
   data = (unsigned char *)malloc(data_size);
+  if (!data) return -1;
   _ctx->level = (fs_level *)data;
   _ctx->nlevels = _nlevels;
   data += _nlevels * sizeof(*_ctx->level);
@@ -97,6 +98,7 @@ static void fs_ctx_init(fs_ctx *_ctx, int _w, int _h, int _nlevels) {
     lh = (lh + 1) >> 1;
   }
   _ctx->col_buf = (unsigned *)data;
+  return 0;
 }
 
 static void fs_ctx_clear(fs_ctx *_ctx) { free(_ctx->level); }
@@ -446,7 +448,7 @@ static double calc_ssim(const uint8_t *_src, int _systride, const uint8_t *_dst,
   double ret;
   int l;
   ret = 1;
-  fs_ctx_init(&ctx, _w, _h, FS_NLEVELS);
+  if (fs_ctx_init(&ctx, _w, _h, FS_NLEVELS)) return 99.0;
   fs_downsample_level0(&ctx, _src, _systride, _dst, _dystride, _w, _h, _shift,
                        buf_is_hbd);
   for (l = 0; l < FS_NLEVELS - 1; l++) {
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/grain_table.c b/chromium/third_party/libaom/source/libaom/aom_dsp/grain_table.c
index 03b25c81f2e..3505f9f2c8b 100644
--- a/chromium/third_party/libaom/source/libaom/aom_dsp/grain_table.c
+++ b/chromium/third_party/libaom/source/libaom/aom_dsp/grain_table.c
@@ -191,11 +191,14 @@ static void grain_table_entry_write(FILE *file,
   }
 }
 
+// TODO(https://crbug.com/aomedia/3228): Update this function to return an
+// integer status.
 void aom_film_grain_table_append(aom_film_grain_table_t *t, int64_t time_stamp,
                                  int64_t end_time,
                                  const aom_film_grain_t *grain) {
   if (!t->tail || memcmp(grain, &t->tail->params, sizeof(*grain))) {
     aom_film_grain_table_entry_t *new_tail = aom_malloc(sizeof(*new_tail));
+    if (!new_tail) return;
     memset(new_tail, 0, sizeof(*new_tail));
     if (t->tail) t->tail->next = new_tail;
     if (!t->head) t->head = new_tail;
@@ -245,6 +248,7 @@ int aom_film_grain_table_lookup(aom_film_grain_table_t *t, int64_t time_stamp,
       } else {
         aom_film_grain_table_entry_t *new_entry =
             aom_malloc(sizeof(*new_entry));
+        if (!new_entry) return 0;
         new_entry->next = entry->next;
         new_entry->start_time = end_time;
         new_entry->end_time = entry->end_time;
@@ -256,7 +260,10 @@ int aom_film_grain_table_lookup(aom_film_grain_table_t *t, int64_t time_stamp,
       // If segments aren't aligned, delete from the beginning of subsequent
       // segments
       if (end_time > entry_end_time) {
-        aom_film_grain_table_lookup(t, entry_end_time, end_time, 1, 0);
+        // Ignoring the return value here is safe since we're erasing from the
+        // beginning of subsequent entries.
+        aom_film_grain_table_lookup(t, entry_end_time, end_time, /*erase=*/1,
+                                    NULL);
       }
       return 1;
     }
@@ -290,6 +297,11 @@ aom_codec_err_t aom_film_grain_table_read(
   aom_film_grain_table_entry_t *prev_entry = NULL;
   while (!feof(file)) {
     aom_film_grain_table_entry_t *entry = aom_malloc(sizeof(*entry));
+    if (!entry) {
+      aom_internal_error(error_info, AOM_CODEC_MEM_ERROR,
+                         "Unable to allocate grain table entry");
+      break;
+    }
     memset(entry, 0, sizeof(*entry));
     grain_table_entry_read(file, error_info, entry);
     entry->next = NULL;
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/intrapred.c b/chromium/third_party/libaom/source/libaom/aom_dsp/intrapred.c
index 2e435383810..6ec091f5f33 100644
--- a/chromium/third_party/libaom/source/libaom/aom_dsp/intrapred.c
+++ b/chromium/third_party/libaom/source/libaom/aom_dsp/intrapred.c
@@ -52,9 +52,9 @@ static INLINE uint16_t paeth_predictor_single(uint16_t left, uint16_t top,
   const int p_top_left = abs_diff(base, top_left);
 
   // Return nearest to base of left, top and top_left.
-  return (p_left <= p_top && p_left <= p_top_left)
-             ? left
-             : (p_top <= p_top_left) ? top : top_left;
+  return (p_left <= p_top && p_left <= p_top_left) ? left
+         : (p_top <= p_top_left)                   ? top
+                                                   : top_left;
 }
 
 static INLINE void paeth_predictor(uint8_t *dst, ptrdiff_t stride, int bw,
@@ -86,11 +86,11 @@ static INLINE void smooth_predictor(uint8_t *dst, ptrdiff_t stride, int bw,
                                     const uint8_t *left) {
   const uint8_t below_pred = left[bh - 1];   // estimated by bottom-left pixel
   const uint8_t right_pred = above[bw - 1];  // estimated by top-right pixel
-  const uint8_t *const sm_weights_w = sm_weight_arrays + bw;
-  const uint8_t *const sm_weights_h = sm_weight_arrays + bh;
-  // scale = 2 * 2^sm_weight_log2_scale
-  const int log2_scale = 1 + sm_weight_log2_scale;
-  const uint16_t scale = (1 << sm_weight_log2_scale);
+  const uint8_t *const sm_weights_w = smooth_weights + bw - 4;
+  const uint8_t *const sm_weights_h = smooth_weights + bh - 4;
+  // scale = 2 * 2^SMOOTH_WEIGHT_LOG2_SCALE
+  const int log2_scale = 1 + SMOOTH_WEIGHT_LOG2_SCALE;
+  const uint16_t scale = (1 << SMOOTH_WEIGHT_LOG2_SCALE);
   sm_weights_sanity_checks(sm_weights_w, sm_weights_h, scale,
                            log2_scale + sizeof(*dst));
   int r;
@@ -116,10 +116,10 @@ static INLINE void smooth_v_predictor(uint8_t *dst, ptrdiff_t stride, int bw,
                                       int bh, const uint8_t *above,
                                       const uint8_t *left) {
   const uint8_t below_pred = left[bh - 1];  // estimated by bottom-left pixel
-  const uint8_t *const sm_weights = sm_weight_arrays + bh;
-  // scale = 2^sm_weight_log2_scale
-  const int log2_scale = sm_weight_log2_scale;
-  const uint16_t scale = (1 << sm_weight_log2_scale);
+  const uint8_t *const sm_weights = smooth_weights + bh - 4;
+  // scale = 2^SMOOTH_WEIGHT_LOG2_SCALE
+  const int log2_scale = SMOOTH_WEIGHT_LOG2_SCALE;
+  const uint16_t scale = (1 << SMOOTH_WEIGHT_LOG2_SCALE);
   sm_weights_sanity_checks(sm_weights, sm_weights, scale,
                            log2_scale + sizeof(*dst));
 
@@ -145,10 +145,10 @@ static INLINE void smooth_h_predictor(uint8_t *dst, ptrdiff_t stride, int bw,
                                       int bh, const uint8_t *above,
                                       const uint8_t *left) {
   const uint8_t right_pred = above[bw - 1];  // estimated by top-right pixel
-  const uint8_t *const sm_weights = sm_weight_arrays + bw;
-  // scale = 2^sm_weight_log2_scale
-  const int log2_scale = sm_weight_log2_scale;
-  const uint16_t scale = (1 << sm_weight_log2_scale);
+  const uint8_t *const sm_weights = smooth_weights + bw - 4;
+  // scale = 2^SMOOTH_WEIGHT_LOG2_SCALE
+  const int log2_scale = SMOOTH_WEIGHT_LOG2_SCALE;
+  const uint16_t scale = (1 << SMOOTH_WEIGHT_LOG2_SCALE);
   sm_weights_sanity_checks(sm_weights, sm_weights, scale,
                            log2_scale + sizeof(*dst));
 
@@ -405,11 +405,11 @@ static INLINE void highbd_smooth_predictor(uint16_t *dst, ptrdiff_t stride,
   (void)bd;
   const uint16_t below_pred = left[bh - 1];   // estimated by bottom-left pixel
   const uint16_t right_pred = above[bw - 1];  // estimated by top-right pixel
-  const uint8_t *const sm_weights_w = sm_weight_arrays + bw;
-  const uint8_t *const sm_weights_h = sm_weight_arrays + bh;
-  // scale = 2 * 2^sm_weight_log2_scale
-  const int log2_scale = 1 + sm_weight_log2_scale;
-  const uint16_t scale = (1 << sm_weight_log2_scale);
+  const uint8_t *const sm_weights_w = smooth_weights + bw - 4;
+  const uint8_t *const sm_weights_h = smooth_weights + bh - 4;
+  // scale = 2 * 2^SMOOTH_WEIGHT_LOG2_SCALE
+  const int log2_scale = 1 + SMOOTH_WEIGHT_LOG2_SCALE;
+  const uint16_t scale = (1 << SMOOTH_WEIGHT_LOG2_SCALE);
   sm_weights_sanity_checks(sm_weights_w, sm_weights_h, scale,
                            log2_scale + sizeof(*dst));
   int r;
@@ -437,10 +437,10 @@ static INLINE void highbd_smooth_v_predictor(uint16_t *dst, ptrdiff_t stride,
                                              const uint16_t *left, int bd) {
   (void)bd;
   const uint16_t below_pred = left[bh - 1];  // estimated by bottom-left pixel
-  const uint8_t *const sm_weights = sm_weight_arrays + bh;
-  // scale = 2^sm_weight_log2_scale
-  const int log2_scale = sm_weight_log2_scale;
-  const uint16_t scale = (1 << sm_weight_log2_scale);
+  const uint8_t *const sm_weights = smooth_weights + bh - 4;
+  // scale = 2^SMOOTH_WEIGHT_LOG2_SCALE
+  const int log2_scale = SMOOTH_WEIGHT_LOG2_SCALE;
+  const uint16_t scale = (1 << SMOOTH_WEIGHT_LOG2_SCALE);
   sm_weights_sanity_checks(sm_weights, sm_weights, scale,
                            log2_scale + sizeof(*dst));
 
@@ -468,10 +468,10 @@ static INLINE void highbd_smooth_h_predictor(uint16_t *dst, ptrdiff_t stride,
                                              const uint16_t *left, int bd) {
   (void)bd;
   const uint16_t right_pred = above[bw - 1];  // estimated by top-right pixel
-  const uint8_t *const sm_weights = sm_weight_arrays + bw;
-  // scale = 2^sm_weight_log2_scale
-  const int log2_scale = sm_weight_log2_scale;
-  const uint16_t scale = (1 << sm_weight_log2_scale);
+  const uint8_t *const sm_weights = smooth_weights + bw - 4;
+  // scale = 2^SMOOTH_WEIGHT_LOG2_SCALE
+  const int log2_scale = SMOOTH_WEIGHT_LOG2_SCALE;
+  const uint16_t scale = (1 << SMOOTH_WEIGHT_LOG2_SCALE);
   sm_weights_sanity_checks(sm_weights, sm_weights, scale,
                            log2_scale + sizeof(*dst));
 
@@ -723,25 +723,6 @@ void aom_highbd_dc_predictor_64x32_c(uint16_t *dst, ptrdiff_t stride,
   }
 
 /* clang-format off */
-#if CONFIG_REALTIME_ONLY
-#define intra_pred_rectangular(type) \
-  intra_pred_sized(type, 4, 8) \
-  intra_pred_sized(type, 8, 4) \
-  intra_pred_sized(type, 8, 16) \
-  intra_pred_sized(type, 16, 8) \
-  intra_pred_sized(type, 16, 32) \
-  intra_pred_sized(type, 32, 16) \
-  intra_pred_sized(type, 32, 64) \
-  intra_pred_sized(type, 64, 32) \
-  intra_pred_highbd_sized(type, 4, 8) \
-  intra_pred_highbd_sized(type, 8, 4) \
-  intra_pred_highbd_sized(type, 8, 16) \
-  intra_pred_highbd_sized(type, 16, 8) \
-  intra_pred_highbd_sized(type, 16, 32) \
-  intra_pred_highbd_sized(type, 32, 16) \
-  intra_pred_highbd_sized(type, 32, 64) \
-  intra_pred_highbd_sized(type, 64, 32)
-#else
 #define intra_pred_rectangular(type) \
   intra_pred_sized(type, 4, 8) \
   intra_pred_sized(type, 8, 4) \
@@ -771,7 +752,6 @@ void aom_highbd_dc_predictor_64x32_c(uint16_t *dst, ptrdiff_t stride,
   intra_pred_highbd_sized(type, 32, 8) \
   intra_pred_highbd_sized(type, 16, 64) \
   intra_pred_highbd_sized(type, 64, 16)
-#endif
 
 #define intra_pred_above_4x4(type) \
   intra_pred_sized(type, 8, 8) \
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/intrapred_common.h b/chromium/third_party/libaom/source/libaom/aom_dsp/intrapred_common.h
index 3ec62a86ef1..6172224be14 100644
--- a/chromium/third_party/libaom/source/libaom/aom_dsp/intrapred_common.h
+++ b/chromium/third_party/libaom/source/libaom/aom_dsp/intrapred_common.h
@@ -15,18 +15,14 @@
 #include "config/aom_config.h"
 
 // Weights are quadratic from '1' to '1 / block_size', scaled by
-// 2^sm_weight_log2_scale.
-static const int sm_weight_log2_scale = 8;
+// 2^SMOOTH_WEIGHT_LOG2_SCALE.
+#define SMOOTH_WEIGHT_LOG2_SCALE 8
 
-// max(block_size_wide[BLOCK_LARGEST], block_size_high[BLOCK_LARGEST])
-#define MAX_BLOCK_DIM 64
-
-/* clang-format off */
-static const uint8_t sm_weight_arrays[2 * MAX_BLOCK_DIM] = {
-  // Unused, because we always offset by bs, which is at least 2.
-  0, 0,
-  // bs = 2
-  255, 128,
+// Note these arrays are aligned to ensure NEON loads using a cast to uint32_t*
+// have sufficient alignment. Using 8 preserves the potential for an alignment
+// hint in load_weight_w8(). For that case, this could be increased to 16 to
+// allow an aligned load in x86.
+DECLARE_ALIGNED(8, static const uint8_t, smooth_weights[]) = {
   // bs = 4
   255, 149, 85, 64,
   // bs = 8
@@ -40,8 +36,24 @@ static const uint8_t sm_weight_arrays[2 * MAX_BLOCK_DIM] = {
   255, 248, 240, 233, 225, 218, 210, 203, 196, 189, 182, 176, 169, 163, 156,
   150, 144, 138, 133, 127, 121, 116, 111, 106, 101, 96, 91, 86, 82, 77, 73, 69,
   65, 61, 57, 54, 50, 47, 44, 41, 38, 35, 32, 29, 27, 25, 22, 20, 18, 16, 15,
-  13, 12, 10, 9, 8, 7, 6, 6, 5, 5, 4, 4, 4,
+  13, 12, 10, 9, 8, 7, 6, 6, 5, 5, 4, 4, 4
+};
+
+DECLARE_ALIGNED(8, static const uint16_t, smooth_weights_u16[]) = {
+  // block dimension = 4
+  255, 149, 85, 64,
+  // block dimension = 8
+  255, 197, 146, 105, 73, 50, 37, 32,
+  // block dimension = 16
+  255, 225, 196, 170, 145, 123, 102, 84, 68, 54, 43, 33, 26, 20, 17, 16,
+  // block dimension = 32
+  255, 240, 225, 210, 196, 182, 169, 157, 145, 133, 122, 111, 101, 92, 83, 74,
+  66, 59, 52, 45, 39, 34, 29, 25, 21, 17, 14, 12, 10, 9, 8, 8,
+  // block dimension = 64
+  255, 248, 240, 233, 225, 218, 210, 203, 196, 189, 182, 176, 169, 163, 156,
+  150, 144, 138, 133, 127, 121, 116, 111, 106, 101, 96, 91, 86, 82, 77, 73, 69,
+  65, 61, 57, 54, 50, 47, 44, 41, 38, 35, 32, 29, 27, 25, 22, 20, 18, 16, 15,
+  13, 12, 10, 9, 8, 7, 6, 6, 5, 5, 4, 4, 4
 };
-/* clang-format on */
 
 #endif  // AOM_AOM_DSP_INTRAPRED_COMMON_H_
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/mathutils.h b/chromium/third_party/libaom/source/libaom/aom_dsp/mathutils.h
index a52a2df494f..3ffca8a17ef 100644
--- a/chromium/third_party/libaom/source/libaom/aom_dsp/mathutils.h
+++ b/chromium/third_party/libaom/source/libaom/aom_dsp/mathutils.h
@@ -70,6 +70,7 @@ static INLINE int least_squares(int n, double *A, int rows, int stride,
   double *AtA, *Atb;
   if (!scratch) {
     scratch_ = (double *)aom_malloc(sizeof(*scratch) * n * (n + 1));
+    if (!scratch_) return 0;
     scratch = scratch_;
   }
   AtA = scratch;
@@ -138,6 +139,7 @@ static INLINE int svdcmp(double **u, int m, int n, double w[], double **v) {
   int flag, i, its, j, jj, k, l, nm;
   double anorm, c, f, g, h, s, scale, x, y, z;
   double *rv1 = (double *)aom_malloc(sizeof(*rv1) * (n + 1));
+  if (!rv1) return 0;
   g = scale = anorm = 0.0;
   for (i = 0; i < n; i++) {
     l = i + 1;
@@ -333,8 +335,8 @@ static INLINE int SVD(double *U, double *W, double *V, double *matx, int M,
       nrV[i] = &V[i * N];
     }
   } else {
-    if (nrU) aom_free(nrU);
-    if (nrV) aom_free(nrV);
+    aom_free(nrU);
+    aom_free(nrV);
     return 1;
   }
 
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/mips/aom_convolve8_horiz_msa.c b/chromium/third_party/libaom/source/libaom/aom_dsp/mips/aom_convolve8_horiz_msa.c
deleted file mode 100644
index c8ab61249a1..00000000000
--- a/chromium/third_party/libaom/source/libaom/aom_dsp/mips/aom_convolve8_horiz_msa.c
+++ /dev/null
@@ -1,693 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom_dsp/mips/aom_convolve_msa.h"
-
-static void common_hz_8t_4x4_msa(const uint8_t *src, int32_t src_stride,
-                                 uint8_t *dst, int32_t dst_stride,
-                                 int8_t *filter) {
-  v16u8 mask0, mask1, mask2, mask3, out;
-  v16i8 src0, src1, src2, src3, filt0, filt1, filt2, filt3;
-  v8i16 filt, out0, out1;
-
-  mask0 = LD_UB(&mc_filt_mask_arr[16]);
-  src -= 3;
-
-  /* rearranging filter */
-  filt = LD_SH(filter);
-  SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
-
-  mask1 = mask0 + 2;
-  mask2 = mask0 + 4;
-  mask3 = mask0 + 6;
-
-  LD_SB4(src, src_stride, src0, src1, src2, src3);
-  XORI_B4_128_SB(src0, src1, src2, src3);
-  HORIZ_8TAP_4WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2, mask3,
-                             filt0, filt1, filt2, filt3, out0, out1);
-  SRARI_H2_SH(out0, out1, FILTER_BITS);
-  SAT_SH2_SH(out0, out1, 7);
-  out = PCKEV_XORI128_UB(out0, out1);
-  ST4x4_UB(out, out, 0, 1, 2, 3, dst, dst_stride);
-}
-
-static void common_hz_8t_4x8_msa(const uint8_t *src, int32_t src_stride,
-                                 uint8_t *dst, int32_t dst_stride,
-                                 int8_t *filter) {
-  v16i8 filt0, filt1, filt2, filt3;
-  v16i8 src0, src1, src2, src3;
-  v16u8 mask0, mask1, mask2, mask3, out;
-  v8i16 filt, out0, out1, out2, out3;
-
-  mask0 = LD_UB(&mc_filt_mask_arr[16]);
-  src -= 3;
-
-  /* rearranging filter */
-  filt = LD_SH(filter);
-  SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
-
-  mask1 = mask0 + 2;
-  mask2 = mask0 + 4;
-  mask3 = mask0 + 6;
-
-  LD_SB4(src, src_stride, src0, src1, src2, src3);
-  XORI_B4_128_SB(src0, src1, src2, src3);
-  src += (4 * src_stride);
-  HORIZ_8TAP_4WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2, mask3,
-                             filt0, filt1, filt2, filt3, out0, out1);
-  LD_SB4(src, src_stride, src0, src1, src2, src3);
-  XORI_B4_128_SB(src0, src1, src2, src3);
-  HORIZ_8TAP_4WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2, mask3,
-                             filt0, filt1, filt2, filt3, out2, out3);
-  SRARI_H4_SH(out0, out1, out2, out3, FILTER_BITS);
-  SAT_SH4_SH(out0, out1, out2, out3, 7);
-  out = PCKEV_XORI128_UB(out0, out1);
-  ST4x4_UB(out, out, 0, 1, 2, 3, dst, dst_stride);
-  dst += (4 * dst_stride);
-  out = PCKEV_XORI128_UB(out2, out3);
-  ST4x4_UB(out, out, 0, 1, 2, 3, dst, dst_stride);
-}
-
-static void common_hz_8t_4w_msa(const uint8_t *src, int32_t src_stride,
-                                uint8_t *dst, int32_t dst_stride,
-                                int8_t *filter, int32_t height) {
-  if (4 == height) {
-    common_hz_8t_4x4_msa(src, src_stride, dst, dst_stride, filter);
-  } else if (8 == height) {
-    common_hz_8t_4x8_msa(src, src_stride, dst, dst_stride, filter);
-  }
-}
-
-static void common_hz_8t_8x4_msa(const uint8_t *src, int32_t src_stride,
-                                 uint8_t *dst, int32_t dst_stride,
-                                 int8_t *filter) {
-  v16i8 src0, src1, src2, src3, filt0, filt1, filt2, filt3;
-  v16u8 mask0, mask1, mask2, mask3, tmp0, tmp1;
-  v8i16 filt, out0, out1, out2, out3;
-
-  mask0 = LD_UB(&mc_filt_mask_arr[0]);
-  src -= 3;
-
-  /* rearranging filter */
-  filt = LD_SH(filter);
-  SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
-
-  mask1 = mask0 + 2;
-  mask2 = mask0 + 4;
-  mask3 = mask0 + 6;
-
-  LD_SB4(src, src_stride, src0, src1, src2, src3);
-  XORI_B4_128_SB(src0, src1, src2, src3);
-  HORIZ_8TAP_8WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2, mask3,
-                             filt0, filt1, filt2, filt3, out0, out1, out2,
-                             out3);
-  SRARI_H4_SH(out0, out1, out2, out3, FILTER_BITS);
-  SAT_SH4_SH(out0, out1, out2, out3, 7);
-  tmp0 = PCKEV_XORI128_UB(out0, out1);
-  tmp1 = PCKEV_XORI128_UB(out2, out3);
-  ST8x4_UB(tmp0, tmp1, dst, dst_stride);
-}
-
-static void common_hz_8t_8x8mult_msa(const uint8_t *src, int32_t src_stride,
-                                     uint8_t *dst, int32_t dst_stride,
-                                     int8_t *filter, int32_t height) {
-  uint32_t loop_cnt;
-  v16i8 src0, src1, src2, src3, filt0, filt1, filt2, filt3;
-  v16u8 mask0, mask1, mask2, mask3, tmp0, tmp1;
-  v8i16 filt, out0, out1, out2, out3;
-
-  mask0 = LD_UB(&mc_filt_mask_arr[0]);
-  src -= 3;
-
-  /* rearranging filter */
-  filt = LD_SH(filter);
-  SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
-
-  mask1 = mask0 + 2;
-  mask2 = mask0 + 4;
-  mask3 = mask0 + 6;
-
-  for (loop_cnt = (height >> 2); loop_cnt--;) {
-    LD_SB4(src, src_stride, src0, src1, src2, src3);
-    XORI_B4_128_SB(src0, src1, src2, src3);
-    src += (4 * src_stride);
-    HORIZ_8TAP_8WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2,
-                               mask3, filt0, filt1, filt2, filt3, out0, out1,
-                               out2, out3);
-    SRARI_H4_SH(out0, out1, out2, out3, FILTER_BITS);
-    SAT_SH4_SH(out0, out1, out2, out3, 7);
-    tmp0 = PCKEV_XORI128_UB(out0, out1);
-    tmp1 = PCKEV_XORI128_UB(out2, out3);
-    ST8x4_UB(tmp0, tmp1, dst, dst_stride);
-    dst += (4 * dst_stride);
-  }
-}
-
-static void common_hz_8t_8w_msa(const uint8_t *src, int32_t src_stride,
-                                uint8_t *dst, int32_t dst_stride,
-                                int8_t *filter, int32_t height) {
-  if (4 == height) {
-    common_hz_8t_8x4_msa(src, src_stride, dst, dst_stride, filter);
-  } else {
-    common_hz_8t_8x8mult_msa(src, src_stride, dst, dst_stride, filter, height);
-  }
-}
-
-static void common_hz_8t_16w_msa(const uint8_t *src, int32_t src_stride,
-                                 uint8_t *dst, int32_t dst_stride,
-                                 int8_t *filter, int32_t height) {
-  uint32_t loop_cnt;
-  v16i8 src0, src1, src2, src3, filt0, filt1, filt2, filt3;
-  v16u8 mask0, mask1, mask2, mask3, out;
-  v8i16 filt, out0, out1, out2, out3;
-
-  mask0 = LD_UB(&mc_filt_mask_arr[0]);
-  src -= 3;
-
-  /* rearranging filter */
-  filt = LD_SH(filter);
-  SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
-
-  mask1 = mask0 + 2;
-  mask2 = mask0 + 4;
-  mask3 = mask0 + 6;
-
-  for (loop_cnt = (height >> 1); loop_cnt--;) {
-    LD_SB2(src, src_stride, src0, src2);
-    LD_SB2(src + 8, src_stride, src1, src3);
-    XORI_B4_128_SB(src0, src1, src2, src3);
-    src += (2 * src_stride);
-    HORIZ_8TAP_8WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2,
-                               mask3, filt0, filt1, filt2, filt3, out0, out1,
-                               out2, out3);
-    SRARI_H4_SH(out0, out1, out2, out3, FILTER_BITS);
-    SAT_SH4_SH(out0, out1, out2, out3, 7);
-    out = PCKEV_XORI128_UB(out0, out1);
-    ST_UB(out, dst);
-    dst += dst_stride;
-    out = PCKEV_XORI128_UB(out2, out3);
-    ST_UB(out, dst);
-    dst += dst_stride;
-  }
-}
-
-static void common_hz_8t_32w_msa(const uint8_t *src, int32_t src_stride,
-                                 uint8_t *dst, int32_t dst_stride,
-                                 int8_t *filter, int32_t height) {
-  uint32_t loop_cnt;
-  v16i8 src0, src1, src2, src3, filt0, filt1, filt2, filt3;
-  v16u8 mask0, mask1, mask2, mask3, out;
-  v8i16 filt, out0, out1, out2, out3;
-
-  mask0 = LD_UB(&mc_filt_mask_arr[0]);
-  src -= 3;
-
-  /* rearranging filter */
-  filt = LD_SH(filter);
-  SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
-
-  mask1 = mask0 + 2;
-  mask2 = mask0 + 4;
-  mask3 = mask0 + 6;
-
-  for (loop_cnt = (height >> 1); loop_cnt--;) {
-    src0 = LD_SB(src);
-    src2 = LD_SB(src + 16);
-    src3 = LD_SB(src + 24);
-    src1 = __msa_sldi_b(src2, src0, 8);
-    src += src_stride;
-    XORI_B4_128_SB(src0, src1, src2, src3);
-    HORIZ_8TAP_8WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2,
-                               mask3, filt0, filt1, filt2, filt3, out0, out1,
-                               out2, out3);
-    SRARI_H4_SH(out0, out1, out2, out3, FILTER_BITS);
-    SAT_SH4_SH(out0, out1, out2, out3, 7);
-
-    src0 = LD_SB(src);
-    src2 = LD_SB(src + 16);
-    src3 = LD_SB(src + 24);
-    src1 = __msa_sldi_b(src2, src0, 8);
-    src += src_stride;
-
-    out = PCKEV_XORI128_UB(out0, out1);
-    ST_UB(out, dst);
-    out = PCKEV_XORI128_UB(out2, out3);
-    ST_UB(out, dst + 16);
-    dst += dst_stride;
-
-    XORI_B4_128_SB(src0, src1, src2, src3);
-    HORIZ_8TAP_8WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2,
-                               mask3, filt0, filt1, filt2, filt3, out0, out1,
-                               out2, out3);
-    SRARI_H4_SH(out0, out1, out2, out3, FILTER_BITS);
-    SAT_SH4_SH(out0, out1, out2, out3, 7);
-    out = PCKEV_XORI128_UB(out0, out1);
-    ST_UB(out, dst);
-    out = PCKEV_XORI128_UB(out2, out3);
-    ST_UB(out, dst + 16);
-    dst += dst_stride;
-  }
-}
-
-static void common_hz_8t_64w_msa(const uint8_t *src, int32_t src_stride,
-                                 uint8_t *dst, int32_t dst_stride,
-                                 int8_t *filter, int32_t height) {
-  int32_t loop_cnt;
-  v16i8 src0, src1, src2, src3, filt0, filt1, filt2, filt3;
-  v16u8 mask0, mask1, mask2, mask3, out;
-  v8i16 filt, out0, out1, out2, out3;
-
-  mask0 = LD_UB(&mc_filt_mask_arr[0]);
-  src -= 3;
-
-  /* rearranging filter */
-  filt = LD_SH(filter);
-  SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
-
-  mask1 = mask0 + 2;
-  mask2 = mask0 + 4;
-  mask3 = mask0 + 6;
-
-  for (loop_cnt = height; loop_cnt--;) {
-    src0 = LD_SB(src);
-    src2 = LD_SB(src + 16);
-    src3 = LD_SB(src + 24);
-    src1 = __msa_sldi_b(src2, src0, 8);
-
-    XORI_B4_128_SB(src0, src1, src2, src3);
-    HORIZ_8TAP_8WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2,
-                               mask3, filt0, filt1, filt2, filt3, out0, out1,
-                               out2, out3);
-    SRARI_H4_SH(out0, out1, out2, out3, FILTER_BITS);
-    SAT_SH4_SH(out0, out1, out2, out3, 7);
-    out = PCKEV_XORI128_UB(out0, out1);
-    ST_UB(out, dst);
-    out = PCKEV_XORI128_UB(out2, out3);
-    ST_UB(out, dst + 16);
-
-    src0 = LD_SB(src + 32);
-    src2 = LD_SB(src + 48);
-    src3 = LD_SB(src + 56);
-    src1 = __msa_sldi_b(src2, src0, 8);
-    src += src_stride;
-
-    XORI_B4_128_SB(src0, src1, src2, src3);
-    HORIZ_8TAP_8WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2,
-                               mask3, filt0, filt1, filt2, filt3, out0, out1,
-                               out2, out3);
-    SRARI_H4_SH(out0, out1, out2, out3, FILTER_BITS);
-    SAT_SH4_SH(out0, out1, out2, out3, 7);
-    out = PCKEV_XORI128_UB(out0, out1);
-    ST_UB(out, dst + 32);
-    out = PCKEV_XORI128_UB(out2, out3);
-    ST_UB(out, dst + 48);
-    dst += dst_stride;
-  }
-}
-
-static void common_hz_2t_4x4_msa(const uint8_t *src, int32_t src_stride,
-                                 uint8_t *dst, int32_t dst_stride,
-                                 int8_t *filter) {
-  v16i8 src0, src1, src2, src3, mask;
-  v16u8 filt0, vec0, vec1, res0, res1;
-  v8u16 vec2, vec3, filt;
-
-  mask = LD_SB(&mc_filt_mask_arr[16]);
-
-  /* rearranging filter */
-  filt = LD_UH(filter);
-  filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0);
-
-  LD_SB4(src, src_stride, src0, src1, src2, src3);
-  VSHF_B2_UB(src0, src1, src2, src3, mask, mask, vec0, vec1);
-  DOTP_UB2_UH(vec0, vec1, filt0, filt0, vec2, vec3);
-  SRARI_H2_UH(vec2, vec3, FILTER_BITS);
-  PCKEV_B2_UB(vec2, vec2, vec3, vec3, res0, res1);
-  ST4x4_UB(res0, res1, 0, 1, 0, 1, dst, dst_stride);
-}
-
-static void common_hz_2t_4x8_msa(const uint8_t *src, int32_t src_stride,
-                                 uint8_t *dst, int32_t dst_stride,
-                                 int8_t *filter) {
-  v16u8 vec0, vec1, vec2, vec3, filt0;
-  v16i8 src0, src1, src2, src3, src4, src5, src6, src7, mask;
-  v16i8 res0, res1, res2, res3;
-  v8u16 vec4, vec5, vec6, vec7, filt;
-
-  mask = LD_SB(&mc_filt_mask_arr[16]);
-
-  /* rearranging filter */
-  filt = LD_UH(filter);
-  filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0);
-
-  LD_SB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7);
-  VSHF_B2_UB(src0, src1, src2, src3, mask, mask, vec0, vec1);
-  VSHF_B2_UB(src4, src5, src6, src7, mask, mask, vec2, vec3);
-  DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, vec4, vec5,
-              vec6, vec7);
-  SRARI_H4_UH(vec4, vec5, vec6, vec7, FILTER_BITS);
-  PCKEV_B4_SB(vec4, vec4, vec5, vec5, vec6, vec6, vec7, vec7, res0, res1, res2,
-              res3);
-  ST4x4_UB(res0, res1, 0, 1, 0, 1, dst, dst_stride);
-  dst += (4 * dst_stride);
-  ST4x4_UB(res2, res3, 0, 1, 0, 1, dst, dst_stride);
-}
-
-static void common_hz_2t_4w_msa(const uint8_t *src, int32_t src_stride,
-                                uint8_t *dst, int32_t dst_stride,
-                                int8_t *filter, int32_t height) {
-  if (4 == height) {
-    common_hz_2t_4x4_msa(src, src_stride, dst, dst_stride, filter);
-  } else if (8 == height) {
-    common_hz_2t_4x8_msa(src, src_stride, dst, dst_stride, filter);
-  }
-}
-
-static void common_hz_2t_8x4_msa(const uint8_t *src, int32_t src_stride,
-                                 uint8_t *dst, int32_t dst_stride,
-                                 int8_t *filter) {
-  v16u8 filt0;
-  v16i8 src0, src1, src2, src3, mask;
-  v8u16 vec0, vec1, vec2, vec3, filt;
-
-  mask = LD_SB(&mc_filt_mask_arr[0]);
-
-  /* rearranging filter */
-  filt = LD_UH(filter);
-  filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0);
-
-  LD_SB4(src, src_stride, src0, src1, src2, src3);
-  VSHF_B2_UH(src0, src0, src1, src1, mask, mask, vec0, vec1);
-  VSHF_B2_UH(src2, src2, src3, src3, mask, mask, vec2, vec3);
-  DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, vec0, vec1,
-              vec2, vec3);
-  SRARI_H4_UH(vec0, vec1, vec2, vec3, FILTER_BITS);
-  PCKEV_B2_SB(vec1, vec0, vec3, vec2, src0, src1);
-  ST8x4_UB(src0, src1, dst, dst_stride);
-}
-
-static void common_hz_2t_8x8mult_msa(const uint8_t *src, int32_t src_stride,
-                                     uint8_t *dst, int32_t dst_stride,
-                                     int8_t *filter, int32_t height) {
-  v16u8 filt0;
-  v16i8 src0, src1, src2, src3, mask, out0, out1;
-  v8u16 vec0, vec1, vec2, vec3, filt;
-
-  mask = LD_SB(&mc_filt_mask_arr[0]);
-
-  /* rearranging filter */
-  filt = LD_UH(filter);
-  filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0);
-
-  LD_SB4(src, src_stride, src0, src1, src2, src3);
-  src += (4 * src_stride);
-
-  VSHF_B2_UH(src0, src0, src1, src1, mask, mask, vec0, vec1);
-  VSHF_B2_UH(src2, src2, src3, src3, mask, mask, vec2, vec3);
-  DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, vec0, vec1,
-              vec2, vec3);
-  SRARI_H4_UH(vec0, vec1, vec2, vec3, FILTER_BITS);
-
-  LD_SB4(src, src_stride, src0, src1, src2, src3);
-  src += (4 * src_stride);
-
-  PCKEV_B2_SB(vec1, vec0, vec3, vec2, out0, out1);
-  ST8x4_UB(out0, out1, dst, dst_stride);
-  dst += (4 * dst_stride);
-
-  VSHF_B2_UH(src0, src0, src1, src1, mask, mask, vec0, vec1);
-  VSHF_B2_UH(src2, src2, src3, src3, mask, mask, vec2, vec3);
-  DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, vec0, vec1,
-              vec2, vec3);
-  SRARI_H4_UH(vec0, vec1, vec2, vec3, FILTER_BITS);
-  PCKEV_B2_SB(vec1, vec0, vec3, vec2, out0, out1);
-  ST8x4_UB(out0, out1, dst, dst_stride);
-  dst += (4 * dst_stride);
-
-  if (16 == height) {
-    LD_SB4(src, src_stride, src0, src1, src2, src3);
-    src += (4 * src_stride);
-
-    VSHF_B2_UH(src0, src0, src1, src1, mask, mask, vec0, vec1);
-    VSHF_B2_UH(src2, src2, src3, src3, mask, mask, vec2, vec3);
-    DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, vec0, vec1,
-                vec2, vec3);
-    SRARI_H4_UH(vec0, vec1, vec2, vec3, FILTER_BITS);
-    LD_SB4(src, src_stride, src0, src1, src2, src3);
-
-    PCKEV_B2_SB(vec1, vec0, vec3, vec2, out0, out1);
-    ST8x4_UB(out0, out1, dst, dst_stride);
-
-    VSHF_B2_UH(src0, src0, src1, src1, mask, mask, vec0, vec1);
-    VSHF_B2_UH(src2, src2, src3, src3, mask, mask, vec2, vec3);
-    DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, vec0, vec1,
-                vec2, vec3);
-    SRARI_H4_UH(vec0, vec1, vec2, vec3, FILTER_BITS);
-    PCKEV_B2_SB(vec1, vec0, vec3, vec2, out0, out1);
-    ST8x4_UB(out0, out1, dst + 4 * dst_stride, dst_stride);
-  }
-}
-
-static void common_hz_2t_8w_msa(const uint8_t *src, int32_t src_stride,
-                                uint8_t *dst, int32_t dst_stride,
-                                int8_t *filter, int32_t height) {
-  if (4 == height) {
-    common_hz_2t_8x4_msa(src, src_stride, dst, dst_stride, filter);
-  } else {
-    common_hz_2t_8x8mult_msa(src, src_stride, dst, dst_stride, filter, height);
-  }
-}
-
-static void common_hz_2t_16w_msa(const uint8_t *src, int32_t src_stride,
-                                 uint8_t *dst, int32_t dst_stride,
-                                 int8_t *filter, int32_t height) {
-  uint32_t loop_cnt;
-  v16i8 src0, src1, src2, src3, src4, src5, src6, src7, mask;
-  v16u8 filt0, vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
-  v8u16 out0, out1, out2, out3, out4, out5, out6, out7, filt;
-
-  mask = LD_SB(&mc_filt_mask_arr[0]);
-
-  loop_cnt = (height >> 2) - 1;
-
-  /* rearranging filter */
-  filt = LD_UH(filter);
-  filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0);
-
-  LD_SB4(src, src_stride, src0, src2, src4, src6);
-  LD_SB4(src + 8, src_stride, src1, src3, src5, src7);
-  src += (4 * src_stride);
-
-  VSHF_B2_UB(src0, src0, src1, src1, mask, mask, vec0, vec1);
-  VSHF_B2_UB(src2, src2, src3, src3, mask, mask, vec2, vec3);
-  VSHF_B2_UB(src4, src4, src5, src5, mask, mask, vec4, vec5);
-  VSHF_B2_UB(src6, src6, src7, src7, mask, mask, vec6, vec7);
-  DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, out0, out1,
-              out2, out3);
-  DOTP_UB4_UH(vec4, vec5, vec6, vec7, filt0, filt0, filt0, filt0, out4, out5,
-              out6, out7);
-  SRARI_H4_UH(out0, out1, out2, out3, FILTER_BITS);
-  SRARI_H4_UH(out4, out5, out6, out7, FILTER_BITS);
-  PCKEV_ST_SB(out0, out1, dst);
-  dst += dst_stride;
-  PCKEV_ST_SB(out2, out3, dst);
-  dst += dst_stride;
-  PCKEV_ST_SB(out4, out5, dst);
-  dst += dst_stride;
-  PCKEV_ST_SB(out6, out7, dst);
-  dst += dst_stride;
-
-  for (; loop_cnt--;) {
-    LD_SB4(src, src_stride, src0, src2, src4, src6);
-    LD_SB4(src + 8, src_stride, src1, src3, src5, src7);
-    src += (4 * src_stride);
-
-    VSHF_B2_UB(src0, src0, src1, src1, mask, mask, vec0, vec1);
-    VSHF_B2_UB(src2, src2, src3, src3, mask, mask, vec2, vec3);
-    VSHF_B2_UB(src4, src4, src5, src5, mask, mask, vec4, vec5);
-    VSHF_B2_UB(src6, src6, src7, src7, mask, mask, vec6, vec7);
-    DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, out0, out1,
-                out2, out3);
-    DOTP_UB4_UH(vec4, vec5, vec6, vec7, filt0, filt0, filt0, filt0, out4, out5,
-                out6, out7);
-    SRARI_H4_UH(out0, out1, out2, out3, FILTER_BITS);
-    SRARI_H4_UH(out4, out5, out6, out7, FILTER_BITS);
-    PCKEV_ST_SB(out0, out1, dst);
-    dst += dst_stride;
-    PCKEV_ST_SB(out2, out3, dst);
-    dst += dst_stride;
-    PCKEV_ST_SB(out4, out5, dst);
-    dst += dst_stride;
-    PCKEV_ST_SB(out6, out7, dst);
-    dst += dst_stride;
-  }
-}
-
-static void common_hz_2t_32w_msa(const uint8_t *src, int32_t src_stride,
-                                 uint8_t *dst, int32_t dst_stride,
-                                 int8_t *filter, int32_t height) {
-  uint32_t loop_cnt;
-  v16i8 src0, src1, src2, src3, src4, src5, src6, src7, mask;
-  v16u8 filt0, vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
-  v8u16 out0, out1, out2, out3, out4, out5, out6, out7, filt;
-
-  mask = LD_SB(&mc_filt_mask_arr[0]);
-
-  /* rearranging filter */
-  filt = LD_UH(filter);
-  filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0);
-
-  for (loop_cnt = height >> 1; loop_cnt--;) {
-    src0 = LD_SB(src);
-    src2 = LD_SB(src + 16);
-    src3 = LD_SB(src + 24);
-    src1 = __msa_sldi_b(src2, src0, 8);
-    src += src_stride;
-    src4 = LD_SB(src);
-    src6 = LD_SB(src + 16);
-    src7 = LD_SB(src + 24);
-    src5 = __msa_sldi_b(src6, src4, 8);
-    src += src_stride;
-
-    VSHF_B2_UB(src0, src0, src1, src1, mask, mask, vec0, vec1);
-    VSHF_B2_UB(src2, src2, src3, src3, mask, mask, vec2, vec3);
-    VSHF_B2_UB(src4, src4, src5, src5, mask, mask, vec4, vec5);
-    VSHF_B2_UB(src6, src6, src7, src7, mask, mask, vec6, vec7);
-    DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, out0, out1,
-                out2, out3);
-    DOTP_UB4_UH(vec4, vec5, vec6, vec7, filt0, filt0, filt0, filt0, out4, out5,
-                out6, out7);
-    SRARI_H4_UH(out0, out1, out2, out3, FILTER_BITS);
-    SRARI_H4_UH(out4, out5, out6, out7, FILTER_BITS);
-    PCKEV_ST_SB(out0, out1, dst);
-    PCKEV_ST_SB(out2, out3, dst + 16);
-    dst += dst_stride;
-    PCKEV_ST_SB(out4, out5, dst);
-    PCKEV_ST_SB(out6, out7, dst + 16);
-    dst += dst_stride;
-  }
-}
-
-static void common_hz_2t_64w_msa(const uint8_t *src, int32_t src_stride,
-                                 uint8_t *dst, int32_t dst_stride,
-                                 int8_t *filter, int32_t height) {
-  uint32_t loop_cnt;
-  v16i8 src0, src1, src2, src3, src4, src5, src6, src7, mask;
-  v16u8 filt0, vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
-  v8u16 out0, out1, out2, out3, out4, out5, out6, out7, filt;
-
-  mask = LD_SB(&mc_filt_mask_arr[0]);
-
-  /* rearranging filter */
-  filt = LD_UH(filter);
-  filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0);
-
-  for (loop_cnt = height; loop_cnt--;) {
-    src0 = LD_SB(src);
-    src2 = LD_SB(src + 16);
-    src4 = LD_SB(src + 32);
-    src6 = LD_SB(src + 48);
-    src7 = LD_SB(src + 56);
-    SLDI_B3_SB(src2, src4, src6, src0, src2, src4, src1, src3, src5, 8);
-    src += src_stride;
-
-    VSHF_B2_UB(src0, src0, src1, src1, mask, mask, vec0, vec1);
-    VSHF_B2_UB(src2, src2, src3, src3, mask, mask, vec2, vec3);
-    VSHF_B2_UB(src4, src4, src5, src5, mask, mask, vec4, vec5);
-    VSHF_B2_UB(src6, src6, src7, src7, mask, mask, vec6, vec7);
-    DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, out0, out1,
-                out2, out3);
-    DOTP_UB4_UH(vec4, vec5, vec6, vec7, filt0, filt0, filt0, filt0, out4, out5,
-                out6, out7);
-    SRARI_H4_UH(out0, out1, out2, out3, FILTER_BITS);
-    SRARI_H4_UH(out4, out5, out6, out7, FILTER_BITS);
-    PCKEV_ST_SB(out0, out1, dst);
-    PCKEV_ST_SB(out2, out3, dst + 16);
-    PCKEV_ST_SB(out4, out5, dst + 32);
-    PCKEV_ST_SB(out6, out7, dst + 48);
-    dst += dst_stride;
-  }
-}
-
-void aom_convolve8_horiz_msa(const uint8_t *src, ptrdiff_t src_stride,
-                             uint8_t *dst, ptrdiff_t dst_stride,
-                             const int16_t *filter_x, int x_step_q4,
-                             const int16_t *filter_y, int y_step_q4, int w,
-                             int h) {
-  int8_t cnt, filt_hor[8];
-
-  assert(x_step_q4 == 16);
-  assert(((const int32_t *)filter_x)[1] != 0x800000);
-
-  for (cnt = 0; cnt < 8; ++cnt) {
-    filt_hor[cnt] = filter_x[cnt];
-  }
-
-  if (((const int32_t *)filter_x)[0] == 0) {
-    switch (w) {
-      case 4:
-        common_hz_2t_4w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride,
-                            &filt_hor[3], h);
-        break;
-      case 8:
-        common_hz_2t_8w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride,
-                            &filt_hor[3], h);
-        break;
-      case 16:
-        common_hz_2t_16w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride,
-                             &filt_hor[3], h);
-        break;
-      case 32:
-        common_hz_2t_32w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride,
-                             &filt_hor[3], h);
-        break;
-      case 64:
-        common_hz_2t_64w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride,
-                             &filt_hor[3], h);
-        break;
-      default:
-        aom_convolve8_horiz_c(src, src_stride, dst, dst_stride, filter_x,
-                              x_step_q4, filter_y, y_step_q4, w, h);
-        break;
-    }
-  } else {
-    switch (w) {
-      case 4:
-        common_hz_8t_4w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride,
-                            filt_hor, h);
-        break;
-      case 8:
-        common_hz_8t_8w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride,
-                            filt_hor, h);
-        break;
-      case 16:
-        common_hz_8t_16w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride,
-                             filt_hor, h);
-        break;
-      case 32:
-        common_hz_8t_32w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride,
-                             filt_hor, h);
-        break;
-      case 64:
-        common_hz_8t_64w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride,
-                             filt_hor, h);
-        break;
-      default:
-        aom_convolve8_horiz_c(src, src_stride, dst, dst_stride, filter_x,
-                              x_step_q4, filter_y, y_step_q4, w, h);
-        break;
-    }
-  }
-}
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/mips/aom_convolve8_vert_msa.c b/chromium/third_party/libaom/source/libaom/aom_dsp/mips/aom_convolve8_vert_msa.c
deleted file mode 100644
index 2c3bc084cca..00000000000
--- a/chromium/third_party/libaom/source/libaom/aom_dsp/mips/aom_convolve8_vert_msa.c
+++ /dev/null
@@ -1,699 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom_dsp/mips/aom_convolve_msa.h"
-
-static void common_vt_8t_4w_msa(const uint8_t *src, int32_t src_stride,
-                                uint8_t *dst, int32_t dst_stride,
-                                int8_t *filter, int32_t height) {
-  uint32_t loop_cnt;
-  v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10;
-  v16i8 src10_r, src32_r, src54_r, src76_r, src98_r, src21_r, src43_r;
-  v16i8 src65_r, src87_r, src109_r, src2110, src4332, src6554, src8776;
-  v16i8 src10998, filt0, filt1, filt2, filt3;
-  v16u8 out;
-  v8i16 filt, out10, out32;
-
-  src -= (3 * src_stride);
-
-  filt = LD_SH(filter);
-  SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
-
-  LD_SB7(src, src_stride, src0, src1, src2, src3, src4, src5, src6);
-  src += (7 * src_stride);
-
-  ILVR_B4_SB(src1, src0, src3, src2, src5, src4, src2, src1, src10_r, src32_r,
-             src54_r, src21_r);
-  ILVR_B2_SB(src4, src3, src6, src5, src43_r, src65_r);
-  ILVR_D3_SB(src21_r, src10_r, src43_r, src32_r, src65_r, src54_r, src2110,
-             src4332, src6554);
-  XORI_B3_128_SB(src2110, src4332, src6554);
-
-  for (loop_cnt = (height >> 2); loop_cnt--;) {
-    LD_SB4(src, src_stride, src7, src8, src9, src10);
-    src += (4 * src_stride);
-
-    ILVR_B4_SB(src7, src6, src8, src7, src9, src8, src10, src9, src76_r,
-               src87_r, src98_r, src109_r);
-    ILVR_D2_SB(src87_r, src76_r, src109_r, src98_r, src8776, src10998);
-    XORI_B2_128_SB(src8776, src10998);
-    out10 = FILT_8TAP_DPADD_S_H(src2110, src4332, src6554, src8776, filt0,
-                                filt1, filt2, filt3);
-    out32 = FILT_8TAP_DPADD_S_H(src4332, src6554, src8776, src10998, filt0,
-                                filt1, filt2, filt3);
-    SRARI_H2_SH(out10, out32, FILTER_BITS);
-    SAT_SH2_SH(out10, out32, 7);
-    out = PCKEV_XORI128_UB(out10, out32);
-    ST4x4_UB(out, out, 0, 1, 2, 3, dst, dst_stride);
-    dst += (4 * dst_stride);
-
-    src2110 = src6554;
-    src4332 = src8776;
-    src6554 = src10998;
-    src6 = src10;
-  }
-}
-
-static void common_vt_8t_8w_msa(const uint8_t *src, int32_t src_stride,
-                                uint8_t *dst, int32_t dst_stride,
-                                int8_t *filter, int32_t height) {
-  uint32_t loop_cnt;
-  v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10;
-  v16i8 src10_r, src32_r, src54_r, src76_r, src98_r, src21_r, src43_r;
-  v16i8 src65_r, src87_r, src109_r, filt0, filt1, filt2, filt3;
-  v16u8 tmp0, tmp1;
-  v8i16 filt, out0_r, out1_r, out2_r, out3_r;
-
-  src -= (3 * src_stride);
-
-  filt = LD_SH(filter);
-  SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
-
-  LD_SB7(src, src_stride, src0, src1, src2, src3, src4, src5, src6);
-  XORI_B7_128_SB(src0, src1, src2, src3, src4, src5, src6);
-  src += (7 * src_stride);
-  ILVR_B4_SB(src1, src0, src3, src2, src5, src4, src2, src1, src10_r, src32_r,
-             src54_r, src21_r);
-  ILVR_B2_SB(src4, src3, src6, src5, src43_r, src65_r);
-
-  for (loop_cnt = (height >> 2); loop_cnt--;) {
-    LD_SB4(src, src_stride, src7, src8, src9, src10);
-    XORI_B4_128_SB(src7, src8, src9, src10);
-    src += (4 * src_stride);
-
-    ILVR_B4_SB(src7, src6, src8, src7, src9, src8, src10, src9, src76_r,
-               src87_r, src98_r, src109_r);
-    out0_r = FILT_8TAP_DPADD_S_H(src10_r, src32_r, src54_r, src76_r, filt0,
-                                 filt1, filt2, filt3);
-    out1_r = FILT_8TAP_DPADD_S_H(src21_r, src43_r, src65_r, src87_r, filt0,
-                                 filt1, filt2, filt3);
-    out2_r = FILT_8TAP_DPADD_S_H(src32_r, src54_r, src76_r, src98_r, filt0,
-                                 filt1, filt2, filt3);
-    out3_r = FILT_8TAP_DPADD_S_H(src43_r, src65_r, src87_r, src109_r, filt0,
-                                 filt1, filt2, filt3);
-    SRARI_H4_SH(out0_r, out1_r, out2_r, out3_r, FILTER_BITS);
-    SAT_SH4_SH(out0_r, out1_r, out2_r, out3_r, 7);
-    tmp0 = PCKEV_XORI128_UB(out0_r, out1_r);
-    tmp1 = PCKEV_XORI128_UB(out2_r, out3_r);
-    ST8x4_UB(tmp0, tmp1, dst, dst_stride);
-    dst += (4 * dst_stride);
-
-    src10_r = src54_r;
-    src32_r = src76_r;
-    src54_r = src98_r;
-    src21_r = src65_r;
-    src43_r = src87_r;
-    src65_r = src109_r;
-    src6 = src10;
-  }
-}
-
-static void common_vt_8t_16w_msa(const uint8_t *src, int32_t src_stride,
-                                 uint8_t *dst, int32_t dst_stride,
-                                 int8_t *filter, int32_t height) {
-  uint32_t loop_cnt;
-  v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10;
-  v16i8 filt0, filt1, filt2, filt3;
-  v16i8 src10_r, src32_r, src54_r, src76_r, src98_r, src21_r, src43_r;
-  v16i8 src65_r, src87_r, src109_r, src10_l, src32_l, src54_l, src76_l;
-  v16i8 src98_l, src21_l, src43_l, src65_l, src87_l, src109_l;
-  v16u8 tmp0, tmp1, tmp2, tmp3;
-  v8i16 filt, out0_r, out1_r, out2_r, out3_r, out0_l, out1_l, out2_l, out3_l;
-
-  src -= (3 * src_stride);
-
-  filt = LD_SH(filter);
-  SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
-
-  LD_SB7(src, src_stride, src0, src1, src2, src3, src4, src5, src6);
-  XORI_B7_128_SB(src0, src1, src2, src3, src4, src5, src6);
-  src += (7 * src_stride);
-  ILVR_B4_SB(src1, src0, src3, src2, src5, src4, src2, src1, src10_r, src32_r,
-             src54_r, src21_r);
-  ILVR_B2_SB(src4, src3, src6, src5, src43_r, src65_r);
-  ILVL_B4_SB(src1, src0, src3, src2, src5, src4, src2, src1, src10_l, src32_l,
-             src54_l, src21_l);
-  ILVL_B2_SB(src4, src3, src6, src5, src43_l, src65_l);
-
-  for (loop_cnt = (height >> 2); loop_cnt--;) {
-    LD_SB4(src, src_stride, src7, src8, src9, src10);
-    XORI_B4_128_SB(src7, src8, src9, src10);
-    src += (4 * src_stride);
-
-    ILVR_B4_SB(src7, src6, src8, src7, src9, src8, src10, src9, src76_r,
-               src87_r, src98_r, src109_r);
-    ILVL_B4_SB(src7, src6, src8, src7, src9, src8, src10, src9, src76_l,
-               src87_l, src98_l, src109_l);
-    out0_r = FILT_8TAP_DPADD_S_H(src10_r, src32_r, src54_r, src76_r, filt0,
-                                 filt1, filt2, filt3);
-    out1_r = FILT_8TAP_DPADD_S_H(src21_r, src43_r, src65_r, src87_r, filt0,
-                                 filt1, filt2, filt3);
-    out2_r = FILT_8TAP_DPADD_S_H(src32_r, src54_r, src76_r, src98_r, filt0,
-                                 filt1, filt2, filt3);
-    out3_r = FILT_8TAP_DPADD_S_H(src43_r, src65_r, src87_r, src109_r, filt0,
-                                 filt1, filt2, filt3);
-    out0_l = FILT_8TAP_DPADD_S_H(src10_l, src32_l, src54_l, src76_l, filt0,
-                                 filt1, filt2, filt3);
-    out1_l = FILT_8TAP_DPADD_S_H(src21_l, src43_l, src65_l, src87_l, filt0,
-                                 filt1, filt2, filt3);
-    out2_l = FILT_8TAP_DPADD_S_H(src32_l, src54_l, src76_l, src98_l, filt0,
-                                 filt1, filt2, filt3);
-    out3_l = FILT_8TAP_DPADD_S_H(src43_l, src65_l, src87_l, src109_l, filt0,
-                                 filt1, filt2, filt3);
-    SRARI_H4_SH(out0_r, out1_r, out2_r, out3_r, FILTER_BITS);
-    SRARI_H4_SH(out0_l, out1_l, out2_l, out3_l, FILTER_BITS);
-    SAT_SH4_SH(out0_r, out1_r, out2_r, out3_r, 7);
-    SAT_SH4_SH(out0_l, out1_l, out2_l, out3_l, 7);
-    PCKEV_B4_UB(out0_l, out0_r, out1_l, out1_r, out2_l, out2_r, out3_l, out3_r,
-                tmp0, tmp1, tmp2, tmp3);
-    XORI_B4_128_UB(tmp0, tmp1, tmp2, tmp3);
-    ST_UB4(tmp0, tmp1, tmp2, tmp3, dst, dst_stride);
-    dst += (4 * dst_stride);
-
-    src10_r = src54_r;
-    src32_r = src76_r;
-    src54_r = src98_r;
-    src21_r = src65_r;
-    src43_r = src87_r;
-    src65_r = src109_r;
-    src10_l = src54_l;
-    src32_l = src76_l;
-    src54_l = src98_l;
-    src21_l = src65_l;
-    src43_l = src87_l;
-    src65_l = src109_l;
-    src6 = src10;
-  }
-}
-
-static void common_vt_8t_16w_mult_msa(const uint8_t *src, int32_t src_stride,
-                                      uint8_t *dst, int32_t dst_stride,
-                                      int8_t *filter, int32_t height,
-                                      int32_t width) {
-  const uint8_t *src_tmp;
-  uint8_t *dst_tmp;
-  uint32_t loop_cnt, cnt;
-  v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10;
-  v16i8 filt0, filt1, filt2, filt3;
-  v16i8 src10_r, src32_r, src54_r, src76_r, src98_r, src21_r, src43_r;
-  v16i8 src65_r, src87_r, src109_r, src10_l, src32_l, src54_l, src76_l;
-  v16i8 src98_l, src21_l, src43_l, src65_l, src87_l, src109_l;
-  v16u8 tmp0, tmp1, tmp2, tmp3;
-  v8i16 filt, out0_r, out1_r, out2_r, out3_r, out0_l, out1_l, out2_l, out3_l;
-
-  src -= (3 * src_stride);
-
-  filt = LD_SH(filter);
-  SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
-
-  for (cnt = (width >> 4); cnt--;) {
-    src_tmp = src;
-    dst_tmp = dst;
-
-    LD_SB7(src_tmp, src_stride, src0, src1, src2, src3, src4, src5, src6);
-    XORI_B7_128_SB(src0, src1, src2, src3, src4, src5, src6);
-    src_tmp += (7 * src_stride);
-    ILVR_B4_SB(src1, src0, src3, src2, src5, src4, src2, src1, src10_r, src32_r,
-               src54_r, src21_r);
-    ILVR_B2_SB(src4, src3, src6, src5, src43_r, src65_r);
-    ILVL_B4_SB(src1, src0, src3, src2, src5, src4, src2, src1, src10_l, src32_l,
-               src54_l, src21_l);
-    ILVL_B2_SB(src4, src3, src6, src5, src43_l, src65_l);
-
-    for (loop_cnt = (height >> 2); loop_cnt--;) {
-      LD_SB4(src_tmp, src_stride, src7, src8, src9, src10);
-      XORI_B4_128_SB(src7, src8, src9, src10);
-      src_tmp += (4 * src_stride);
-      ILVR_B4_SB(src7, src6, src8, src7, src9, src8, src10, src9, src76_r,
-                 src87_r, src98_r, src109_r);
-      ILVL_B4_SB(src7, src6, src8, src7, src9, src8, src10, src9, src76_l,
-                 src87_l, src98_l, src109_l);
-      out0_r = FILT_8TAP_DPADD_S_H(src10_r, src32_r, src54_r, src76_r, filt0,
-                                   filt1, filt2, filt3);
-      out1_r = FILT_8TAP_DPADD_S_H(src21_r, src43_r, src65_r, src87_r, filt0,
-                                   filt1, filt2, filt3);
-      out2_r = FILT_8TAP_DPADD_S_H(src32_r, src54_r, src76_r, src98_r, filt0,
-                                   filt1, filt2, filt3);
-      out3_r = FILT_8TAP_DPADD_S_H(src43_r, src65_r, src87_r, src109_r, filt0,
-                                   filt1, filt2, filt3);
-      out0_l = FILT_8TAP_DPADD_S_H(src10_l, src32_l, src54_l, src76_l, filt0,
-                                   filt1, filt2, filt3);
-      out1_l = FILT_8TAP_DPADD_S_H(src21_l, src43_l, src65_l, src87_l, filt0,
-                                   filt1, filt2, filt3);
-      out2_l = FILT_8TAP_DPADD_S_H(src32_l, src54_l, src76_l, src98_l, filt0,
-                                   filt1, filt2, filt3);
-      out3_l = FILT_8TAP_DPADD_S_H(src43_l, src65_l, src87_l, src109_l, filt0,
-                                   filt1, filt2, filt3);
-      SRARI_H4_SH(out0_r, out1_r, out2_r, out3_r, FILTER_BITS);
-      SRARI_H4_SH(out0_l, out1_l, out2_l, out3_l, FILTER_BITS);
-      SAT_SH4_SH(out0_r, out1_r, out2_r, out3_r, 7);
-      SAT_SH4_SH(out0_l, out1_l, out2_l, out3_l, 7);
-      PCKEV_B4_UB(out0_l, out0_r, out1_l, out1_r, out2_l, out2_r, out3_l,
-                  out3_r, tmp0, tmp1, tmp2, tmp3);
-      XORI_B4_128_UB(tmp0, tmp1, tmp2, tmp3);
-      ST_UB4(tmp0, tmp1, tmp2, tmp3, dst_tmp, dst_stride);
-      dst_tmp += (4 * dst_stride);
-
-      src10_r = src54_r;
-      src32_r = src76_r;
-      src54_r = src98_r;
-      src21_r = src65_r;
-      src43_r = src87_r;
-      src65_r = src109_r;
-      src10_l = src54_l;
-      src32_l = src76_l;
-      src54_l = src98_l;
-      src21_l = src65_l;
-      src43_l = src87_l;
-      src65_l = src109_l;
-      src6 = src10;
-    }
-
-    src += 16;
-    dst += 16;
-  }
-}
-
-static void common_vt_8t_32w_msa(const uint8_t *src, int32_t src_stride,
-                                 uint8_t *dst, int32_t dst_stride,
-                                 int8_t *filter, int32_t height) {
-  common_vt_8t_16w_mult_msa(src, src_stride, dst, dst_stride, filter, height,
-                            32);
-}
-
-static void common_vt_8t_64w_msa(const uint8_t *src, int32_t src_stride,
-                                 uint8_t *dst, int32_t dst_stride,
-                                 int8_t *filter, int32_t height) {
-  common_vt_8t_16w_mult_msa(src, src_stride, dst, dst_stride, filter, height,
-                            64);
-}
-
-static void common_vt_2t_4x4_msa(const uint8_t *src, int32_t src_stride,
-                                 uint8_t *dst, int32_t dst_stride,
-                                 int8_t *filter) {
-  v16i8 src0, src1, src2, src3, src4;
-  v16i8 src10_r, src32_r, src21_r, src43_r, src2110, src4332;
-  v16u8 filt0;
-  v8i16 filt;
-  v8u16 tmp0, tmp1;
-
-  filt = LD_SH(filter);
-  filt0 = (v16u8)__msa_splati_h(filt, 0);
-
-  LD_SB5(src, src_stride, src0, src1, src2, src3, src4);
-
-  ILVR_B4_SB(src1, src0, src2, src1, src3, src2, src4, src3, src10_r, src21_r,
-             src32_r, src43_r);
-  ILVR_D2_SB(src21_r, src10_r, src43_r, src32_r, src2110, src4332);
-  DOTP_UB2_UH(src2110, src4332, filt0, filt0, tmp0, tmp1);
-  SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
-  src2110 = __msa_pckev_b((v16i8)tmp1, (v16i8)tmp0);
-  ST4x4_UB(src2110, src2110, 0, 1, 2, 3, dst, dst_stride);
-}
-
-static void common_vt_2t_4x8_msa(const uint8_t *src, int32_t src_stride,
-                                 uint8_t *dst, int32_t dst_stride,
-                                 int8_t *filter) {
-  v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8;
-  v16i8 src10_r, src32_r, src54_r, src76_r, src21_r, src43_r;
-  v16i8 src65_r, src87_r, src2110, src4332, src6554, src8776;
-  v8u16 tmp0, tmp1, tmp2, tmp3;
-  v16u8 filt0;
-  v8i16 filt;
-
-  filt = LD_SH(filter);
-  filt0 = (v16u8)__msa_splati_h(filt, 0);
-
-  LD_SB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7);
-  src += (8 * src_stride);
-
-  src8 = LD_SB(src);
-
-  ILVR_B4_SB(src1, src0, src2, src1, src3, src2, src4, src3, src10_r, src21_r,
-             src32_r, src43_r);
-  ILVR_B4_SB(src5, src4, src6, src5, src7, src6, src8, src7, src54_r, src65_r,
-             src76_r, src87_r);
-  ILVR_D4_SB(src21_r, src10_r, src43_r, src32_r, src65_r, src54_r, src87_r,
-             src76_r, src2110, src4332, src6554, src8776);
-  DOTP_UB4_UH(src2110, src4332, src6554, src8776, filt0, filt0, filt0, filt0,
-              tmp0, tmp1, tmp2, tmp3);
-  SRARI_H4_UH(tmp0, tmp1, tmp2, tmp3, FILTER_BITS);
-  PCKEV_B2_SB(tmp1, tmp0, tmp3, tmp2, src2110, src4332);
-  ST4x4_UB(src2110, src2110, 0, 1, 2, 3, dst, dst_stride);
-  ST4x4_UB(src4332, src4332, 0, 1, 2, 3, dst + 4 * dst_stride, dst_stride);
-}
-
-static void common_vt_2t_4w_msa(const uint8_t *src, int32_t src_stride,
-                                uint8_t *dst, int32_t dst_stride,
-                                int8_t *filter, int32_t height) {
-  if (4 == height) {
-    common_vt_2t_4x4_msa(src, src_stride, dst, dst_stride, filter);
-  } else if (8 == height) {
-    common_vt_2t_4x8_msa(src, src_stride, dst, dst_stride, filter);
-  }
-}
-
-static void common_vt_2t_8x4_msa(const uint8_t *src, int32_t src_stride,
-                                 uint8_t *dst, int32_t dst_stride,
-                                 int8_t *filter) {
-  v16u8 src0, src1, src2, src3, src4, vec0, vec1, vec2, vec3, filt0;
-  v16i8 out0, out1;
-  v8u16 tmp0, tmp1, tmp2, tmp3;
-  v8i16 filt;
-
-  /* rearranging filter_y */
-  filt = LD_SH(filter);
-  filt0 = (v16u8)__msa_splati_h(filt, 0);
-
-  LD_UB5(src, src_stride, src0, src1, src2, src3, src4);
-  ILVR_B2_UB(src1, src0, src2, src1, vec0, vec1);
-  ILVR_B2_UB(src3, src2, src4, src3, vec2, vec3);
-  DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, tmp0, tmp1,
-              tmp2, tmp3);
-  SRARI_H4_UH(tmp0, tmp1, tmp2, tmp3, FILTER_BITS);
-  PCKEV_B2_SB(tmp1, tmp0, tmp3, tmp2, out0, out1);
-  ST8x4_UB(out0, out1, dst, dst_stride);
-}
-
-static void common_vt_2t_8x8mult_msa(const uint8_t *src, int32_t src_stride,
-                                     uint8_t *dst, int32_t dst_stride,
-                                     int8_t *filter, int32_t height) {
-  uint32_t loop_cnt;
-  v16u8 src0, src1, src2, src3, src4, src5, src6, src7, src8;
-  v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, filt0;
-  v16i8 out0, out1;
-  v8u16 tmp0, tmp1, tmp2, tmp3;
-  v8i16 filt;
-
-  /* rearranging filter_y */
-  filt = LD_SH(filter);
-  filt0 = (v16u8)__msa_splati_h(filt, 0);
-
-  src0 = LD_UB(src);
-  src += src_stride;
-
-  for (loop_cnt = (height >> 3); loop_cnt--;) {
-    LD_UB8(src, src_stride, src1, src2, src3, src4, src5, src6, src7, src8);
-    src += (8 * src_stride);
-
-    ILVR_B4_UB(src1, src0, src2, src1, src3, src2, src4, src3, vec0, vec1, vec2,
-               vec3);
-    ILVR_B4_UB(src5, src4, src6, src5, src7, src6, src8, src7, vec4, vec5, vec6,
-               vec7);
-    DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, tmp0, tmp1,
-                tmp2, tmp3);
-    SRARI_H4_UH(tmp0, tmp1, tmp2, tmp3, FILTER_BITS);
-    PCKEV_B2_SB(tmp1, tmp0, tmp3, tmp2, out0, out1);
-    ST8x4_UB(out0, out1, dst, dst_stride);
-    dst += (4 * dst_stride);
-
-    DOTP_UB4_UH(vec4, vec5, vec6, vec7, filt0, filt0, filt0, filt0, tmp0, tmp1,
-                tmp2, tmp3);
-    SRARI_H4_UH(tmp0, tmp1, tmp2, tmp3, FILTER_BITS);
-    PCKEV_B2_SB(tmp1, tmp0, tmp3, tmp2, out0, out1);
-    ST8x4_UB(out0, out1, dst, dst_stride);
-    dst += (4 * dst_stride);
-
-    src0 = src8;
-  }
-}
-
-static void common_vt_2t_8w_msa(const uint8_t *src, int32_t src_stride,
-                                uint8_t *dst, int32_t dst_stride,
-                                int8_t *filter, int32_t height) {
-  if (4 == height) {
-    common_vt_2t_8x4_msa(src, src_stride, dst, dst_stride, filter);
-  } else {
-    common_vt_2t_8x8mult_msa(src, src_stride, dst, dst_stride, filter, height);
-  }
-}
-
-static void common_vt_2t_16w_msa(const uint8_t *src, int32_t src_stride,
-                                 uint8_t *dst, int32_t dst_stride,
-                                 int8_t *filter, int32_t height) {
-  uint32_t loop_cnt;
-  v16u8 src0, src1, src2, src3, src4;
-  v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, filt0;
-  v8u16 tmp0, tmp1, tmp2, tmp3;
-  v8i16 filt;
-
-  /* rearranging filter_y */
-  filt = LD_SH(filter);
-  filt0 = (v16u8)__msa_splati_h(filt, 0);
-
-  src0 = LD_UB(src);
-  src += src_stride;
-
-  for (loop_cnt = (height >> 2); loop_cnt--;) {
-    LD_UB4(src, src_stride, src1, src2, src3, src4);
-    src += (4 * src_stride);
-
-    ILVR_B2_UB(src1, src0, src2, src1, vec0, vec2);
-    ILVL_B2_UB(src1, src0, src2, src1, vec1, vec3);
-    DOTP_UB2_UH(vec0, vec1, filt0, filt0, tmp0, tmp1);
-    SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
-    PCKEV_ST_SB(tmp0, tmp1, dst);
-    dst += dst_stride;
-
-    ILVR_B2_UB(src3, src2, src4, src3, vec4, vec6);
-    ILVL_B2_UB(src3, src2, src4, src3, vec5, vec7);
-    DOTP_UB2_UH(vec2, vec3, filt0, filt0, tmp2, tmp3);
-    SRARI_H2_UH(tmp2, tmp3, FILTER_BITS);
-    PCKEV_ST_SB(tmp2, tmp3, dst);
-    dst += dst_stride;
-
-    DOTP_UB2_UH(vec4, vec5, filt0, filt0, tmp0, tmp1);
-    SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
-    PCKEV_ST_SB(tmp0, tmp1, dst);
-    dst += dst_stride;
-
-    DOTP_UB2_UH(vec6, vec7, filt0, filt0, tmp2, tmp3);
-    SRARI_H2_UH(tmp2, tmp3, FILTER_BITS);
-    PCKEV_ST_SB(tmp2, tmp3, dst);
-    dst += dst_stride;
-
-    src0 = src4;
-  }
-}
-
-static void common_vt_2t_32w_msa(const uint8_t *src, int32_t src_stride,
-                                 uint8_t *dst, int32_t dst_stride,
-                                 int8_t *filter, int32_t height) {
-  uint32_t loop_cnt;
-  v16u8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9;
-  v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, filt0;
-  v8u16 tmp0, tmp1, tmp2, tmp3;
-  v8i16 filt;
-
-  /* rearranging filter_y */
-  filt = LD_SH(filter);
-  filt0 = (v16u8)__msa_splati_h(filt, 0);
-
-  src0 = LD_UB(src);
-  src5 = LD_UB(src + 16);
-  src += src_stride;
-
-  for (loop_cnt = (height >> 2); loop_cnt--;) {
-    LD_UB4(src, src_stride, src1, src2, src3, src4);
-    ILVR_B2_UB(src1, src0, src2, src1, vec0, vec2);
-    ILVL_B2_UB(src1, src0, src2, src1, vec1, vec3);
-
-    LD_UB4(src + 16, src_stride, src6, src7, src8, src9);
-    src += (4 * src_stride);
-
-    DOTP_UB2_UH(vec0, vec1, filt0, filt0, tmp0, tmp1);
-    SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
-    PCKEV_ST_SB(tmp0, tmp1, dst);
-    DOTP_UB2_UH(vec2, vec3, filt0, filt0, tmp2, tmp3);
-    SRARI_H2_UH(tmp2, tmp3, FILTER_BITS);
-    PCKEV_ST_SB(tmp2, tmp3, dst + dst_stride);
-
-    ILVR_B2_UB(src3, src2, src4, src3, vec4, vec6);
-    ILVL_B2_UB(src3, src2, src4, src3, vec5, vec7);
-    DOTP_UB2_UH(vec4, vec5, filt0, filt0, tmp0, tmp1);
-    SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
-    PCKEV_ST_SB(tmp0, tmp1, dst + 2 * dst_stride);
-
-    DOTP_UB2_UH(vec6, vec7, filt0, filt0, tmp2, tmp3);
-    SRARI_H2_UH(tmp2, tmp3, FILTER_BITS);
-    PCKEV_ST_SB(tmp2, tmp3, dst + 3 * dst_stride);
-
-    ILVR_B2_UB(src6, src5, src7, src6, vec0, vec2);
-    ILVL_B2_UB(src6, src5, src7, src6, vec1, vec3);
-    DOTP_UB2_UH(vec0, vec1, filt0, filt0, tmp0, tmp1);
-    SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
-    PCKEV_ST_SB(tmp0, tmp1, dst + 16);
-
-    DOTP_UB2_UH(vec2, vec3, filt0, filt0, tmp2, tmp3);
-    SRARI_H2_UH(tmp2, tmp3, FILTER_BITS);
-    PCKEV_ST_SB(tmp2, tmp3, dst + 16 + dst_stride);
-
-    ILVR_B2_UB(src8, src7, src9, src8, vec4, vec6);
-    ILVL_B2_UB(src8, src7, src9, src8, vec5, vec7);
-    DOTP_UB2_UH(vec4, vec5, filt0, filt0, tmp0, tmp1);
-    SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
-    PCKEV_ST_SB(tmp0, tmp1, dst + 16 + 2 * dst_stride);
-
-    DOTP_UB2_UH(vec6, vec7, filt0, filt0, tmp2, tmp3);
-    SRARI_H2_UH(tmp2, tmp3, FILTER_BITS);
-    PCKEV_ST_SB(tmp2, tmp3, dst + 16 + 3 * dst_stride);
-    dst += (4 * dst_stride);
-
-    src0 = src4;
-    src5 = src9;
-  }
-}
-
-static void common_vt_2t_64w_msa(const uint8_t *src, int32_t src_stride,
-                                 uint8_t *dst, int32_t dst_stride,
-                                 int8_t *filter, int32_t height) {
-  uint32_t loop_cnt;
-  v16u8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10;
-  v16u8 src11, vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, filt0;
-  v8u16 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
-  v8i16 filt;
-
-  /* rearranging filter_y */
-  filt = LD_SH(filter);
-  filt0 = (v16u8)__msa_splati_h(filt, 0);
-
-  LD_UB4(src, 16, src0, src3, src6, src9);
-  src += src_stride;
-
-  for (loop_cnt = (height >> 1); loop_cnt--;) {
-    LD_UB2(src, src_stride, src1, src2);
-    LD_UB2(src + 16, src_stride, src4, src5);
-    LD_UB2(src + 32, src_stride, src7, src8);
-    LD_UB2(src + 48, src_stride, src10, src11);
-    src += (2 * src_stride);
-
-    ILVR_B2_UB(src1, src0, src2, src1, vec0, vec2);
-    ILVL_B2_UB(src1, src0, src2, src1, vec1, vec3);
-    DOTP_UB2_UH(vec0, vec1, filt0, filt0, tmp0, tmp1);
-    SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
-    PCKEV_ST_SB(tmp0, tmp1, dst);
-
-    DOTP_UB2_UH(vec2, vec3, filt0, filt0, tmp2, tmp3);
-    SRARI_H2_UH(tmp2, tmp3, FILTER_BITS);
-    PCKEV_ST_SB(tmp2, tmp3, dst + dst_stride);
-
-    ILVR_B2_UB(src4, src3, src5, src4, vec4, vec6);
-    ILVL_B2_UB(src4, src3, src5, src4, vec5, vec7);
-    DOTP_UB2_UH(vec4, vec5, filt0, filt0, tmp4, tmp5);
-    SRARI_H2_UH(tmp4, tmp5, FILTER_BITS);
-    PCKEV_ST_SB(tmp4, tmp5, dst + 16);
-
-    DOTP_UB2_UH(vec6, vec7, filt0, filt0, tmp6, tmp7);
-    SRARI_H2_UH(tmp6, tmp7, FILTER_BITS);
-    PCKEV_ST_SB(tmp6, tmp7, dst + 16 + dst_stride);
-
-    ILVR_B2_UB(src7, src6, src8, src7, vec0, vec2);
-    ILVL_B2_UB(src7, src6, src8, src7, vec1, vec3);
-    DOTP_UB2_UH(vec0, vec1, filt0, filt0, tmp0, tmp1);
-    SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
-    PCKEV_ST_SB(tmp0, tmp1, dst + 32);
-
-    DOTP_UB2_UH(vec2, vec3, filt0, filt0, tmp2, tmp3);
-    SRARI_H2_UH(tmp2, tmp3, FILTER_BITS);
-    PCKEV_ST_SB(tmp2, tmp3, dst + 32 + dst_stride);
-
-    ILVR_B2_UB(src10, src9, src11, src10, vec4, vec6);
-    ILVL_B2_UB(src10, src9, src11, src10, vec5, vec7);
-    DOTP_UB2_UH(vec4, vec5, filt0, filt0, tmp4, tmp5);
-    SRARI_H2_UH(tmp4, tmp5, FILTER_BITS);
-    PCKEV_ST_SB(tmp4, tmp5, dst + 48);
-
-    DOTP_UB2_UH(vec6, vec7, filt0, filt0, tmp6, tmp7);
-    SRARI_H2_UH(tmp6, tmp7, FILTER_BITS);
-    PCKEV_ST_SB(tmp6, tmp7, dst + 48 + dst_stride);
-    dst += (2 * dst_stride);
-
-    src0 = src2;
-    src3 = src5;
-    src6 = src8;
-    src9 = src11;
-  }
-}
-
-void aom_convolve8_vert_msa(const uint8_t *src, ptrdiff_t src_stride,
-                            uint8_t *dst, ptrdiff_t dst_stride,
-                            const int16_t *filter_x, int x_step_q4,
-                            const int16_t *filter_y, int y_step_q4, int w,
-                            int h) {
-  int8_t cnt, filt_ver[8];
-
-  assert(y_step_q4 == 16);
-  assert(((const int32_t *)filter_y)[1] != 0x800000);
-
-  for (cnt = 8; cnt--;) {
-    filt_ver[cnt] = filter_y[cnt];
-  }
-
-  if (((const int32_t *)filter_y)[0] == 0) {
-    switch (w) {
-      case 4:
-        common_vt_2t_4w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride,
-                            &filt_ver[3], h);
-        break;
-      case 8:
-        common_vt_2t_8w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride,
-                            &filt_ver[3], h);
-        break;
-      case 16:
-        common_vt_2t_16w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride,
-                             &filt_ver[3], h);
-        break;
-      case 32:
-        common_vt_2t_32w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride,
-                             &filt_ver[3], h);
-        break;
-      case 64:
-        common_vt_2t_64w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride,
-                             &filt_ver[3], h);
-        break;
-      default:
-        aom_convolve8_vert_c(src, src_stride, dst, dst_stride, filter_x,
-                             x_step_q4, filter_y, y_step_q4, w, h);
-        break;
-    }
-  } else {
-    switch (w) {
-      case 4:
-        common_vt_8t_4w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride,
-                            filt_ver, h);
-        break;
-      case 8:
-        common_vt_8t_8w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride,
-                            filt_ver, h);
-        break;
-      case 16:
-        common_vt_8t_16w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride,
-                             filt_ver, h);
-        break;
-      case 32:
-        common_vt_8t_32w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride,
-                             filt_ver, h);
-        break;
-      case 64:
-        common_vt_8t_64w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride,
-                             filt_ver, h);
-        break;
-      default:
-        aom_convolve8_vert_c(src, src_stride, dst, dst_stride, filter_x,
-                             x_step_q4, filter_y, y_step_q4, w, h);
-        break;
-    }
-  }
-}
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/mips/aom_convolve_copy_dspr2.c b/chromium/third_party/libaom/source/libaom/aom_dsp/mips/aom_convolve_copy_dspr2.c
deleted file mode 100644
index 12a213eaa36..00000000000
--- a/chromium/third_party/libaom/source/libaom/aom_dsp/mips/aom_convolve_copy_dspr2.c
+++ /dev/null
@@ -1,214 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-#include <stdio.h>
-
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom_dsp/mips/convolve_common_dspr2.h"
-#include "aom_dsp/aom_dsp_common.h"
-#include "aom_dsp/aom_filter.h"
-#include "aom_ports/mem.h"
-
-#if HAVE_DSPR2
-void aom_convolve_copy_dspr2(const uint8_t *src, ptrdiff_t src_stride,
-                             uint8_t *dst, ptrdiff_t dst_stride, int w, int h) {
-  int x, y;
-
-  /* prefetch data to cache memory */
-  prefetch_load(src);
-  prefetch_load(src + 32);
-  prefetch_store(dst);
-
-  switch (w) {
-    case 4: {
-      uint32_t tp1;
-
-      /* 1 word storage */
-      for (y = h; y--;) {
-        prefetch_load(src + src_stride);
-        prefetch_load(src + src_stride + 32);
-        prefetch_store(dst + dst_stride);
-
-        __asm__ __volatile__(
-            "ulw              %[tp1],         (%[src])      \n\t"
-            "sw               %[tp1],         (%[dst])      \n\t" /* store */
-
-            : [tp1] "=&r"(tp1)
-            : [src] "r"(src), [dst] "r"(dst));
-
-        src += src_stride;
-        dst += dst_stride;
-      }
-    } break;
-    case 8: {
-      uint32_t tp1, tp2;
-
-      /* 2 word storage */
-      for (y = h; y--;) {
-        prefetch_load(src + src_stride);
-        prefetch_load(src + src_stride + 32);
-        prefetch_store(dst + dst_stride);
-
-        __asm__ __volatile__(
-            "ulw              %[tp1],         0(%[src])      \n\t"
-            "ulw              %[tp2],         4(%[src])      \n\t"
-            "sw               %[tp1],         0(%[dst])      \n\t" /* store */
-            "sw               %[tp2],         4(%[dst])      \n\t" /* store */
-
-            : [tp1] "=&r"(tp1), [tp2] "=&r"(tp2)
-            : [src] "r"(src), [dst] "r"(dst));
-
-        src += src_stride;
-        dst += dst_stride;
-      }
-    } break;
-    case 16: {
-      uint32_t tp1, tp2, tp3, tp4;
-
-      /* 4 word storage */
-      for (y = h; y--;) {
-        prefetch_load(src + src_stride);
-        prefetch_load(src + src_stride + 32);
-        prefetch_store(dst + dst_stride);
-
-        __asm__ __volatile__(
-            "ulw              %[tp1],         0(%[src])      \n\t"
-            "ulw              %[tp2],         4(%[src])      \n\t"
-            "ulw              %[tp3],         8(%[src])      \n\t"
-            "ulw              %[tp4],         12(%[src])     \n\t"
-
-            "sw               %[tp1],         0(%[dst])      \n\t" /* store */
-            "sw               %[tp2],         4(%[dst])      \n\t" /* store */
-            "sw               %[tp3],         8(%[dst])      \n\t" /* store */
-            "sw               %[tp4],         12(%[dst])     \n\t" /* store */
-
-            : [tp1] "=&r"(tp1), [tp2] "=&r"(tp2), [tp3] "=&r"(tp3),
-              [tp4] "=&r"(tp4)
-            : [src] "r"(src), [dst] "r"(dst));
-
-        src += src_stride;
-        dst += dst_stride;
-      }
-    } break;
-    case 32: {
-      uint32_t tp1, tp2, tp3, tp4;
-      uint32_t tp5, tp6, tp7, tp8;
-
-      /* 8 word storage */
-      for (y = h; y--;) {
-        prefetch_load(src + src_stride);
-        prefetch_load(src + src_stride + 32);
-        prefetch_store(dst + dst_stride);
-
-        __asm__ __volatile__(
-            "ulw              %[tp1],         0(%[src])      \n\t"
-            "ulw              %[tp2],         4(%[src])      \n\t"
-            "ulw              %[tp3],         8(%[src])      \n\t"
-            "ulw              %[tp4],         12(%[src])     \n\t"
-            "ulw              %[tp5],         16(%[src])     \n\t"
-            "ulw              %[tp6],         20(%[src])     \n\t"
-            "ulw              %[tp7],         24(%[src])     \n\t"
-            "ulw              %[tp8],         28(%[src])     \n\t"
-
-            "sw               %[tp1],         0(%[dst])      \n\t" /* store */
-            "sw               %[tp2],         4(%[dst])      \n\t" /* store */
-            "sw               %[tp3],         8(%[dst])      \n\t" /* store */
-            "sw               %[tp4],         12(%[dst])     \n\t" /* store */
-            "sw               %[tp5],         16(%[dst])     \n\t" /* store */
-            "sw               %[tp6],         20(%[dst])     \n\t" /* store */
-            "sw               %[tp7],         24(%[dst])     \n\t" /* store */
-            "sw               %[tp8],         28(%[dst])     \n\t" /* store */
-
-            : [tp1] "=&r"(tp1), [tp2] "=&r"(tp2), [tp3] "=&r"(tp3),
-              [tp4] "=&r"(tp4), [tp5] "=&r"(tp5), [tp6] "=&r"(tp6),
-              [tp7] "=&r"(tp7), [tp8] "=&r"(tp8)
-            : [src] "r"(src), [dst] "r"(dst));
-
-        src += src_stride;
-        dst += dst_stride;
-      }
-    } break;
-    case 64: {
-      uint32_t tp1, tp2, tp3, tp4;
-      uint32_t tp5, tp6, tp7, tp8;
-
-      prefetch_load(src + 64);
-      prefetch_store(dst + 32);
-
-      /* 16 word storage */
-      for (y = h; y--;) {
-        prefetch_load(src + src_stride);
-        prefetch_load(src + src_stride + 32);
-        prefetch_load(src + src_stride + 64);
-        prefetch_store(dst + dst_stride);
-        prefetch_store(dst + dst_stride + 32);
-
-        __asm__ __volatile__(
-            "ulw              %[tp1],         0(%[src])      \n\t"
-            "ulw              %[tp2],         4(%[src])      \n\t"
-            "ulw              %[tp3],         8(%[src])      \n\t"
-            "ulw              %[tp4],         12(%[src])     \n\t"
-            "ulw              %[tp5],         16(%[src])     \n\t"
-            "ulw              %[tp6],         20(%[src])     \n\t"
-            "ulw              %[tp7],         24(%[src])     \n\t"
-            "ulw              %[tp8],         28(%[src])     \n\t"
-
-            "sw               %[tp1],         0(%[dst])      \n\t" /* store */
-            "sw               %[tp2],         4(%[dst])      \n\t" /* store */
-            "sw               %[tp3],         8(%[dst])      \n\t" /* store */
-            "sw               %[tp4],         12(%[dst])     \n\t" /* store */
-            "sw               %[tp5],         16(%[dst])     \n\t" /* store */
-            "sw               %[tp6],         20(%[dst])     \n\t" /* store */
-            "sw               %[tp7],         24(%[dst])     \n\t" /* store */
-            "sw               %[tp8],         28(%[dst])     \n\t" /* store */
-
-            "ulw              %[tp1],         32(%[src])     \n\t"
-            "ulw              %[tp2],         36(%[src])     \n\t"
-            "ulw              %[tp3],         40(%[src])     \n\t"
-            "ulw              %[tp4],         44(%[src])     \n\t"
-            "ulw              %[tp5],         48(%[src])     \n\t"
-            "ulw              %[tp6],         52(%[src])     \n\t"
-            "ulw              %[tp7],         56(%[src])     \n\t"
-            "ulw              %[tp8],         60(%[src])     \n\t"
-
-            "sw               %[tp1],         32(%[dst])     \n\t" /* store */
-            "sw               %[tp2],         36(%[dst])     \n\t" /* store */
-            "sw               %[tp3],         40(%[dst])     \n\t" /* store */
-            "sw               %[tp4],         44(%[dst])     \n\t" /* store */
-            "sw               %[tp5],         48(%[dst])     \n\t" /* store */
-            "sw               %[tp6],         52(%[dst])     \n\t" /* store */
-            "sw               %[tp7],         56(%[dst])     \n\t" /* store */
-            "sw               %[tp8],         60(%[dst])     \n\t" /* store */
-
-            : [tp1] "=&r"(tp1), [tp2] "=&r"(tp2), [tp3] "=&r"(tp3),
-              [tp4] "=&r"(tp4), [tp5] "=&r"(tp5), [tp6] "=&r"(tp6),
-              [tp7] "=&r"(tp7), [tp8] "=&r"(tp8)
-            : [src] "r"(src), [dst] "r"(dst));
-
-        src += src_stride;
-        dst += dst_stride;
-      }
-    } break;
-    default:
-      for (y = h; y--;) {
-        for (x = 0; x < w; ++x) {
-          dst[x] = src[x];
-        }
-
-        src += src_stride;
-        dst += dst_stride;
-      }
-      break;
-  }
-}
-#endif
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/mips/aom_convolve_copy_msa.c b/chromium/third_party/libaom/source/libaom/aom_dsp/mips/aom_convolve_copy_msa.c
deleted file mode 100644
index 12e7d9539ac..00000000000
--- a/chromium/third_party/libaom/source/libaom/aom_dsp/mips/aom_convolve_copy_msa.c
+++ /dev/null
@@ -1,241 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <string.h>
-#include "aom_dsp/mips/macros_msa.h"
-
-static void copy_width8_msa(const uint8_t *src, int32_t src_stride,
-                            uint8_t *dst, int32_t dst_stride, int32_t height) {
-  int32_t cnt;
-  uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
-  v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
-
-  if (0 == height % 12) {
-    for (cnt = (height / 12); cnt--;) {
-      LD_UB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7);
-      src += (8 * src_stride);
-
-      out0 = __msa_copy_u_d((v2i64)src0, 0);
-      out1 = __msa_copy_u_d((v2i64)src1, 0);
-      out2 = __msa_copy_u_d((v2i64)src2, 0);
-      out3 = __msa_copy_u_d((v2i64)src3, 0);
-      out4 = __msa_copy_u_d((v2i64)src4, 0);
-      out5 = __msa_copy_u_d((v2i64)src5, 0);
-      out6 = __msa_copy_u_d((v2i64)src6, 0);
-      out7 = __msa_copy_u_d((v2i64)src7, 0);
-
-      SD4(out0, out1, out2, out3, dst, dst_stride);
-      dst += (4 * dst_stride);
-      SD4(out4, out5, out6, out7, dst, dst_stride);
-      dst += (4 * dst_stride);
-
-      LD_UB4(src, src_stride, src0, src1, src2, src3);
-      src += (4 * src_stride);
-
-      out0 = __msa_copy_u_d((v2i64)src0, 0);
-      out1 = __msa_copy_u_d((v2i64)src1, 0);
-      out2 = __msa_copy_u_d((v2i64)src2, 0);
-      out3 = __msa_copy_u_d((v2i64)src3, 0);
-      SD4(out0, out1, out2, out3, dst, dst_stride);
-      dst += (4 * dst_stride);
-    }
-  } else if (0 == height % 8) {
-    for (cnt = height >> 3; cnt--;) {
-      LD_UB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7);
-      src += (8 * src_stride);
-
-      out0 = __msa_copy_u_d((v2i64)src0, 0);
-      out1 = __msa_copy_u_d((v2i64)src1, 0);
-      out2 = __msa_copy_u_d((v2i64)src2, 0);
-      out3 = __msa_copy_u_d((v2i64)src3, 0);
-      out4 = __msa_copy_u_d((v2i64)src4, 0);
-      out5 = __msa_copy_u_d((v2i64)src5, 0);
-      out6 = __msa_copy_u_d((v2i64)src6, 0);
-      out7 = __msa_copy_u_d((v2i64)src7, 0);
-
-      SD4(out0, out1, out2, out3, dst, dst_stride);
-      dst += (4 * dst_stride);
-      SD4(out4, out5, out6, out7, dst, dst_stride);
-      dst += (4 * dst_stride);
-    }
-  } else if (0 == height % 4) {
-    for (cnt = (height / 4); cnt--;) {
-      LD_UB4(src, src_stride, src0, src1, src2, src3);
-      src += (4 * src_stride);
-      out0 = __msa_copy_u_d((v2i64)src0, 0);
-      out1 = __msa_copy_u_d((v2i64)src1, 0);
-      out2 = __msa_copy_u_d((v2i64)src2, 0);
-      out3 = __msa_copy_u_d((v2i64)src3, 0);
-
-      SD4(out0, out1, out2, out3, dst, dst_stride);
-      dst += (4 * dst_stride);
-    }
-  } else if (0 == height % 2) {
-    for (cnt = (height / 2); cnt--;) {
-      LD_UB2(src, src_stride, src0, src1);
-      src += (2 * src_stride);
-      out0 = __msa_copy_u_d((v2i64)src0, 0);
-      out1 = __msa_copy_u_d((v2i64)src1, 0);
-
-      SD(out0, dst);
-      dst += dst_stride;
-      SD(out1, dst);
-      dst += dst_stride;
-    }
-  }
-}
-
-static void copy_16multx8mult_msa(const uint8_t *src, int32_t src_stride,
-                                  uint8_t *dst, int32_t dst_stride,
-                                  int32_t height, int32_t width) {
-  int32_t cnt, loop_cnt;
-  const uint8_t *src_tmp;
-  uint8_t *dst_tmp;
-  v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
-
-  for (cnt = (width >> 4); cnt--;) {
-    src_tmp = src;
-    dst_tmp = dst;
-
-    for (loop_cnt = (height >> 3); loop_cnt--;) {
-      LD_UB8(src_tmp, src_stride, src0, src1, src2, src3, src4, src5, src6,
-             src7);
-      src_tmp += (8 * src_stride);
-
-      ST_UB8(src0, src1, src2, src3, src4, src5, src6, src7, dst_tmp,
-             dst_stride);
-      dst_tmp += (8 * dst_stride);
-    }
-
-    src += 16;
-    dst += 16;
-  }
-}
-
-static void copy_width16_msa(const uint8_t *src, int32_t src_stride,
-                             uint8_t *dst, int32_t dst_stride, int32_t height) {
-  int32_t cnt;
-  v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
-
-  if (0 == height % 12) {
-    for (cnt = (height / 12); cnt--;) {
-      LD_UB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7);
-      src += (8 * src_stride);
-      ST_UB8(src0, src1, src2, src3, src4, src5, src6, src7, dst, dst_stride);
-      dst += (8 * dst_stride);
-
-      LD_UB4(src, src_stride, src0, src1, src2, src3);
-      src += (4 * src_stride);
-      ST_UB4(src0, src1, src2, src3, dst, dst_stride);
-      dst += (4 * dst_stride);
-    }
-  } else if (0 == height % 8) {
-    copy_16multx8mult_msa(src, src_stride, dst, dst_stride, height, 16);
-  } else if (0 == height % 4) {
-    for (cnt = (height >> 2); cnt--;) {
-      LD_UB4(src, src_stride, src0, src1, src2, src3);
-      src += (4 * src_stride);
-
-      ST_UB4(src0, src1, src2, src3, dst, dst_stride);
-      dst += (4 * dst_stride);
-    }
-  }
-}
-
-static void copy_width32_msa(const uint8_t *src, int32_t src_stride,
-                             uint8_t *dst, int32_t dst_stride, int32_t height) {
-  int32_t cnt;
-  v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
-
-  if (0 == height % 12) {
-    for (cnt = (height / 12); cnt--;) {
-      LD_UB4(src, src_stride, src0, src1, src2, src3);
-      LD_UB4(src + 16, src_stride, src4, src5, src6, src7);
-      src += (4 * src_stride);
-      ST_UB4(src0, src1, src2, src3, dst, dst_stride);
-      ST_UB4(src4, src5, src6, src7, dst + 16, dst_stride);
-      dst += (4 * dst_stride);
-
-      LD_UB4(src, src_stride, src0, src1, src2, src3);
-      LD_UB4(src + 16, src_stride, src4, src5, src6, src7);
-      src += (4 * src_stride);
-      ST_UB4(src0, src1, src2, src3, dst, dst_stride);
-      ST_UB4(src4, src5, src6, src7, dst + 16, dst_stride);
-      dst += (4 * dst_stride);
-
-      LD_UB4(src, src_stride, src0, src1, src2, src3);
-      LD_UB4(src + 16, src_stride, src4, src5, src6, src7);
-      src += (4 * src_stride);
-      ST_UB4(src0, src1, src2, src3, dst, dst_stride);
-      ST_UB4(src4, src5, src6, src7, dst + 16, dst_stride);
-      dst += (4 * dst_stride);
-    }
-  } else if (0 == height % 8) {
-    copy_16multx8mult_msa(src, src_stride, dst, dst_stride, height, 32);
-  } else if (0 == height % 4) {
-    for (cnt = (height >> 2); cnt--;) {
-      LD_UB4(src, src_stride, src0, src1, src2, src3);
-      LD_UB4(src + 16, src_stride, src4, src5, src6, src7);
-      src += (4 * src_stride);
-      ST_UB4(src0, src1, src2, src3, dst, dst_stride);
-      ST_UB4(src4, src5, src6, src7, dst + 16, dst_stride);
-      dst += (4 * dst_stride);
-    }
-  }
-}
-
-static void copy_width64_msa(const uint8_t *src, int32_t src_stride,
-                             uint8_t *dst, int32_t dst_stride, int32_t height) {
-  copy_16multx8mult_msa(src, src_stride, dst, dst_stride, height, 64);
-}
-
-void aom_convolve_copy_msa(const uint8_t *src, ptrdiff_t src_stride,
-                           uint8_t *dst, ptrdiff_t dst_stride, int32_t w,
-                           int32_t h) {
-  switch (w) {
-    case 4: {
-      uint32_t cnt, tmp;
-      /* 1 word storage */
-      for (cnt = h; cnt--;) {
-        tmp = LW(src);
-        SW(tmp, dst);
-        src += src_stride;
-        dst += dst_stride;
-      }
-      break;
-    }
-    case 8: {
-      copy_width8_msa(src, src_stride, dst, dst_stride, h);
-      break;
-    }
-    case 16: {
-      copy_width16_msa(src, src_stride, dst, dst_stride, h);
-      break;
-    }
-    case 32: {
-      copy_width32_msa(src, src_stride, dst, dst_stride, h);
-      break;
-    }
-    case 64: {
-      copy_width64_msa(src, src_stride, dst, dst_stride, h);
-      break;
-    }
-    default: {
-      uint32_t cnt;
-      for (cnt = h; cnt--;) {
-        memmove(dst, src, w);
-        src += src_stride;
-        dst += dst_stride;
-      }
-      break;
-    }
-  }
-}
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/mips/aom_convolve_msa.h b/chromium/third_party/libaom/source/libaom/aom_dsp/mips/aom_convolve_msa.h
deleted file mode 100644
index 852415c2019..00000000000
--- a/chromium/third_party/libaom/source/libaom/aom_dsp/mips/aom_convolve_msa.h
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AOM_DSP_MIPS_AOM_CONVOLVE_MSA_H_
-#define AOM_AOM_DSP_MIPS_AOM_CONVOLVE_MSA_H_
-
-#include "aom_dsp/mips/macros_msa.h"
-#include "aom_dsp/aom_filter.h"
-
-extern const uint8_t mc_filt_mask_arr[16 * 3];
-
-#define FILT_8TAP_DPADD_S_H(vec0, vec1, vec2, vec3, filt0, filt1, filt2,   \
-                            filt3)                                         \
-  ({                                                                       \
-    v8i16 tmp_dpadd_0, tmp_dpadd_1;                                        \
-                                                                           \
-    tmp_dpadd_0 = __msa_dotp_s_h((v16i8)vec0, (v16i8)filt0);               \
-    tmp_dpadd_0 = __msa_dpadd_s_h(tmp_dpadd_0, (v16i8)vec1, (v16i8)filt1); \
-    tmp_dpadd_1 = __msa_dotp_s_h((v16i8)vec2, (v16i8)filt2);               \
-    tmp_dpadd_1 = __msa_dpadd_s_h(tmp_dpadd_1, (v16i8)vec3, (v16i8)filt3); \
-    tmp_dpadd_0 = __msa_adds_s_h(tmp_dpadd_0, tmp_dpadd_1);                \
-                                                                           \
-    tmp_dpadd_0;                                                           \
-  })
-
-#define HORIZ_8TAP_4WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1,     \
-                                   mask2, mask3, filt0, filt1, filt2, filt3, \
-                                   out0, out1)                               \
-  {                                                                          \
-    v16i8 vec0_m, vec1_m, vec2_m, vec3_m, vec4_m, vec5_m, vec6_m, vec7_m;    \
-    v8i16 res0_m, res1_m, res2_m, res3_m;                                    \
-                                                                             \
-    VSHF_B2_SB(src0, src1, src2, src3, mask0, mask0, vec0_m, vec1_m);        \
-    DOTP_SB2_SH(vec0_m, vec1_m, filt0, filt0, res0_m, res1_m);               \
-    VSHF_B2_SB(src0, src1, src2, src3, mask1, mask1, vec2_m, vec3_m);        \
-    DPADD_SB2_SH(vec2_m, vec3_m, filt1, filt1, res0_m, res1_m);              \
-    VSHF_B2_SB(src0, src1, src2, src3, mask2, mask2, vec4_m, vec5_m);        \
-    DOTP_SB2_SH(vec4_m, vec5_m, filt2, filt2, res2_m, res3_m);               \
-    VSHF_B2_SB(src0, src1, src2, src3, mask3, mask3, vec6_m, vec7_m);        \
-    DPADD_SB2_SH(vec6_m, vec7_m, filt3, filt3, res2_m, res3_m);              \
-    ADDS_SH2_SH(res0_m, res2_m, res1_m, res3_m, out0, out1);                 \
-  }
-
-#define HORIZ_8TAP_8WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1,     \
-                                   mask2, mask3, filt0, filt1, filt2, filt3, \
-                                   out0, out1, out2, out3)                   \
-  {                                                                          \
-    v16i8 vec0_m, vec1_m, vec2_m, vec3_m, vec4_m, vec5_m, vec6_m, vec7_m;    \
-    v8i16 res0_m, res1_m, res2_m, res3_m, res4_m, res5_m, res6_m, res7_m;    \
-                                                                             \
-    VSHF_B2_SB(src0, src0, src1, src1, mask0, mask0, vec0_m, vec1_m);        \
-    VSHF_B2_SB(src2, src2, src3, src3, mask0, mask0, vec2_m, vec3_m);        \
-    DOTP_SB4_SH(vec0_m, vec1_m, vec2_m, vec3_m, filt0, filt0, filt0, filt0,  \
-                res0_m, res1_m, res2_m, res3_m);                             \
-    VSHF_B2_SB(src0, src0, src1, src1, mask2, mask2, vec0_m, vec1_m);        \
-    VSHF_B2_SB(src2, src2, src3, src3, mask2, mask2, vec2_m, vec3_m);        \
-    DOTP_SB4_SH(vec0_m, vec1_m, vec2_m, vec3_m, filt2, filt2, filt2, filt2,  \
-                res4_m, res5_m, res6_m, res7_m);                             \
-    VSHF_B2_SB(src0, src0, src1, src1, mask1, mask1, vec4_m, vec5_m);        \
-    VSHF_B2_SB(src2, src2, src3, src3, mask1, mask1, vec6_m, vec7_m);        \
-    DPADD_SB4_SH(vec4_m, vec5_m, vec6_m, vec7_m, filt1, filt1, filt1, filt1, \
-                 res0_m, res1_m, res2_m, res3_m);                            \
-    VSHF_B2_SB(src0, src0, src1, src1, mask3, mask3, vec4_m, vec5_m);        \
-    VSHF_B2_SB(src2, src2, src3, src3, mask3, mask3, vec6_m, vec7_m);        \
-    DPADD_SB4_SH(vec4_m, vec5_m, vec6_m, vec7_m, filt3, filt3, filt3, filt3, \
-                 res4_m, res5_m, res6_m, res7_m);                            \
-    ADDS_SH4_SH(res0_m, res4_m, res1_m, res5_m, res2_m, res6_m, res3_m,      \
-                res7_m, out0, out1, out2, out3);                             \
-  }
-
-#endif  // AOM_AOM_DSP_MIPS_AOM_CONVOLVE_MSA_H_
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/mips/common_dspr2.c b/chromium/third_party/libaom/source/libaom/aom_dsp/mips/common_dspr2.c
deleted file mode 100644
index 00ab75dc31d..00000000000
--- a/chromium/third_party/libaom/source/libaom/aom_dsp/mips/common_dspr2.c
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "aom_dsp/mips/common_dspr2.h"
-
-#if HAVE_DSPR2
-uint8_t aom_ff_cropTbl_a[256 + 2 * CROP_WIDTH];
-uint8_t *aom_ff_cropTbl;
-
-void aom_dsputil_static_init(void) {
-  int i;
-
-  for (i = 0; i < 256; i++) aom_ff_cropTbl_a[i + CROP_WIDTH] = i;
-
-  for (i = 0; i < CROP_WIDTH; i++) {
-    aom_ff_cropTbl_a[i] = 0;
-    aom_ff_cropTbl_a[i + CROP_WIDTH + 256] = 255;
-  }
-
-  aom_ff_cropTbl = &aom_ff_cropTbl_a[CROP_WIDTH];
-}
-
-#endif
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/mips/common_dspr2.h b/chromium/third_party/libaom/source/libaom/aom_dsp/mips/common_dspr2.h
deleted file mode 100644
index c42188d62d7..00000000000
--- a/chromium/third_party/libaom/source/libaom/aom_dsp/mips/common_dspr2.h
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AOM_DSP_MIPS_COMMON_DSPR2_H_
-#define AOM_AOM_DSP_MIPS_COMMON_DSPR2_H_
-
-#include <assert.h>
-
-#include "config/aom_config.h"
-
-#include "aom/aom_integer.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if HAVE_DSPR2
-#define CROP_WIDTH 512
-
-extern uint8_t *aom_ff_cropTbl;  // From "aom_dsp/mips/intrapred4_dspr2.c"
-
-static INLINE void prefetch_load(const unsigned char *src) {
-  __asm__ __volatile__("pref   0,  0(%[src])   \n\t" : : [src] "r"(src));
-}
-
-/* prefetch data for store */
-static INLINE void prefetch_store(unsigned char *dst) {
-  __asm__ __volatile__("pref   1,  0(%[dst])   \n\t" : : [dst] "r"(dst));
-}
-
-static INLINE void prefetch_load_streamed(const unsigned char *src) {
-  __asm__ __volatile__("pref   4,  0(%[src])   \n\t" : : [src] "r"(src));
-}
-
-/* prefetch data for store */
-static INLINE void prefetch_store_streamed(unsigned char *dst) {
-  __asm__ __volatile__("pref   5,  0(%[dst])   \n\t" : : [dst] "r"(dst));
-}
-#endif  // #if HAVE_DSPR2
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  // AOM_AOM_DSP_MIPS_COMMON_DSPR2_H_
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/mips/convolve2_dspr2.c b/chromium/third_party/libaom/source/libaom/aom_dsp/mips/convolve2_dspr2.c
deleted file mode 100644
index 08bf1ab30f3..00000000000
--- a/chromium/third_party/libaom/source/libaom/aom_dsp/mips/convolve2_dspr2.c
+++ /dev/null
@@ -1,1031 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-#include <stdio.h>
-
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom_dsp/mips/convolve_common_dspr2.h"
-#include "aom_dsp/aom_dsp_common.h"
-#include "aom_dsp/aom_filter.h"
-#include "aom_ports/mem.h"
-
-#if HAVE_DSPR2
-static void convolve_bi_horiz_4_transposed_dspr2(
-    const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride,
-    const int16_t *filter_x0, int32_t h) {
-  int32_t y;
-  uint8_t *cm = aom_ff_cropTbl;
-  uint8_t *dst_ptr;
-  int32_t Temp1, Temp2;
-  uint32_t vector4a = 64;
-  uint32_t tp1, tp2;
-  uint32_t p1, p2;
-  const int16_t *filter = &filter_x0[3];
-  uint32_t filter45;
-
-  filter45 = ((const int32_t *)filter)[0];
-
-  for (y = h; y--;) {
-    dst_ptr = dst;
-    /* prefetch data to cache memory */
-    prefetch_load(src + src_stride);
-    prefetch_load(src + src_stride + 32);
-
-    __asm__ __volatile__(
-        "ulw              %[tp1],         0(%[src])                      \n\t"
-        "ulw              %[tp2],         4(%[src])                      \n\t"
-
-        /* even 1. pixel */
-        "mtlo             %[vector4a],    $ac3                           \n\t"
-        "mthi             $zero,          $ac3                           \n\t"
-        "preceu.ph.qbr    %[p1],          %[tp1]                         \n\t"
-        "preceu.ph.qbl    %[p2],          %[tp1]                         \n\t"
-        "dpa.w.ph         $ac3,           %[p1],          %[filter45]    \n\t"
-        "extp             %[Temp1],       $ac3,           31             \n\t"
-
-        /* even 2. pixel */
-        "mtlo             %[vector4a],    $ac2                           \n\t"
-        "mthi             $zero,          $ac2                           \n\t"
-        "balign           %[tp2],         %[tp1],         3              \n\t"
-        "dpa.w.ph         $ac2,           %[p2],          %[filter45]    \n\t"
-        "extp             %[Temp2],       $ac2,           31             \n\t"
-
-        /* odd 1. pixel */
-        "lbux             %[tp1],         %[Temp1](%[cm])                \n\t"
-        "mtlo             %[vector4a],    $ac3                           \n\t"
-        "mthi             $zero,          $ac3                           \n\t"
-        "preceu.ph.qbr    %[p1],          %[tp2]                         \n\t"
-        "preceu.ph.qbl    %[p2],          %[tp2]                         \n\t"
-        "dpa.w.ph         $ac3,           %[p1],          %[filter45]    \n\t"
-        "extp             %[Temp1],       $ac3,           31             \n\t"
-
-        /* odd 2. pixel */
-        "lbux             %[tp2],         %[Temp2](%[cm])                \n\t"
-        "mtlo             %[vector4a],    $ac2                           \n\t"
-        "mthi             $zero,          $ac2                           \n\t"
-        "dpa.w.ph         $ac2,           %[p2],          %[filter45]    \n\t"
-        "extp             %[Temp2],       $ac2,           31             \n\t"
-
-        /* clamp */
-        "lbux             %[p1],          %[Temp1](%[cm])                \n\t"
-        "lbux             %[p2],          %[Temp2](%[cm])                \n\t"
-
-        /* store bytes */
-        "sb               %[tp1],         0(%[dst_ptr])                  \n\t"
-        "addu             %[dst_ptr],     %[dst_ptr],     %[dst_stride]  \n\t"
-
-        "sb               %[p1],          0(%[dst_ptr])                  \n\t"
-        "addu             %[dst_ptr],     %[dst_ptr],     %[dst_stride]  \n\t"
-
-        "sb               %[tp2],         0(%[dst_ptr])                  \n\t"
-        "addu             %[dst_ptr],     %[dst_ptr],     %[dst_stride]  \n\t"
-
-        "sb               %[p2],          0(%[dst_ptr])                  \n\t"
-        "addu             %[dst_ptr],     %[dst_ptr],     %[dst_stride]  \n\t"
-
-        : [tp1] "=&r"(tp1), [tp2] "=&r"(tp2), [p1] "=&r"(p1), [p2] "=&r"(p2),
-          [Temp1] "=&r"(Temp1), [Temp2] "=&r"(Temp2), [dst_ptr] "+r"(dst_ptr)
-        : [filter45] "r"(filter45), [vector4a] "r"(vector4a), [cm] "r"(cm),
-          [src] "r"(src), [dst_stride] "r"(dst_stride));
-
-    /* Next row... */
-    src += src_stride;
-    dst += 1;
-  }
-}
-
-static void convolve_bi_horiz_8_transposed_dspr2(
-    const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride,
-    const int16_t *filter_x0, int32_t h) {
-  int32_t y;
-  uint8_t *cm = aom_ff_cropTbl;
-  uint8_t *dst_ptr;
-  uint32_t vector4a = 64;
-  int32_t Temp1, Temp2, Temp3;
-  uint32_t tp1, tp2, tp3;
-  uint32_t p1, p2, p3, p4;
-  uint8_t *odd_dst;
-  uint32_t dst_pitch_2 = (dst_stride << 1);
-  const int16_t *filter = &filter_x0[3];
-  uint32_t filter45;
-
-  filter45 = ((const int32_t *)filter)[0];
-
-  for (y = h; y--;) {
-    /* prefetch data to cache memory */
-    prefetch_load(src + src_stride);
-    prefetch_load(src + src_stride + 32);
-
-    dst_ptr = dst;
-    odd_dst = (dst_ptr + dst_stride);
-
-    __asm__ __volatile__(
-        "ulw              %[tp1],         0(%[src])                       \n\t"
-        "ulw              %[tp2],         4(%[src])                       \n\t"
-
-        /* even 1. pixel */
-        "mtlo             %[vector4a],    $ac3                            \n\t"
-        "mthi             $zero,          $ac3                            \n\t"
-        "mtlo             %[vector4a],    $ac2                            \n\t"
-        "mthi             $zero,          $ac2                            \n\t"
-        "preceu.ph.qbr    %[p1],          %[tp1]                          \n\t"
-        "preceu.ph.qbl    %[p2],          %[tp1]                          \n\t"
-        "preceu.ph.qbr    %[p3],          %[tp2]                          \n\t"
-        "preceu.ph.qbl    %[p4],          %[tp2]                          \n\t"
-        "ulw              %[tp3],         8(%[src])                       \n\t"
-        "dpa.w.ph         $ac3,           %[p1],          %[filter45]     \n\t"
-        "extp             %[Temp1],       $ac3,           31              \n\t"
-
-        /* even 2. pixel */
-        "dpa.w.ph         $ac2,           %[p2],          %[filter45]     \n\t"
-        "extp             %[Temp3],       $ac2,           31              \n\t"
-
-        /* even 3. pixel */
-        "lbux             %[Temp2],       %[Temp1](%[cm])                 \n\t"
-        "mtlo             %[vector4a],    $ac1                            \n\t"
-        "mthi             $zero,          $ac1                            \n\t"
-        "balign           %[tp3],         %[tp2],         3              \n\t"
-        "balign           %[tp2],         %[tp1],         3              \n\t"
-        "dpa.w.ph         $ac1,           %[p3],          %[filter45]     \n\t"
-        "lbux             %[tp1],         %[Temp3](%[cm])                 \n\t"
-        "extp             %[p3],          $ac1,           31              \n\t"
-
-        /* even 4. pixel */
-        "mtlo             %[vector4a],    $ac2                            \n\t"
-        "mthi             $zero,          $ac2                            \n\t"
-        "mtlo             %[vector4a],    $ac3                            \n\t"
-        "mthi             $zero,          $ac3                            \n\t"
-        "sb               %[Temp2],       0(%[dst_ptr])                   \n\t"
-        "addu             %[dst_ptr],     %[dst_ptr],     %[dst_pitch_2]  \n\t"
-        "sb               %[tp1],         0(%[dst_ptr])                   \n\t"
-        "addu             %[dst_ptr],     %[dst_ptr],     %[dst_pitch_2]  \n\t"
-
-        "dpa.w.ph         $ac2,           %[p4],          %[filter45]     \n\t"
-        "extp             %[Temp3],       $ac2,           31              \n\t"
-
-        "lbux             %[Temp1],         %[p3](%[cm])                    "
-        "\n\t"
-
-        /* odd 1. pixel */
-        "mtlo             %[vector4a],    $ac1                            \n\t"
-        "mthi             $zero,          $ac1                            \n\t"
-        "preceu.ph.qbr    %[p1],          %[tp2]                          \n\t"
-        "preceu.ph.qbl    %[p2],          %[tp2]                          \n\t"
-        "preceu.ph.qbr    %[p3],          %[tp3]                          \n\t"
-        "preceu.ph.qbl    %[p4],          %[tp3]                          \n\t"
-        "sb               %[Temp1],       0(%[dst_ptr])                   \n\t"
-        "addu             %[dst_ptr],     %[dst_ptr],     %[dst_pitch_2]  \n\t"
-
-        "dpa.w.ph         $ac3,           %[p1],          %[filter45]     \n\t"
-        "extp             %[Temp2],       $ac3,           31              \n\t"
-
-        /* odd 2. pixel */
-        "lbux             %[tp1],         %[Temp3](%[cm])                 \n\t"
-        "mtlo             %[vector4a],    $ac3                            \n\t"
-        "mthi             $zero,          $ac3                            \n\t"
-        "mtlo             %[vector4a],    $ac2                            \n\t"
-        "mthi             $zero,          $ac2                            \n\t"
-        "dpa.w.ph         $ac1,           %[p2],          %[filter45]     \n\t"
-        "sb               %[tp1],         0(%[dst_ptr])                   \n\t"
-        "addu             %[dst_ptr],     %[dst_ptr],     %[dst_pitch_2]  \n\t"
-        "extp             %[Temp3],       $ac1,           31              \n\t"
-
-        /* odd 3. pixel */
-        "lbux             %[tp3],         %[Temp2](%[cm])                 \n\t"
-        "dpa.w.ph         $ac3,           %[p3],          %[filter45]     \n\t"
-        "extp             %[Temp2],       $ac3,           31              \n\t"
-
-        /* odd 4. pixel */
-        "sb               %[tp3],         0(%[odd_dst])                   \n\t"
-        "addu             %[odd_dst],     %[odd_dst],     %[dst_pitch_2]  \n\t"
-        "dpa.w.ph         $ac2,           %[p4],          %[filter45]     \n\t"
-        "extp             %[Temp1],       $ac2,           31              \n\t"
-
-        /* clamp */
-        "lbux             %[p4],          %[Temp3](%[cm])                 \n\t"
-        "lbux             %[p2],          %[Temp2](%[cm])                 \n\t"
-        "lbux             %[p1],          %[Temp1](%[cm])                 \n\t"
-
-        /* store bytes */
-        "sb               %[p4],          0(%[odd_dst])                   \n\t"
-        "addu             %[odd_dst],     %[odd_dst],     %[dst_pitch_2]  \n\t"
-
-        "sb               %[p2],          0(%[odd_dst])                   \n\t"
-        "addu             %[odd_dst],     %[odd_dst],     %[dst_pitch_2]  \n\t"
-
-        "sb               %[p1],          0(%[odd_dst])                   \n\t"
-
-        : [tp1] "=&r"(tp1), [tp2] "=&r"(tp2), [tp3] "=&r"(tp3), [p1] "=&r"(p1),
-          [p2] "=&r"(p2), [p3] "=&r"(p3), [p4] "=&r"(p4), [Temp1] "=&r"(Temp1),
-          [Temp2] "=&r"(Temp2), [Temp3] "=&r"(Temp3), [dst_ptr] "+r"(dst_ptr),
-          [odd_dst] "+r"(odd_dst)
-        : [filter45] "r"(filter45), [vector4a] "r"(vector4a), [cm] "r"(cm),
-          [src] "r"(src), [dst_pitch_2] "r"(dst_pitch_2));
-
-    /* Next row... */
-    src += src_stride;
-    dst += 1;
-  }
-}
-
-static void convolve_bi_horiz_16_transposed_dspr2(
-    const uint8_t *src_ptr, int32_t src_stride, uint8_t *dst_ptr,
-    int32_t dst_stride, const int16_t *filter_x0, int32_t h, int32_t count) {
-  int32_t c, y;
-  const uint8_t *src;
-  uint8_t *dst;
-  uint8_t *cm = aom_ff_cropTbl;
-  uint32_t vector_64 = 64;
-  int32_t Temp1, Temp2, Temp3;
-  uint32_t qload1, qload2;
-  uint32_t p1, p2, p3, p4, p5;
-  uint32_t st1, st2, st3;
-  uint32_t dst_pitch_2 = (dst_stride << 1);
-  uint8_t *odd_dst;
-  const int16_t *filter = &filter_x0[3];
-  uint32_t filter45;
-
-  filter45 = ((const int32_t *)filter)[0];
-
-  for (y = h; y--;) {
-    /* prefetch data to cache memory */
-    prefetch_load(src_ptr + src_stride);
-    prefetch_load(src_ptr + src_stride + 32);
-
-    src = src_ptr;
-    dst = dst_ptr;
-
-    odd_dst = (dst + dst_stride);
-
-    for (c = 0; c < count; c++) {
-      __asm__ __volatile__(
-          "ulw              %[qload1],        0(%[src])                       "
-          "\n\t"
-          "ulw              %[qload2],        4(%[src])                       "
-          "\n\t"
-
-          /* even 1. pixel */
-          "mtlo             %[vector_64],     $ac1                            "
-          "\n\t" /* even 1 */
-          "mthi             $zero,            $ac1                            "
-          "\n\t"
-          "mtlo             %[vector_64],     $ac2                            "
-          "\n\t" /* even 2 */
-          "mthi             $zero,            $ac2                            "
-          "\n\t"
-          "preceu.ph.qbr    %[p1],            %[qload1]                       "
-          "\n\t"
-          "preceu.ph.qbl    %[p2],            %[qload1]                       "
-          "\n\t"
-          "preceu.ph.qbr    %[p3],            %[qload2]                       "
-          "\n\t"
-          "preceu.ph.qbl    %[p4],            %[qload2]                       "
-          "\n\t"
-          "ulw              %[qload1],        8(%[src])                       "
-          "\n\t"
-          "dpa.w.ph         $ac1,             %[p1],          %[filter45]     "
-          "\n\t" /* even 1 */
-          "extp             %[Temp1],         $ac1,           31              "
-          "\n\t" /* even 1 */
-
-          /* even 2. pixel */
-          "mtlo             %[vector_64],     $ac3                            "
-          "\n\t" /* even 3 */
-          "mthi             $zero,            $ac3                            "
-          "\n\t"
-          "preceu.ph.qbr    %[p1],            %[qload1]                       "
-          "\n\t"
-          "preceu.ph.qbl    %[p5],            %[qload1]                       "
-          "\n\t"
-          "ulw              %[qload2],        12(%[src])                      "
-          "\n\t"
-          "dpa.w.ph         $ac2,             %[p2],          %[filter45]     "
-          "\n\t" /* even 1 */
-          "lbux             %[st1],           %[Temp1](%[cm])                 "
-          "\n\t" /* even 1 */
-          "extp             %[Temp2],         $ac2,           31              "
-          "\n\t" /* even 1 */
-
-          /* even 3. pixel */
-          "mtlo             %[vector_64],     $ac1                            "
-          "\n\t" /* even 4 */
-          "mthi             $zero,            $ac1                            "
-          "\n\t"
-          "preceu.ph.qbr    %[p2],            %[qload2]                       "
-          "\n\t"
-          "sb               %[st1],           0(%[dst])                       "
-          "\n\t" /* even 1 */
-          "addu             %[dst],           %[dst],         %[dst_pitch_2]   "
-          "          \n\t"
-          "dpa.w.ph         $ac3,             %[p3],          %[filter45]     "
-          "\n\t" /* even 3 */
-          "extp             %[Temp3],         $ac3,           31              "
-          "\n\t" /* even 3 */
-          "lbux             %[st2],           %[Temp2](%[cm])                 "
-          "\n\t" /* even 1 */
-
-          /* even 4. pixel */
-          "mtlo             %[vector_64],     $ac2                            "
-          "\n\t" /* even 5 */
-          "mthi             $zero,            $ac2                            "
-          "\n\t"
-          "preceu.ph.qbl    %[p3],            %[qload2]                       "
-          "\n\t"
-          "sb               %[st2],           0(%[dst])                       "
-          "\n\t" /* even 2 */
-          "addu             %[dst],           %[dst],         %[dst_pitch_2]  "
-          "\n\t"
-          "dpa.w.ph         $ac1,             %[p4],          %[filter45]     "
-          "\n\t" /* even 4 */
-          "extp             %[Temp1],         $ac1,           31              "
-          "\n\t" /* even 4 */
-          "lbux             %[st3],           %[Temp3](%[cm])                 "
-          "\n\t" /* even 3 */
-
-          /* even 5. pixel */
-          "mtlo             %[vector_64],     $ac3                            "
-          "\n\t" /* even 6 */
-          "mthi             $zero,            $ac3                            "
-          "\n\t"
-          "sb               %[st3],           0(%[dst])                       "
-          "\n\t" /* even 3 */
-          "addu             %[dst],           %[dst],         %[dst_pitch_2]  "
-          "\n\t"
-          "dpa.w.ph         $ac2,             %[p1],          %[filter45]     "
-          "\n\t" /* even 5 */
-          "extp             %[Temp2],         $ac2,           31              "
-          "\n\t" /* even 5 */
-          "lbux             %[st1],           %[Temp1](%[cm])                 "
-          "\n\t" /* even 4 */
-
-          /* even 6. pixel */
-          "mtlo             %[vector_64],     $ac1                            "
-          "\n\t" /* even 7 */
-          "mthi             $zero,            $ac1                            "
-          "\n\t"
-          "sb               %[st1],           0(%[dst])                       "
-          "\n\t" /* even 4 */
-          "addu             %[dst],           %[dst],         %[dst_pitch_2]  "
-          "\n\t"
-          "ulw              %[qload1],        20(%[src])                      "
-          "\n\t"
-          "dpa.w.ph         $ac3,             %[p5],          %[filter45]     "
-          "\n\t" /* even 6 */
-          "extp             %[Temp3],         $ac3,           31              "
-          "\n\t" /* even 6 */
-          "lbux             %[st2],           %[Temp2](%[cm])                 "
-          "\n\t" /* even 5 */
-
-          /* even 7. pixel */
-          "mtlo             %[vector_64],     $ac2                            "
-          "\n\t" /* even 8 */
-          "mthi             $zero,            $ac2                            "
-          "\n\t"
-          "preceu.ph.qbr    %[p5],            %[qload1]                       "
-          "\n\t"
-          "sb               %[st2],           0(%[dst])                       "
-          "\n\t" /* even 5 */
-          "addu             %[dst],           %[dst],         %[dst_pitch_2]  "
-          "\n\t"
-          "dpa.w.ph         $ac1,             %[p2],          %[filter45]     "
-          "\n\t" /* even 7 */
-          "extp             %[Temp1],         $ac1,           31              "
-          "\n\t" /* even 7 */
-          "lbux             %[st3],           %[Temp3](%[cm])                 "
-          "\n\t" /* even 6 */
-
-          /* even 8. pixel */
-          "mtlo             %[vector_64],     $ac3                            "
-          "\n\t" /* odd 1 */
-          "mthi             $zero,            $ac3                            "
-          "\n\t"
-          "dpa.w.ph         $ac2,             %[p3],          %[filter45]     "
-          "\n\t" /* even 8 */
-          "sb               %[st3],           0(%[dst])                       "
-          "\n\t" /* even 6 */
-          "addu             %[dst],           %[dst],         %[dst_pitch_2]  "
-          "\n\t"
-          "extp             %[Temp2],         $ac2,           31              "
-          "\n\t" /* even 8 */
-          "lbux             %[st1],           %[Temp1](%[cm])                 "
-          "\n\t" /* even 7 */
-
-          /* ODD pixels */
-          "ulw              %[qload1],        1(%[src])                       "
-          "\n\t"
-          "ulw              %[qload2],        5(%[src])                       "
-          "\n\t"
-
-          /* odd 1. pixel */
-          "mtlo             %[vector_64],     $ac1                            "
-          "\n\t" /* odd 2 */
-          "mthi             $zero,            $ac1                            "
-          "\n\t"
-          "preceu.ph.qbr    %[p1],            %[qload1]                       "
-          "\n\t"
-          "preceu.ph.qbl    %[p2],            %[qload1]                       "
-          "\n\t"
-          "preceu.ph.qbr    %[p3],            %[qload2]                       "
-          "\n\t"
-          "preceu.ph.qbl    %[p4],            %[qload2]                       "
-          "\n\t"
-          "sb               %[st1],           0(%[dst])                       "
-          "\n\t" /* even 7 */
-          "addu             %[dst],           %[dst],         %[dst_pitch_2]  "
-          "\n\t"
-          "ulw              %[qload2],        9(%[src])                       "
-          "\n\t"
-          "dpa.w.ph         $ac3,             %[p1],          %[filter45]     "
-          "\n\t" /* odd 1 */
-          "extp             %[Temp3],         $ac3,           31              "
-          "\n\t" /* odd 1 */
-          "lbux             %[st2],           %[Temp2](%[cm])                 "
-          "\n\t" /* even 8 */
-
-          /* odd 2. pixel */
-          "mtlo             %[vector_64],     $ac2                            "
-          "\n\t" /* odd 3 */
-          "mthi             $zero,            $ac2                            "
-          "\n\t"
-          "preceu.ph.qbr    %[p1],            %[qload2]                       "
-          "\n\t"
-          "preceu.ph.qbl    %[p5],            %[qload2]                       "
-          "\n\t"
-          "sb               %[st2],           0(%[dst])                       "
-          "\n\t" /* even 8 */
-          "ulw              %[qload1],        13(%[src])                      "
-          "\n\t"
-          "dpa.w.ph         $ac1,             %[p2],          %[filter45]     "
-          "\n\t" /* odd 2 */
-          "extp             %[Temp1],         $ac1,           31              "
-          "\n\t" /* odd 2 */
-          "lbux             %[st3],           %[Temp3](%[cm])                 "
-          "\n\t" /* odd 1 */
-
-          /* odd 3. pixel */
-          "mtlo             %[vector_64],     $ac3                            "
-          "\n\t" /* odd 4 */
-          "mthi             $zero,            $ac3                            "
-          "\n\t"
-          "preceu.ph.qbr    %[p2],            %[qload1]                       "
-          "\n\t"
-          "sb               %[st3],           0(%[odd_dst])                   "
-          "\n\t" /* odd 1 */
-          "addu             %[odd_dst],       %[odd_dst],     %[dst_pitch_2]  "
-          "\n\t"
-          "dpa.w.ph         $ac2,             %[p3],          %[filter45]     "
-          "\n\t" /* odd 3 */
-          "extp             %[Temp2],         $ac2,           31              "
-          "\n\t" /* odd 3 */
-          "lbux             %[st1],           %[Temp1](%[cm])                 "
-          "\n\t" /* odd 2 */
-
-          /* odd 4. pixel */
-          "mtlo             %[vector_64],     $ac1                            "
-          "\n\t" /* odd 5 */
-          "mthi             $zero,            $ac1                            "
-          "\n\t"
-          "preceu.ph.qbl    %[p3],            %[qload1]                       "
-          "\n\t"
-          "sb               %[st1],           0(%[odd_dst])                   "
-          "\n\t" /* odd 2 */
-          "addu             %[odd_dst],       %[odd_dst],     %[dst_pitch_2]  "
-          "\n\t"
-          "dpa.w.ph         $ac3,             %[p4],          %[filter45]     "
-          "\n\t" /* odd 4 */
-          "extp             %[Temp3],         $ac3,           31              "
-          "\n\t" /* odd 4 */
-          "lbux             %[st2],           %[Temp2](%[cm])                 "
-          "\n\t" /* odd 3 */
-
-          /* odd 5. pixel */
-          "mtlo             %[vector_64],     $ac2                            "
-          "\n\t" /* odd 6 */
-          "mthi             $zero,            $ac2                            "
-          "\n\t"
-          "sb               %[st2],           0(%[odd_dst])                   "
-          "\n\t" /* odd 3 */
-          "addu             %[odd_dst],       %[odd_dst],     %[dst_pitch_2]  "
-          "\n\t"
-          "dpa.w.ph         $ac1,             %[p1],          %[filter45]     "
-          "\n\t" /* odd 5 */
-          "extp             %[Temp1],         $ac1,           31              "
-          "\n\t" /* odd 5 */
-          "lbux             %[st3],           %[Temp3](%[cm])                 "
-          "\n\t" /* odd 4 */
-
-          /* odd 6. pixel */
-          "mtlo             %[vector_64],     $ac3                            "
-          "\n\t" /* odd 7 */
-          "mthi             $zero,            $ac3                            "
-          "\n\t"
-          "sb               %[st3],           0(%[odd_dst])                   "
-          "\n\t" /* odd 4 */
-          "addu             %[odd_dst],       %[odd_dst],     %[dst_pitch_2]  "
-          "\n\t"
-          "ulw              %[qload1],        21(%[src])                      "
-          "\n\t"
-          "dpa.w.ph         $ac2,             %[p5],          %[filter45]     "
-          "\n\t" /* odd 6 */
-          "extp             %[Temp2],         $ac2,           31              "
-          "\n\t" /* odd 6 */
-          "lbux             %[st1],           %[Temp1](%[cm])                 "
-          "\n\t" /* odd 5 */
-
-          /* odd 7. pixel */
-          "mtlo             %[vector_64],     $ac1                            "
-          "\n\t" /* odd 8 */
-          "mthi             $zero,            $ac1                            "
-          "\n\t"
-          "preceu.ph.qbr    %[p5],            %[qload1]                       "
-          "\n\t"
-          "sb               %[st1],           0(%[odd_dst])                   "
-          "\n\t" /* odd 5 */
-          "addu             %[odd_dst],       %[odd_dst],     %[dst_pitch_2]  "
-          "\n\t"
-          "dpa.w.ph         $ac3,             %[p2],          %[filter45]     "
-          "\n\t" /* odd 7 */
-          "extp             %[Temp3],         $ac3,           31              "
-          "\n\t" /* odd 7 */
-
-          /* odd 8. pixel */
-          "dpa.w.ph         $ac1,             %[p3],          %[filter45]     "
-          "\n\t" /* odd 8 */
-          "extp             %[Temp1],         $ac1,           31              "
-          "\n\t" /* odd 8 */
-
-          "lbux             %[st2],           %[Temp2](%[cm])                 "
-          "\n\t" /* odd 6 */
-          "lbux             %[st3],           %[Temp3](%[cm])                 "
-          "\n\t" /* odd 7 */
-          "lbux             %[st1],           %[Temp1](%[cm])                 "
-          "\n\t" /* odd 8 */
-
-          "sb               %[st2],           0(%[odd_dst])                   "
-          "\n\t" /* odd 6 */
-          "addu             %[odd_dst],       %[odd_dst],     %[dst_pitch_2]  "
-          "\n\t"
-
-          "sb               %[st3],           0(%[odd_dst])                   "
-          "\n\t" /* odd 7 */
-          "addu             %[odd_dst],       %[odd_dst],     %[dst_pitch_2]  "
-          "\n\t"
-
-          "sb               %[st1],           0(%[odd_dst])                   "
-          "\n\t" /* odd 8 */
-
-          : [qload1] "=&r"(qload1), [qload2] "=&r"(qload2), [p5] "=&r"(p5),
-            [st1] "=&r"(st1), [st2] "=&r"(st2), [st3] "=&r"(st3),
-            [p1] "=&r"(p1), [p2] "=&r"(p2), [p3] "=&r"(p3), [p4] "=&r"(p4),
-            [Temp1] "=&r"(Temp1), [Temp2] "=&r"(Temp2), [Temp3] "=&r"(Temp3),
-            [dst] "+r"(dst), [odd_dst] "+r"(odd_dst)
-          : [filter45] "r"(filter45), [vector_64] "r"(vector_64), [cm] "r"(cm),
-            [src] "r"(src), [dst_pitch_2] "r"(dst_pitch_2));
-
-      src += 16;
-      dst = (dst_ptr + ((c + 1) * 16 * dst_stride));
-      odd_dst = (dst + dst_stride);
-    }
-
-    /* Next row... */
-    src_ptr += src_stride;
-    dst_ptr += 1;
-  }
-}
-
-static void convolve_bi_horiz_64_transposed_dspr2(
-    const uint8_t *src_ptr, int32_t src_stride, uint8_t *dst_ptr,
-    int32_t dst_stride, const int16_t *filter_x0, int32_t h) {
-  int32_t c, y;
-  const uint8_t *src;
-  uint8_t *dst;
-  uint8_t *cm = aom_ff_cropTbl;
-  uint32_t vector_64 = 64;
-  int32_t Temp1, Temp2, Temp3;
-  uint32_t qload1, qload2;
-  uint32_t p1, p2, p3, p4, p5;
-  uint32_t st1, st2, st3;
-  uint32_t dst_pitch_2 = (dst_stride << 1);
-  uint8_t *odd_dst;
-  const int16_t *filter = &filter_x0[3];
-  uint32_t filter45;
-
-  filter45 = ((const int32_t *)filter)[0];
-
-  for (y = h; y--;) {
-    /* prefetch data to cache memory */
-    prefetch_load(src_ptr + src_stride);
-    prefetch_load(src_ptr + src_stride + 32);
-    prefetch_load(src_ptr + src_stride + 64);
-
-    src = src_ptr;
-    dst = dst_ptr;
-
-    odd_dst = (dst + dst_stride);
-
-    for (c = 0; c < 4; c++) {
-      __asm__ __volatile__(
-          "ulw              %[qload1],        0(%[src])                       "
-          "\n\t"
-          "ulw              %[qload2],        4(%[src])                       "
-          "\n\t"
-
-          /* even 1. pixel */
-          "mtlo             %[vector_64],     $ac1                            "
-          "\n\t" /* even 1 */
-          "mthi             $zero,            $ac1                            "
-          "\n\t"
-          "mtlo             %[vector_64],     $ac2                            "
-          "\n\t" /* even 2 */
-          "mthi             $zero,            $ac2                            "
-          "\n\t"
-          "preceu.ph.qbr    %[p1],            %[qload1]                       "
-          "\n\t"
-          "preceu.ph.qbl    %[p2],            %[qload1]                       "
-          "\n\t"
-          "preceu.ph.qbr    %[p3],            %[qload2]                       "
-          "\n\t"
-          "preceu.ph.qbl    %[p4],            %[qload2]                       "
-          "\n\t"
-          "ulw              %[qload1],        8(%[src])                       "
-          "\n\t"
-          "dpa.w.ph         $ac1,             %[p1],          %[filter45]     "
-          "\n\t" /* even 1 */
-          "extp             %[Temp1],         $ac1,           31              "
-          "\n\t" /* even 1 */
-
-          /* even 2. pixel */
-          "mtlo             %[vector_64],     $ac3                            "
-          "\n\t" /* even 3 */
-          "mthi             $zero,            $ac3                            "
-          "\n\t"
-          "preceu.ph.qbr    %[p1],            %[qload1]                       "
-          "\n\t"
-          "preceu.ph.qbl    %[p5],            %[qload1]                       "
-          "\n\t"
-          "ulw              %[qload2],        12(%[src])                      "
-          "\n\t"
-          "dpa.w.ph         $ac2,             %[p2],          %[filter45]     "
-          "\n\t" /* even 1 */
-          "lbux             %[st1],           %[Temp1](%[cm])                 "
-          "\n\t" /* even 1 */
-          "extp             %[Temp2],         $ac2,           31              "
-          "\n\t" /* even 1 */
-
-          /* even 3. pixel */
-          "mtlo             %[vector_64],     $ac1                            "
-          "\n\t" /* even 4 */
-          "mthi             $zero,            $ac1                            "
-          "\n\t"
-          "preceu.ph.qbr    %[p2],            %[qload2]                       "
-          "\n\t"
-          "sb               %[st1],           0(%[dst])                       "
-          "\n\t" /* even 1 */
-          "addu             %[dst],           %[dst],         %[dst_pitch_2]   "
-          "          \n\t"
-          "dpa.w.ph         $ac3,             %[p3],          %[filter45]     "
-          "\n\t" /* even 3 */
-          "extp             %[Temp3],         $ac3,           31              "
-          "\n\t" /* even 3 */
-          "lbux             %[st2],           %[Temp2](%[cm])                 "
-          "\n\t" /* even 1 */
-
-          /* even 4. pixel */
-          "mtlo             %[vector_64],     $ac2                            "
-          "\n\t" /* even 5 */
-          "mthi             $zero,            $ac2                            "
-          "\n\t"
-          "preceu.ph.qbl    %[p3],            %[qload2]                       "
-          "\n\t"
-          "sb               %[st2],           0(%[dst])                       "
-          "\n\t" /* even 2 */
-          "addu             %[dst],           %[dst],         %[dst_pitch_2]  "
-          "\n\t"
-          "dpa.w.ph         $ac1,             %[p4],          %[filter45]     "
-          "\n\t" /* even 4 */
-          "extp             %[Temp1],         $ac1,           31              "
-          "\n\t" /* even 4 */
-          "lbux             %[st3],           %[Temp3](%[cm])                 "
-          "\n\t" /* even 3 */
-
-          /* even 5. pixel */
-          "mtlo             %[vector_64],     $ac3                            "
-          "\n\t" /* even 6 */
-          "mthi             $zero,            $ac3                            "
-          "\n\t"
-          "sb               %[st3],           0(%[dst])                       "
-          "\n\t" /* even 3 */
-          "addu             %[dst],           %[dst],         %[dst_pitch_2]  "
-          "\n\t"
-          "dpa.w.ph         $ac2,             %[p1],          %[filter45]     "
-          "\n\t" /* even 5 */
-          "extp             %[Temp2],         $ac2,           31              "
-          "\n\t" /* even 5 */
-          "lbux             %[st1],           %[Temp1](%[cm])                 "
-          "\n\t" /* even 4 */
-
-          /* even 6. pixel */
-          "mtlo             %[vector_64],     $ac1                            "
-          "\n\t" /* even 7 */
-          "mthi             $zero,            $ac1                            "
-          "\n\t"
-          "sb               %[st1],           0(%[dst])                       "
-          "\n\t" /* even 4 */
-          "addu             %[dst],           %[dst],         %[dst_pitch_2]  "
-          "\n\t"
-          "ulw              %[qload1],        20(%[src])                      "
-          "\n\t"
-          "dpa.w.ph         $ac3,             %[p5],          %[filter45]     "
-          "\n\t" /* even 6 */
-          "extp             %[Temp3],         $ac3,           31              "
-          "\n\t" /* even 6 */
-          "lbux             %[st2],           %[Temp2](%[cm])                 "
-          "\n\t" /* even 5 */
-
-          /* even 7. pixel */
-          "mtlo             %[vector_64],     $ac2                            "
-          "\n\t" /* even 8 */
-          "mthi             $zero,            $ac2                            "
-          "\n\t"
-          "preceu.ph.qbr    %[p5],            %[qload1]                       "
-          "\n\t"
-          "sb               %[st2],           0(%[dst])                       "
-          "\n\t" /* even 5 */
-          "addu             %[dst],           %[dst],         %[dst_pitch_2]  "
-          "\n\t"
-          "dpa.w.ph         $ac1,             %[p2],          %[filter45]     "
-          "\n\t" /* even 7 */
-          "extp             %[Temp1],         $ac1,           31              "
-          "\n\t" /* even 7 */
-          "lbux             %[st3],           %[Temp3](%[cm])                 "
-          "\n\t" /* even 6 */
-
-          /* even 8. pixel */
-          "mtlo             %[vector_64],     $ac3                            "
-          "\n\t" /* odd 1 */
-          "mthi             $zero,            $ac3                            "
-          "\n\t"
-          "dpa.w.ph         $ac2,             %[p3],          %[filter45]     "
-          "\n\t" /* even 8 */
-          "sb               %[st3],           0(%[dst])                       "
-          "\n\t" /* even 6 */
-          "addu             %[dst],           %[dst],         %[dst_pitch_2]  "
-          "\n\t"
-          "extp             %[Temp2],         $ac2,           31              "
-          "\n\t" /* even 8 */
-          "lbux             %[st1],           %[Temp1](%[cm])                 "
-          "\n\t" /* even 7 */
-
-          /* ODD pixels */
-          "ulw              %[qload1],        1(%[src])                       "
-          "\n\t"
-          "ulw              %[qload2],        5(%[src])                       "
-          "\n\t"
-
-          /* odd 1. pixel */
-          "mtlo             %[vector_64],     $ac1                            "
-          "\n\t" /* odd 2 */
-          "mthi             $zero,            $ac1                            "
-          "\n\t"
-          "preceu.ph.qbr    %[p1],            %[qload1]                       "
-          "\n\t"
-          "preceu.ph.qbl    %[p2],            %[qload1]                       "
-          "\n\t"
-          "preceu.ph.qbr    %[p3],            %[qload2]                       "
-          "\n\t"
-          "preceu.ph.qbl    %[p4],            %[qload2]                       "
-          "\n\t"
-          "sb               %[st1],           0(%[dst])                       "
-          "\n\t" /* even 7 */
-          "addu             %[dst],           %[dst],         %[dst_pitch_2]  "
-          "\n\t"
-          "ulw              %[qload2],        9(%[src])                       "
-          "\n\t"
-          "dpa.w.ph         $ac3,             %[p1],          %[filter45]     "
-          "\n\t" /* odd 1 */
-          "extp             %[Temp3],         $ac3,           31              "
-          "\n\t" /* odd 1 */
-          "lbux             %[st2],           %[Temp2](%[cm])                 "
-          "\n\t" /* even 8 */
-
-          /* odd 2. pixel */
-          "mtlo             %[vector_64],     $ac2                            "
-          "\n\t" /* odd 3 */
-          "mthi             $zero,            $ac2                            "
-          "\n\t"
-          "preceu.ph.qbr    %[p1],            %[qload2]                       "
-          "\n\t"
-          "preceu.ph.qbl    %[p5],            %[qload2]                       "
-          "\n\t"
-          "sb               %[st2],           0(%[dst])                       "
-          "\n\t" /* even 8 */
-          "ulw              %[qload1],        13(%[src])                      "
-          "\n\t"
-          "dpa.w.ph         $ac1,             %[p2],          %[filter45]     "
-          "\n\t" /* odd 2 */
-          "extp             %[Temp1],         $ac1,           31              "
-          "\n\t" /* odd 2 */
-          "lbux             %[st3],           %[Temp3](%[cm])                 "
-          "\n\t" /* odd 1 */
-
-          /* odd 3. pixel */
-          "mtlo             %[vector_64],     $ac3                            "
-          "\n\t" /* odd 4 */
-          "mthi             $zero,            $ac3                            "
-          "\n\t"
-          "preceu.ph.qbr    %[p2],            %[qload1]                       "
-          "\n\t"
-          "sb               %[st3],           0(%[odd_dst])                   "
-          "\n\t" /* odd 1 */
-          "addu             %[odd_dst],       %[odd_dst],     %[dst_pitch_2]  "
-          "\n\t"
-          "dpa.w.ph         $ac2,             %[p3],          %[filter45]     "
-          "\n\t" /* odd 3 */
-          "extp             %[Temp2],         $ac2,           31              "
-          "\n\t" /* odd 3 */
-          "lbux             %[st1],           %[Temp1](%[cm])                 "
-          "\n\t" /* odd 2 */
-
-          /* odd 4. pixel */
-          "mtlo             %[vector_64],     $ac1                            "
-          "\n\t" /* odd 5 */
-          "mthi             $zero,            $ac1                            "
-          "\n\t"
-          "preceu.ph.qbl    %[p3],            %[qload1]                       "
-          "\n\t"
-          "sb               %[st1],           0(%[odd_dst])                   "
-          "\n\t" /* odd 2 */
-          "addu             %[odd_dst],       %[odd_dst],     %[dst_pitch_2]  "
-          "\n\t"
-          "dpa.w.ph         $ac3,             %[p4],          %[filter45]     "
-          "\n\t" /* odd 4 */
-          "extp             %[Temp3],         $ac3,           31              "
-          "\n\t" /* odd 4 */
-          "lbux             %[st2],           %[Temp2](%[cm])                 "
-          "\n\t" /* odd 3 */
-
-          /* odd 5. pixel */
-          "mtlo             %[vector_64],     $ac2                            "
-          "\n\t" /* odd 6 */
-          "mthi             $zero,            $ac2                            "
-          "\n\t"
-          "sb               %[st2],           0(%[odd_dst])                   "
-          "\n\t" /* odd 3 */
-          "addu             %[odd_dst],       %[odd_dst],     %[dst_pitch_2]  "
-          "\n\t"
-          "dpa.w.ph         $ac1,             %[p1],          %[filter45]     "
-          "\n\t" /* odd 5 */
-          "extp             %[Temp1],         $ac1,           31              "
-          "\n\t" /* odd 5 */
-          "lbux             %[st3],           %[Temp3](%[cm])                 "
-          "\n\t" /* odd 4 */
-
-          /* odd 6. pixel */
-          "mtlo             %[vector_64],     $ac3                            "
-          "\n\t" /* odd 7 */
-          "mthi             $zero,            $ac3                            "
-          "\n\t"
-          "sb               %[st3],           0(%[odd_dst])                   "
-          "\n\t" /* odd 4 */
-          "addu             %[odd_dst],       %[odd_dst],     %[dst_pitch_2]  "
-          "\n\t"
-          "ulw              %[qload1],        21(%[src])                      "
-          "\n\t"
-          "dpa.w.ph         $ac2,             %[p5],          %[filter45]     "
-          "\n\t" /* odd 6 */
-          "extp             %[Temp2],         $ac2,           31              "
-          "\n\t" /* odd 6 */
-          "lbux             %[st1],           %[Temp1](%[cm])                 "
-          "\n\t" /* odd 5 */
-
-          /* odd 7. pixel */
-          "mtlo             %[vector_64],     $ac1                            "
-          "\n\t" /* odd 8 */
-          "mthi             $zero,            $ac1                            "
-          "\n\t"
-          "preceu.ph.qbr    %[p5],            %[qload1]                       "
-          "\n\t"
-          "sb               %[st1],           0(%[odd_dst])                   "
-          "\n\t" /* odd 5 */
-          "addu             %[odd_dst],       %[odd_dst],     %[dst_pitch_2]  "
-          "\n\t"
-          "dpa.w.ph         $ac3,             %[p2],          %[filter45]     "
-          "\n\t" /* odd 7 */
-          "extp             %[Temp3],         $ac3,           31              "
-          "\n\t" /* odd 7 */
-
-          /* odd 8. pixel */
-          "dpa.w.ph         $ac1,             %[p3],          %[filter45]     "
-          "\n\t" /* odd 8 */
-          "extp             %[Temp1],         $ac1,           31              "
-          "\n\t" /* odd 8 */
-
-          "lbux             %[st2],           %[Temp2](%[cm])                 "
-          "\n\t" /* odd 6 */
-          "lbux             %[st3],           %[Temp3](%[cm])                 "
-          "\n\t" /* odd 7 */
-          "lbux             %[st1],           %[Temp1](%[cm])                 "
-          "\n\t" /* odd 8 */
-
-          "sb               %[st2],           0(%[odd_dst])                   "
-          "\n\t" /* odd 6 */
-          "addu             %[odd_dst],       %[odd_dst],     %[dst_pitch_2]  "
-          "\n\t"
-
-          "sb               %[st3],           0(%[odd_dst])                   "
-          "\n\t" /* odd 7 */
-          "addu             %[odd_dst],       %[odd_dst],     %[dst_pitch_2]  "
-          "\n\t"
-
-          "sb               %[st1],           0(%[odd_dst])                   "
-          "\n\t" /* odd 8 */
-
-          : [qload1] "=&r"(qload1), [qload2] "=&r"(qload2), [p5] "=&r"(p5),
-            [st1] "=&r"(st1), [st2] "=&r"(st2), [st3] "=&r"(st3),
-            [p1] "=&r"(p1), [p2] "=&r"(p2), [p3] "=&r"(p3), [p4] "=&r"(p4),
-            [Temp1] "=&r"(Temp1), [Temp2] "=&r"(Temp2), [Temp3] "=&r"(Temp3),
-            [dst] "+r"(dst), [odd_dst] "+r"(odd_dst)
-          : [filter45] "r"(filter45), [vector_64] "r"(vector_64), [cm] "r"(cm),
-            [src] "r"(src), [dst_pitch_2] "r"(dst_pitch_2));
-
-      src += 16;
-      dst = (dst_ptr + ((c + 1) * 16 * dst_stride));
-      odd_dst = (dst + dst_stride);
-    }
-
-    /* Next row... */
-    src_ptr += src_stride;
-    dst_ptr += 1;
-  }
-}
-
-void convolve_bi_horiz_transposed(const uint8_t *src, ptrdiff_t src_stride,
-                                  uint8_t *dst, ptrdiff_t dst_stride,
-                                  const int16_t *filter, int w, int h) {
-  int x, y;
-
-  for (y = 0; y < h; ++y) {
-    for (x = 0; x < w; ++x) {
-      int sum = 0;
-
-      sum += src[x] * filter[3];
-      sum += src[x + 1] * filter[4];
-
-      dst[x * dst_stride] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
-    }
-
-    src += src_stride;
-    dst += 1;
-  }
-}
-
-void aom_convolve2_dspr2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
-                         ptrdiff_t dst_stride, const int16_t *filter, int w,
-                         int h) {
-  uint32_t pos = 38;
-
-  /* bit positon for extract from acc */
-  __asm__ __volatile__("wrdsp      %[pos],     1           \n\t"
-                       :
-                       : [pos] "r"(pos));
-
-  /* prefetch data to cache memory */
-  prefetch_load(src);
-  prefetch_load(src + 32);
-
-  switch (w) {
-    case 4:
-      convolve_bi_horiz_4_transposed_dspr2(src, src_stride, dst, dst_stride,
-                                           filter, h);
-      break;
-    case 8:
-      convolve_bi_horiz_8_transposed_dspr2(src, src_stride, dst, dst_stride,
-                                           filter, h);
-      break;
-    case 16:
-    case 32:
-      convolve_bi_horiz_16_transposed_dspr2(src, src_stride, dst, dst_stride,
-                                            filter, h, (w / 16));
-      break;
-    case 64:
-      prefetch_load(src + 32);
-      convolve_bi_horiz_64_transposed_dspr2(src, src_stride, dst, dst_stride,
-                                            filter, h);
-      break;
-    default:
-      convolve_bi_horiz_transposed(src, src_stride, dst, dst_stride, filter, w,
-                                   h);
-      break;
-  }
-}
-#endif
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/mips/convolve2_horiz_dspr2.c b/chromium/third_party/libaom/source/libaom/aom_dsp/mips/convolve2_horiz_dspr2.c
deleted file mode 100644
index 097da73ca03..00000000000
--- a/chromium/third_party/libaom/source/libaom/aom_dsp/mips/convolve2_horiz_dspr2.c
+++ /dev/null
@@ -1,681 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-#include <stdio.h>
-
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom_dsp/mips/convolve_common_dspr2.h"
-#include "aom_dsp/aom_dsp_common.h"
-#include "aom_ports/mem.h"
-
-#if HAVE_DSPR2
-static void convolve_bi_horiz_4_dspr2(const uint8_t *src, int32_t src_stride,
-                                      uint8_t *dst, int32_t dst_stride,
-                                      const int16_t *filter_x0, int32_t h) {
-  int32_t y;
-  uint8_t *cm = aom_ff_cropTbl;
-  int32_t Temp1, Temp2, Temp3, Temp4;
-  uint32_t vector4a = 64;
-  uint32_t tp1, tp2;
-  uint32_t p1, p2;
-  const int16_t *filter = &filter_x0[3];
-  uint32_t filter45;
-
-  filter45 = ((const int32_t *)filter)[0];
-
-  for (y = h; y--;) {
-    /* prefetch data to cache memory */
-    prefetch_load(src + src_stride);
-    prefetch_load(src + src_stride + 32);
-    prefetch_store(dst + dst_stride);
-
-    __asm__ __volatile__(
-        "ulw              %[tp1],      0(%[src])                      \n\t"
-        "ulw              %[tp2],      4(%[src])                      \n\t"
-
-        /* even 1. pixel */
-        "mtlo             %[vector4a], $ac3                           \n\t"
-        "mthi             $zero,       $ac3                           \n\t"
-        "preceu.ph.qbr    %[p1],       %[tp1]                         \n\t"
-        "preceu.ph.qbl    %[p2],       %[tp1]                         \n\t"
-        "dpa.w.ph         $ac3,        %[p1],          %[filter45]    \n\t"
-        "extp             %[Temp1],    $ac3,           31             \n\t"
-
-        /* even 2. pixel */
-        "mtlo             %[vector4a], $ac2                           \n\t"
-        "mthi             $zero,       $ac2                           \n\t"
-        "balign           %[tp2],      %[tp1],         3              \n\t"
-        "dpa.w.ph         $ac2,        %[p2],          %[filter45]    \n\t"
-        "extp             %[Temp3],    $ac2,           31             \n\t"
-
-        /* odd 1. pixel */
-        "lbux             %[tp1],      %[Temp1](%[cm])                \n\t"
-        "mtlo             %[vector4a], $ac3                           \n\t"
-        "mthi             $zero,       $ac3                           \n\t"
-        "preceu.ph.qbr    %[p1],       %[tp2]                         \n\t"
-        "preceu.ph.qbl    %[p2],       %[tp2]                         \n\t"
-        "dpa.w.ph         $ac3,        %[p1],          %[filter45]    \n\t"
-        "extp             %[Temp2],    $ac3,           31             \n\t"
-
-        /* odd 2. pixel */
-        "lbux             %[tp2],      %[Temp3](%[cm])                \n\t"
-        "mtlo             %[vector4a], $ac2                           \n\t"
-        "mthi             $zero,       $ac2                           \n\t"
-        "dpa.w.ph         $ac2,        %[p2],          %[filter45]    \n\t"
-        "extp             %[Temp4],    $ac2,           31             \n\t"
-
-        /* clamp */
-        "lbux             %[p1],       %[Temp2](%[cm])                \n\t"
-        "lbux             %[p2],       %[Temp4](%[cm])                \n\t"
-
-        /* store bytes */
-        "sb               %[tp1],      0(%[dst])                      \n\t"
-        "sb               %[p1],       1(%[dst])                      \n\t"
-        "sb               %[tp2],      2(%[dst])                      \n\t"
-        "sb               %[p2],       3(%[dst])                      \n\t"
-
-        : [tp1] "=&r"(tp1), [tp2] "=&r"(tp2), [p1] "=&r"(p1), [p2] "=&r"(p2),
-          [Temp1] "=&r"(Temp1), [Temp2] "=&r"(Temp2), [Temp3] "=&r"(Temp3),
-          [Temp4] "=&r"(Temp4)
-        : [filter45] "r"(filter45), [vector4a] "r"(vector4a), [cm] "r"(cm),
-          [dst] "r"(dst), [src] "r"(src));
-
-    /* Next row... */
-    src += src_stride;
-    dst += dst_stride;
-  }
-}
-
-static void convolve_bi_horiz_8_dspr2(const uint8_t *src, int32_t src_stride,
-                                      uint8_t *dst, int32_t dst_stride,
-                                      const int16_t *filter_x0, int32_t h) {
-  int32_t y;
-  uint8_t *cm = aom_ff_cropTbl;
-  uint32_t vector4a = 64;
-  int32_t Temp1, Temp2, Temp3;
-  uint32_t tp1, tp2, tp3;
-  uint32_t p1, p2, p3, p4;
-  uint32_t st0, st1;
-  const int16_t *filter = &filter_x0[3];
-  uint32_t filter45;
-
-  filter45 = ((const int32_t *)filter)[0];
-
-  for (y = h; y--;) {
-    /* prefetch data to cache memory */
-    prefetch_load(src + src_stride);
-    prefetch_load(src + src_stride + 32);
-    prefetch_store(dst + dst_stride);
-
-    __asm__ __volatile__(
-        "ulw              %[tp1],      0(%[src])                      \n\t"
-        "ulw              %[tp2],      4(%[src])                      \n\t"
-
-        /* even 1. pixel */
-        "mtlo             %[vector4a], $ac3                           \n\t"
-        "mthi             $zero,       $ac3                           \n\t"
-        "mtlo             %[vector4a], $ac2                           \n\t"
-        "mthi             $zero,       $ac2                           \n\t"
-        "preceu.ph.qbr    %[p1],       %[tp1]                         \n\t"
-        "preceu.ph.qbl    %[p2],       %[tp1]                         \n\t"
-        "preceu.ph.qbr    %[p3],       %[tp2]                         \n\t"
-        "preceu.ph.qbl    %[p4],       %[tp2]                         \n\t"
-        "ulw              %[tp3],      8(%[src])                      \n\t"
-        "dpa.w.ph         $ac3,        %[p1],          %[filter45]    \n\t"
-        "extp             %[Temp1],    $ac3,           31             \n\t"
-
-        /* even 2. pixel */
-        "dpa.w.ph         $ac2,        %[p2],          %[filter45]    \n\t"
-        "extp             %[Temp3],    $ac2,           31             \n\t"
-
-        /* even 3. pixel */
-        "lbux             %[st0],      %[Temp1](%[cm])                \n\t"
-        "mtlo             %[vector4a], $ac1                           \n\t"
-        "mthi             $zero,       $ac1                           \n\t"
-        "dpa.w.ph         $ac1,        %[p3],          %[filter45]    \n\t"
-        "extp             %[Temp1],    $ac1,           31             \n\t"
-
-        /* even 4. pixel */
-        "mtlo             %[vector4a], $ac2                           \n\t"
-        "mthi             $zero,       $ac2                           \n\t"
-        "mtlo             %[vector4a], $ac3                           \n\t"
-        "mthi             $zero,       $ac3                           \n\t"
-        "sb               %[st0],      0(%[dst])                      \n\t"
-        "lbux             %[st1],      %[Temp3](%[cm])                \n\t"
-
-        "balign           %[tp3],      %[tp2],         3              \n\t"
-        "balign           %[tp2],      %[tp1],         3              \n\t"
-
-        "dpa.w.ph         $ac2,        %[p4],          %[filter45]    \n\t"
-        "extp             %[Temp3],    $ac2,           31             \n\t"
-
-        "lbux             %[st0],      %[Temp1](%[cm])                \n\t"
-
-        /* odd 1. pixel */
-        "mtlo             %[vector4a], $ac1                           \n\t"
-        "mthi             $zero,       $ac1                           \n\t"
-        "sb               %[st1],      2(%[dst])                      \n\t"
-        "preceu.ph.qbr    %[p1],       %[tp2]                         \n\t"
-        "preceu.ph.qbl    %[p2],       %[tp2]                         \n\t"
-        "preceu.ph.qbr    %[p3],       %[tp3]                         \n\t"
-        "preceu.ph.qbl    %[p4],       %[tp3]                         \n\t"
-        "sb               %[st0],      4(%[dst])                      \n\t"
-        "dpa.w.ph         $ac3,        %[p1],          %[filter45]    \n\t"
-        "extp             %[Temp2],    $ac3,           31             \n\t"
-
-        /* odd 2. pixel */
-        "mtlo             %[vector4a], $ac3                           \n\t"
-        "mthi             $zero,       $ac3                           \n\t"
-        "mtlo             %[vector4a], $ac2                           \n\t"
-        "mthi             $zero,       $ac2                           \n\t"
-        "lbux             %[st0],      %[Temp3](%[cm])                \n\t"
-        "dpa.w.ph         $ac1,        %[p2],          %[filter45]    \n\t"
-        "extp             %[Temp3],    $ac1,           31             \n\t"
-
-        /* odd 3. pixel */
-        "lbux             %[st1],      %[Temp2](%[cm])                \n\t"
-        "dpa.w.ph         $ac3,        %[p3],          %[filter45]    \n\t"
-        "extp             %[Temp2],    $ac3,           31             \n\t"
-
-        /* odd 4. pixel */
-        "sb               %[st1],      1(%[dst])                      \n\t"
-        "sb               %[st0],      6(%[dst])                      \n\t"
-        "dpa.w.ph         $ac2,        %[p4],          %[filter45]    \n\t"
-        "extp             %[Temp1],    $ac2,           31             \n\t"
-
-        /* clamp */
-        "lbux             %[p4],       %[Temp3](%[cm])                \n\t"
-        "lbux             %[p2],       %[Temp2](%[cm])                \n\t"
-        "lbux             %[p1],       %[Temp1](%[cm])                \n\t"
-
-        /* store bytes */
-        "sb               %[p4],       3(%[dst])                      \n\t"
-        "sb               %[p2],       5(%[dst])                      \n\t"
-        "sb               %[p1],       7(%[dst])                      \n\t"
-
-        : [tp1] "=&r"(tp1), [tp2] "=&r"(tp2), [tp3] "=&r"(tp3),
-          [st0] "=&r"(st0), [st1] "=&r"(st1), [p1] "=&r"(p1), [p2] "=&r"(p2),
-          [p3] "=&r"(p3), [p4] "=&r"(p4), [Temp1] "=&r"(Temp1),
-          [Temp2] "=&r"(Temp2), [Temp3] "=&r"(Temp3)
-        : [filter45] "r"(filter45), [vector4a] "r"(vector4a), [cm] "r"(cm),
-          [dst] "r"(dst), [src] "r"(src));
-
-    /* Next row... */
-    src += src_stride;
-    dst += dst_stride;
-  }
-}
-
-static void convolve_bi_horiz_16_dspr2(const uint8_t *src_ptr,
-                                       int32_t src_stride, uint8_t *dst_ptr,
-                                       int32_t dst_stride,
-                                       const int16_t *filter_x0, int32_t h,
-                                       int32_t count) {
-  int32_t y, c;
-  const uint8_t *src;
-  uint8_t *dst;
-  uint8_t *cm = aom_ff_cropTbl;
-  uint32_t vector_64 = 64;
-  int32_t Temp1, Temp2, Temp3;
-  uint32_t qload1, qload2, qload3;
-  uint32_t p1, p2, p3, p4, p5;
-  uint32_t st1, st2, st3;
-  const int16_t *filter = &filter_x0[3];
-  uint32_t filter45;
-
-  filter45 = ((const int32_t *)filter)[0];
-
-  for (y = h; y--;) {
-    src = src_ptr;
-    dst = dst_ptr;
-
-    /* prefetch data to cache memory */
-    prefetch_load(src_ptr + src_stride);
-    prefetch_load(src_ptr + src_stride + 32);
-    prefetch_store(dst_ptr + dst_stride);
-
-    for (c = 0; c < count; c++) {
-      __asm__ __volatile__(
-          "ulw              %[qload1],    0(%[src])                    \n\t"
-          "ulw              %[qload2],    4(%[src])                    \n\t"
-
-          /* even 1. pixel */
-          "mtlo             %[vector_64], $ac1                         \n\t" /* even 1 */
-          "mthi             $zero,        $ac1                         \n\t"
-          "mtlo             %[vector_64], $ac2                         \n\t" /* even 2 */
-          "mthi             $zero,        $ac2                         \n\t"
-          "preceu.ph.qbr    %[p1],        %[qload1]                    \n\t"
-          "preceu.ph.qbl    %[p2],        %[qload1]                    \n\t"
-          "preceu.ph.qbr    %[p3],        %[qload2]                    \n\t"
-          "preceu.ph.qbl    %[p4],        %[qload2]                    \n\t"
-          "ulw              %[qload3],    8(%[src])                    \n\t"
-          "dpa.w.ph         $ac1,         %[p1],          %[filter45]  \n\t" /* even 1 */
-          "extp             %[Temp1],     $ac1,           31           \n\t" /* even 1 */
-
-          /* even 2. pixel */
-          "mtlo             %[vector_64], $ac3                         \n\t" /* even 3 */
-          "mthi             $zero,        $ac3                         \n\t"
-          "preceu.ph.qbr    %[p1],        %[qload3]                    \n\t"
-          "preceu.ph.qbl    %[p5],        %[qload3]                    \n\t"
-          "ulw              %[qload1],    12(%[src])                   \n\t"
-          "dpa.w.ph         $ac2,         %[p2],          %[filter45]  \n\t" /* even 1 */
-          "extp             %[Temp2],     $ac2,           31           \n\t" /* even 1 */
-          "lbux             %[st1],       %[Temp1](%[cm])              \n\t" /* even 1 */
-
-          /* even 3. pixel */
-          "mtlo             %[vector_64], $ac1                         \n\t" /* even 4 */
-          "mthi             $zero,        $ac1                         \n\t"
-          "preceu.ph.qbr    %[p2],        %[qload1]                    \n\t"
-          "sb               %[st1],       0(%[dst])                    \n\t" /* even 1 */
-          "dpa.w.ph         $ac3,         %[p3],          %[filter45]  \n\t" /* even 3 */
-          "extp             %[Temp3],     $ac3,           31           \n\t" /* even 3 */
-          "lbux             %[st2],       %[Temp2](%[cm])              \n\t" /* even 1 */
-
-          /* even 4. pixel */
-          "mtlo             %[vector_64], $ac2                         \n\t" /* even 5 */
-          "mthi             $zero,        $ac2                         \n\t"
-          "preceu.ph.qbl    %[p3],        %[qload1]                    \n\t"
-          "sb               %[st2],       2(%[dst])                    \n\t" /* even 1 */
-          "dpa.w.ph         $ac1,         %[p4],          %[filter45]  \n\t" /* even 4 */
-          "extp             %[Temp1],     $ac1,           31           \n\t" /* even 4 */
-          "lbux             %[st3],       %[Temp3](%[cm])              \n\t" /* even 3 */
-
-          /* even 5. pixel */
-          "mtlo             %[vector_64], $ac3                         \n\t" /* even 6 */
-          "mthi             $zero,        $ac3                         \n\t"
-          "sb               %[st3],       4(%[dst])                    \n\t" /* even 3 */
-          "dpa.w.ph         $ac2,         %[p1],          %[filter45]  \n\t" /* even 5 */
-          "extp             %[Temp2],     $ac2,           31           \n\t" /* even 5 */
-          "lbux             %[st1],       %[Temp1](%[cm])              \n\t" /* even 4 */
-
-          /* even 6. pixel */
-          "mtlo             %[vector_64], $ac1                         \n\t" /* even 7 */
-          "mthi             $zero,        $ac1                         \n\t"
-          "sb               %[st1],       6(%[dst])                    \n\t" /* even 4 */
-          "dpa.w.ph         $ac3,         %[p5],          %[filter45]  \n\t" /* even 6 */
-          "extp             %[Temp3],     $ac3,           31           \n\t" /* even 6 */
-          "lbux             %[st2],       %[Temp2](%[cm])              \n\t" /* even 5 */
-
-          /* even 7. pixel */
-          "mtlo             %[vector_64], $ac2                         \n\t" /* even 8 */
-          "mthi             $zero,        $ac2                         \n\t"
-          "sb               %[st2],       8(%[dst])                    \n\t" /* even 5 */
-          "dpa.w.ph         $ac1,         %[p2],          %[filter45]  \n\t" /* even 7 */
-          "extp             %[Temp1],     $ac1,           31           \n\t" /* even 7 */
-          "lbux             %[st3],       %[Temp3](%[cm])              \n\t" /* even 6 */
-
-          /* even 8. pixel */
-          "mtlo             %[vector_64], $ac3                         \n\t" /* odd 1 */
-          "mthi             $zero,        $ac3                         \n\t"
-          "dpa.w.ph         $ac2,         %[p3],          %[filter45]  \n\t" /* even 8 */
-          "sb               %[st3],       10(%[dst])                   \n\t" /* even 6 */
-          "extp             %[Temp2],     $ac2,           31           \n\t" /* even 8 */
-          "lbux             %[st1],       %[Temp1](%[cm])              \n\t" /* even 7 */
-
-          /* ODD pixels */
-          "ulw              %[qload1],    1(%[src])                    \n\t"
-          "ulw              %[qload2],    5(%[src])                    \n\t"
-
-          /* odd 1. pixel */
-          "mtlo             %[vector_64], $ac1                         \n\t" /* odd 2 */
-          "mthi             $zero,        $ac1                         \n\t"
-          "preceu.ph.qbr    %[p1],        %[qload1]                    \n\t"
-          "preceu.ph.qbl    %[p2],        %[qload1]                    \n\t"
-          "preceu.ph.qbr    %[p3],        %[qload2]                    \n\t"
-          "preceu.ph.qbl    %[p4],        %[qload2]                    \n\t"
-          "sb               %[st1],       12(%[dst])                   \n\t" /* even 7 */
-          "ulw              %[qload3],    9(%[src])                    \n\t"
-          "dpa.w.ph         $ac3,         %[p1],          %[filter45]  \n\t" /* odd 1 */
-          "extp             %[Temp3],     $ac3,           31           \n\t" /* odd 1 */
-          "lbux             %[st2],       %[Temp2](%[cm])              \n\t" /* even 8 */
-
-          /* odd 2. pixel */
-          "mtlo             %[vector_64], $ac2                         \n\t" /* odd 3 */
-          "mthi             $zero,        $ac2                         \n\t"
-          "preceu.ph.qbr    %[p1],        %[qload3]                    \n\t"
-          "preceu.ph.qbl    %[p5],        %[qload3]                    \n\t"
-          "sb               %[st2],       14(%[dst])                   \n\t" /* even 8 */
-          "ulw              %[qload1],    13(%[src])                   \n\t"
-          "dpa.w.ph         $ac1,         %[p2],          %[filter45]  \n\t" /* odd 2 */
-          "extp             %[Temp1],     $ac1,           31           \n\t" /* odd 2 */
-          "lbux             %[st3],       %[Temp3](%[cm])              \n\t" /* odd 1 */
-
-          /* odd 3. pixel */
-          "mtlo             %[vector_64], $ac3                         \n\t" /* odd 4 */
-          "mthi             $zero,        $ac3                         \n\t"
-          "preceu.ph.qbr    %[p2],        %[qload1]                    \n\t"
-          "sb               %[st3],       1(%[dst])                    \n\t" /* odd 1 */
-          "dpa.w.ph         $ac2,         %[p3],          %[filter45]  \n\t" /* odd 3 */
-          "extp             %[Temp2],     $ac2,           31           \n\t" /* odd 3 */
-          "lbux             %[st1],       %[Temp1](%[cm])              \n\t" /* odd 2 */
-
-          /* odd 4. pixel */
-          "mtlo             %[vector_64], $ac1                         \n\t" /* odd 5 */
-          "mthi             $zero,        $ac1                         \n\t"
-          "preceu.ph.qbl    %[p3],        %[qload1]                    \n\t"
-          "sb               %[st1],       3(%[dst])                    \n\t" /* odd 2 */
-          "dpa.w.ph         $ac3,         %[p4],          %[filter45]  \n\t" /* odd 4 */
-          "extp             %[Temp3],     $ac3,           31           \n\t" /* odd 4 */
-          "lbux             %[st2],       %[Temp2](%[cm])              \n\t" /* odd 3 */
-
-          /* odd 5. pixel */
-          "mtlo             %[vector_64], $ac2                         \n\t" /* odd 6 */
-          "mthi             $zero,        $ac2                         \n\t"
-          "sb               %[st2],       5(%[dst])                    \n\t" /* odd 3 */
-          "dpa.w.ph         $ac1,         %[p1],          %[filter45]  \n\t" /* odd 5 */
-          "extp             %[Temp1],     $ac1,           31           \n\t" /* odd 5 */
-          "lbux             %[st3],       %[Temp3](%[cm])              \n\t" /* odd 4 */
-
-          /* odd 6. pixel */
-          "mtlo             %[vector_64], $ac3                         \n\t" /* odd 7 */
-          "mthi             $zero,        $ac3                         \n\t"
-          "sb               %[st3],       7(%[dst])                    \n\t" /* odd 4 */
-          "dpa.w.ph         $ac2,         %[p5],          %[filter45]  \n\t" /* odd 6 */
-          "extp             %[Temp2],     $ac2,           31           \n\t" /* odd 6 */
-          "lbux             %[st1],       %[Temp1](%[cm])              \n\t" /* odd 5 */
-
-          /* odd 7. pixel */
-          "mtlo             %[vector_64], $ac1                         \n\t" /* odd 8 */
-          "mthi             $zero,        $ac1                         \n\t"
-          "sb               %[st1],       9(%[dst])                    \n\t" /* odd 5 */
-          "dpa.w.ph         $ac3,         %[p2],          %[filter45]  \n\t" /* odd 7 */
-          "extp             %[Temp3],     $ac3,           31           \n\t" /* odd 7 */
-
-          /* odd 8. pixel */
-          "dpa.w.ph         $ac1,         %[p3],          %[filter45]  \n\t" /* odd 8 */
-          "extp             %[Temp1],     $ac1,           31           \n\t" /* odd 8 */
-
-          "lbux             %[st2],       %[Temp2](%[cm])              \n\t" /* odd 6 */
-          "lbux             %[st3],       %[Temp3](%[cm])              \n\t" /* odd 7 */
-          "lbux             %[st1],       %[Temp1](%[cm])              \n\t" /* odd 8 */
-
-          "sb               %[st2],       11(%[dst])                   \n\t" /* odd 6 */
-          "sb               %[st3],       13(%[dst])                   \n\t" /* odd 7 */
-          "sb               %[st1],       15(%[dst])                   \n\t" /* odd 8 */
-
-          : [qload1] "=&r"(qload1), [qload2] "=&r"(qload2),
-            [qload3] "=&r"(qload3), [st1] "=&r"(st1), [st2] "=&r"(st2),
-            [st3] "=&r"(st3), [p1] "=&r"(p1), [p2] "=&r"(p2), [p3] "=&r"(p3),
-            [p4] "=&r"(p4), [p5] "=&r"(p5), [Temp1] "=&r"(Temp1),
-            [Temp2] "=&r"(Temp2), [Temp3] "=&r"(Temp3)
-          : [filter45] "r"(filter45), [vector_64] "r"(vector_64), [cm] "r"(cm),
-            [dst] "r"(dst), [src] "r"(src));
-
-      src += 16;
-      dst += 16;
-    }
-
-    /* Next row... */
-    src_ptr += src_stride;
-    dst_ptr += dst_stride;
-  }
-}
-
-static void convolve_bi_horiz_64_dspr2(const uint8_t *src_ptr,
-                                       int32_t src_stride, uint8_t *dst_ptr,
-                                       int32_t dst_stride,
-                                       const int16_t *filter_x0, int32_t h) {
-  int32_t y, c;
-  const uint8_t *src;
-  uint8_t *dst;
-  uint8_t *cm = aom_ff_cropTbl;
-  uint32_t vector_64 = 64;
-  int32_t Temp1, Temp2, Temp3;
-  uint32_t qload1, qload2, qload3;
-  uint32_t p1, p2, p3, p4, p5;
-  uint32_t st1, st2, st3;
-  const int16_t *filter = &filter_x0[3];
-  uint32_t filter45;
-
-  filter45 = ((const int32_t *)filter)[0];
-
-  for (y = h; y--;) {
-    src = src_ptr;
-    dst = dst_ptr;
-
-    /* prefetch data to cache memory */
-    prefetch_load(src_ptr + src_stride);
-    prefetch_load(src_ptr + src_stride + 32);
-    prefetch_load(src_ptr + src_stride + 64);
-    prefetch_store(dst_ptr + dst_stride);
-    prefetch_store(dst_ptr + dst_stride + 32);
-
-    for (c = 0; c < 4; c++) {
-      __asm__ __volatile__(
-          "ulw              %[qload1],    0(%[src])                    \n\t"
-          "ulw              %[qload2],    4(%[src])                    \n\t"
-
-          /* even 1. pixel */
-          "mtlo             %[vector_64], $ac1                         \n\t" /* even 1 */
-          "mthi             $zero,        $ac1                         \n\t"
-          "mtlo             %[vector_64], $ac2                         \n\t" /* even 2 */
-          "mthi             $zero,        $ac2                         \n\t"
-          "preceu.ph.qbr    %[p1],        %[qload1]                    \n\t"
-          "preceu.ph.qbl    %[p2],        %[qload1]                    \n\t"
-          "preceu.ph.qbr    %[p3],        %[qload2]                    \n\t"
-          "preceu.ph.qbl    %[p4],        %[qload2]                    \n\t"
-          "ulw              %[qload3],    8(%[src])                    \n\t"
-          "dpa.w.ph         $ac1,         %[p1],          %[filter45]  \n\t" /* even 1 */
-          "extp             %[Temp1],     $ac1,           31           \n\t" /* even 1 */
-
-          /* even 2. pixel */
-          "mtlo             %[vector_64], $ac3                         \n\t" /* even 3 */
-          "mthi             $zero,        $ac3                         \n\t"
-          "preceu.ph.qbr    %[p1],        %[qload3]                    \n\t"
-          "preceu.ph.qbl    %[p5],        %[qload3]                    \n\t"
-          "ulw              %[qload1],    12(%[src])                   \n\t"
-          "dpa.w.ph         $ac2,         %[p2],          %[filter45]  \n\t" /* even 1 */
-          "extp             %[Temp2],     $ac2,           31           \n\t" /* even 1 */
-          "lbux             %[st1],       %[Temp1](%[cm])              \n\t" /* even 1 */
-
-          /* even 3. pixel */
-          "mtlo             %[vector_64], $ac1                         \n\t" /* even 4 */
-          "mthi             $zero,        $ac1                         \n\t"
-          "preceu.ph.qbr    %[p2],        %[qload1]                    \n\t"
-          "sb               %[st1],       0(%[dst])                    \n\t" /* even 1 */
-          "dpa.w.ph         $ac3,         %[p3],          %[filter45]  \n\t" /* even 3 */
-          "extp             %[Temp3],     $ac3,           31           \n\t" /* even 3 */
-          "lbux             %[st2],       %[Temp2](%[cm])              \n\t" /* even 1 */
-
-          /* even 4. pixel */
-          "mtlo             %[vector_64], $ac2                         \n\t" /* even 5 */
-          "mthi             $zero,        $ac2                         \n\t"
-          "preceu.ph.qbl    %[p3],        %[qload1]                    \n\t"
-          "sb               %[st2],       2(%[dst])                    \n\t" /* even 1 */
-          "dpa.w.ph         $ac1,         %[p4],          %[filter45]  \n\t" /* even 4 */
-          "extp             %[Temp1],     $ac1,           31           \n\t" /* even 4 */
-          "lbux             %[st3],       %[Temp3](%[cm])              \n\t" /* even 3 */
-
-          /* even 5. pixel */
-          "mtlo             %[vector_64], $ac3                         \n\t" /* even 6 */
-          "mthi             $zero,        $ac3                         \n\t"
-          "sb               %[st3],       4(%[dst])                    \n\t" /* even 3 */
-          "dpa.w.ph         $ac2,         %[p1],          %[filter45]  \n\t" /* even 5 */
-          "extp             %[Temp2],     $ac2,           31           \n\t" /* even 5 */
-          "lbux             %[st1],       %[Temp1](%[cm])              \n\t" /* even 4 */
-
-          /* even 6. pixel */
-          "mtlo             %[vector_64], $ac1                         \n\t" /* even 7 */
-          "mthi             $zero,        $ac1                         \n\t"
-          "sb               %[st1],       6(%[dst])                    \n\t" /* even 4 */
-          "dpa.w.ph         $ac3,         %[p5],          %[filter45]  \n\t" /* even 6 */
-          "extp             %[Temp3],     $ac3,           31           \n\t" /* even 6 */
-          "lbux             %[st2],       %[Temp2](%[cm])              \n\t" /* even 5 */
-
-          /* even 7. pixel */
-          "mtlo             %[vector_64], $ac2                         \n\t" /* even 8 */
-          "mthi             $zero,        $ac2                         \n\t"
-          "sb               %[st2],       8(%[dst])                    \n\t" /* even 5 */
-          "dpa.w.ph         $ac1,         %[p2],          %[filter45]  \n\t" /* even 7 */
-          "extp             %[Temp1],     $ac1,           31           \n\t" /* even 7 */
-          "lbux             %[st3],       %[Temp3](%[cm])              \n\t" /* even 6 */
-
-          /* even 8. pixel */
-          "mtlo             %[vector_64], $ac3                         \n\t" /* odd 1 */
-          "mthi             $zero,        $ac3                         \n\t"
-          "dpa.w.ph         $ac2,         %[p3],          %[filter45]  \n\t" /* even 8 */
-          "sb               %[st3],       10(%[dst])                   \n\t" /* even 6 */
-          "extp             %[Temp2],     $ac2,           31           \n\t" /* even 8 */
-          "lbux             %[st1],       %[Temp1](%[cm])              \n\t" /* even 7 */
-
-          /* ODD pixels */
-          "ulw              %[qload1],    1(%[src])                    \n\t"
-          "ulw              %[qload2],    5(%[src])                    \n\t"
-
-          /* odd 1. pixel */
-          "mtlo             %[vector_64], $ac1                         \n\t" /* odd 2 */
-          "mthi             $zero,        $ac1                         \n\t"
-          "preceu.ph.qbr    %[p1],        %[qload1]                    \n\t"
-          "preceu.ph.qbl    %[p2],        %[qload1]                    \n\t"
-          "preceu.ph.qbr    %[p3],        %[qload2]                    \n\t"
-          "preceu.ph.qbl    %[p4],        %[qload2]                    \n\t"
-          "sb               %[st1],       12(%[dst])                   \n\t" /* even 7 */
-          "ulw              %[qload3],    9(%[src])                    \n\t"
-          "dpa.w.ph         $ac3,         %[p1],          %[filter45]  \n\t" /* odd 1 */
-          "extp             %[Temp3],     $ac3,           31           \n\t" /* odd 1 */
-          "lbux             %[st2],       %[Temp2](%[cm])              \n\t" /* even 8 */
-
-          /* odd 2. pixel */
-          "mtlo             %[vector_64], $ac2                         \n\t" /* odd 3 */
-          "mthi             $zero,        $ac2                         \n\t"
-          "preceu.ph.qbr    %[p1],        %[qload3]                    \n\t"
-          "preceu.ph.qbl    %[p5],        %[qload3]                    \n\t"
-          "sb               %[st2],       14(%[dst])                   \n\t" /* even 8 */
-          "ulw              %[qload1],    13(%[src])                   \n\t"
-          "dpa.w.ph         $ac1,         %[p2],          %[filter45]  \n\t" /* odd 2 */
-          "extp             %[Temp1],     $ac1,           31           \n\t" /* odd 2 */
-          "lbux             %[st3],       %[Temp3](%[cm])              \n\t" /* odd 1 */
-
-          /* odd 3. pixel */
-          "mtlo             %[vector_64], $ac3                         \n\t" /* odd 4 */
-          "mthi             $zero,        $ac3                         \n\t"
-          "preceu.ph.qbr    %[p2],        %[qload1]                    \n\t"
-          "sb               %[st3],       1(%[dst])                    \n\t" /* odd 1 */
-          "dpa.w.ph         $ac2,         %[p3],          %[filter45]  \n\t" /* odd 3 */
-          "extp             %[Temp2],     $ac2,           31           \n\t" /* odd 3 */
-          "lbux             %[st1],       %[Temp1](%[cm])              \n\t" /* odd 2 */
-
-          /* odd 4. pixel */
-          "mtlo             %[vector_64], $ac1                         \n\t" /* odd 5 */
-          "mthi             $zero,        $ac1                         \n\t"
-          "preceu.ph.qbl    %[p3],        %[qload1]                    \n\t"
-          "sb               %[st1],       3(%[dst])                    \n\t" /* odd 2 */
-          "dpa.w.ph         $ac3,         %[p4],          %[filter45]  \n\t" /* odd 4 */
-          "extp             %[Temp3],     $ac3,           31           \n\t" /* odd 4 */
-          "lbux             %[st2],       %[Temp2](%[cm])              \n\t" /* odd 3 */
-
-          /* odd 5. pixel */
-          "mtlo             %[vector_64], $ac2                         \n\t" /* odd 6 */
-          "mthi             $zero,        $ac2                         \n\t"
-          "sb               %[st2],       5(%[dst])                    \n\t" /* odd 3 */
-          "dpa.w.ph         $ac1,         %[p1],          %[filter45]  \n\t" /* odd 5 */
-          "extp             %[Temp1],     $ac1,           31           \n\t" /* odd 5 */
-          "lbux             %[st3],       %[Temp3](%[cm])              \n\t" /* odd 4 */
-
-          /* odd 6. pixel */
-          "mtlo             %[vector_64], $ac3                         \n\t" /* odd 7 */
-          "mthi             $zero,        $ac3                         \n\t"
-          "sb               %[st3],       7(%[dst])                    \n\t" /* odd 4 */
-          "dpa.w.ph         $ac2,         %[p5],          %[filter45]  \n\t" /* odd 6 */
-          "extp             %[Temp2],     $ac2,           31           \n\t" /* odd 6 */
-          "lbux             %[st1],       %[Temp1](%[cm])              \n\t" /* odd 5 */
-
-          /* odd 7. pixel */
-          "mtlo             %[vector_64], $ac1                         \n\t" /* odd 8 */
-          "mthi             $zero,        $ac1                         \n\t"
-          "sb               %[st1],       9(%[dst])                    \n\t" /* odd 5 */
-          "dpa.w.ph         $ac3,         %[p2],          %[filter45]  \n\t" /* odd 7 */
-          "extp             %[Temp3],     $ac3,           31           \n\t" /* odd 7 */
-
-          /* odd 8. pixel */
-          "dpa.w.ph         $ac1,         %[p3],          %[filter45]  \n\t" /* odd 8 */
-          "extp             %[Temp1],     $ac1,           31           \n\t" /* odd 8 */
-
-          "lbux             %[st2],       %[Temp2](%[cm])              \n\t" /* odd 6 */
-          "lbux             %[st3],       %[Temp3](%[cm])              \n\t" /* odd 7 */
-          "lbux             %[st1],       %[Temp1](%[cm])              \n\t" /* odd 8 */
-
-          "sb               %[st2],       11(%[dst])                   \n\t" /* odd 6 */
-          "sb               %[st3],       13(%[dst])                   \n\t" /* odd 7 */
-          "sb               %[st1],       15(%[dst])                   \n\t" /* odd 8 */
-
-          : [qload1] "=&r"(qload1), [qload2] "=&r"(qload2),
-            [qload3] "=&r"(qload3), [st1] "=&r"(st1), [st2] "=&r"(st2),
-            [st3] "=&r"(st3), [p1] "=&r"(p1), [p2] "=&r"(p2), [p3] "=&r"(p3),
-            [p4] "=&r"(p4), [p5] "=&r"(p5), [Temp1] "=&r"(Temp1),
-            [Temp2] "=&r"(Temp2), [Temp3] "=&r"(Temp3)
-          : [filter45] "r"(filter45), [vector_64] "r"(vector_64), [cm] "r"(cm),
-            [dst] "r"(dst), [src] "r"(src));
-
-      src += 16;
-      dst += 16;
-    }
-
-    /* Next row... */
-    src_ptr += src_stride;
-    dst_ptr += dst_stride;
-  }
-}
-
-void aom_convolve2_horiz_dspr2(const uint8_t *src, ptrdiff_t src_stride,
-                               uint8_t *dst, ptrdiff_t dst_stride,
-                               const int16_t *filter_x, int x_step_q4,
-                               const int16_t *filter_y, int y_step_q4, int w,
-                               int h) {
-  uint32_t pos = 38;
-
-  assert(x_step_q4 == 16);
-
-  prefetch_load((const uint8_t *)filter_x);
-
-  /* bit positon for extract from acc */
-  __asm__ __volatile__("wrdsp      %[pos],     1           \n\t"
-                       :
-                       : [pos] "r"(pos));
-
-  /* prefetch data to cache memory */
-  prefetch_load(src);
-  prefetch_load(src + 32);
-  prefetch_store(dst);
-
-  switch (w) {
-    case 4:
-      convolve_bi_horiz_4_dspr2(src, (int32_t)src_stride, dst,
-                                (int32_t)dst_stride, filter_x, (int32_t)h);
-      break;
-    case 8:
-      convolve_bi_horiz_8_dspr2(src, (int32_t)src_stride, dst,
-                                (int32_t)dst_stride, filter_x, (int32_t)h);
-      break;
-    case 16:
-      convolve_bi_horiz_16_dspr2(src, (int32_t)src_stride, dst,
-                                 (int32_t)dst_stride, filter_x, (int32_t)h, 1);
-      break;
-    case 32:
-      convolve_bi_horiz_16_dspr2(src, (int32_t)src_stride, dst,
-                                 (int32_t)dst_stride, filter_x, (int32_t)h, 2);
-      break;
-    case 64:
-      prefetch_load(src + 64);
-      prefetch_store(dst + 32);
-
-      convolve_bi_horiz_64_dspr2(src, (int32_t)src_stride, dst,
-                                 (int32_t)dst_stride, filter_x, (int32_t)h);
-      break;
-    default:
-      aom_convolve8_horiz_c(src, src_stride, dst, dst_stride, filter_x,
-                            x_step_q4, filter_y, y_step_q4, w, h);
-      break;
-  }
-}
-#endif
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/mips/convolve2_vert_dspr2.c b/chromium/third_party/libaom/source/libaom/aom_dsp/mips/convolve2_vert_dspr2.c
deleted file mode 100644
index 40abfd89eb2..00000000000
--- a/chromium/third_party/libaom/source/libaom/aom_dsp/mips/convolve2_vert_dspr2.c
+++ /dev/null
@@ -1,237 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-#include <stdio.h>
-
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom_dsp/mips/convolve_common_dspr2.h"
-#include "aom_dsp/aom_dsp_common.h"
-#include "aom_ports/mem.h"
-
-#if HAVE_DSPR2
-static void convolve_bi_vert_4_dspr2(const uint8_t *src, int32_t src_stride,
-                                     uint8_t *dst, int32_t dst_stride,
-                                     const int16_t *filter_y, int32_t w,
-                                     int32_t h) {
-  int32_t x, y;
-  const uint8_t *src_ptr;
-  uint8_t *dst_ptr;
-  uint8_t *cm = aom_ff_cropTbl;
-  uint32_t vector4a = 64;
-  uint32_t load1, load2;
-  uint32_t p1, p2;
-  uint32_t scratch1;
-  uint32_t store1, store2;
-  int32_t Temp1, Temp2;
-  const int16_t *filter = &filter_y[3];
-  uint32_t filter45;
-
-  filter45 = ((const int32_t *)filter)[0];
-
-  for (y = h; y--;) {
-    /* prefetch data to cache memory */
-    prefetch_store(dst + dst_stride);
-
-    for (x = 0; x < w; x += 4) {
-      src_ptr = src + x;
-      dst_ptr = dst + x;
-
-      __asm__ __volatile__(
-          "ulw              %[load1],     0(%[src_ptr])                   \n\t"
-          "add              %[src_ptr],   %[src_ptr],     %[src_stride]   \n\t"
-          "ulw              %[load2],     0(%[src_ptr])                   \n\t"
-
-          "mtlo             %[vector4a],  $ac0                            \n\t"
-          "mtlo             %[vector4a],  $ac1                            \n\t"
-          "mtlo             %[vector4a],  $ac2                            \n\t"
-          "mtlo             %[vector4a],  $ac3                            \n\t"
-          "mthi             $zero,        $ac0                            \n\t"
-          "mthi             $zero,        $ac1                            \n\t"
-          "mthi             $zero,        $ac2                            \n\t"
-          "mthi             $zero,        $ac3                            \n\t"
-
-          "preceu.ph.qbr    %[scratch1],  %[load1]                        \n\t"
-          "preceu.ph.qbr    %[p1],        %[load2]                        \n\t"
-
-          "precrq.ph.w      %[p2],        %[p1],          %[scratch1]     \n\t" /* pixel 2 */
-          "append           %[p1],        %[scratch1],    16              \n\t" /* pixel 1 */
-
-          "dpa.w.ph         $ac0,         %[p1],          %[filter45]     \n\t"
-          "dpa.w.ph         $ac1,         %[p2],          %[filter45]     \n\t"
-
-          "preceu.ph.qbl    %[scratch1],  %[load1]                        \n\t"
-          "preceu.ph.qbl    %[p1],        %[load2]                        \n\t"
-
-          "precrq.ph.w      %[p2],        %[p1],          %[scratch1]     \n\t" /* pixel 2 */
-          "append           %[p1],        %[scratch1],    16              \n\t" /* pixel 1 */
-
-          "dpa.w.ph         $ac2,         %[p1],          %[filter45]     \n\t"
-          "dpa.w.ph         $ac3,         %[p2],          %[filter45]     \n\t"
-
-          "extp             %[Temp1],     $ac0,           31              \n\t"
-          "extp             %[Temp2],     $ac1,           31              \n\t"
-
-          "lbux             %[store1],    %[Temp1](%[cm])                 \n\t"
-          "extp             %[Temp1],     $ac2,           31              \n\t"
-
-          "lbux             %[store2],    %[Temp2](%[cm])                 \n\t"
-          "extp             %[Temp2],     $ac3,           31              \n\t"
-
-          "sb               %[store1],    0(%[dst_ptr])                   \n\t"
-          "sb               %[store2],    1(%[dst_ptr])                   \n\t"
-
-          "lbux             %[store1],    %[Temp1](%[cm])                 \n\t"
-          "lbux             %[store2],    %[Temp2](%[cm])                 \n\t"
-
-          "sb               %[store1],    2(%[dst_ptr])                   \n\t"
-          "sb               %[store2],    3(%[dst_ptr])                   \n\t"
-
-          : [load1] "=&r"(load1), [load2] "=&r"(load2), [p1] "=&r"(p1),
-            [p2] "=&r"(p2), [scratch1] "=&r"(scratch1), [Temp1] "=&r"(Temp1),
-            [Temp2] "=&r"(Temp2), [store1] "=&r"(store1),
-            [store2] "=&r"(store2), [src_ptr] "+r"(src_ptr)
-          : [filter45] "r"(filter45), [vector4a] "r"(vector4a),
-            [src_stride] "r"(src_stride), [cm] "r"(cm), [dst_ptr] "r"(dst_ptr));
-    }
-
-    /* Next row... */
-    src += src_stride;
-    dst += dst_stride;
-  }
-}
-
-static void convolve_bi_vert_64_dspr2(const uint8_t *src, int32_t src_stride,
-                                      uint8_t *dst, int32_t dst_stride,
-                                      const int16_t *filter_y, int32_t h) {
-  int32_t x, y;
-  const uint8_t *src_ptr;
-  uint8_t *dst_ptr;
-  uint8_t *cm = aom_ff_cropTbl;
-  uint32_t vector4a = 64;
-  uint32_t load1, load2;
-  uint32_t p1, p2;
-  uint32_t scratch1;
-  uint32_t store1, store2;
-  int32_t Temp1, Temp2;
-  const int16_t *filter = &filter_y[3];
-  uint32_t filter45;
-
-  filter45 = ((const int32_t *)filter)[0];
-
-  for (y = h; y--;) {
-    /* prefetch data to cache memory */
-    prefetch_store(dst + dst_stride);
-
-    for (x = 0; x < 64; x += 4) {
-      src_ptr = src + x;
-      dst_ptr = dst + x;
-
-      __asm__ __volatile__(
-          "ulw              %[load1],     0(%[src_ptr])                   \n\t"
-          "add              %[src_ptr],   %[src_ptr],     %[src_stride]   \n\t"
-          "ulw              %[load2],     0(%[src_ptr])                   \n\t"
-
-          "mtlo             %[vector4a],  $ac0                            \n\t"
-          "mtlo             %[vector4a],  $ac1                            \n\t"
-          "mtlo             %[vector4a],  $ac2                            \n\t"
-          "mtlo             %[vector4a],  $ac3                            \n\t"
-          "mthi             $zero,        $ac0                            \n\t"
-          "mthi             $zero,        $ac1                            \n\t"
-          "mthi             $zero,        $ac2                            \n\t"
-          "mthi             $zero,        $ac3                            \n\t"
-
-          "preceu.ph.qbr    %[scratch1],  %[load1]                        \n\t"
-          "preceu.ph.qbr    %[p1],        %[load2]                        \n\t"
-
-          "precrq.ph.w      %[p2],        %[p1],          %[scratch1]     \n\t" /* pixel 2 */
-          "append           %[p1],        %[scratch1],    16              \n\t" /* pixel 1 */
-
-          "dpa.w.ph         $ac0,         %[p1],          %[filter45]     \n\t"
-          "dpa.w.ph         $ac1,         %[p2],          %[filter45]     \n\t"
-
-          "preceu.ph.qbl    %[scratch1],  %[load1]                        \n\t"
-          "preceu.ph.qbl    %[p1],        %[load2]                        \n\t"
-
-          "precrq.ph.w      %[p2],        %[p1],          %[scratch1]     \n\t" /* pixel 2 */
-          "append           %[p1],        %[scratch1],    16              \n\t" /* pixel 1 */
-
-          "dpa.w.ph         $ac2,         %[p1],          %[filter45]     \n\t"
-          "dpa.w.ph         $ac3,         %[p2],          %[filter45]     \n\t"
-
-          "extp             %[Temp1],     $ac0,           31              \n\t"
-          "extp             %[Temp2],     $ac1,           31              \n\t"
-
-          "lbux             %[store1],    %[Temp1](%[cm])                 \n\t"
-          "extp             %[Temp1],     $ac2,           31              \n\t"
-
-          "lbux             %[store2],    %[Temp2](%[cm])                 \n\t"
-          "extp             %[Temp2],     $ac3,           31              \n\t"
-
-          "sb               %[store1],    0(%[dst_ptr])                   \n\t"
-          "sb               %[store2],    1(%[dst_ptr])                   \n\t"
-
-          "lbux             %[store1],    %[Temp1](%[cm])                 \n\t"
-          "lbux             %[store2],    %[Temp2](%[cm])                 \n\t"
-
-          "sb               %[store1],    2(%[dst_ptr])                   \n\t"
-          "sb               %[store2],    3(%[dst_ptr])                   \n\t"
-
-          : [load1] "=&r"(load1), [load2] "=&r"(load2), [p1] "=&r"(p1),
-            [p2] "=&r"(p2), [scratch1] "=&r"(scratch1), [Temp1] "=&r"(Temp1),
-            [Temp2] "=&r"(Temp2), [store1] "=&r"(store1),
-            [store2] "=&r"(store2), [src_ptr] "+r"(src_ptr)
-          : [filter45] "r"(filter45), [vector4a] "r"(vector4a),
-            [src_stride] "r"(src_stride), [cm] "r"(cm), [dst_ptr] "r"(dst_ptr));
-    }
-
-    /* Next row... */
-    src += src_stride;
-    dst += dst_stride;
-  }
-}
-
-void aom_convolve2_vert_dspr2(const uint8_t *src, ptrdiff_t src_stride,
-                              uint8_t *dst, ptrdiff_t dst_stride,
-                              const int16_t *filter_x, int x_step_q4,
-                              const int16_t *filter_y, int y_step_q4, int w,
-                              int h) {
-  uint32_t pos = 38;
-
-  assert(y_step_q4 == 16);
-
-  /* bit positon for extract from acc */
-  __asm__ __volatile__("wrdsp      %[pos],     1           \n\t"
-                       :
-                       : [pos] "r"(pos));
-
-  prefetch_store(dst);
-
-  switch (w) {
-    case 4:
-    case 8:
-    case 16:
-    case 32:
-      convolve_bi_vert_4_dspr2(src, src_stride, dst, dst_stride, filter_y, w,
-                               h);
-      break;
-    case 64:
-      prefetch_store(dst + 32);
-      convolve_bi_vert_64_dspr2(src, src_stride, dst, dst_stride, filter_y, h);
-      break;
-    default:
-      aom_convolve8_vert_c(src, src_stride, dst, dst_stride, filter_x,
-                           x_step_q4, filter_y, y_step_q4, w, h);
-      break;
-  }
-}
-#endif
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/mips/convolve8_horiz_dspr2.c b/chromium/third_party/libaom/source/libaom/aom_dsp/mips/convolve8_horiz_dspr2.c
deleted file mode 100644
index f9c6879abd6..00000000000
--- a/chromium/third_party/libaom/source/libaom/aom_dsp/mips/convolve8_horiz_dspr2.c
+++ /dev/null
@@ -1,879 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-#include <stdio.h>
-
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom_dsp/mips/convolve_common_dspr2.h"
-#include "aom_dsp/aom_dsp_common.h"
-#include "aom_dsp/aom_filter.h"
-#include "aom_ports/mem.h"
-
-#if HAVE_DSPR2
-static void convolve_horiz_4_dspr2(const uint8_t *src, int32_t src_stride,
-                                   uint8_t *dst, int32_t dst_stride,
-                                   const int16_t *filter_x0, int32_t h) {
-  int32_t y;
-  uint8_t *cm = aom_ff_cropTbl;
-  int32_t vector1b, vector2b, vector3b, vector4b;
-  int32_t Temp1, Temp2, Temp3, Temp4;
-  uint32_t vector4a = 64;
-  uint32_t tp1, tp2;
-  uint32_t p1, p2, p3, p4;
-  uint32_t n1, n2, n3, n4;
-  uint32_t tn1, tn2;
-
-  vector1b = ((const int32_t *)filter_x0)[0];
-  vector2b = ((const int32_t *)filter_x0)[1];
-  vector3b = ((const int32_t *)filter_x0)[2];
-  vector4b = ((const int32_t *)filter_x0)[3];
-
-  for (y = h; y--;) {
-    /* prefetch data to cache memory */
-    prefetch_load(src + src_stride);
-    prefetch_load(src + src_stride + 32);
-    prefetch_store(dst + dst_stride);
-
-    __asm__ __volatile__(
-        "ulw              %[tp1],      0(%[src])                      \n\t"
-        "ulw              %[tp2],      4(%[src])                      \n\t"
-
-        /* even 1. pixel */
-        "mtlo             %[vector4a], $ac3                           \n\t"
-        "mthi             $zero,       $ac3                           \n\t"
-        "preceu.ph.qbr    %[p1],       %[tp1]                         \n\t"
-        "preceu.ph.qbl    %[p2],       %[tp1]                         \n\t"
-        "preceu.ph.qbr    %[p3],       %[tp2]                         \n\t"
-        "preceu.ph.qbl    %[p4],       %[tp2]                         \n\t"
-        "dpa.w.ph         $ac3,        %[p1],          %[vector1b]    \n\t"
-        "dpa.w.ph         $ac3,        %[p2],          %[vector2b]    \n\t"
-        "dpa.w.ph         $ac3,        %[p3],          %[vector3b]    \n\t"
-        "ulw              %[tn2],      8(%[src])                      \n\t"
-        "dpa.w.ph         $ac3,        %[p4],          %[vector4b]    \n\t"
-        "extp             %[Temp1],    $ac3,           31             \n\t"
-
-        /* even 2. pixel */
-        "mtlo             %[vector4a], $ac2                           \n\t"
-        "mthi             $zero,       $ac2                           \n\t"
-        "preceu.ph.qbr    %[p1],       %[tn2]                         \n\t"
-        "balign           %[tn1],      %[tn2],         3              \n\t"
-        "balign           %[tn2],      %[tp2],         3              \n\t"
-        "balign           %[tp2],      %[tp1],         3              \n\t"
-        "dpa.w.ph         $ac2,        %[p2],          %[vector1b]    \n\t"
-        "dpa.w.ph         $ac2,        %[p3],          %[vector2b]    \n\t"
-        "dpa.w.ph         $ac2,        %[p4],          %[vector3b]    \n\t"
-        "dpa.w.ph         $ac2,        %[p1],          %[vector4b]    \n\t"
-        "extp             %[Temp3],    $ac2,           31             \n\t"
-
-        /* odd 1. pixel */
-        "lbux             %[tp1],      %[Temp1](%[cm])                \n\t"
-        "mtlo             %[vector4a], $ac3                           \n\t"
-        "mthi             $zero,       $ac3                           \n\t"
-        "preceu.ph.qbr    %[n1],       %[tp2]                         \n\t"
-        "preceu.ph.qbl    %[n2],       %[tp2]                         \n\t"
-        "preceu.ph.qbr    %[n3],       %[tn2]                         \n\t"
-        "preceu.ph.qbl    %[n4],       %[tn2]                         \n\t"
-        "dpa.w.ph         $ac3,        %[n1],          %[vector1b]    \n\t"
-        "dpa.w.ph         $ac3,        %[n2],          %[vector2b]    \n\t"
-        "dpa.w.ph         $ac3,        %[n3],          %[vector3b]    \n\t"
-        "dpa.w.ph         $ac3,        %[n4],          %[vector4b]    \n\t"
-        "extp             %[Temp2],    $ac3,           31             \n\t"
-
-        /* odd 2. pixel */
-        "lbux             %[tp2],      %[Temp3](%[cm])                \n\t"
-        "mtlo             %[vector4a], $ac2                           \n\t"
-        "mthi             $zero,       $ac2                           \n\t"
-        "preceu.ph.qbr    %[n1],       %[tn1]                         \n\t"
-        "dpa.w.ph         $ac2,        %[n2],          %[vector1b]    \n\t"
-        "dpa.w.ph         $ac2,        %[n3],          %[vector2b]    \n\t"
-        "dpa.w.ph         $ac2,        %[n4],          %[vector3b]    \n\t"
-        "dpa.w.ph         $ac2,        %[n1],          %[vector4b]    \n\t"
-        "extp             %[Temp4],    $ac2,           31             \n\t"
-
-        /* clamp */
-        "lbux             %[tn1],      %[Temp2](%[cm])                \n\t"
-        "lbux             %[n2],       %[Temp4](%[cm])                \n\t"
-
-        /* store bytes */
-        "sb               %[tp1],      0(%[dst])                      \n\t"
-        "sb               %[tn1],      1(%[dst])                      \n\t"
-        "sb               %[tp2],      2(%[dst])                      \n\t"
-        "sb               %[n2],       3(%[dst])                      \n\t"
-
-        : [tp1] "=&r"(tp1), [tp2] "=&r"(tp2), [tn1] "=&r"(tn1),
-          [tn2] "=&r"(tn2), [p1] "=&r"(p1), [p2] "=&r"(p2), [p3] "=&r"(p3),
-          [p4] "=&r"(p4), [n1] "=&r"(n1), [n2] "=&r"(n2), [n3] "=&r"(n3),
-          [n4] "=&r"(n4), [Temp1] "=&r"(Temp1), [Temp2] "=&r"(Temp2),
-          [Temp3] "=&r"(Temp3), [Temp4] "=&r"(Temp4)
-        : [vector1b] "r"(vector1b), [vector2b] "r"(vector2b),
-          [vector3b] "r"(vector3b), [vector4b] "r"(vector4b),
-          [vector4a] "r"(vector4a), [cm] "r"(cm), [dst] "r"(dst),
-          [src] "r"(src));
-
-    /* Next row... */
-    src += src_stride;
-    dst += dst_stride;
-  }
-}
-
-static void convolve_horiz_8_dspr2(const uint8_t *src, int32_t src_stride,
-                                   uint8_t *dst, int32_t dst_stride,
-                                   const int16_t *filter_x0, int32_t h) {
-  int32_t y;
-  uint8_t *cm = aom_ff_cropTbl;
-  uint32_t vector4a = 64;
-  int32_t vector1b, vector2b, vector3b, vector4b;
-  int32_t Temp1, Temp2, Temp3;
-  uint32_t tp1, tp2;
-  uint32_t p1, p2, p3, p4, n1;
-  uint32_t tn1, tn2, tn3;
-  uint32_t st0, st1;
-
-  vector1b = ((const int32_t *)filter_x0)[0];
-  vector2b = ((const int32_t *)filter_x0)[1];
-  vector3b = ((const int32_t *)filter_x0)[2];
-  vector4b = ((const int32_t *)filter_x0)[3];
-
-  for (y = h; y--;) {
-    /* prefetch data to cache memory */
-    prefetch_load(src + src_stride);
-    prefetch_load(src + src_stride + 32);
-    prefetch_store(dst + dst_stride);
-
-    __asm__ __volatile__(
-        "ulw              %[tp1],      0(%[src])                      \n\t"
-        "ulw              %[tp2],      4(%[src])                      \n\t"
-
-        /* even 1. pixel */
-        "mtlo             %[vector4a], $ac3                           \n\t"
-        "mthi             $zero,       $ac3                           \n\t"
-        "mtlo             %[vector4a], $ac2                           \n\t"
-        "mthi             $zero,       $ac2                           \n\t"
-        "preceu.ph.qbr    %[p1],       %[tp1]                         \n\t"
-        "preceu.ph.qbl    %[p2],       %[tp1]                         \n\t"
-        "preceu.ph.qbr    %[p3],       %[tp2]                         \n\t"
-        "preceu.ph.qbl    %[p4],       %[tp2]                         \n\t"
-        "ulw              %[tn2],      8(%[src])                      \n\t"
-        "dpa.w.ph         $ac3,        %[p1],          %[vector1b]    \n\t"
-        "dpa.w.ph         $ac3,        %[p2],          %[vector2b]    \n\t"
-        "dpa.w.ph         $ac3,        %[p3],          %[vector3b]    \n\t"
-        "dpa.w.ph         $ac3,        %[p4],          %[vector4b]    \n\t"
-        "extp             %[Temp1],    $ac3,           31             \n\t"
-
-        /* even 2. pixel */
-        "preceu.ph.qbr    %[p1],       %[tn2]                         \n\t"
-        "preceu.ph.qbl    %[n1],       %[tn2]                         \n\t"
-        "ulw              %[tn1],      12(%[src])                     \n\t"
-        "dpa.w.ph         $ac2,        %[p2],          %[vector1b]    \n\t"
-        "dpa.w.ph         $ac2,        %[p3],          %[vector2b]    \n\t"
-        "dpa.w.ph         $ac2,        %[p4],          %[vector3b]    \n\t"
-        "dpa.w.ph         $ac2,        %[p1],          %[vector4b]    \n\t"
-        "extp             %[Temp3],    $ac2,           31             \n\t"
-
-        /* even 3. pixel */
-        "lbux             %[st0],      %[Temp1](%[cm])                \n\t"
-        "mtlo             %[vector4a], $ac1                           \n\t"
-        "mthi             $zero,       $ac1                           \n\t"
-        "preceu.ph.qbr    %[p2],       %[tn1]                         \n\t"
-        "dpa.w.ph         $ac1,        %[p3],          %[vector1b]    \n\t"
-        "dpa.w.ph         $ac1,        %[p4],          %[vector2b]    \n\t"
-        "dpa.w.ph         $ac1,        %[p1],          %[vector3b]    \n\t"
-        "dpa.w.ph         $ac1,        %[n1],          %[vector4b]    \n\t"
-        "extp             %[Temp1],    $ac1,           31             \n\t"
-
-        /* even 4. pixel */
-        "mtlo             %[vector4a], $ac2                           \n\t"
-        "mthi             $zero,       $ac2                           \n\t"
-        "mtlo             %[vector4a], $ac3                           \n\t"
-        "mthi             $zero,       $ac3                           \n\t"
-        "sb               %[st0],      0(%[dst])                      \n\t"
-        "lbux             %[st1],      %[Temp3](%[cm])                \n\t"
-
-        "balign           %[tn3],      %[tn1],         3              \n\t"
-        "balign           %[tn1],      %[tn2],         3              \n\t"
-        "balign           %[tn2],      %[tp2],         3              \n\t"
-        "balign           %[tp2],      %[tp1],         3              \n\t"
-
-        "dpa.w.ph         $ac2,        %[p4],          %[vector1b]    \n\t"
-        "dpa.w.ph         $ac2,        %[p1],          %[vector2b]    \n\t"
-        "dpa.w.ph         $ac2,        %[n1],          %[vector3b]    \n\t"
-        "dpa.w.ph         $ac2,        %[p2],          %[vector4b]    \n\t"
-        "extp             %[Temp3],    $ac2,           31             \n\t"
-
-        "lbux             %[st0],      %[Temp1](%[cm])                \n\t"
-
-        /* odd 1. pixel */
-        "mtlo             %[vector4a], $ac1                           \n\t"
-        "mthi             $zero,       $ac1                           \n\t"
-        "sb               %[st1],      2(%[dst])                      \n\t"
-        "preceu.ph.qbr    %[p1],       %[tp2]                         \n\t"
-        "preceu.ph.qbl    %[p2],       %[tp2]                         \n\t"
-        "preceu.ph.qbr    %[p3],       %[tn2]                         \n\t"
-        "preceu.ph.qbl    %[p4],       %[tn2]                         \n\t"
-        "sb               %[st0],      4(%[dst])                      \n\t"
-        "dpa.w.ph         $ac3,        %[p1],          %[vector1b]    \n\t"
-        "dpa.w.ph         $ac3,        %[p2],          %[vector2b]    \n\t"
-        "dpa.w.ph         $ac3,        %[p3],          %[vector3b]    \n\t"
-        "dpa.w.ph         $ac3,        %[p4],          %[vector4b]    \n\t"
-        "extp             %[Temp2],    $ac3,           31             \n\t"
-
-        /* odd 2. pixel */
-        "mtlo             %[vector4a], $ac3                           \n\t"
-        "mthi             $zero,       $ac3                           \n\t"
-        "mtlo             %[vector4a], $ac2                           \n\t"
-        "mthi             $zero,       $ac2                           \n\t"
-        "preceu.ph.qbr    %[p1],       %[tn1]                         \n\t"
-        "preceu.ph.qbl    %[n1],       %[tn1]                         \n\t"
-        "lbux             %[st0],      %[Temp3](%[cm])                \n\t"
-        "dpa.w.ph         $ac1,        %[p2],          %[vector1b]    \n\t"
-        "dpa.w.ph         $ac1,        %[p3],          %[vector2b]    \n\t"
-        "dpa.w.ph         $ac1,        %[p4],          %[vector3b]    \n\t"
-        "dpa.w.ph         $ac1,        %[p1],          %[vector4b]    \n\t"
-        "extp             %[Temp3],    $ac1,           31             \n\t"
-
-        /* odd 3. pixel */
-        "lbux             %[st1],      %[Temp2](%[cm])                \n\t"
-        "preceu.ph.qbr    %[p2],       %[tn3]                         \n\t"
-        "dpa.w.ph         $ac3,        %[p3],          %[vector1b]    \n\t"
-        "dpa.w.ph         $ac3,        %[p4],          %[vector2b]    \n\t"
-        "dpa.w.ph         $ac3,        %[p1],          %[vector3b]    \n\t"
-        "dpa.w.ph         $ac3,        %[n1],          %[vector4b]    \n\t"
-        "extp             %[Temp2],    $ac3,           31             \n\t"
-
-        /* odd 4. pixel */
-        "sb               %[st1],      1(%[dst])                      \n\t"
-        "sb               %[st0],      6(%[dst])                      \n\t"
-        "dpa.w.ph         $ac2,        %[p4],          %[vector1b]    \n\t"
-        "dpa.w.ph         $ac2,        %[p1],          %[vector2b]    \n\t"
-        "dpa.w.ph         $ac2,        %[n1],          %[vector3b]    \n\t"
-        "dpa.w.ph         $ac2,        %[p2],          %[vector4b]    \n\t"
-        "extp             %[Temp1],    $ac2,           31             \n\t"
-
-        /* clamp */
-        "lbux             %[p4],       %[Temp3](%[cm])                \n\t"
-        "lbux             %[p2],       %[Temp2](%[cm])                \n\t"
-        "lbux             %[n1],       %[Temp1](%[cm])                \n\t"
-
-        /* store bytes */
-        "sb               %[p4],       3(%[dst])                      \n\t"
-        "sb               %[p2],       5(%[dst])                      \n\t"
-        "sb               %[n1],       7(%[dst])                      \n\t"
-
-        : [tp1] "=&r"(tp1), [tp2] "=&r"(tp2), [tn1] "=&r"(tn1),
-          [tn2] "=&r"(tn2), [tn3] "=&r"(tn3), [st0] "=&r"(st0),
-          [st1] "=&r"(st1), [p1] "=&r"(p1), [p2] "=&r"(p2), [p3] "=&r"(p3),
-          [p4] "=&r"(p4), [n1] "=&r"(n1), [Temp1] "=&r"(Temp1),
-          [Temp2] "=&r"(Temp2), [Temp3] "=&r"(Temp3)
-        : [vector1b] "r"(vector1b), [vector2b] "r"(vector2b),
-          [vector3b] "r"(vector3b), [vector4b] "r"(vector4b),
-          [vector4a] "r"(vector4a), [cm] "r"(cm), [dst] "r"(dst),
-          [src] "r"(src));
-
-    /* Next row... */
-    src += src_stride;
-    dst += dst_stride;
-  }
-}
-
-static void convolve_horiz_16_dspr2(const uint8_t *src_ptr, int32_t src_stride,
-                                    uint8_t *dst_ptr, int32_t dst_stride,
-                                    const int16_t *filter_x0, int32_t h,
-                                    int32_t count) {
-  int32_t y, c;
-  const uint8_t *src;
-  uint8_t *dst;
-  uint8_t *cm = aom_ff_cropTbl;
-  uint32_t vector_64 = 64;
-  int32_t filter12, filter34, filter56, filter78;
-  int32_t Temp1, Temp2, Temp3;
-  uint32_t qload1, qload2, qload3;
-  uint32_t p1, p2, p3, p4, p5;
-  uint32_t st1, st2, st3;
-
-  filter12 = ((const int32_t *)filter_x0)[0];
-  filter34 = ((const int32_t *)filter_x0)[1];
-  filter56 = ((const int32_t *)filter_x0)[2];
-  filter78 = ((const int32_t *)filter_x0)[3];
-
-  for (y = h; y--;) {
-    src = src_ptr;
-    dst = dst_ptr;
-
-    /* prefetch data to cache memory */
-    prefetch_load(src_ptr + src_stride);
-    prefetch_load(src_ptr + src_stride + 32);
-    prefetch_store(dst_ptr + dst_stride);
-
-    for (c = 0; c < count; c++) {
-      __asm__ __volatile__(
-          "ulw              %[qload1],    0(%[src])                    \n\t"
-          "ulw              %[qload2],    4(%[src])                    \n\t"
-
-          /* even 1. pixel */
-          "mtlo             %[vector_64], $ac1                         \n\t" /* even 1 */
-          "mthi             $zero,        $ac1                         \n\t"
-          "mtlo             %[vector_64], $ac2                         \n\t" /* even 2 */
-          "mthi             $zero,        $ac2                         \n\t"
-          "preceu.ph.qbr    %[p1],        %[qload1]                    \n\t"
-          "preceu.ph.qbl    %[p2],        %[qload1]                    \n\t"
-          "preceu.ph.qbr    %[p3],        %[qload2]                    \n\t"
-          "preceu.ph.qbl    %[p4],        %[qload2]                    \n\t"
-          "ulw              %[qload3],    8(%[src])                    \n\t"
-          "dpa.w.ph         $ac1,         %[p1],          %[filter12]  \n\t" /* even 1 */
-          "dpa.w.ph         $ac1,         %[p2],          %[filter34]  \n\t" /* even 1 */
-          "dpa.w.ph         $ac1,         %[p3],          %[filter56]  \n\t" /* even 1 */
-          "dpa.w.ph         $ac1,         %[p4],          %[filter78]  \n\t" /* even 1 */
-          "extp             %[Temp1],     $ac1,           31           \n\t" /* even 1 */
-
-          /* even 2. pixel */
-          "mtlo             %[vector_64], $ac3                         \n\t" /* even 3 */
-          "mthi             $zero,        $ac3                         \n\t"
-          "preceu.ph.qbr    %[p1],        %[qload3]                    \n\t"
-          "preceu.ph.qbl    %[p5],        %[qload3]                    \n\t"
-          "ulw              %[qload1],    12(%[src])                   \n\t"
-          "dpa.w.ph         $ac2,         %[p2],          %[filter12]  \n\t" /* even 1 */
-          "dpa.w.ph         $ac2,         %[p3],          %[filter34]  \n\t" /* even 1 */
-          "dpa.w.ph         $ac2,         %[p4],          %[filter56]  \n\t" /* even 1 */
-          "dpa.w.ph         $ac2,         %[p1],          %[filter78]  \n\t" /* even 1 */
-          "extp             %[Temp2],     $ac2,           31           \n\t" /* even 1 */
-          "lbux             %[st1],       %[Temp1](%[cm])              \n\t" /* even 1 */
-
-          /* even 3. pixel */
-          "mtlo             %[vector_64], $ac1                         \n\t" /* even 4 */
-          "mthi             $zero,        $ac1                         \n\t"
-          "preceu.ph.qbr    %[p2],        %[qload1]                    \n\t"
-          "sb               %[st1],       0(%[dst])                    \n\t" /* even 1 */
-          "dpa.w.ph         $ac3,         %[p3],          %[filter12]  \n\t" /* even 3 */
-          "dpa.w.ph         $ac3,         %[p4],          %[filter34]  \n\t" /* even 3 */
-          "dpa.w.ph         $ac3,         %[p1],          %[filter56]  \n\t" /* even 3 */
-          "dpa.w.ph         $ac3,         %[p5],          %[filter78]  \n\t" /* even 3 */
-          "extp             %[Temp3],     $ac3,           31           \n\t" /* even 3 */
-          "lbux             %[st2],       %[Temp2](%[cm])              \n\t" /* even 1 */
-
-          /* even 4. pixel */
-          "mtlo             %[vector_64], $ac2                         \n\t" /* even 5 */
-          "mthi             $zero,        $ac2                         \n\t"
-          "preceu.ph.qbl    %[p3],        %[qload1]                    \n\t"
-          "sb               %[st2],       2(%[dst])                    \n\t" /* even 1 */
-          "ulw              %[qload2],    16(%[src])                   \n\t"
-          "dpa.w.ph         $ac1,         %[p4],          %[filter12]  \n\t" /* even 4 */
-          "dpa.w.ph         $ac1,         %[p1],          %[filter34]  \n\t" /* even 4 */
-          "dpa.w.ph         $ac1,         %[p5],          %[filter56]  \n\t" /* even 4 */
-          "dpa.w.ph         $ac1,         %[p2],          %[filter78]  \n\t" /* even 4 */
-          "extp             %[Temp1],     $ac1,           31           \n\t" /* even 4 */
-          "lbux             %[st3],       %[Temp3](%[cm])              \n\t" /* even 3 */
-
-          /* even 5. pixel */
-          "mtlo             %[vector_64], $ac3                         \n\t" /* even 6 */
-          "mthi             $zero,        $ac3                         \n\t"
-          "preceu.ph.qbr    %[p4],        %[qload2]                    \n\t"
-          "sb               %[st3],       4(%[dst])                    \n\t" /* even 3 */
-          "dpa.w.ph         $ac2,         %[p1],          %[filter12]  \n\t" /* even 5 */
-          "dpa.w.ph         $ac2,         %[p5],          %[filter34]  \n\t" /* even 5 */
-          "dpa.w.ph         $ac2,         %[p2],          %[filter56]  \n\t" /* even 5 */
-          "dpa.w.ph         $ac2,         %[p3],          %[filter78]  \n\t" /* even 5 */
-          "extp             %[Temp2],     $ac2,           31           \n\t" /* even 5 */
-          "lbux             %[st1],       %[Temp1](%[cm])              \n\t" /* even 4 */
-
-          /* even 6. pixel */
-          "mtlo             %[vector_64], $ac1                         \n\t" /* even 7 */
-          "mthi             $zero,        $ac1                         \n\t"
-          "preceu.ph.qbl    %[p1],        %[qload2]                    \n\t"
-          "sb               %[st1],       6(%[dst])                    \n\t" /* even 4 */
-          "ulw              %[qload3],    20(%[src])                   \n\t"
-          "dpa.w.ph         $ac3,         %[p5],          %[filter12]  \n\t" /* even 6 */
-          "dpa.w.ph         $ac3,         %[p2],          %[filter34]  \n\t" /* even 6 */
-          "dpa.w.ph         $ac3,         %[p3],          %[filter56]  \n\t" /* even 6 */
-          "dpa.w.ph         $ac3,         %[p4],          %[filter78]  \n\t" /* even 6 */
-          "extp             %[Temp3],     $ac3,           31           \n\t" /* even 6 */
-          "lbux             %[st2],       %[Temp2](%[cm])              \n\t" /* even 5 */
-
-          /* even 7. pixel */
-          "mtlo             %[vector_64], $ac2                         \n\t" /* even 8 */
-          "mthi             $zero,        $ac2                         \n\t"
-          "preceu.ph.qbr    %[p5],        %[qload3]                    \n\t"
-          "sb               %[st2],       8(%[dst])                    \n\t" /* even 5 */
-          "dpa.w.ph         $ac1,         %[p2],          %[filter12]  \n\t" /* even 7 */
-          "dpa.w.ph         $ac1,         %[p3],          %[filter34]  \n\t" /* even 7 */
-          "dpa.w.ph         $ac1,         %[p4],          %[filter56]  \n\t" /* even 7 */
-          "dpa.w.ph         $ac1,         %[p1],          %[filter78]  \n\t" /* even 7 */
-          "extp             %[Temp1],     $ac1,           31           \n\t" /* even 7 */
-          "lbux             %[st3],       %[Temp3](%[cm])              \n\t" /* even 6 */
-
-          /* even 8. pixel */
-          "mtlo             %[vector_64], $ac3                         \n\t" /* odd 1 */
-          "mthi             $zero,        $ac3                         \n\t"
-          "dpa.w.ph         $ac2,         %[p3],          %[filter12]  \n\t" /* even 8 */
-          "dpa.w.ph         $ac2,         %[p4],          %[filter34]  \n\t" /* even 8 */
-          "sb               %[st3],       10(%[dst])                   \n\t" /* even 6 */
-          "dpa.w.ph         $ac2,         %[p1],          %[filter56]  \n\t" /* even 8 */
-          "dpa.w.ph         $ac2,         %[p5],          %[filter78]  \n\t" /* even 8 */
-          "extp             %[Temp2],     $ac2,           31           \n\t" /* even 8 */
-          "lbux             %[st1],       %[Temp1](%[cm])              \n\t" /* even 7 */
-
-          /* ODD pixels */
-          "ulw              %[qload1],    1(%[src])                    \n\t"
-          "ulw              %[qload2],    5(%[src])                    \n\t"
-
-          /* odd 1. pixel */
-          "mtlo             %[vector_64], $ac1                         \n\t" /* odd 2 */
-          "mthi             $zero,        $ac1                         \n\t"
-          "preceu.ph.qbr    %[p1],        %[qload1]                    \n\t"
-          "preceu.ph.qbl    %[p2],        %[qload1]                    \n\t"
-          "preceu.ph.qbr    %[p3],        %[qload2]                    \n\t"
-          "preceu.ph.qbl    %[p4],        %[qload2]                    \n\t"
-          "sb               %[st1],       12(%[dst])                   \n\t" /* even 7 */
-          "ulw              %[qload3],    9(%[src])                    \n\t"
-          "dpa.w.ph         $ac3,         %[p1],          %[filter12]  \n\t" /* odd 1 */
-          "dpa.w.ph         $ac3,         %[p2],          %[filter34]  \n\t" /* odd 1 */
-          "dpa.w.ph         $ac3,         %[p3],          %[filter56]  \n\t" /* odd 1 */
-          "dpa.w.ph         $ac3,         %[p4],          %[filter78]  \n\t" /* odd 1 */
-          "extp             %[Temp3],     $ac3,           31           \n\t" /* odd 1 */
-          "lbux             %[st2],       %[Temp2](%[cm])              \n\t" /* even 8 */
-
-          /* odd 2. pixel */
-          "mtlo             %[vector_64], $ac2                         \n\t" /* odd 3 */
-          "mthi             $zero,        $ac2                         \n\t"
-          "preceu.ph.qbr    %[p1],        %[qload3]                    \n\t"
-          "preceu.ph.qbl    %[p5],        %[qload3]                    \n\t"
-          "sb               %[st2],       14(%[dst])                   \n\t" /* even 8 */
-          "ulw              %[qload1],    13(%[src])                   \n\t"
-          "dpa.w.ph         $ac1,         %[p2],          %[filter12]  \n\t" /* odd 2 */
-          "dpa.w.ph         $ac1,         %[p3],          %[filter34]  \n\t" /* odd 2 */
-          "dpa.w.ph         $ac1,         %[p4],          %[filter56]  \n\t" /* odd 2 */
-          "dpa.w.ph         $ac1,         %[p1],          %[filter78]  \n\t" /* odd 2 */
-          "extp             %[Temp1],     $ac1,           31           \n\t" /* odd 2 */
-          "lbux             %[st3],       %[Temp3](%[cm])              \n\t" /* odd 1 */
-
-          /* odd 3. pixel */
-          "mtlo             %[vector_64], $ac3                         \n\t" /* odd 4 */
-          "mthi             $zero,        $ac3                         \n\t"
-          "preceu.ph.qbr    %[p2],        %[qload1]                    \n\t"
-          "sb               %[st3],       1(%[dst])                    \n\t" /* odd 1 */
-          "dpa.w.ph         $ac2,         %[p3],          %[filter12]  \n\t" /* odd 3 */
-          "dpa.w.ph         $ac2,         %[p4],          %[filter34]  \n\t" /* odd 3 */
-          "dpa.w.ph         $ac2,         %[p1],          %[filter56]  \n\t" /* odd 3 */
-          "dpa.w.ph         $ac2,         %[p5],          %[filter78]  \n\t" /* odd 3 */
-          "extp             %[Temp2],     $ac2,           31           \n\t" /* odd 3 */
-          "lbux             %[st1],       %[Temp1](%[cm])              \n\t" /* odd 2 */
-
-          /* odd 4. pixel */
-          "mtlo             %[vector_64], $ac1                         \n\t" /* odd 5 */
-          "mthi             $zero,        $ac1                         \n\t"
-          "preceu.ph.qbl    %[p3],        %[qload1]                    \n\t"
-          "sb               %[st1],       3(%[dst])                    \n\t" /* odd 2 */
-          "ulw              %[qload2],    17(%[src])                   \n\t"
-          "dpa.w.ph         $ac3,         %[p4],          %[filter12]  \n\t" /* odd 4 */
-          "dpa.w.ph         $ac3,         %[p1],          %[filter34]  \n\t" /* odd 4 */
-          "dpa.w.ph         $ac3,         %[p5],          %[filter56]  \n\t" /* odd 4 */
-          "dpa.w.ph         $ac3,         %[p2],          %[filter78]  \n\t" /* odd 4 */
-          "extp             %[Temp3],     $ac3,           31           \n\t" /* odd 4 */
-          "lbux             %[st2],       %[Temp2](%[cm])              \n\t" /* odd 3 */
-
-          /* odd 5. pixel */
-          "mtlo             %[vector_64], $ac2                         \n\t" /* odd 6 */
-          "mthi             $zero,        $ac2                         \n\t"
-          "preceu.ph.qbr    %[p4],        %[qload2]                    \n\t"
-          "sb               %[st2],       5(%[dst])                    \n\t" /* odd 3 */
-          "dpa.w.ph         $ac1,         %[p1],          %[filter12]  \n\t" /* odd 5 */
-          "dpa.w.ph         $ac1,         %[p5],          %[filter34]  \n\t" /* odd 5 */
-          "dpa.w.ph         $ac1,         %[p2],          %[filter56]  \n\t" /* odd 5 */
-          "dpa.w.ph         $ac1,         %[p3],          %[filter78]  \n\t" /* odd 5 */
-          "extp             %[Temp1],     $ac1,           31           \n\t" /* odd 5 */
-          "lbux             %[st3],       %[Temp3](%[cm])              \n\t" /* odd 4 */
-
-          /* odd 6. pixel */
-          "mtlo             %[vector_64], $ac3                         \n\t" /* odd 7 */
-          "mthi             $zero,        $ac3                         \n\t"
-          "preceu.ph.qbl    %[p1],        %[qload2]                    \n\t"
-          "sb               %[st3],       7(%[dst])                    \n\t" /* odd 4 */
-          "ulw              %[qload3],    21(%[src])                   \n\t"
-          "dpa.w.ph         $ac2,         %[p5],          %[filter12]  \n\t" /* odd 6 */
-          "dpa.w.ph         $ac2,         %[p2],          %[filter34]  \n\t" /* odd 6 */
-          "dpa.w.ph         $ac2,         %[p3],          %[filter56]  \n\t" /* odd 6 */
-          "dpa.w.ph         $ac2,         %[p4],          %[filter78]  \n\t" /* odd 6 */
-          "extp             %[Temp2],     $ac2,           31           \n\t" /* odd 6 */
-          "lbux             %[st1],       %[Temp1](%[cm])              \n\t" /* odd 5 */
-
-          /* odd 7. pixel */
-          "mtlo             %[vector_64], $ac1                         \n\t" /* odd 8 */
-          "mthi             $zero,        $ac1                         \n\t"
-          "preceu.ph.qbr    %[p5],        %[qload3]                    \n\t"
-          "sb               %[st1],       9(%[dst])                    \n\t" /* odd 5 */
-          "dpa.w.ph         $ac3,         %[p2],          %[filter12]  \n\t" /* odd 7 */
-          "dpa.w.ph         $ac3,         %[p3],          %[filter34]  \n\t" /* odd 7 */
-          "dpa.w.ph         $ac3,         %[p4],          %[filter56]  \n\t" /* odd 7 */
-          "dpa.w.ph         $ac3,         %[p1],          %[filter78]  \n\t" /* odd 7 */
-          "extp             %[Temp3],     $ac3,           31           \n\t" /* odd 7 */
-
-          /* odd 8. pixel */
-          "dpa.w.ph         $ac1,         %[p3],          %[filter12]  \n\t" /* odd 8 */
-          "dpa.w.ph         $ac1,         %[p4],          %[filter34]  \n\t" /* odd 8 */
-          "dpa.w.ph         $ac1,         %[p1],          %[filter56]  \n\t" /* odd 8 */
-          "dpa.w.ph         $ac1,         %[p5],          %[filter78]  \n\t" /* odd 8 */
-          "extp             %[Temp1],     $ac1,           31           \n\t" /* odd 8 */
-
-          "lbux             %[st2],       %[Temp2](%[cm])              \n\t" /* odd 6 */
-          "lbux             %[st3],       %[Temp3](%[cm])              \n\t" /* odd 7 */
-          "lbux             %[st1],       %[Temp1](%[cm])              \n\t" /* odd 8 */
-
-          "sb               %[st2],       11(%[dst])                   \n\t" /* odd 6 */
-          "sb               %[st3],       13(%[dst])                   \n\t" /* odd 7 */
-          "sb               %[st1],       15(%[dst])                   \n\t" /* odd 8 */
-
-          : [qload1] "=&r"(qload1), [qload2] "=&r"(qload2),
-            [qload3] "=&r"(qload3), [st1] "=&r"(st1), [st2] "=&r"(st2),
-            [st3] "=&r"(st3), [p1] "=&r"(p1), [p2] "=&r"(p2), [p3] "=&r"(p3),
-            [p4] "=&r"(p4), [p5] "=&r"(p5), [Temp1] "=&r"(Temp1),
-            [Temp2] "=&r"(Temp2), [Temp3] "=&r"(Temp3)
-          : [filter12] "r"(filter12), [filter34] "r"(filter34),
-            [filter56] "r"(filter56), [filter78] "r"(filter78),
-            [vector_64] "r"(vector_64), [cm] "r"(cm), [dst] "r"(dst),
-            [src] "r"(src));
-
-      src += 16;
-      dst += 16;
-    }
-
-    /* Next row... */
-    src_ptr += src_stride;
-    dst_ptr += dst_stride;
-  }
-}
-
-static void convolve_horiz_64_dspr2(const uint8_t *src_ptr, int32_t src_stride,
-                                    uint8_t *dst_ptr, int32_t dst_stride,
-                                    const int16_t *filter_x0, int32_t h) {
-  int32_t y, c;
-  const uint8_t *src;
-  uint8_t *dst;
-  uint8_t *cm = aom_ff_cropTbl;
-  uint32_t vector_64 = 64;
-  int32_t filter12, filter34, filter56, filter78;
-  int32_t Temp1, Temp2, Temp3;
-  uint32_t qload1, qload2, qload3;
-  uint32_t p1, p2, p3, p4, p5;
-  uint32_t st1, st2, st3;
-
-  filter12 = ((const int32_t *)filter_x0)[0];
-  filter34 = ((const int32_t *)filter_x0)[1];
-  filter56 = ((const int32_t *)filter_x0)[2];
-  filter78 = ((const int32_t *)filter_x0)[3];
-
-  for (y = h; y--;) {
-    src = src_ptr;
-    dst = dst_ptr;
-
-    /* prefetch data to cache memory */
-    prefetch_load(src_ptr + src_stride);
-    prefetch_load(src_ptr + src_stride + 32);
-    prefetch_load(src_ptr + src_stride + 64);
-    prefetch_store(dst_ptr + dst_stride);
-    prefetch_store(dst_ptr + dst_stride + 32);
-
-    for (c = 0; c < 4; c++) {
-      __asm__ __volatile__(
-          "ulw              %[qload1],    0(%[src])                    \n\t"
-          "ulw              %[qload2],    4(%[src])                    \n\t"
-
-          /* even 1. pixel */
-          "mtlo             %[vector_64], $ac1                         \n\t" /* even 1 */
-          "mthi             $zero,        $ac1                         \n\t"
-          "mtlo             %[vector_64], $ac2                         \n\t" /* even 2 */
-          "mthi             $zero,        $ac2                         \n\t"
-          "preceu.ph.qbr    %[p1],        %[qload1]                    \n\t"
-          "preceu.ph.qbl    %[p2],        %[qload1]                    \n\t"
-          "preceu.ph.qbr    %[p3],        %[qload2]                    \n\t"
-          "preceu.ph.qbl    %[p4],        %[qload2]                    \n\t"
-          "ulw              %[qload3],    8(%[src])                    \n\t"
-          "dpa.w.ph         $ac1,         %[p1],          %[filter12]  \n\t" /* even 1 */
-          "dpa.w.ph         $ac1,         %[p2],          %[filter34]  \n\t" /* even 1 */
-          "dpa.w.ph         $ac1,         %[p3],          %[filter56]  \n\t" /* even 1 */
-          "dpa.w.ph         $ac1,         %[p4],          %[filter78]  \n\t" /* even 1 */
-          "extp             %[Temp1],     $ac1,           31           \n\t" /* even 1 */
-
-          /* even 2. pixel */
-          "mtlo             %[vector_64], $ac3                         \n\t" /* even 3 */
-          "mthi             $zero,        $ac3                         \n\t"
-          "preceu.ph.qbr    %[p1],        %[qload3]                    \n\t"
-          "preceu.ph.qbl    %[p5],        %[qload3]                    \n\t"
-          "ulw              %[qload1],    12(%[src])                   \n\t"
-          "dpa.w.ph         $ac2,         %[p2],          %[filter12]  \n\t" /* even 1 */
-          "dpa.w.ph         $ac2,         %[p3],          %[filter34]  \n\t" /* even 1 */
-          "dpa.w.ph         $ac2,         %[p4],          %[filter56]  \n\t" /* even 1 */
-          "dpa.w.ph         $ac2,         %[p1],          %[filter78]  \n\t" /* even 1 */
-          "extp             %[Temp2],     $ac2,           31           \n\t" /* even 1 */
-          "lbux             %[st1],       %[Temp1](%[cm])              \n\t" /* even 1 */
-
-          /* even 3. pixel */
-          "mtlo             %[vector_64], $ac1                         \n\t" /* even 4 */
-          "mthi             $zero,        $ac1                         \n\t"
-          "preceu.ph.qbr    %[p2],        %[qload1]                    \n\t"
-          "sb               %[st1],       0(%[dst])                    \n\t" /* even 1 */
-          "dpa.w.ph         $ac3,         %[p3],          %[filter12]  \n\t" /* even 3 */
-          "dpa.w.ph         $ac3,         %[p4],          %[filter34]  \n\t" /* even 3 */
-          "dpa.w.ph         $ac3,         %[p1],          %[filter56]  \n\t" /* even 3 */
-          "dpa.w.ph         $ac3,         %[p5],          %[filter78]  \n\t" /* even 3 */
-          "extp             %[Temp3],     $ac3,           31           \n\t" /* even 3 */
-          "lbux             %[st2],       %[Temp2](%[cm])              \n\t" /* even 1 */
-
-          /* even 4. pixel */
-          "mtlo             %[vector_64], $ac2                         \n\t" /* even 5 */
-          "mthi             $zero,        $ac2                         \n\t"
-          "preceu.ph.qbl    %[p3],        %[qload1]                    \n\t"
-          "sb               %[st2],       2(%[dst])                    \n\t" /* even 1 */
-          "ulw              %[qload2],    16(%[src])                   \n\t"
-          "dpa.w.ph         $ac1,         %[p4],          %[filter12]  \n\t" /* even 4 */
-          "dpa.w.ph         $ac1,         %[p1],          %[filter34]  \n\t" /* even 4 */
-          "dpa.w.ph         $ac1,         %[p5],          %[filter56]  \n\t" /* even 4 */
-          "dpa.w.ph         $ac1,         %[p2],          %[filter78]  \n\t" /* even 4 */
-          "extp             %[Temp1],     $ac1,           31           \n\t" /* even 4 */
-          "lbux             %[st3],       %[Temp3](%[cm])              \n\t" /* even 3 */
-
-          /* even 5. pixel */
-          "mtlo             %[vector_64], $ac3                         \n\t" /* even 6 */
-          "mthi             $zero,        $ac3                         \n\t"
-          "preceu.ph.qbr    %[p4],        %[qload2]                    \n\t"
-          "sb               %[st3],       4(%[dst])                    \n\t" /* even 3 */
-          "dpa.w.ph         $ac2,         %[p1],          %[filter12]  \n\t" /* even 5 */
-          "dpa.w.ph         $ac2,         %[p5],          %[filter34]  \n\t" /* even 5 */
-          "dpa.w.ph         $ac2,         %[p2],          %[filter56]  \n\t" /* even 5 */
-          "dpa.w.ph         $ac2,         %[p3],          %[filter78]  \n\t" /* even 5 */
-          "extp             %[Temp2],     $ac2,           31           \n\t" /* even 5 */
-          "lbux             %[st1],       %[Temp1](%[cm])              \n\t" /* even 4 */
-
-          /* even 6. pixel */
-          "mtlo             %[vector_64], $ac1                         \n\t" /* even 7 */
-          "mthi             $zero,        $ac1                         \n\t"
-          "preceu.ph.qbl    %[p1],        %[qload2]                    \n\t"
-          "sb               %[st1],       6(%[dst])                    \n\t" /* even 4 */
-          "ulw              %[qload3],    20(%[src])                   \n\t"
-          "dpa.w.ph         $ac3,         %[p5],          %[filter12]  \n\t" /* even 6 */
-          "dpa.w.ph         $ac3,         %[p2],          %[filter34]  \n\t" /* even 6 */
-          "dpa.w.ph         $ac3,         %[p3],          %[filter56]  \n\t" /* even 6 */
-          "dpa.w.ph         $ac3,         %[p4],          %[filter78]  \n\t" /* even 6 */
-          "extp             %[Temp3],     $ac3,           31           \n\t" /* even 6 */
-          "lbux             %[st2],       %[Temp2](%[cm])              \n\t" /* even 5 */
-
-          /* even 7. pixel */
-          "mtlo             %[vector_64], $ac2                         \n\t" /* even 8 */
-          "mthi             $zero,        $ac2                         \n\t"
-          "preceu.ph.qbr    %[p5],        %[qload3]                    \n\t"
-          "sb               %[st2],       8(%[dst])                    \n\t" /* even 5 */
-          "dpa.w.ph         $ac1,         %[p2],          %[filter12]  \n\t" /* even 7 */
-          "dpa.w.ph         $ac1,         %[p3],          %[filter34]  \n\t" /* even 7 */
-          "dpa.w.ph         $ac1,         %[p4],          %[filter56]  \n\t" /* even 7 */
-          "dpa.w.ph         $ac1,         %[p1],          %[filter78]  \n\t" /* even 7 */
-          "extp             %[Temp1],     $ac1,           31           \n\t" /* even 7 */
-          "lbux             %[st3],       %[Temp3](%[cm])              \n\t" /* even 6 */
-
-          /* even 8. pixel */
-          "mtlo             %[vector_64], $ac3                         \n\t" /* odd 1 */
-          "mthi             $zero,        $ac3                         \n\t"
-          "dpa.w.ph         $ac2,         %[p3],          %[filter12]  \n\t" /* even 8 */
-          "dpa.w.ph         $ac2,         %[p4],          %[filter34]  \n\t" /* even 8 */
-          "sb               %[st3],       10(%[dst])                   \n\t" /* even 6 */
-          "dpa.w.ph         $ac2,         %[p1],          %[filter56]  \n\t" /* even 8 */
-          "dpa.w.ph         $ac2,         %[p5],          %[filter78]  \n\t" /* even 8 */
-          "extp             %[Temp2],     $ac2,           31           \n\t" /* even 8 */
-          "lbux             %[st1],       %[Temp1](%[cm])              \n\t" /* even 7 */
-
-          /* ODD pixels */
-          "ulw              %[qload1],    1(%[src])                    \n\t"
-          "ulw              %[qload2],    5(%[src])                    \n\t"
-
-          /* odd 1. pixel */
-          "mtlo             %[vector_64], $ac1                         \n\t" /* odd 2 */
-          "mthi             $zero,        $ac1                         \n\t"
-          "preceu.ph.qbr    %[p1],        %[qload1]                    \n\t"
-          "preceu.ph.qbl    %[p2],        %[qload1]                    \n\t"
-          "preceu.ph.qbr    %[p3],        %[qload2]                    \n\t"
-          "preceu.ph.qbl    %[p4],        %[qload2]                    \n\t"
-          "sb               %[st1],       12(%[dst])                   \n\t" /* even 7 */
-          "ulw              %[qload3],    9(%[src])                    \n\t"
-          "dpa.w.ph         $ac3,         %[p1],          %[filter12]  \n\t" /* odd 1 */
-          "dpa.w.ph         $ac3,         %[p2],          %[filter34]  \n\t" /* odd 1 */
-          "dpa.w.ph         $ac3,         %[p3],          %[filter56]  \n\t" /* odd 1 */
-          "dpa.w.ph         $ac3,         %[p4],          %[filter78]  \n\t" /* odd 1 */
-          "extp             %[Temp3],     $ac3,           31           \n\t" /* odd 1 */
-          "lbux             %[st2],       %[Temp2](%[cm])              \n\t" /* even 8 */
-
-          /* odd 2. pixel */
-          "mtlo             %[vector_64], $ac2                         \n\t" /* odd 3 */
-          "mthi             $zero,        $ac2                         \n\t"
-          "preceu.ph.qbr    %[p1],        %[qload3]                    \n\t"
-          "preceu.ph.qbl    %[p5],        %[qload3]                    \n\t"
-          "sb               %[st2],       14(%[dst])                   \n\t" /* even 8 */
-          "ulw              %[qload1],    13(%[src])                   \n\t"
-          "dpa.w.ph         $ac1,         %[p2],          %[filter12]  \n\t" /* odd 2 */
-          "dpa.w.ph         $ac1,         %[p3],          %[filter34]  \n\t" /* odd 2 */
-          "dpa.w.ph         $ac1,         %[p4],          %[filter56]  \n\t" /* odd 2 */
-          "dpa.w.ph         $ac1,         %[p1],          %[filter78]  \n\t" /* odd 2 */
-          "extp             %[Temp1],     $ac1,           31           \n\t" /* odd 2 */
-          "lbux             %[st3],       %[Temp3](%[cm])              \n\t" /* odd 1 */
-
-          /* odd 3. pixel */
-          "mtlo             %[vector_64], $ac3                         \n\t" /* odd 4 */
-          "mthi             $zero,        $ac3                         \n\t"
-          "preceu.ph.qbr    %[p2],        %[qload1]                    \n\t"
-          "sb               %[st3],       1(%[dst])                    \n\t" /* odd 1 */
-          "dpa.w.ph         $ac2,         %[p3],          %[filter12]  \n\t" /* odd 3 */
-          "dpa.w.ph         $ac2,         %[p4],          %[filter34]  \n\t" /* odd 3 */
-          "dpa.w.ph         $ac2,         %[p1],          %[filter56]  \n\t" /* odd 3 */
-          "dpa.w.ph         $ac2,         %[p5],          %[filter78]  \n\t" /* odd 3 */
-          "extp             %[Temp2],     $ac2,           31           \n\t" /* odd 3 */
-          "lbux             %[st1],       %[Temp1](%[cm])              \n\t" /* odd 2 */
-
-          /* odd 4. pixel */
-          "mtlo             %[vector_64], $ac1                         \n\t" /* odd 5 */
-          "mthi             $zero,        $ac1                         \n\t"
-          "preceu.ph.qbl    %[p3],        %[qload1]                    \n\t"
-          "sb               %[st1],       3(%[dst])                    \n\t" /* odd 2 */
-          "ulw              %[qload2],    17(%[src])                   \n\t"
-          "dpa.w.ph         $ac3,         %[p4],          %[filter12]  \n\t" /* odd 4 */
-          "dpa.w.ph         $ac3,         %[p1],          %[filter34]  \n\t" /* odd 4 */
-          "dpa.w.ph         $ac3,         %[p5],          %[filter56]  \n\t" /* odd 4 */
-          "dpa.w.ph         $ac3,         %[p2],          %[filter78]  \n\t" /* odd 4 */
-          "extp             %[Temp3],     $ac3,           31           \n\t" /* odd 4 */
-          "lbux             %[st2],       %[Temp2](%[cm])              \n\t" /* odd 3 */
-
-          /* odd 5. pixel */
-          "mtlo             %[vector_64], $ac2                         \n\t" /* odd 6 */
-          "mthi             $zero,        $ac2                         \n\t"
-          "preceu.ph.qbr    %[p4],        %[qload2]                    \n\t"
-          "sb               %[st2],       5(%[dst])                    \n\t" /* odd 3 */
-          "dpa.w.ph         $ac1,         %[p1],          %[filter12]  \n\t" /* odd 5 */
-          "dpa.w.ph         $ac1,         %[p5],          %[filter34]  \n\t" /* odd 5 */
-          "dpa.w.ph         $ac1,         %[p2],          %[filter56]  \n\t" /* odd 5 */
-          "dpa.w.ph         $ac1,         %[p3],          %[filter78]  \n\t" /* odd 5 */
-          "extp             %[Temp1],     $ac1,           31           \n\t" /* odd 5 */
-          "lbux             %[st3],       %[Temp3](%[cm])              \n\t" /* odd 4 */
-
-          /* odd 6. pixel */
-          "mtlo             %[vector_64], $ac3                         \n\t" /* odd 7 */
-          "mthi             $zero,        $ac3                         \n\t"
-          "preceu.ph.qbl    %[p1],        %[qload2]                    \n\t"
-          "sb               %[st3],       7(%[dst])                    \n\t" /* odd 4 */
-          "ulw              %[qload3],    21(%[src])                   \n\t"
-          "dpa.w.ph         $ac2,         %[p5],          %[filter12]  \n\t" /* odd 6 */
-          "dpa.w.ph         $ac2,         %[p2],          %[filter34]  \n\t" /* odd 6 */
-          "dpa.w.ph         $ac2,         %[p3],          %[filter56]  \n\t" /* odd 6 */
-          "dpa.w.ph         $ac2,         %[p4],          %[filter78]  \n\t" /* odd 6 */
-          "extp             %[Temp2],     $ac2,           31           \n\t" /* odd 6 */
-          "lbux             %[st1],       %[Temp1](%[cm])              \n\t" /* odd 5 */
-
-          /* odd 7. pixel */
-          "mtlo             %[vector_64], $ac1                         \n\t" /* odd 8 */
-          "mthi             $zero,        $ac1                         \n\t"
-          "preceu.ph.qbr    %[p5],        %[qload3]                    \n\t"
-          "sb               %[st1],       9(%[dst])                    \n\t" /* odd 5 */
-          "dpa.w.ph         $ac3,         %[p2],          %[filter12]  \n\t" /* odd 7 */
-          "dpa.w.ph         $ac3,         %[p3],          %[filter34]  \n\t" /* odd 7 */
-          "dpa.w.ph         $ac3,         %[p4],          %[filter56]  \n\t" /* odd 7 */
-          "dpa.w.ph         $ac3,         %[p1],          %[filter78]  \n\t" /* odd 7 */
-          "extp             %[Temp3],     $ac3,           31           \n\t" /* odd 7 */
-
-          /* odd 8. pixel */
-          "dpa.w.ph         $ac1,         %[p3],          %[filter12]  \n\t" /* odd 8 */
-          "dpa.w.ph         $ac1,         %[p4],          %[filter34]  \n\t" /* odd 8 */
-          "dpa.w.ph         $ac1,         %[p1],          %[filter56]  \n\t" /* odd 8 */
-          "dpa.w.ph         $ac1,         %[p5],          %[filter78]  \n\t" /* odd 8 */
-          "extp             %[Temp1],     $ac1,           31           \n\t" /* odd 8 */
-
-          "lbux             %[st2],       %[Temp2](%[cm])              \n\t" /* odd 6 */
-          "lbux             %[st3],       %[Temp3](%[cm])              \n\t" /* odd 7 */
-          "lbux             %[st1],       %[Temp1](%[cm])              \n\t" /* odd 8 */
-
-          "sb               %[st2],       11(%[dst])                   \n\t" /* odd 6 */
-          "sb               %[st3],       13(%[dst])                   \n\t" /* odd 7 */
-          "sb               %[st1],       15(%[dst])                   \n\t" /* odd 8 */
-
-          : [qload1] "=&r"(qload1), [qload2] "=&r"(qload2),
-            [qload3] "=&r"(qload3), [st1] "=&r"(st1), [st2] "=&r"(st2),
-            [st3] "=&r"(st3), [p1] "=&r"(p1), [p2] "=&r"(p2), [p3] "=&r"(p3),
-            [p4] "=&r"(p4), [p5] "=&r"(p5), [Temp1] "=&r"(Temp1),
-            [Temp2] "=&r"(Temp2), [Temp3] "=&r"(Temp3)
-          : [filter12] "r"(filter12), [filter34] "r"(filter34),
-            [filter56] "r"(filter56), [filter78] "r"(filter78),
-            [vector_64] "r"(vector_64), [cm] "r"(cm), [dst] "r"(dst),
-            [src] "r"(src));
-
-      src += 16;
-      dst += 16;
-    }
-
-    /* Next row... */
-    src_ptr += src_stride;
-    dst_ptr += dst_stride;
-  }
-}
-
-void aom_convolve8_horiz_dspr2(const uint8_t *src, ptrdiff_t src_stride,
-                               uint8_t *dst, ptrdiff_t dst_stride,
-                               const int16_t *filter_x, int x_step_q4,
-                               const int16_t *filter_y, int y_step_q4, int w,
-                               int h) {
-  assert(x_step_q4 == 16);
-  assert(((const int32_t *)filter_x)[1] != 0x800000);
-
-  if (((const int32_t *)filter_x)[0] == 0) {
-    aom_convolve2_horiz_dspr2(src, src_stride, dst, dst_stride, filter_x,
-                              x_step_q4, filter_y, y_step_q4, w, h);
-  } else {
-    uint32_t pos = 38;
-
-    prefetch_load((const uint8_t *)filter_x);
-    src -= 3;
-
-    /* bit positon for extract from acc */
-    __asm__ __volatile__("wrdsp      %[pos],     1           \n\t"
-                         :
-                         : [pos] "r"(pos));
-
-    /* prefetch data to cache memory */
-    prefetch_load(src);
-    prefetch_load(src + 32);
-    prefetch_store(dst);
-
-    switch (w) {
-      case 4:
-        convolve_horiz_4_dspr2(src, (int32_t)src_stride, dst,
-                               (int32_t)dst_stride, filter_x, (int32_t)h);
-        break;
-      case 8:
-        convolve_horiz_8_dspr2(src, (int32_t)src_stride, dst,
-                               (int32_t)dst_stride, filter_x, (int32_t)h);
-        break;
-      case 16:
-        convolve_horiz_16_dspr2(src, (int32_t)src_stride, dst,
-                                (int32_t)dst_stride, filter_x, (int32_t)h, 1);
-        break;
-      case 32:
-        convolve_horiz_16_dspr2(src, (int32_t)src_stride, dst,
-                                (int32_t)dst_stride, filter_x, (int32_t)h, 2);
-        break;
-      case 64:
-        prefetch_load(src + 64);
-        prefetch_store(dst + 32);
-
-        convolve_horiz_64_dspr2(src, (int32_t)src_stride, dst,
-                                (int32_t)dst_stride, filter_x, (int32_t)h);
-        break;
-      default:
-        aom_convolve8_horiz_c(src + 3, src_stride, dst, dst_stride, filter_x,
-                              x_step_q4, filter_y, y_step_q4, w, h);
-        break;
-    }
-  }
-}
-#endif
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/mips/convolve8_vert_dspr2.c b/chromium/third_party/libaom/source/libaom/aom_dsp/mips/convolve8_vert_dspr2.c
deleted file mode 100644
index 201e664279f..00000000000
--- a/chromium/third_party/libaom/source/libaom/aom_dsp/mips/convolve8_vert_dspr2.c
+++ /dev/null
@@ -1,361 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-#include <stdio.h>
-
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom_dsp/mips/convolve_common_dspr2.h"
-#include "aom_dsp/aom_dsp_common.h"
-#include "aom_dsp/aom_filter.h"
-#include "aom_ports/mem.h"
-
-#if HAVE_DSPR2
-static void convolve_vert_4_dspr2(const uint8_t *src, int32_t src_stride,
-                                  uint8_t *dst, int32_t dst_stride,
-                                  const int16_t *filter_y, int32_t w,
-                                  int32_t h) {
-  int32_t x, y;
-  const uint8_t *src_ptr;
-  uint8_t *dst_ptr;
-  uint8_t *cm = aom_ff_cropTbl;
-  uint32_t vector4a = 64;
-  uint32_t load1, load2, load3, load4;
-  uint32_t p1, p2;
-  uint32_t n1, n2;
-  uint32_t scratch1, scratch2;
-  uint32_t store1, store2;
-  int32_t vector1b, vector2b, vector3b, vector4b;
-  int32_t Temp1, Temp2;
-
-  vector1b = ((const int32_t *)filter_y)[0];
-  vector2b = ((const int32_t *)filter_y)[1];
-  vector3b = ((const int32_t *)filter_y)[2];
-  vector4b = ((const int32_t *)filter_y)[3];
-
-  src -= 3 * src_stride;
-
-  for (y = h; y--;) {
-    /* prefetch data to cache memory */
-    prefetch_store(dst + dst_stride);
-
-    for (x = 0; x < w; x += 4) {
-      src_ptr = src + x;
-      dst_ptr = dst + x;
-
-      __asm__ __volatile__(
-          "ulw              %[load1],     0(%[src_ptr])                   \n\t"
-          "add              %[src_ptr],   %[src_ptr],     %[src_stride]   \n\t"
-          "ulw              %[load2],     0(%[src_ptr])                   \n\t"
-          "add              %[src_ptr],   %[src_ptr],     %[src_stride]   \n\t"
-          "ulw              %[load3],     0(%[src_ptr])                   \n\t"
-          "add              %[src_ptr],   %[src_ptr],     %[src_stride]   \n\t"
-          "ulw              %[load4],     0(%[src_ptr])                   \n\t"
-
-          "mtlo             %[vector4a],  $ac0                            \n\t"
-          "mtlo             %[vector4a],  $ac1                            \n\t"
-          "mtlo             %[vector4a],  $ac2                            \n\t"
-          "mtlo             %[vector4a],  $ac3                            \n\t"
-          "mthi             $zero,        $ac0                            \n\t"
-          "mthi             $zero,        $ac1                            \n\t"
-          "mthi             $zero,        $ac2                            \n\t"
-          "mthi             $zero,        $ac3                            \n\t"
-
-          "preceu.ph.qbr    %[scratch1],  %[load1]                        \n\t"
-          "preceu.ph.qbr    %[p1],        %[load2]                        \n\t"
-          "precrq.ph.w      %[n1],        %[p1],          %[scratch1]     \n\t" /* pixel 2 */
-          "append           %[p1],        %[scratch1],    16              \n\t" /* pixel 1 */
-          "preceu.ph.qbr    %[scratch2],  %[load3]                        \n\t"
-          "preceu.ph.qbr    %[p2],        %[load4]                        \n\t"
-          "precrq.ph.w      %[n2],        %[p2],          %[scratch2]     \n\t" /* pixel 2 */
-          "append           %[p2],        %[scratch2],    16              \n\t" /* pixel 1 */
-
-          "dpa.w.ph         $ac0,         %[p1],          %[vector1b]     \n\t"
-          "dpa.w.ph         $ac0,         %[p2],          %[vector2b]     \n\t"
-          "dpa.w.ph         $ac1,         %[n1],          %[vector1b]     \n\t"
-          "dpa.w.ph         $ac1,         %[n2],          %[vector2b]     \n\t"
-
-          "preceu.ph.qbl    %[scratch1],  %[load1]                        \n\t"
-          "preceu.ph.qbl    %[p1],        %[load2]                        \n\t"
-          "precrq.ph.w      %[n1],        %[p1],          %[scratch1]     \n\t" /* pixel 2 */
-          "append           %[p1],        %[scratch1],    16              \n\t" /* pixel 1 */
-          "preceu.ph.qbl    %[scratch2],  %[load3]                        \n\t"
-          "preceu.ph.qbl    %[p2],        %[load4]                        \n\t"
-          "precrq.ph.w      %[n2],        %[p2],          %[scratch2]     \n\t" /* pixel 2 */
-          "append           %[p2],        %[scratch2],    16              \n\t" /* pixel 1 */
-
-          "dpa.w.ph         $ac2,         %[p1],          %[vector1b]     \n\t"
-          "dpa.w.ph         $ac2,         %[p2],          %[vector2b]     \n\t"
-          "dpa.w.ph         $ac3,         %[n1],          %[vector1b]     \n\t"
-          "dpa.w.ph         $ac3,         %[n2],          %[vector2b]     \n\t"
-
-          "add              %[src_ptr],   %[src_ptr],     %[src_stride]   \n\t"
-          "ulw              %[load1],     0(%[src_ptr])                   \n\t"
-          "add              %[src_ptr],   %[src_ptr],     %[src_stride]   \n\t"
-          "ulw              %[load2],     0(%[src_ptr])                   \n\t"
-          "add              %[src_ptr],   %[src_ptr],     %[src_stride]   \n\t"
-          "ulw              %[load3],     0(%[src_ptr])                   \n\t"
-          "add              %[src_ptr],   %[src_ptr],     %[src_stride]   \n\t"
-          "ulw              %[load4],     0(%[src_ptr])                   \n\t"
-
-          "preceu.ph.qbr    %[scratch1],  %[load1]                        \n\t"
-          "preceu.ph.qbr    %[p1],        %[load2]                        \n\t"
-          "precrq.ph.w      %[n1],        %[p1],          %[scratch1]     \n\t" /* pixel 2 */
-          "append           %[p1],        %[scratch1],    16              \n\t" /* pixel 1 */
-          "preceu.ph.qbr    %[scratch2],  %[load3]                        \n\t"
-          "preceu.ph.qbr    %[p2],        %[load4]                        \n\t"
-          "precrq.ph.w      %[n2],        %[p2],          %[scratch2]     \n\t" /* pixel 2 */
-          "append           %[p2],        %[scratch2],    16              \n\t" /* pixel 1 */
-
-          "dpa.w.ph         $ac0,         %[p1],          %[vector3b]     \n\t"
-          "dpa.w.ph         $ac0,         %[p2],          %[vector4b]     \n\t"
-          "extp             %[Temp1],     $ac0,           31              \n\t"
-          "dpa.w.ph         $ac1,         %[n1],          %[vector3b]     \n\t"
-          "dpa.w.ph         $ac1,         %[n2],          %[vector4b]     \n\t"
-          "extp             %[Temp2],     $ac1,           31              \n\t"
-
-          "preceu.ph.qbl    %[scratch1],  %[load1]                        \n\t"
-          "preceu.ph.qbl    %[p1],        %[load2]                        \n\t"
-          "precrq.ph.w      %[n1],        %[p1],          %[scratch1]     \n\t" /* pixel 2 */
-          "append           %[p1],        %[scratch1],    16              \n\t" /* pixel 1 */
-          "preceu.ph.qbl    %[scratch2],  %[load3]                        \n\t"
-          "preceu.ph.qbl    %[p2],        %[load4]                        \n\t"
-          "precrq.ph.w      %[n2],        %[p2],          %[scratch2]     \n\t" /* pixel 2 */
-          "append           %[p2],        %[scratch2],    16              \n\t" /* pixel 1 */
-
-          "lbux             %[store1],    %[Temp1](%[cm])                 \n\t"
-          "dpa.w.ph         $ac2,         %[p1],          %[vector3b]     \n\t"
-          "dpa.w.ph         $ac2,         %[p2],          %[vector4b]     \n\t"
-          "extp             %[Temp1],     $ac2,           31              \n\t"
-
-          "lbux             %[store2],    %[Temp2](%[cm])                 \n\t"
-          "dpa.w.ph         $ac3,         %[n1],          %[vector3b]     \n\t"
-          "dpa.w.ph         $ac3,         %[n2],          %[vector4b]     \n\t"
-          "extp             %[Temp2],     $ac3,           31              \n\t"
-
-          "sb               %[store1],    0(%[dst_ptr])                   \n\t"
-          "sb               %[store2],    1(%[dst_ptr])                   \n\t"
-
-          "lbux             %[store1],    %[Temp1](%[cm])                 \n\t"
-          "lbux             %[store2],    %[Temp2](%[cm])                 \n\t"
-
-          "sb               %[store1],    2(%[dst_ptr])                   \n\t"
-          "sb               %[store2],    3(%[dst_ptr])                   \n\t"
-
-          : [load1] "=&r"(load1), [load2] "=&r"(load2), [load3] "=&r"(load3),
-            [load4] "=&r"(load4), [p1] "=&r"(p1), [p2] "=&r"(p2),
-            [n1] "=&r"(n1), [n2] "=&r"(n2), [scratch1] "=&r"(scratch1),
-            [scratch2] "=&r"(scratch2), [Temp1] "=&r"(Temp1),
-            [Temp2] "=&r"(Temp2), [store1] "=&r"(store1),
-            [store2] "=&r"(store2), [src_ptr] "+r"(src_ptr)
-          : [vector1b] "r"(vector1b), [vector2b] "r"(vector2b),
-            [vector3b] "r"(vector3b), [vector4b] "r"(vector4b),
-            [vector4a] "r"(vector4a), [src_stride] "r"(src_stride),
-            [cm] "r"(cm), [dst_ptr] "r"(dst_ptr));
-    }
-
-    /* Next row... */
-    src += src_stride;
-    dst += dst_stride;
-  }
-}
-
-static void convolve_vert_64_dspr2(const uint8_t *src, int32_t src_stride,
-                                   uint8_t *dst, int32_t dst_stride,
-                                   const int16_t *filter_y, int32_t h) {
-  int32_t x, y;
-  const uint8_t *src_ptr;
-  uint8_t *dst_ptr;
-  uint8_t *cm = aom_ff_cropTbl;
-  uint32_t vector4a = 64;
-  uint32_t load1, load2, load3, load4;
-  uint32_t p1, p2;
-  uint32_t n1, n2;
-  uint32_t scratch1, scratch2;
-  uint32_t store1, store2;
-  int32_t vector1b, vector2b, vector3b, vector4b;
-  int32_t Temp1, Temp2;
-
-  vector1b = ((const int32_t *)filter_y)[0];
-  vector2b = ((const int32_t *)filter_y)[1];
-  vector3b = ((const int32_t *)filter_y)[2];
-  vector4b = ((const int32_t *)filter_y)[3];
-
-  src -= 3 * src_stride;
-
-  for (y = h; y--;) {
-    /* prefetch data to cache memory */
-    prefetch_store(dst + dst_stride);
-    prefetch_store(dst + dst_stride + 32);
-
-    for (x = 0; x < 64; x += 4) {
-      src_ptr = src + x;
-      dst_ptr = dst + x;
-
-      __asm__ __volatile__(
-          "ulw              %[load1],     0(%[src_ptr])                   \n\t"
-          "add              %[src_ptr],   %[src_ptr],     %[src_stride]   \n\t"
-          "ulw              %[load2],     0(%[src_ptr])                   \n\t"
-          "add              %[src_ptr],   %[src_ptr],     %[src_stride]   \n\t"
-          "ulw              %[load3],     0(%[src_ptr])                   \n\t"
-          "add              %[src_ptr],   %[src_ptr],     %[src_stride]   \n\t"
-          "ulw              %[load4],     0(%[src_ptr])                   \n\t"
-
-          "mtlo             %[vector4a],  $ac0                            \n\t"
-          "mtlo             %[vector4a],  $ac1                            \n\t"
-          "mtlo             %[vector4a],  $ac2                            \n\t"
-          "mtlo             %[vector4a],  $ac3                            \n\t"
-          "mthi             $zero,        $ac0                            \n\t"
-          "mthi             $zero,        $ac1                            \n\t"
-          "mthi             $zero,        $ac2                            \n\t"
-          "mthi             $zero,        $ac3                            \n\t"
-
-          "preceu.ph.qbr    %[scratch1],  %[load1]                        \n\t"
-          "preceu.ph.qbr    %[p1],        %[load2]                        \n\t"
-          "precrq.ph.w      %[n1],        %[p1],          %[scratch1]     \n\t" /* pixel 2 */
-          "append           %[p1],        %[scratch1],    16              \n\t" /* pixel 1 */
-          "preceu.ph.qbr    %[scratch2],  %[load3]                        \n\t"
-          "preceu.ph.qbr    %[p2],        %[load4]                        \n\t"
-          "precrq.ph.w      %[n2],        %[p2],          %[scratch2]     \n\t" /* pixel 2 */
-          "append           %[p2],        %[scratch2],    16              \n\t" /* pixel 1 */
-
-          "dpa.w.ph         $ac0,         %[p1],          %[vector1b]     \n\t"
-          "dpa.w.ph         $ac0,         %[p2],          %[vector2b]     \n\t"
-          "dpa.w.ph         $ac1,         %[n1],          %[vector1b]     \n\t"
-          "dpa.w.ph         $ac1,         %[n2],          %[vector2b]     \n\t"
-
-          "preceu.ph.qbl    %[scratch1],  %[load1]                        \n\t"
-          "preceu.ph.qbl    %[p1],        %[load2]                        \n\t"
-          "precrq.ph.w      %[n1],        %[p1],          %[scratch1]     \n\t" /* pixel 2 */
-          "append           %[p1],        %[scratch1],    16              \n\t" /* pixel 1 */
-          "preceu.ph.qbl    %[scratch2],  %[load3]                        \n\t"
-          "preceu.ph.qbl    %[p2],        %[load4]                        \n\t"
-          "precrq.ph.w      %[n2],        %[p2],          %[scratch2]     \n\t" /* pixel 2 */
-          "append           %[p2],        %[scratch2],    16              \n\t" /* pixel 1 */
-
-          "dpa.w.ph         $ac2,         %[p1],          %[vector1b]     \n\t"
-          "dpa.w.ph         $ac2,         %[p2],          %[vector2b]     \n\t"
-          "dpa.w.ph         $ac3,         %[n1],          %[vector1b]     \n\t"
-          "dpa.w.ph         $ac3,         %[n2],          %[vector2b]     \n\t"
-
-          "add              %[src_ptr],   %[src_ptr],     %[src_stride]   \n\t"
-          "ulw              %[load1],     0(%[src_ptr])                   \n\t"
-          "add              %[src_ptr],   %[src_ptr],     %[src_stride]   \n\t"
-          "ulw              %[load2],     0(%[src_ptr])                   \n\t"
-          "add              %[src_ptr],   %[src_ptr],     %[src_stride]   \n\t"
-          "ulw              %[load3],     0(%[src_ptr])                   \n\t"
-          "add              %[src_ptr],   %[src_ptr],     %[src_stride]   \n\t"
-          "ulw              %[load4],     0(%[src_ptr])                   \n\t"
-
-          "preceu.ph.qbr    %[scratch1],  %[load1]                        \n\t"
-          "preceu.ph.qbr    %[p1],        %[load2]                        \n\t"
-          "precrq.ph.w      %[n1],        %[p1],          %[scratch1]     \n\t" /* pixel 2 */
-          "append           %[p1],        %[scratch1],    16              \n\t" /* pixel 1 */
-          "preceu.ph.qbr    %[scratch2],  %[load3]                        \n\t"
-          "preceu.ph.qbr    %[p2],        %[load4]                        \n\t"
-          "precrq.ph.w      %[n2],        %[p2],          %[scratch2]     \n\t" /* pixel 2 */
-          "append           %[p2],        %[scratch2],    16              \n\t" /* pixel 1 */
-
-          "dpa.w.ph         $ac0,         %[p1],          %[vector3b]     \n\t"
-          "dpa.w.ph         $ac0,         %[p2],          %[vector4b]     \n\t"
-          "extp             %[Temp1],     $ac0,           31              \n\t"
-          "dpa.w.ph         $ac1,         %[n1],          %[vector3b]     \n\t"
-          "dpa.w.ph         $ac1,         %[n2],          %[vector4b]     \n\t"
-          "extp             %[Temp2],     $ac1,           31              \n\t"
-
-          "preceu.ph.qbl    %[scratch1],  %[load1]                        \n\t"
-          "preceu.ph.qbl    %[p1],        %[load2]                        \n\t"
-          "precrq.ph.w      %[n1],        %[p1],          %[scratch1]     \n\t" /* pixel 2 */
-          "append           %[p1],        %[scratch1],    16              \n\t" /* pixel 1 */
-          "preceu.ph.qbl    %[scratch2],  %[load3]                        \n\t"
-          "preceu.ph.qbl    %[p2],        %[load4]                        \n\t"
-          "precrq.ph.w      %[n2],        %[p2],          %[scratch2]     \n\t" /* pixel 2 */
-          "append           %[p2],        %[scratch2],    16              \n\t" /* pixel 1 */
-
-          "lbux             %[store1],    %[Temp1](%[cm])                 \n\t"
-          "dpa.w.ph         $ac2,         %[p1],          %[vector3b]     \n\t"
-          "dpa.w.ph         $ac2,         %[p2],          %[vector4b]     \n\t"
-          "extp             %[Temp1],     $ac2,           31              \n\t"
-
-          "lbux             %[store2],    %[Temp2](%[cm])                 \n\t"
-          "dpa.w.ph         $ac3,         %[n1],          %[vector3b]     \n\t"
-          "dpa.w.ph         $ac3,         %[n2],          %[vector4b]     \n\t"
-          "extp             %[Temp2],     $ac3,           31              \n\t"
-
-          "sb               %[store1],    0(%[dst_ptr])                   \n\t"
-          "sb               %[store2],    1(%[dst_ptr])                   \n\t"
-
-          "lbux             %[store1],    %[Temp1](%[cm])                 \n\t"
-          "lbux             %[store2],    %[Temp2](%[cm])                 \n\t"
-
-          "sb               %[store1],    2(%[dst_ptr])                   \n\t"
-          "sb               %[store2],    3(%[dst_ptr])                   \n\t"
-
-          : [load1] "=&r"(load1), [load2] "=&r"(load2), [load3] "=&r"(load3),
-            [load4] "=&r"(load4), [p1] "=&r"(p1), [p2] "=&r"(p2),
-            [n1] "=&r"(n1), [n2] "=&r"(n2), [scratch1] "=&r"(scratch1),
-            [scratch2] "=&r"(scratch2), [Temp1] "=&r"(Temp1),
-            [Temp2] "=&r"(Temp2), [store1] "=&r"(store1),
-            [store2] "=&r"(store2), [src_ptr] "+r"(src_ptr)
-          : [vector1b] "r"(vector1b), [vector2b] "r"(vector2b),
-            [vector3b] "r"(vector3b), [vector4b] "r"(vector4b),
-            [vector4a] "r"(vector4a), [src_stride] "r"(src_stride),
-            [cm] "r"(cm), [dst_ptr] "r"(dst_ptr));
-    }
-
-    /* Next row... */
-    src += src_stride;
-    dst += dst_stride;
-  }
-}
-
-void aom_convolve8_vert_dspr2(const uint8_t *src, ptrdiff_t src_stride,
-                              uint8_t *dst, ptrdiff_t dst_stride,
-                              const int16_t *filter_x, int x_step_q4,
-                              const int16_t *filter_y, int y_step_q4, int w,
-                              int h) {
-  assert(y_step_q4 == 16);
-  assert(((const int32_t *)filter_y)[1] != 0x800000);
-
-  if (((const int32_t *)filter_y)[0] == 0) {
-    aom_convolve2_vert_dspr2(src, src_stride, dst, dst_stride, filter_x,
-                             x_step_q4, filter_y, y_step_q4, w, h);
-  } else {
-    uint32_t pos = 38;
-
-    /* bit positon for extract from acc */
-    __asm__ __volatile__("wrdsp      %[pos],     1           \n\t"
-                         :
-                         : [pos] "r"(pos));
-
-    prefetch_store(dst);
-
-    switch (w) {
-      case 4:
-      case 8:
-      case 16:
-      case 32:
-        convolve_vert_4_dspr2(src, src_stride, dst, dst_stride, filter_y, w, h);
-        break;
-      case 64:
-        prefetch_store(dst + 32);
-        convolve_vert_64_dspr2(src, src_stride, dst, dst_stride, filter_y, h);
-        break;
-      default:
-        aom_convolve8_vert_c(src, src_stride, dst, dst_stride, filter_x,
-                             x_step_q4, filter_y, y_step_q4, w, h);
-        break;
-    }
-  }
-}
-
-#endif
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/mips/convolve_common_dspr2.h b/chromium/third_party/libaom/source/libaom/aom_dsp/mips/convolve_common_dspr2.h
deleted file mode 100644
index e5d48a884db..00000000000
--- a/chromium/third_party/libaom/source/libaom/aom_dsp/mips/convolve_common_dspr2.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AOM_DSP_MIPS_CONVOLVE_COMMON_DSPR2_H_
-#define AOM_AOM_DSP_MIPS_CONVOLVE_COMMON_DSPR2_H_
-
-#include <assert.h>
-
-#include "config/aom_config.h"
-
-#include "aom/aom_integer.h"
-#include "aom_dsp/mips/common_dspr2.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#if HAVE_DSPR2
-void aom_convolve2_horiz_dspr2(const uint8_t *src, ptrdiff_t src_stride,
-                               uint8_t *dst, ptrdiff_t dst_stride,
-                               const int16_t *filter_x, int x_step_q4,
-                               const int16_t *filter_y, int y_step_q4, int w,
-                               int h);
-
-void aom_convolve2_dspr2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
-                         ptrdiff_t dst_stride, const int16_t *filter, int w,
-                         int h);
-
-void aom_convolve2_vert_dspr2(const uint8_t *src, ptrdiff_t src_stride,
-                              uint8_t *dst, ptrdiff_t dst_stride,
-                              const int16_t *filter_x, int x_step_q4,
-                              const int16_t *filter_y, int y_step_q4, int w,
-                              int h);
-
-#endif  // #if HAVE_DSPR2
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  // AOM_AOM_DSP_MIPS_CONVOLVE_COMMON_DSPR2_H_
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/mips/intrapred16_dspr2.c b/chromium/third_party/libaom/source/libaom/aom_dsp/mips/intrapred16_dspr2.c
deleted file mode 100644
index 7c221ae8998..00000000000
--- a/chromium/third_party/libaom/source/libaom/aom_dsp/mips/intrapred16_dspr2.c
+++ /dev/null
@@ -1,327 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "aom_dsp/mips/common_dspr2.h"
-
-#if HAVE_DSPR2
-void aom_h_predictor_16x16_dspr2(uint8_t *dst, ptrdiff_t stride,
-                                 const uint8_t *above, const uint8_t *left) {
-  int32_t tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8;
-  int32_t tmp9, tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
-
-  (void)above;
-
-  __asm__ __volatile__(
-      "lb         %[tmp1],      (%[left])                    \n\t"
-      "lb         %[tmp2],      1(%[left])                   \n\t"
-      "lb         %[tmp3],      2(%[left])                   \n\t"
-      "lb         %[tmp4],      3(%[left])                   \n\t"
-      "lb         %[tmp5],      4(%[left])                   \n\t"
-      "lb         %[tmp6],      5(%[left])                   \n\t"
-      "lb         %[tmp7],      6(%[left])                   \n\t"
-      "lb         %[tmp8],      7(%[left])                   \n\t"
-      "lb         %[tmp9],      8(%[left])                   \n\t"
-      "lb         %[tmp10],     9(%[left])                   \n\t"
-      "lb         %[tmp11],     10(%[left])                  \n\t"
-      "lb         %[tmp12],     11(%[left])                  \n\t"
-      "lb         %[tmp13],     12(%[left])                  \n\t"
-      "lb         %[tmp14],     13(%[left])                  \n\t"
-      "lb         %[tmp15],     14(%[left])                  \n\t"
-      "lb         %[tmp16],     15(%[left])                  \n\t"
-
-      "replv.qb   %[tmp1],      %[tmp1]                      \n\t"
-      "replv.qb   %[tmp2],      %[tmp2]                      \n\t"
-      "replv.qb   %[tmp3],      %[tmp3]                      \n\t"
-      "replv.qb   %[tmp4],      %[tmp4]                      \n\t"
-      "replv.qb   %[tmp5],      %[tmp5]                      \n\t"
-      "replv.qb   %[tmp6],      %[tmp6]                      \n\t"
-      "replv.qb   %[tmp7],      %[tmp7]                      \n\t"
-      "replv.qb   %[tmp8],      %[tmp8]                      \n\t"
-      "replv.qb   %[tmp9],      %[tmp9]                      \n\t"
-      "replv.qb   %[tmp10],     %[tmp10]                     \n\t"
-      "replv.qb   %[tmp11],     %[tmp11]                     \n\t"
-      "replv.qb   %[tmp12],     %[tmp12]                     \n\t"
-      "replv.qb   %[tmp13],     %[tmp13]                     \n\t"
-      "replv.qb   %[tmp14],     %[tmp14]                     \n\t"
-      "replv.qb   %[tmp15],     %[tmp15]                     \n\t"
-      "replv.qb   %[tmp16],     %[tmp16]                     \n\t"
-
-      "sw         %[tmp1],      (%[dst])                     \n\t"
-      "sw         %[tmp1],      4(%[dst])                    \n\t"
-      "sw         %[tmp1],      8(%[dst])                    \n\t"
-      "sw         %[tmp1],      12(%[dst])                   \n\t"
-
-      "add        %[dst],       %[dst],         %[stride]    \n\t"
-      "sw         %[tmp2],      (%[dst])                     \n\t"
-      "sw         %[tmp2],      4(%[dst])                    \n\t"
-      "sw         %[tmp2],      8(%[dst])                    \n\t"
-      "sw         %[tmp2],      12(%[dst])                   \n\t"
-
-      "add        %[dst],       %[dst],         %[stride]    \n\t"
-      "sw         %[tmp3],      (%[dst])                     \n\t"
-      "sw         %[tmp3],      4(%[dst])                    \n\t"
-      "sw         %[tmp3],      8(%[dst])                    \n\t"
-      "sw         %[tmp3],      12(%[dst])                   \n\t"
-
-      "add        %[dst],       %[dst],         %[stride]    \n\t"
-      "sw         %[tmp4],      (%[dst])                     \n\t"
-      "sw         %[tmp4],      4(%[dst])                    \n\t"
-      "sw         %[tmp4],      8(%[dst])                    \n\t"
-      "sw         %[tmp4],      12(%[dst])                   \n\t"
-
-      "add        %[dst],       %[dst],         %[stride]    \n\t"
-      "sw         %[tmp5],      (%[dst])                     \n\t"
-      "sw         %[tmp5],      4(%[dst])                    \n\t"
-      "sw         %[tmp5],      8(%[dst])                    \n\t"
-      "sw         %[tmp5],      12(%[dst])                   \n\t"
-
-      "add        %[dst],       %[dst],         %[stride]    \n\t"
-      "sw         %[tmp6],      (%[dst])                     \n\t"
-      "sw         %[tmp6],      4(%[dst])                    \n\t"
-      "sw         %[tmp6],      8(%[dst])                    \n\t"
-      "sw         %[tmp6],      12(%[dst])                   \n\t"
-
-      "add        %[dst],       %[dst],         %[stride]    \n\t"
-      "sw         %[tmp7],      (%[dst])                     \n\t"
-      "sw         %[tmp7],      4(%[dst])                    \n\t"
-      "sw         %[tmp7],      8(%[dst])                    \n\t"
-      "sw         %[tmp7],      12(%[dst])                   \n\t"
-
-      "add        %[dst],       %[dst],         %[stride]    \n\t"
-      "sw         %[tmp8],      (%[dst])                     \n\t"
-      "sw         %[tmp8],      4(%[dst])                    \n\t"
-      "sw         %[tmp8],      8(%[dst])                    \n\t"
-      "sw         %[tmp8],      12(%[dst])                   \n\t"
-
-      "add        %[dst],       %[dst],         %[stride]    \n\t"
-      "sw         %[tmp9],      (%[dst])                     \n\t"
-      "sw         %[tmp9],      4(%[dst])                    \n\t"
-      "sw         %[tmp9],      8(%[dst])                    \n\t"
-      "sw         %[tmp9],      12(%[dst])                   \n\t"
-
-      "add        %[dst],       %[dst],         %[stride]    \n\t"
-      "sw         %[tmp10],     (%[dst])                     \n\t"
-      "sw         %[tmp10],     4(%[dst])                    \n\t"
-      "sw         %[tmp10],     8(%[dst])                    \n\t"
-      "sw         %[tmp10],     12(%[dst])                   \n\t"
-
-      "add        %[dst],       %[dst],         %[stride]    \n\t"
-      "sw         %[tmp11],     (%[dst])                     \n\t"
-      "sw         %[tmp11],     4(%[dst])                    \n\t"
-      "sw         %[tmp11],     8(%[dst])                    \n\t"
-      "sw         %[tmp11],     12(%[dst])                   \n\t"
-
-      "add        %[dst],       %[dst],         %[stride]    \n\t"
-      "sw         %[tmp12],     (%[dst])                     \n\t"
-      "sw         %[tmp12],     4(%[dst])                    \n\t"
-      "sw         %[tmp12],     8(%[dst])                    \n\t"
-      "sw         %[tmp12],     12(%[dst])                   \n\t"
-
-      "add        %[dst],       %[dst],         %[stride]    \n\t"
-      "sw         %[tmp13],     (%[dst])                     \n\t"
-      "sw         %[tmp13],     4(%[dst])                    \n\t"
-      "sw         %[tmp13],     8(%[dst])                    \n\t"
-      "sw         %[tmp13],     12(%[dst])                   \n\t"
-
-      "add        %[dst],       %[dst],         %[stride]    \n\t"
-      "sw         %[tmp14],     (%[dst])                     \n\t"
-      "sw         %[tmp14],     4(%[dst])                    \n\t"
-      "sw         %[tmp14],     8(%[dst])                    \n\t"
-      "sw         %[tmp14],     12(%[dst])                   \n\t"
-
-      "add        %[dst],       %[dst],         %[stride]    \n\t"
-      "sw         %[tmp15],     (%[dst])                     \n\t"
-      "sw         %[tmp15],     4(%[dst])                    \n\t"
-      "sw         %[tmp15],     8(%[dst])                    \n\t"
-      "sw         %[tmp15],     12(%[dst])                   \n\t"
-
-      "add        %[dst],       %[dst],         %[stride]    \n\t"
-      "sw         %[tmp16],     (%[dst])                     \n\t"
-      "sw         %[tmp16],     4(%[dst])                    \n\t"
-      "sw         %[tmp16],     8(%[dst])                    \n\t"
-      "sw         %[tmp16],     12(%[dst])                   \n\t"
-
-      : [tmp1] "=&r"(tmp1), [tmp2] "=&r"(tmp2), [tmp3] "=&r"(tmp3),
-        [tmp4] "=&r"(tmp4), [tmp5] "=&r"(tmp5), [tmp7] "=&r"(tmp7),
-        [tmp6] "=&r"(tmp6), [tmp8] "=&r"(tmp8), [tmp9] "=&r"(tmp9),
-        [tmp10] "=&r"(tmp10), [tmp11] "=&r"(tmp11), [tmp12] "=&r"(tmp12),
-        [tmp13] "=&r"(tmp13), [tmp14] "=&r"(tmp14), [tmp15] "=&r"(tmp15),
-        [tmp16] "=&r"(tmp16)
-      : [left] "r"(left), [dst] "r"(dst), [stride] "r"(stride));
-}
-
-void aom_dc_predictor_16x16_dspr2(uint8_t *dst, ptrdiff_t stride,
-                                  const uint8_t *above, const uint8_t *left) {
-  int32_t expected_dc;
-  int32_t average;
-  int32_t tmp, above1, above_l1, above_r1, left1, left_r1, left_l1;
-  int32_t above2, left2;
-
-  __asm__ __volatile__(
-      "lw              %[above1],           (%[above])                    \n\t"
-      "lw              %[above2],           4(%[above])                   \n\t"
-      "lw              %[left1],            (%[left])                     \n\t"
-      "lw              %[left2],            4(%[left])                    \n\t"
-
-      "preceu.ph.qbl   %[above_l1],         %[above1]                     \n\t"
-      "preceu.ph.qbr   %[above_r1],         %[above1]                     \n\t"
-      "preceu.ph.qbl   %[left_l1],          %[left1]                      \n\t"
-      "preceu.ph.qbr   %[left_r1],          %[left1]                      \n\t"
-
-      "addu.ph         %[average],          %[above_r1],     %[above_l1]  \n\t"
-      "addu.ph         %[average],          %[average],      %[left_l1]   \n\t"
-      "addu.ph         %[average],          %[average],      %[left_r1]   \n\t"
-
-      "preceu.ph.qbl   %[above_l1],         %[above2]                     \n\t"
-      "preceu.ph.qbr   %[above_r1],         %[above2]                     \n\t"
-      "preceu.ph.qbl   %[left_l1],          %[left2]                      \n\t"
-      "preceu.ph.qbr   %[left_r1],          %[left2]                      \n\t"
-
-      "addu.ph         %[average],          %[average],      %[above_l1]  \n\t"
-      "addu.ph         %[average],          %[average],      %[above_r1]  \n\t"
-      "addu.ph         %[average],          %[average],      %[left_l1]   \n\t"
-      "addu.ph         %[average],          %[average],      %[left_r1]   \n\t"
-
-      "lw              %[above1],           8(%[above])                   \n\t"
-      "lw              %[above2],           12(%[above])                  \n\t"
-      "lw              %[left1],            8(%[left])                    \n\t"
-      "lw              %[left2],            12(%[left])                   \n\t"
-
-      "preceu.ph.qbl   %[above_l1],         %[above1]                     \n\t"
-      "preceu.ph.qbr   %[above_r1],         %[above1]                     \n\t"
-      "preceu.ph.qbl   %[left_l1],          %[left1]                      \n\t"
-      "preceu.ph.qbr   %[left_r1],          %[left1]                      \n\t"
-
-      "addu.ph         %[average],          %[average],      %[above_l1]  \n\t"
-      "addu.ph         %[average],          %[average],      %[above_r1]  \n\t"
-      "addu.ph         %[average],          %[average],      %[left_l1]   \n\t"
-      "addu.ph         %[average],          %[average],      %[left_r1]   \n\t"
-
-      "preceu.ph.qbl   %[above_l1],         %[above2]                     \n\t"
-      "preceu.ph.qbr   %[above_r1],         %[above2]                     \n\t"
-      "preceu.ph.qbl   %[left_l1],          %[left2]                      \n\t"
-      "preceu.ph.qbr   %[left_r1],          %[left2]                      \n\t"
-
-      "addu.ph         %[average],          %[average],      %[above_l1]  \n\t"
-      "addu.ph         %[average],          %[average],      %[above_r1]  \n\t"
-      "addu.ph         %[average],          %[average],      %[left_l1]   \n\t"
-      "addu.ph         %[average],          %[average],      %[left_r1]   \n\t"
-
-      "addiu           %[average],          %[average],      16           \n\t"
-      "srl             %[tmp],              %[average],      16           \n\t"
-      "addu.ph         %[average],          %[tmp],          %[average]   \n\t"
-      "srl             %[expected_dc],      %[average],      5            \n\t"
-      "replv.qb        %[expected_dc],      %[expected_dc]                \n\t"
-
-      "sw              %[expected_dc],      (%[dst])                      \n\t"
-      "sw              %[expected_dc],      4(%[dst])                     \n\t"
-      "sw              %[expected_dc],      8(%[dst])                     \n\t"
-      "sw              %[expected_dc],      12(%[dst])                    \n\t"
-
-      "add             %[dst],              %[dst],          %[stride]    \n\t"
-      "sw              %[expected_dc],      (%[dst])                      \n\t"
-      "sw              %[expected_dc],      4(%[dst])                     \n\t"
-      "sw              %[expected_dc],      8(%[dst])                     \n\t"
-      "sw              %[expected_dc],      12(%[dst])                    \n\t"
-
-      "add             %[dst],              %[dst],          %[stride]    \n\t"
-      "sw              %[expected_dc],      (%[dst])                      \n\t"
-      "sw              %[expected_dc],      4(%[dst])                     \n\t"
-      "sw              %[expected_dc],      8(%[dst])                     \n\t"
-      "sw              %[expected_dc],      12(%[dst])                    \n\t"
-
-      "add             %[dst],              %[dst],          %[stride]    \n\t"
-      "sw              %[expected_dc],      (%[dst])                      \n\t"
-      "sw              %[expected_dc],      4(%[dst])                     \n\t"
-      "sw              %[expected_dc],      8(%[dst])                     \n\t"
-      "sw              %[expected_dc],      12(%[dst])                    \n\t"
-
-      "add             %[dst],              %[dst],          %[stride]    \n\t"
-      "sw              %[expected_dc],      (%[dst])                      \n\t"
-      "sw              %[expected_dc],      4(%[dst])                     \n\t"
-      "sw              %[expected_dc],      8(%[dst])                     \n\t"
-      "sw              %[expected_dc],      12(%[dst])                    \n\t"
-
-      "add             %[dst],              %[dst],          %[stride]    \n\t"
-      "sw              %[expected_dc],      (%[dst])                      \n\t"
-      "sw              %[expected_dc],      4(%[dst])                     \n\t"
-      "sw              %[expected_dc],      8(%[dst])                     \n\t"
-      "sw              %[expected_dc],      12(%[dst])                    \n\t"
-
-      "add             %[dst],              %[dst],          %[stride]    \n\t"
-      "sw              %[expected_dc],      (%[dst])                      \n\t"
-      "sw              %[expected_dc],      4(%[dst])                     \n\t"
-      "sw              %[expected_dc],      8(%[dst])                     \n\t"
-      "sw              %[expected_dc],      12(%[dst])                    \n\t"
-
-      "add             %[dst],              %[dst],          %[stride]    \n\t"
-      "sw              %[expected_dc],      (%[dst])                      \n\t"
-      "sw              %[expected_dc],      4(%[dst])                     \n\t"
-      "sw              %[expected_dc],      8(%[dst])                     \n\t"
-      "sw              %[expected_dc],      12(%[dst])                    \n\t"
-
-      "add             %[dst],              %[dst],          %[stride]    \n\t"
-      "sw              %[expected_dc],      (%[dst])                      \n\t"
-      "sw              %[expected_dc],      4(%[dst])                     \n\t"
-      "sw              %[expected_dc],      8(%[dst])                     \n\t"
-      "sw              %[expected_dc],      12(%[dst])                    \n\t"
-
-      "add             %[dst],              %[dst],          %[stride]    \n\t"
-      "sw              %[expected_dc],      (%[dst])                      \n\t"
-      "sw              %[expected_dc],      4(%[dst])                     \n\t"
-      "sw              %[expected_dc],      8(%[dst])                     \n\t"
-      "sw              %[expected_dc],      12(%[dst])                    \n\t"
-
-      "add             %[dst],              %[dst],          %[stride]    \n\t"
-      "sw              %[expected_dc],      (%[dst])                      \n\t"
-      "sw              %[expected_dc],      4(%[dst])                     \n\t"
-      "sw              %[expected_dc],      8(%[dst])                     \n\t"
-      "sw              %[expected_dc],      12(%[dst])                    \n\t"
-
-      "add             %[dst],              %[dst],          %[stride]    \n\t"
-      "sw              %[expected_dc],      (%[dst])                      \n\t"
-      "sw              %[expected_dc],      4(%[dst])                     \n\t"
-      "sw              %[expected_dc],      8(%[dst])                     \n\t"
-      "sw              %[expected_dc],      12(%[dst])                    \n\t"
-
-      "add             %[dst],              %[dst],          %[stride]    \n\t"
-      "sw              %[expected_dc],      (%[dst])                      \n\t"
-      "sw              %[expected_dc],      4(%[dst])                     \n\t"
-      "sw              %[expected_dc],      8(%[dst])                     \n\t"
-      "sw              %[expected_dc],      12(%[dst])                    \n\t"
-
-      "add             %[dst],              %[dst],          %[stride]    \n\t"
-      "sw              %[expected_dc],      (%[dst])                      \n\t"
-      "sw              %[expected_dc],      4(%[dst])                     \n\t"
-      "sw              %[expected_dc],      8(%[dst])                     \n\t"
-      "sw              %[expected_dc],      12(%[dst])                    \n\t"
-
-      "add             %[dst],              %[dst],          %[stride]    \n\t"
-      "sw              %[expected_dc],      (%[dst])                      \n\t"
-      "sw              %[expected_dc],      4(%[dst])                     \n\t"
-      "sw              %[expected_dc],      8(%[dst])                     \n\t"
-      "sw              %[expected_dc],      12(%[dst])                    \n\t"
-
-      "add             %[dst],              %[dst],          %[stride]    \n\t"
-      "sw              %[expected_dc],      (%[dst])                      \n\t"
-      "sw              %[expected_dc],      4(%[dst])                     \n\t"
-      "sw              %[expected_dc],      8(%[dst])                     \n\t"
-      "sw              %[expected_dc],      12(%[dst])                    \n\t"
-
-      : [left1] "=&r"(left1), [above1] "=&r"(above1), [left_l1] "=&r"(left_l1),
-        [above_l1] "=&r"(above_l1), [left_r1] "=&r"(left_r1),
-        [above_r1] "=&r"(above_r1), [above2] "=&r"(above2),
-        [left2] "=&r"(left2), [average] "=&r"(average), [tmp] "=&r"(tmp),
-        [expected_dc] "=&r"(expected_dc)
-      : [above] "r"(above), [left] "r"(left), [dst] "r"(dst),
-        [stride] "r"(stride));
-}
-#endif  // #if HAVE_DSPR2
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/mips/intrapred4_dspr2.c b/chromium/third_party/libaom/source/libaom/aom_dsp/mips/intrapred4_dspr2.c
deleted file mode 100644
index 0a21979c746..00000000000
--- a/chromium/third_party/libaom/source/libaom/aom_dsp/mips/intrapred4_dspr2.c
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "aom_dsp/mips/common_dspr2.h"
-
-#if HAVE_DSPR2
-void aom_h_predictor_4x4_dspr2(uint8_t *dst, ptrdiff_t stride,
-                               const uint8_t *above, const uint8_t *left) {
-  int32_t tmp1, tmp2, tmp3, tmp4;
-  (void)above;
-
-  __asm__ __volatile__(
-      "lb         %[tmp1],      (%[left])                    \n\t"
-      "lb         %[tmp2],      1(%[left])                   \n\t"
-      "lb         %[tmp3],      2(%[left])                   \n\t"
-      "lb         %[tmp4],      3(%[left])                   \n\t"
-      "replv.qb   %[tmp1],      %[tmp1]                      \n\t"
-      "replv.qb   %[tmp2],      %[tmp2]                      \n\t"
-      "replv.qb   %[tmp3],      %[tmp3]                      \n\t"
-      "replv.qb   %[tmp4],      %[tmp4]                      \n\t"
-      "sw         %[tmp1],      (%[dst])                     \n\t"
-      "add        %[dst],       %[dst],         %[stride]    \n\t"
-      "sw         %[tmp2],      (%[dst])                     \n\t"
-      "add        %[dst],       %[dst],         %[stride]    \n\t"
-      "sw         %[tmp3],      (%[dst])                     \n\t"
-      "add        %[dst],       %[dst],         %[stride]    \n\t"
-      "sw         %[tmp4],      (%[dst])                     \n\t"
-
-      : [tmp1] "=&r"(tmp1), [tmp2] "=&r"(tmp2), [tmp3] "=&r"(tmp3),
-        [tmp4] "=&r"(tmp4)
-      : [left] "r"(left), [dst] "r"(dst), [stride] "r"(stride));
-}
-
-void aom_dc_predictor_4x4_dspr2(uint8_t *dst, ptrdiff_t stride,
-                                const uint8_t *above, const uint8_t *left) {
-  int32_t expected_dc;
-  int32_t average;
-  int32_t tmp, above_c, above_l, above_r, left_c, left_r, left_l;
-
-  __asm__ __volatile__(
-      "lw              %[above_c],         (%[above])                    \n\t"
-      "lw              %[left_c],          (%[left])                     \n\t"
-
-      "preceu.ph.qbl   %[above_l],         %[above_c]                    \n\t"
-      "preceu.ph.qbr   %[above_r],         %[above_c]                    \n\t"
-      "preceu.ph.qbl   %[left_l],          %[left_c]                     \n\t"
-      "preceu.ph.qbr   %[left_r],          %[left_c]                     \n\t"
-
-      "addu.ph         %[average],         %[above_r],       %[above_l]  \n\t"
-      "addu.ph         %[average],         %[average],       %[left_l]   \n\t"
-      "addu.ph         %[average],         %[average],       %[left_r]   \n\t"
-      "addiu           %[average],         %[average],       4           \n\t"
-      "srl             %[tmp],             %[average],       16          \n\t"
-      "addu.ph         %[average],         %[tmp],           %[average]  \n\t"
-      "srl             %[expected_dc],     %[average],       3           \n\t"
-      "replv.qb        %[expected_dc],     %[expected_dc]                \n\t"
-
-      "sw              %[expected_dc],     (%[dst])                      \n\t"
-      "add             %[dst],              %[dst],          %[stride]   \n\t"
-      "sw              %[expected_dc],     (%[dst])                      \n\t"
-      "add             %[dst],              %[dst],          %[stride]   \n\t"
-      "sw              %[expected_dc],     (%[dst])                      \n\t"
-      "add             %[dst],              %[dst],          %[stride]   \n\t"
-      "sw              %[expected_dc],     (%[dst])                      \n\t"
-
-      : [above_c] "=&r"(above_c), [above_l] "=&r"(above_l),
-        [above_r] "=&r"(above_r), [left_c] "=&r"(left_c),
-        [left_l] "=&r"(left_l), [left_r] "=&r"(left_r),
-        [average] "=&r"(average), [tmp] "=&r"(tmp),
-        [expected_dc] "=&r"(expected_dc)
-      : [above] "r"(above), [left] "r"(left), [dst] "r"(dst),
-        [stride] "r"(stride));
-}
-#endif  // #if HAVE_DSPR2
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/mips/intrapred8_dspr2.c b/chromium/third_party/libaom/source/libaom/aom_dsp/mips/intrapred8_dspr2.c
deleted file mode 100644
index d42a77c8025..00000000000
--- a/chromium/third_party/libaom/source/libaom/aom_dsp/mips/intrapred8_dspr2.c
+++ /dev/null
@@ -1,150 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "aom_dsp/mips/common_dspr2.h"
-
-#if HAVE_DSPR2
-void aom_h_predictor_8x8_dspr2(uint8_t *dst, ptrdiff_t stride,
-                               const uint8_t *above, const uint8_t *left) {
-  int32_t tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8;
-  (void)above;
-
-  __asm__ __volatile__(
-      "lb         %[tmp1],      (%[left])                   \n\t"
-      "lb         %[tmp2],      1(%[left])                  \n\t"
-      "lb         %[tmp3],      2(%[left])                  \n\t"
-      "lb         %[tmp4],      3(%[left])                  \n\t"
-      "lb         %[tmp5],      4(%[left])                  \n\t"
-      "lb         %[tmp6],      5(%[left])                  \n\t"
-      "lb         %[tmp7],      6(%[left])                  \n\t"
-      "lb         %[tmp8],      7(%[left])                  \n\t"
-
-      "replv.qb   %[tmp1],      %[tmp1]                     \n\t"
-      "replv.qb   %[tmp2],      %[tmp2]                     \n\t"
-      "replv.qb   %[tmp3],      %[tmp3]                     \n\t"
-      "replv.qb   %[tmp4],      %[tmp4]                     \n\t"
-      "replv.qb   %[tmp5],      %[tmp5]                     \n\t"
-      "replv.qb   %[tmp6],      %[tmp6]                     \n\t"
-      "replv.qb   %[tmp7],      %[tmp7]                     \n\t"
-      "replv.qb   %[tmp8],      %[tmp8]                     \n\t"
-
-      "sw         %[tmp1],      (%[dst])                    \n\t"
-      "sw         %[tmp1],      4(%[dst])                   \n\t"
-      "add        %[dst],       %[dst],         %[stride]   \n\t"
-      "sw         %[tmp2],      (%[dst])                    \n\t"
-      "sw         %[tmp2],      4(%[dst])                   \n\t"
-      "add        %[dst],       %[dst],         %[stride]   \n\t"
-      "sw         %[tmp3],      (%[dst])                    \n\t"
-      "sw         %[tmp3],      4(%[dst])                   \n\t"
-      "add        %[dst],       %[dst],         %[stride]   \n\t"
-      "sw         %[tmp4],      (%[dst])                    \n\t"
-      "sw         %[tmp4],      4(%[dst])                   \n\t"
-      "add        %[dst],       %[dst],         %[stride]   \n\t"
-      "sw         %[tmp5],      (%[dst])                    \n\t"
-      "sw         %[tmp5],      4(%[dst])                   \n\t"
-      "add        %[dst],       %[dst],         %[stride]   \n\t"
-      "sw         %[tmp6],      (%[dst])                    \n\t"
-      "sw         %[tmp6],      4(%[dst])                   \n\t"
-      "add        %[dst],       %[dst],         %[stride]   \n\t"
-      "sw         %[tmp7],      (%[dst])                    \n\t"
-      "sw         %[tmp7],      4(%[dst])                   \n\t"
-      "add        %[dst],       %[dst],         %[stride]   \n\t"
-      "sw         %[tmp8],      (%[dst])                    \n\t"
-      "sw         %[tmp8],      4(%[dst])                   \n\t"
-
-      : [tmp1] "=&r"(tmp1), [tmp2] "=&r"(tmp2), [tmp3] "=&r"(tmp3),
-        [tmp4] "=&r"(tmp4), [tmp5] "=&r"(tmp5), [tmp7] "=&r"(tmp7),
-        [tmp6] "=&r"(tmp6), [tmp8] "=&r"(tmp8)
-      : [left] "r"(left), [dst] "r"(dst), [stride] "r"(stride));
-}
-
-void aom_dc_predictor_8x8_dspr2(uint8_t *dst, ptrdiff_t stride,
-                                const uint8_t *above, const uint8_t *left) {
-  int32_t expected_dc;
-  int32_t average;
-  int32_t tmp, above1, above_l1, above_r1, left1, left_r1, left_l1;
-  int32_t above2, above_l2, above_r2, left2, left_r2, left_l2;
-
-  __asm__ __volatile__(
-      "lw              %[above1],         (%[above])                      \n\t"
-      "lw              %[above2],         4(%[above])                     \n\t"
-      "lw              %[left1],          (%[left])                       \n\t"
-      "lw              %[left2],          4(%[left])                      \n\t"
-
-      "preceu.ph.qbl   %[above_l1],       %[above1]                       \n\t"
-      "preceu.ph.qbr   %[above_r1],       %[above1]                       \n\t"
-      "preceu.ph.qbl   %[left_l1],        %[left1]                        \n\t"
-      "preceu.ph.qbr   %[left_r1],        %[left1]                        \n\t"
-
-      "preceu.ph.qbl   %[above_l2],       %[above2]                       \n\t"
-      "preceu.ph.qbr   %[above_r2],       %[above2]                       \n\t"
-      "preceu.ph.qbl   %[left_l2],        %[left2]                        \n\t"
-      "preceu.ph.qbr   %[left_r2],        %[left2]                        \n\t"
-
-      "addu.ph         %[average],        %[above_r1],      %[above_l1]   \n\t"
-      "addu.ph         %[average],        %[average],       %[left_l1]    \n\t"
-      "addu.ph         %[average],        %[average],       %[left_r1]    \n\t"
-
-      "addu.ph         %[average],        %[average],       %[above_l2]   \n\t"
-      "addu.ph         %[average],        %[average],       %[above_r2]   \n\t"
-      "addu.ph         %[average],        %[average],       %[left_l2]    \n\t"
-      "addu.ph         %[average],        %[average],       %[left_r2]    \n\t"
-
-      "addiu           %[average],        %[average],       8             \n\t"
-
-      "srl             %[tmp],            %[average],       16            \n\t"
-      "addu.ph         %[average],        %[tmp],           %[average]    \n\t"
-      "srl             %[expected_dc],    %[average],       4             \n\t"
-      "replv.qb        %[expected_dc],    %[expected_dc]                  \n\t"
-
-      "sw              %[expected_dc],    (%[dst])                        \n\t"
-      "sw              %[expected_dc],    4(%[dst])                       \n\t"
-
-      "add             %[dst],             %[dst],          %[stride]     \n\t"
-      "sw              %[expected_dc],    (%[dst])                        \n\t"
-      "sw              %[expected_dc],    4(%[dst])                       \n\t"
-
-      "add             %[dst],             %[dst],          %[stride]     \n\t"
-      "sw              %[expected_dc],    (%[dst])                        \n\t"
-      "sw              %[expected_dc],    4(%[dst])                       \n\t"
-
-      "add             %[dst],             %[dst],          %[stride]     \n\t"
-      "sw              %[expected_dc],    (%[dst])                        \n\t"
-      "sw              %[expected_dc],    4(%[dst])                       \n\t"
-
-      "add             %[dst],             %[dst],          %[stride]     \n\t"
-      "sw              %[expected_dc],    (%[dst])                        \n\t"
-      "sw              %[expected_dc],    4(%[dst])                       \n\t"
-
-      "add             %[dst],             %[dst],          %[stride]     \n\t"
-      "sw              %[expected_dc],    (%[dst])                        \n\t"
-      "sw              %[expected_dc],    4(%[dst])                       \n\t"
-
-      "add             %[dst],             %[dst],          %[stride]     \n\t"
-      "sw              %[expected_dc],    (%[dst])                        \n\t"
-      "sw              %[expected_dc],    4(%[dst])                       \n\t"
-
-      "add             %[dst],             %[dst],          %[stride]     \n\t"
-      "sw              %[expected_dc],    (%[dst])                        \n\t"
-      "sw              %[expected_dc],    4(%[dst])                       \n\t"
-
-      : [above1] "=&r"(above1), [above_l1] "=&r"(above_l1),
-        [above_r1] "=&r"(above_r1), [left1] "=&r"(left1),
-        [left_l1] "=&r"(left_l1), [left_r1] "=&r"(left_r1),
-        [above2] "=&r"(above2), [above_l2] "=&r"(above_l2),
-        [above_r2] "=&r"(above_r2), [left2] "=&r"(left2),
-        [left_l2] "=&r"(left_l2), [left_r2] "=&r"(left_r2),
-        [average] "=&r"(average), [tmp] "=&r"(tmp),
-        [expected_dc] "=&r"(expected_dc)
-      : [above] "r"(above), [left] "r"(left), [dst] "r"(dst),
-        [stride] "r"(stride));
-}
-#endif  // #if HAVE_DSPR2
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/mips/intrapred_msa.c b/chromium/third_party/libaom/source/libaom/aom_dsp/mips/intrapred_msa.c
deleted file mode 100644
index 9f25cc1ca0d..00000000000
--- a/chromium/third_party/libaom/source/libaom/aom_dsp/mips/intrapred_msa.c
+++ /dev/null
@@ -1,550 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom_dsp/mips/macros_msa.h"
-
-#define IPRED_SUBS_UH2_UH(in0, in1, out0, out1) \
-  {                                             \
-    out0 = __msa_subs_u_h(out0, in0);           \
-    out1 = __msa_subs_u_h(out1, in1);           \
-  }
-
-static void intra_predict_vert_4x4_msa(const uint8_t *src, uint8_t *dst,
-                                       int32_t dst_stride) {
-  uint32_t src_data;
-
-  src_data = LW(src);
-
-  SW4(src_data, src_data, src_data, src_data, dst, dst_stride);
-}
-
-static void intra_predict_vert_8x8_msa(const uint8_t *src, uint8_t *dst,
-                                       int32_t dst_stride) {
-  uint32_t row;
-  uint32_t src_data1, src_data2;
-
-  src_data1 = LW(src);
-  src_data2 = LW(src + 4);
-
-  for (row = 8; row--;) {
-    SW(src_data1, dst);
-    SW(src_data2, (dst + 4));
-    dst += dst_stride;
-  }
-}
-
-static void intra_predict_vert_16x16_msa(const uint8_t *src, uint8_t *dst,
-                                         int32_t dst_stride) {
-  uint32_t row;
-  v16u8 src0;
-
-  src0 = LD_UB(src);
-
-  for (row = 16; row--;) {
-    ST_UB(src0, dst);
-    dst += dst_stride;
-  }
-}
-
-static void intra_predict_vert_32x32_msa(const uint8_t *src, uint8_t *dst,
-                                         int32_t dst_stride) {
-  uint32_t row;
-  v16u8 src1, src2;
-
-  src1 = LD_UB(src);
-  src2 = LD_UB(src + 16);
-
-  for (row = 32; row--;) {
-    ST_UB2(src1, src2, dst, 16);
-    dst += dst_stride;
-  }
-}
-
-static void intra_predict_horiz_4x4_msa(const uint8_t *src, uint8_t *dst,
-                                        int32_t dst_stride) {
-  uint32_t out0, out1, out2, out3;
-
-  out0 = src[0] * 0x01010101;
-  out1 = src[1] * 0x01010101;
-  out2 = src[2] * 0x01010101;
-  out3 = src[3] * 0x01010101;
-
-  SW4(out0, out1, out2, out3, dst, dst_stride);
-}
-
-static void intra_predict_horiz_8x8_msa(const uint8_t *src, uint8_t *dst,
-                                        int32_t dst_stride) {
-  uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
-
-  out0 = src[0] * 0x0101010101010101ull;
-  out1 = src[1] * 0x0101010101010101ull;
-  out2 = src[2] * 0x0101010101010101ull;
-  out3 = src[3] * 0x0101010101010101ull;
-  out4 = src[4] * 0x0101010101010101ull;
-  out5 = src[5] * 0x0101010101010101ull;
-  out6 = src[6] * 0x0101010101010101ull;
-  out7 = src[7] * 0x0101010101010101ull;
-
-  SD4(out0, out1, out2, out3, dst, dst_stride);
-  dst += (4 * dst_stride);
-  SD4(out4, out5, out6, out7, dst, dst_stride);
-}
-
-static void intra_predict_horiz_16x16_msa(const uint8_t *src, uint8_t *dst,
-                                          int32_t dst_stride) {
-  uint32_t row;
-  uint8_t inp0, inp1, inp2, inp3;
-  v16u8 src0, src1, src2, src3;
-
-  for (row = 4; row--;) {
-    inp0 = src[0];
-    inp1 = src[1];
-    inp2 = src[2];
-    inp3 = src[3];
-    src += 4;
-
-    src0 = (v16u8)__msa_fill_b(inp0);
-    src1 = (v16u8)__msa_fill_b(inp1);
-    src2 = (v16u8)__msa_fill_b(inp2);
-    src3 = (v16u8)__msa_fill_b(inp3);
-
-    ST_UB4(src0, src1, src2, src3, dst, dst_stride);
-    dst += (4 * dst_stride);
-  }
-}
-
-static void intra_predict_horiz_32x32_msa(const uint8_t *src, uint8_t *dst,
-                                          int32_t dst_stride) {
-  uint32_t row;
-  uint8_t inp0, inp1, inp2, inp3;
-  v16u8 src0, src1, src2, src3;
-
-  for (row = 8; row--;) {
-    inp0 = src[0];
-    inp1 = src[1];
-    inp2 = src[2];
-    inp3 = src[3];
-    src += 4;
-
-    src0 = (v16u8)__msa_fill_b(inp0);
-    src1 = (v16u8)__msa_fill_b(inp1);
-    src2 = (v16u8)__msa_fill_b(inp2);
-    src3 = (v16u8)__msa_fill_b(inp3);
-
-    ST_UB2(src0, src0, dst, 16);
-    dst += dst_stride;
-    ST_UB2(src1, src1, dst, 16);
-    dst += dst_stride;
-    ST_UB2(src2, src2, dst, 16);
-    dst += dst_stride;
-    ST_UB2(src3, src3, dst, 16);
-    dst += dst_stride;
-  }
-}
-
-static void intra_predict_dc_4x4_msa(const uint8_t *src_top,
-                                     const uint8_t *src_left, uint8_t *dst,
-                                     int32_t dst_stride) {
-  uint32_t val0, val1;
-  v16i8 store, src = { 0 };
-  v8u16 sum_h;
-  v4u32 sum_w;
-  v2u64 sum_d;
-
-  val0 = LW(src_top);
-  val1 = LW(src_left);
-  INSERT_W2_SB(val0, val1, src);
-  sum_h = __msa_hadd_u_h((v16u8)src, (v16u8)src);
-  sum_w = __msa_hadd_u_w(sum_h, sum_h);
-  sum_d = __msa_hadd_u_d(sum_w, sum_w);
-  sum_w = (v4u32)__msa_srari_w((v4i32)sum_d, 3);
-  store = __msa_splati_b((v16i8)sum_w, 0);
-  val0 = __msa_copy_u_w((v4i32)store, 0);
-
-  SW4(val0, val0, val0, val0, dst, dst_stride);
-}
-
-static void intra_predict_dc_tl_4x4_msa(const uint8_t *src, uint8_t *dst,
-                                        int32_t dst_stride) {
-  uint32_t val0;
-  v16i8 store, data = { 0 };
-  v8u16 sum_h;
-  v4u32 sum_w;
-
-  val0 = LW(src);
-  data = (v16i8)__msa_insert_w((v4i32)data, 0, val0);
-  sum_h = __msa_hadd_u_h((v16u8)data, (v16u8)data);
-  sum_w = __msa_hadd_u_w(sum_h, sum_h);
-  sum_w = (v4u32)__msa_srari_w((v4i32)sum_w, 2);
-  store = __msa_splati_b((v16i8)sum_w, 0);
-  val0 = __msa_copy_u_w((v4i32)store, 0);
-
-  SW4(val0, val0, val0, val0, dst, dst_stride);
-}
-
-static void intra_predict_128dc_4x4_msa(uint8_t *dst, int32_t dst_stride) {
-  uint32_t out;
-  const v16i8 store = __msa_ldi_b(128);
-
-  out = __msa_copy_u_w((v4i32)store, 0);
-
-  SW4(out, out, out, out, dst, dst_stride);
-}
-
-static void intra_predict_dc_8x8_msa(const uint8_t *src_top,
-                                     const uint8_t *src_left, uint8_t *dst,
-                                     int32_t dst_stride) {
-  uint64_t val0, val1;
-  v16i8 store;
-  v16u8 src = { 0 };
-  v8u16 sum_h;
-  v4u32 sum_w;
-  v2u64 sum_d;
-
-  val0 = LD(src_top);
-  val1 = LD(src_left);
-  INSERT_D2_UB(val0, val1, src);
-  sum_h = __msa_hadd_u_h(src, src);
-  sum_w = __msa_hadd_u_w(sum_h, sum_h);
-  sum_d = __msa_hadd_u_d(sum_w, sum_w);
-  sum_w = (v4u32)__msa_pckev_w((v4i32)sum_d, (v4i32)sum_d);
-  sum_d = __msa_hadd_u_d(sum_w, sum_w);
-  sum_w = (v4u32)__msa_srari_w((v4i32)sum_d, 4);
-  store = __msa_splati_b((v16i8)sum_w, 0);
-  val0 = __msa_copy_u_d((v2i64)store, 0);
-
-  SD4(val0, val0, val0, val0, dst, dst_stride);
-  dst += (4 * dst_stride);
-  SD4(val0, val0, val0, val0, dst, dst_stride);
-}
-
-static void intra_predict_dc_tl_8x8_msa(const uint8_t *src, uint8_t *dst,
-                                        int32_t dst_stride) {
-  uint64_t val0;
-  v16i8 store;
-  v16u8 data = { 0 };
-  v8u16 sum_h;
-  v4u32 sum_w;
-  v2u64 sum_d;
-
-  val0 = LD(src);
-  data = (v16u8)__msa_insert_d((v2i64)data, 0, val0);
-  sum_h = __msa_hadd_u_h(data, data);
-  sum_w = __msa_hadd_u_w(sum_h, sum_h);
-  sum_d = __msa_hadd_u_d(sum_w, sum_w);
-  sum_w = (v4u32)__msa_srari_w((v4i32)sum_d, 3);
-  store = __msa_splati_b((v16i8)sum_w, 0);
-  val0 = __msa_copy_u_d((v2i64)store, 0);
-
-  SD4(val0, val0, val0, val0, dst, dst_stride);
-  dst += (4 * dst_stride);
-  SD4(val0, val0, val0, val0, dst, dst_stride);
-}
-
-static void intra_predict_128dc_8x8_msa(uint8_t *dst, int32_t dst_stride) {
-  uint64_t out;
-  const v16i8 store = __msa_ldi_b(128);
-
-  out = __msa_copy_u_d((v2i64)store, 0);
-
-  SD4(out, out, out, out, dst, dst_stride);
-  dst += (4 * dst_stride);
-  SD4(out, out, out, out, dst, dst_stride);
-}
-
-static void intra_predict_dc_16x16_msa(const uint8_t *src_top,
-                                       const uint8_t *src_left, uint8_t *dst,
-                                       int32_t dst_stride) {
-  v16u8 top, left, out;
-  v8u16 sum_h, sum_top, sum_left;
-  v4u32 sum_w;
-  v2u64 sum_d;
-
-  top = LD_UB(src_top);
-  left = LD_UB(src_left);
-  HADD_UB2_UH(top, left, sum_top, sum_left);
-  sum_h = sum_top + sum_left;
-  sum_w = __msa_hadd_u_w(sum_h, sum_h);
-  sum_d = __msa_hadd_u_d(sum_w, sum_w);
-  sum_w = (v4u32)__msa_pckev_w((v4i32)sum_d, (v4i32)sum_d);
-  sum_d = __msa_hadd_u_d(sum_w, sum_w);
-  sum_w = (v4u32)__msa_srari_w((v4i32)sum_d, 5);
-  out = (v16u8)__msa_splati_b((v16i8)sum_w, 0);
-
-  ST_UB8(out, out, out, out, out, out, out, out, dst, dst_stride);
-  dst += (8 * dst_stride);
-  ST_UB8(out, out, out, out, out, out, out, out, dst, dst_stride);
-}
-
-static void intra_predict_dc_tl_16x16_msa(const uint8_t *src, uint8_t *dst,
-                                          int32_t dst_stride) {
-  v16u8 data, out;
-  v8u16 sum_h;
-  v4u32 sum_w;
-  v2u64 sum_d;
-
-  data = LD_UB(src);
-  sum_h = __msa_hadd_u_h(data, data);
-  sum_w = __msa_hadd_u_w(sum_h, sum_h);
-  sum_d = __msa_hadd_u_d(sum_w, sum_w);
-  sum_w = (v4u32)__msa_pckev_w((v4i32)sum_d, (v4i32)sum_d);
-  sum_d = __msa_hadd_u_d(sum_w, sum_w);
-  sum_w = (v4u32)__msa_srari_w((v4i32)sum_d, 4);
-  out = (v16u8)__msa_splati_b((v16i8)sum_w, 0);
-
-  ST_UB8(out, out, out, out, out, out, out, out, dst, dst_stride);
-  dst += (8 * dst_stride);
-  ST_UB8(out, out, out, out, out, out, out, out, dst, dst_stride);
-}
-
-static void intra_predict_128dc_16x16_msa(uint8_t *dst, int32_t dst_stride) {
-  const v16u8 out = (v16u8)__msa_ldi_b(128);
-
-  ST_UB8(out, out, out, out, out, out, out, out, dst, dst_stride);
-  dst += (8 * dst_stride);
-  ST_UB8(out, out, out, out, out, out, out, out, dst, dst_stride);
-}
-
-static void intra_predict_dc_32x32_msa(const uint8_t *src_top,
-                                       const uint8_t *src_left, uint8_t *dst,
-                                       int32_t dst_stride) {
-  uint32_t row;
-  v16u8 top0, top1, left0, left1, out;
-  v8u16 sum_h, sum_top0, sum_top1, sum_left0, sum_left1;
-  v4u32 sum_w;
-  v2u64 sum_d;
-
-  LD_UB2(src_top, 16, top0, top1);
-  LD_UB2(src_left, 16, left0, left1);
-  HADD_UB2_UH(top0, top1, sum_top0, sum_top1);
-  HADD_UB2_UH(left0, left1, sum_left0, sum_left1);
-  sum_h = sum_top0 + sum_top1;
-  sum_h += sum_left0 + sum_left1;
-  sum_w = __msa_hadd_u_w(sum_h, sum_h);
-  sum_d = __msa_hadd_u_d(sum_w, sum_w);
-  sum_w = (v4u32)__msa_pckev_w((v4i32)sum_d, (v4i32)sum_d);
-  sum_d = __msa_hadd_u_d(sum_w, sum_w);
-  sum_w = (v4u32)__msa_srari_w((v4i32)sum_d, 6);
-  out = (v16u8)__msa_splati_b((v16i8)sum_w, 0);
-
-  for (row = 16; row--;) {
-    ST_UB2(out, out, dst, 16);
-    dst += dst_stride;
-    ST_UB2(out, out, dst, 16);
-    dst += dst_stride;
-  }
-}
-
-static void intra_predict_dc_tl_32x32_msa(const uint8_t *src, uint8_t *dst,
-                                          int32_t dst_stride) {
-  uint32_t row;
-  v16u8 data0, data1, out;
-  v8u16 sum_h, sum_data0, sum_data1;
-  v4u32 sum_w;
-  v2u64 sum_d;
-
-  LD_UB2(src, 16, data0, data1);
-  HADD_UB2_UH(data0, data1, sum_data0, sum_data1);
-  sum_h = sum_data0 + sum_data1;
-  sum_w = __msa_hadd_u_w(sum_h, sum_h);
-  sum_d = __msa_hadd_u_d(sum_w, sum_w);
-  sum_w = (v4u32)__msa_pckev_w((v4i32)sum_d, (v4i32)sum_d);
-  sum_d = __msa_hadd_u_d(sum_w, sum_w);
-  sum_w = (v4u32)__msa_srari_w((v4i32)sum_d, 5);
-  out = (v16u8)__msa_splati_b((v16i8)sum_w, 0);
-
-  for (row = 16; row--;) {
-    ST_UB2(out, out, dst, 16);
-    dst += dst_stride;
-    ST_UB2(out, out, dst, 16);
-    dst += dst_stride;
-  }
-}
-
-static void intra_predict_128dc_32x32_msa(uint8_t *dst, int32_t dst_stride) {
-  uint32_t row;
-  const v16u8 out = (v16u8)__msa_ldi_b(128);
-
-  for (row = 16; row--;) {
-    ST_UB2(out, out, dst, 16);
-    dst += dst_stride;
-    ST_UB2(out, out, dst, 16);
-    dst += dst_stride;
-  }
-}
-
-void aom_v_predictor_4x4_msa(uint8_t *dst, ptrdiff_t y_stride,
-                             const uint8_t *above, const uint8_t *left) {
-  (void)left;
-
-  intra_predict_vert_4x4_msa(above, dst, y_stride);
-}
-
-void aom_v_predictor_8x8_msa(uint8_t *dst, ptrdiff_t y_stride,
-                             const uint8_t *above, const uint8_t *left) {
-  (void)left;
-
-  intra_predict_vert_8x8_msa(above, dst, y_stride);
-}
-
-void aom_v_predictor_16x16_msa(uint8_t *dst, ptrdiff_t y_stride,
-                               const uint8_t *above, const uint8_t *left) {
-  (void)left;
-
-  intra_predict_vert_16x16_msa(above, dst, y_stride);
-}
-
-void aom_v_predictor_32x32_msa(uint8_t *dst, ptrdiff_t y_stride,
-                               const uint8_t *above, const uint8_t *left) {
-  (void)left;
-
-  intra_predict_vert_32x32_msa(above, dst, y_stride);
-}
-
-void aom_h_predictor_4x4_msa(uint8_t *dst, ptrdiff_t y_stride,
-                             const uint8_t *above, const uint8_t *left) {
-  (void)above;
-
-  intra_predict_horiz_4x4_msa(left, dst, y_stride);
-}
-
-void aom_h_predictor_8x8_msa(uint8_t *dst, ptrdiff_t y_stride,
-                             const uint8_t *above, const uint8_t *left) {
-  (void)above;
-
-  intra_predict_horiz_8x8_msa(left, dst, y_stride);
-}
-
-void aom_h_predictor_16x16_msa(uint8_t *dst, ptrdiff_t y_stride,
-                               const uint8_t *above, const uint8_t *left) {
-  (void)above;
-
-  intra_predict_horiz_16x16_msa(left, dst, y_stride);
-}
-
-void aom_h_predictor_32x32_msa(uint8_t *dst, ptrdiff_t y_stride,
-                               const uint8_t *above, const uint8_t *left) {
-  (void)above;
-
-  intra_predict_horiz_32x32_msa(left, dst, y_stride);
-}
-
-void aom_dc_predictor_4x4_msa(uint8_t *dst, ptrdiff_t y_stride,
-                              const uint8_t *above, const uint8_t *left) {
-  intra_predict_dc_4x4_msa(above, left, dst, y_stride);
-}
-
-void aom_dc_predictor_8x8_msa(uint8_t *dst, ptrdiff_t y_stride,
-                              const uint8_t *above, const uint8_t *left) {
-  intra_predict_dc_8x8_msa(above, left, dst, y_stride);
-}
-
-void aom_dc_predictor_16x16_msa(uint8_t *dst, ptrdiff_t y_stride,
-                                const uint8_t *above, const uint8_t *left) {
-  intra_predict_dc_16x16_msa(above, left, dst, y_stride);
-}
-
-void aom_dc_predictor_32x32_msa(uint8_t *dst, ptrdiff_t y_stride,
-                                const uint8_t *above, const uint8_t *left) {
-  intra_predict_dc_32x32_msa(above, left, dst, y_stride);
-}
-
-void aom_dc_top_predictor_4x4_msa(uint8_t *dst, ptrdiff_t y_stride,
-                                  const uint8_t *above, const uint8_t *left) {
-  (void)left;
-
-  intra_predict_dc_tl_4x4_msa(above, dst, y_stride);
-}
-
-void aom_dc_top_predictor_8x8_msa(uint8_t *dst, ptrdiff_t y_stride,
-                                  const uint8_t *above, const uint8_t *left) {
-  (void)left;
-
-  intra_predict_dc_tl_8x8_msa(above, dst, y_stride);
-}
-
-void aom_dc_top_predictor_16x16_msa(uint8_t *dst, ptrdiff_t y_stride,
-                                    const uint8_t *above, const uint8_t *left) {
-  (void)left;
-
-  intra_predict_dc_tl_16x16_msa(above, dst, y_stride);
-}
-
-void aom_dc_top_predictor_32x32_msa(uint8_t *dst, ptrdiff_t y_stride,
-                                    const uint8_t *above, const uint8_t *left) {
-  (void)left;
-
-  intra_predict_dc_tl_32x32_msa(above, dst, y_stride);
-}
-
-void aom_dc_left_predictor_4x4_msa(uint8_t *dst, ptrdiff_t y_stride,
-                                   const uint8_t *above, const uint8_t *left) {
-  (void)above;
-
-  intra_predict_dc_tl_4x4_msa(left, dst, y_stride);
-}
-
-void aom_dc_left_predictor_8x8_msa(uint8_t *dst, ptrdiff_t y_stride,
-                                   const uint8_t *above, const uint8_t *left) {
-  (void)above;
-
-  intra_predict_dc_tl_8x8_msa(left, dst, y_stride);
-}
-
-void aom_dc_left_predictor_16x16_msa(uint8_t *dst, ptrdiff_t y_stride,
-                                     const uint8_t *above,
-                                     const uint8_t *left) {
-  (void)above;
-
-  intra_predict_dc_tl_16x16_msa(left, dst, y_stride);
-}
-
-void aom_dc_left_predictor_32x32_msa(uint8_t *dst, ptrdiff_t y_stride,
-                                     const uint8_t *above,
-                                     const uint8_t *left) {
-  (void)above;
-
-  intra_predict_dc_tl_32x32_msa(left, dst, y_stride);
-}
-
-void aom_dc_128_predictor_4x4_msa(uint8_t *dst, ptrdiff_t y_stride,
-                                  const uint8_t *above, const uint8_t *left) {
-  (void)above;
-  (void)left;
-
-  intra_predict_128dc_4x4_msa(dst, y_stride);
-}
-
-void aom_dc_128_predictor_8x8_msa(uint8_t *dst, ptrdiff_t y_stride,
-                                  const uint8_t *above, const uint8_t *left) {
-  (void)above;
-  (void)left;
-
-  intra_predict_128dc_8x8_msa(dst, y_stride);
-}
-
-void aom_dc_128_predictor_16x16_msa(uint8_t *dst, ptrdiff_t y_stride,
-                                    const uint8_t *above, const uint8_t *left) {
-  (void)above;
-  (void)left;
-
-  intra_predict_128dc_16x16_msa(dst, y_stride);
-}
-
-void aom_dc_128_predictor_32x32_msa(uint8_t *dst, ptrdiff_t y_stride,
-                                    const uint8_t *above, const uint8_t *left) {
-  (void)above;
-  (void)left;
-
-  intra_predict_128dc_32x32_msa(dst, y_stride);
-}
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/mips/loopfilter_16_msa.c b/chromium/third_party/libaom/source/libaom/aom_dsp/mips/loopfilter_16_msa.c
deleted file mode 100644
index 38a10e9b226..00000000000
--- a/chromium/third_party/libaom/source/libaom/aom_dsp/mips/loopfilter_16_msa.c
+++ /dev/null
@@ -1,1488 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "aom_ports/mem.h"
-#include "aom_dsp/mips/loopfilter_msa.h"
-
-int32_t aom_hz_lpf_t4_and_t8_16w(uint8_t *src, int32_t pitch, uint8_t *filter48,
-                                 const uint8_t *b_limit_ptr,
-                                 const uint8_t *limit_ptr,
-                                 const uint8_t *thresh_ptr) {
-  v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
-  v16u8 p2_out, p1_out, p0_out, q0_out, q1_out, q2_out;
-  v16u8 flat, mask, hev, thresh, b_limit, limit;
-  v8u16 p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r;
-  v8u16 p3_l, p2_l, p1_l, p0_l, q0_l, q1_l, q2_l, q3_l;
-  v8i16 p2_filt8_r, p1_filt8_r, p0_filt8_r, q0_filt8_r, q1_filt8_r, q2_filt8_r;
-  v8i16 p2_filt8_l, p1_filt8_l, p0_filt8_l, q0_filt8_l, q1_filt8_l, q2_filt8_l;
-  v16u8 zero = { 0 };
-
-  /* load vector elements */
-  LD_UB8(src - (4 * pitch), pitch, p3, p2, p1, p0, q0, q1, q2, q3);
-
-  thresh = (v16u8)__msa_fill_b(*thresh_ptr);
-  b_limit = (v16u8)__msa_fill_b(*b_limit_ptr);
-  limit = (v16u8)__msa_fill_b(*limit_ptr);
-
-  /* mask and hev */
-  LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh, hev,
-               mask, flat);
-  AOM_FLAT4(p3, p2, p0, q0, q2, q3, flat);
-  AOM_LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev, p1_out, p0_out, q0_out, q1_out);
-
-  if (__msa_test_bz_v(flat)) {
-    ST_UB4(p1_out, p0_out, q0_out, q1_out, (src - 2 * pitch), pitch);
-
-    return 1;
-  } else {
-    ILVR_B8_UH(zero, p3, zero, p2, zero, p1, zero, p0, zero, q0, zero, q1, zero,
-               q2, zero, q3, p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r);
-    AOM_FILTER8(p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r, p2_filt8_r,
-                p1_filt8_r, p0_filt8_r, q0_filt8_r, q1_filt8_r, q2_filt8_r);
-
-    ILVL_B4_UH(zero, p3, zero, p2, zero, p1, zero, p0, p3_l, p2_l, p1_l, p0_l);
-    ILVL_B4_UH(zero, q0, zero, q1, zero, q2, zero, q3, q0_l, q1_l, q2_l, q3_l);
-    AOM_FILTER8(p3_l, p2_l, p1_l, p0_l, q0_l, q1_l, q2_l, q3_l, p2_filt8_l,
-                p1_filt8_l, p0_filt8_l, q0_filt8_l, q1_filt8_l, q2_filt8_l);
-
-    /* convert 16 bit output data into 8 bit */
-    PCKEV_B4_SH(p2_filt8_l, p2_filt8_r, p1_filt8_l, p1_filt8_r, p0_filt8_l,
-                p0_filt8_r, q0_filt8_l, q0_filt8_r, p2_filt8_r, p1_filt8_r,
-                p0_filt8_r, q0_filt8_r);
-    PCKEV_B2_SH(q1_filt8_l, q1_filt8_r, q2_filt8_l, q2_filt8_r, q1_filt8_r,
-                q2_filt8_r);
-
-    /* store pixel values */
-    p2_out = __msa_bmnz_v(p2, (v16u8)p2_filt8_r, flat);
-    p1_out = __msa_bmnz_v(p1_out, (v16u8)p1_filt8_r, flat);
-    p0_out = __msa_bmnz_v(p0_out, (v16u8)p0_filt8_r, flat);
-    q0_out = __msa_bmnz_v(q0_out, (v16u8)q0_filt8_r, flat);
-    q1_out = __msa_bmnz_v(q1_out, (v16u8)q1_filt8_r, flat);
-    q2_out = __msa_bmnz_v(q2, (v16u8)q2_filt8_r, flat);
-
-    ST_UB4(p2_out, p1_out, p0_out, q0_out, filter48, 16);
-    filter48 += (4 * 16);
-    ST_UB2(q1_out, q2_out, filter48, 16);
-    filter48 += (2 * 16);
-    ST_UB(flat, filter48);
-
-    return 0;
-  }
-}
-
-void aom_hz_lpf_t16_16w(uint8_t *src, int32_t pitch, uint8_t *filter48) {
-  v16u8 flat, flat2, filter8;
-  v16i8 zero = { 0 };
-  v16u8 p7, p6, p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, q6, q7;
-  v8u16 p7_r_in, p6_r_in, p5_r_in, p4_r_in, p3_r_in, p2_r_in, p1_r_in, p0_r_in;
-  v8u16 q7_r_in, q6_r_in, q5_r_in, q4_r_in, q3_r_in, q2_r_in, q1_r_in, q0_r_in;
-  v8u16 p7_l_in, p6_l_in, p5_l_in, p4_l_in, p3_l_in, p2_l_in, p1_l_in, p0_l_in;
-  v8u16 q7_l_in, q6_l_in, q5_l_in, q4_l_in, q3_l_in, q2_l_in, q1_l_in, q0_l_in;
-  v8u16 tmp0_r, tmp1_r, tmp0_l, tmp1_l;
-  v8i16 l_out, r_out;
-
-  flat = LD_UB(filter48 + 96);
-
-  LD_UB8((src - 8 * pitch), pitch, p7, p6, p5, p4, p3, p2, p1, p0);
-  LD_UB8(src, pitch, q0, q1, q2, q3, q4, q5, q6, q7);
-  AOM_FLAT5(p7, p6, p5, p4, p0, q0, q4, q5, q6, q7, flat, flat2);
-
-  if (__msa_test_bz_v(flat2)) {
-    LD_UB4(filter48, 16, p2, p1, p0, q0);
-    LD_UB2(filter48 + 4 * 16, 16, q1, q2);
-
-    src -= 3 * pitch;
-    ST_UB4(p2, p1, p0, q0, src, pitch);
-    src += (4 * pitch);
-    ST_UB2(q1, q2, src, pitch);
-  } else {
-    src -= 7 * pitch;
-
-    ILVR_B8_UH(zero, p7, zero, p6, zero, p5, zero, p4, zero, p3, zero, p2, zero,
-               p1, zero, p0, p7_r_in, p6_r_in, p5_r_in, p4_r_in, p3_r_in,
-               p2_r_in, p1_r_in, p0_r_in);
-
-    q0_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q0);
-
-    tmp0_r = p7_r_in << 3;
-    tmp0_r -= p7_r_in;
-    tmp0_r += p6_r_in;
-    tmp0_r += q0_r_in;
-    tmp1_r = p6_r_in + p5_r_in;
-    tmp1_r += p4_r_in;
-    tmp1_r += p3_r_in;
-    tmp1_r += p2_r_in;
-    tmp1_r += p1_r_in;
-    tmp1_r += p0_r_in;
-    tmp1_r += tmp0_r;
-    r_out = __msa_srari_h((v8i16)tmp1_r, 4);
-
-    ILVL_B4_UH(zero, p7, zero, p6, zero, p5, zero, p4, p7_l_in, p6_l_in,
-               p5_l_in, p4_l_in);
-    ILVL_B4_UH(zero, p3, zero, p2, zero, p1, zero, p0, p3_l_in, p2_l_in,
-               p1_l_in, p0_l_in);
-    q0_l_in = (v8u16)__msa_ilvl_b(zero, (v16i8)q0);
-
-    tmp0_l = p7_l_in << 3;
-    tmp0_l -= p7_l_in;
-    tmp0_l += p6_l_in;
-    tmp0_l += q0_l_in;
-    tmp1_l = p6_l_in + p5_l_in;
-    tmp1_l += p4_l_in;
-    tmp1_l += p3_l_in;
-    tmp1_l += p2_l_in;
-    tmp1_l += p1_l_in;
-    tmp1_l += p0_l_in;
-    tmp1_l += tmp0_l;
-    l_out = __msa_srari_h((v8i16)tmp1_l, 4);
-
-    r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
-    p6 = __msa_bmnz_v(p6, (v16u8)r_out, flat2);
-    ST_UB(p6, src);
-    src += pitch;
-
-    /* p5 */
-    q1_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q1);
-    tmp0_r = p5_r_in - p6_r_in;
-    tmp0_r += q1_r_in;
-    tmp0_r -= p7_r_in;
-    tmp1_r += tmp0_r;
-    r_out = __msa_srari_h((v8i16)tmp1_r, 4);
-
-    q1_l_in = (v8u16)__msa_ilvl_b(zero, (v16i8)q1);
-    tmp0_l = p5_l_in - p6_l_in;
-    tmp0_l += q1_l_in;
-    tmp0_l -= p7_l_in;
-    tmp1_l += tmp0_l;
-    l_out = __msa_srari_h((v8i16)tmp1_l, 4);
-
-    r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
-    p5 = __msa_bmnz_v(p5, (v16u8)r_out, flat2);
-    ST_UB(p5, src);
-    src += pitch;
-
-    /* p4 */
-    q2_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q2);
-    tmp0_r = p4_r_in - p5_r_in;
-    tmp0_r += q2_r_in;
-    tmp0_r -= p7_r_in;
-    tmp1_r += tmp0_r;
-    r_out = (v8i16)__msa_srari_h((v8i16)tmp1_r, 4);
-
-    q2_l_in = (v8u16)__msa_ilvl_b(zero, (v16i8)q2);
-    tmp0_l = p4_l_in - p5_l_in;
-    tmp0_l += q2_l_in;
-    tmp0_l -= p7_l_in;
-    tmp1_l += tmp0_l;
-    l_out = __msa_srari_h((v8i16)tmp1_l, 4);
-
-    r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
-    p4 = __msa_bmnz_v(p4, (v16u8)r_out, flat2);
-    ST_UB(p4, src);
-    src += pitch;
-
-    /* p3 */
-    q3_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q3);
-    tmp0_r = p3_r_in - p4_r_in;
-    tmp0_r += q3_r_in;
-    tmp0_r -= p7_r_in;
-    tmp1_r += tmp0_r;
-    r_out = __msa_srari_h((v8i16)tmp1_r, 4);
-
-    q3_l_in = (v8u16)__msa_ilvl_b(zero, (v16i8)q3);
-    tmp0_l = p3_l_in - p4_l_in;
-    tmp0_l += q3_l_in;
-    tmp0_l -= p7_l_in;
-    tmp1_l += tmp0_l;
-    l_out = __msa_srari_h((v8i16)tmp1_l, 4);
-
-    r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
-    p3 = __msa_bmnz_v(p3, (v16u8)r_out, flat2);
-    ST_UB(p3, src);
-    src += pitch;
-
-    /* p2 */
-    q4_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q4);
-    filter8 = LD_UB(filter48);
-    tmp0_r = p2_r_in - p3_r_in;
-    tmp0_r += q4_r_in;
-    tmp0_r -= p7_r_in;
-    tmp1_r += tmp0_r;
-    r_out = __msa_srari_h((v8i16)tmp1_r, 4);
-
-    q4_l_in = (v8u16)__msa_ilvl_b(zero, (v16i8)q4);
-    tmp0_l = p2_l_in - p3_l_in;
-    tmp0_l += q4_l_in;
-    tmp0_l -= p7_l_in;
-    tmp1_l += tmp0_l;
-    l_out = __msa_srari_h((v8i16)tmp1_l, 4);
-
-    r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
-    filter8 = __msa_bmnz_v(filter8, (v16u8)r_out, flat2);
-    ST_UB(filter8, src);
-    src += pitch;
-
-    /* p1 */
-    q5_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q5);
-    filter8 = LD_UB(filter48 + 16);
-    tmp0_r = p1_r_in - p2_r_in;
-    tmp0_r += q5_r_in;
-    tmp0_r -= p7_r_in;
-    tmp1_r += tmp0_r;
-    r_out = __msa_srari_h((v8i16)tmp1_r, 4);
-
-    q5_l_in = (v8u16)__msa_ilvl_b(zero, (v16i8)q5);
-    tmp0_l = p1_l_in - p2_l_in;
-    tmp0_l += q5_l_in;
-    tmp0_l -= p7_l_in;
-    tmp1_l += tmp0_l;
-    l_out = __msa_srari_h((v8i16)tmp1_l, 4);
-
-    r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
-    filter8 = __msa_bmnz_v(filter8, (v16u8)r_out, flat2);
-    ST_UB(filter8, src);
-    src += pitch;
-
-    /* p0 */
-    q6_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q6);
-    filter8 = LD_UB(filter48 + 32);
-    tmp0_r = p0_r_in - p1_r_in;
-    tmp0_r += q6_r_in;
-    tmp0_r -= p7_r_in;
-    tmp1_r += tmp0_r;
-    r_out = __msa_srari_h((v8i16)tmp1_r, 4);
-
-    q6_l_in = (v8u16)__msa_ilvl_b(zero, (v16i8)q6);
-    tmp0_l = p0_l_in - p1_l_in;
-    tmp0_l += q6_l_in;
-    tmp0_l -= p7_l_in;
-    tmp1_l += tmp0_l;
-    l_out = __msa_srari_h((v8i16)tmp1_l, 4);
-
-    r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
-    filter8 = __msa_bmnz_v(filter8, (v16u8)r_out, flat2);
-    ST_UB(filter8, src);
-    src += pitch;
-
-    /* q0 */
-    q7_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q7);
-    filter8 = LD_UB(filter48 + 48);
-    tmp0_r = q7_r_in - p0_r_in;
-    tmp0_r += q0_r_in;
-    tmp0_r -= p7_r_in;
-    tmp1_r += tmp0_r;
-    r_out = __msa_srari_h((v8i16)tmp1_r, 4);
-
-    q7_l_in = (v8u16)__msa_ilvl_b(zero, (v16i8)q7);
-    tmp0_l = q7_l_in - p0_l_in;
-    tmp0_l += q0_l_in;
-    tmp0_l -= p7_l_in;
-    tmp1_l += tmp0_l;
-    l_out = __msa_srari_h((v8i16)tmp1_l, 4);
-
-    r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
-    filter8 = __msa_bmnz_v(filter8, (v16u8)r_out, flat2);
-    ST_UB(filter8, src);
-    src += pitch;
-
-    /* q1 */
-    filter8 = LD_UB(filter48 + 64);
-    tmp0_r = q7_r_in - q0_r_in;
-    tmp0_r += q1_r_in;
-    tmp0_r -= p6_r_in;
-    tmp1_r += tmp0_r;
-    r_out = __msa_srari_h((v8i16)tmp1_r, 4);
-
-    tmp0_l = q7_l_in - q0_l_in;
-    tmp0_l += q1_l_in;
-    tmp0_l -= p6_l_in;
-    tmp1_l += tmp0_l;
-    l_out = __msa_srari_h((v8i16)tmp1_l, 4);
-
-    r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
-    filter8 = __msa_bmnz_v(filter8, (v16u8)r_out, flat2);
-    ST_UB(filter8, src);
-    src += pitch;
-
-    /* q2 */
-    filter8 = LD_UB(filter48 + 80);
-    tmp0_r = q7_r_in - q1_r_in;
-    tmp0_r += q2_r_in;
-    tmp0_r -= p5_r_in;
-    tmp1_r += tmp0_r;
-    r_out = __msa_srari_h((v8i16)tmp1_r, 4);
-
-    tmp0_l = q7_l_in - q1_l_in;
-    tmp0_l += q2_l_in;
-    tmp0_l -= p5_l_in;
-    tmp1_l += tmp0_l;
-    l_out = __msa_srari_h((v8i16)tmp1_l, 4);
-
-    r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
-    filter8 = __msa_bmnz_v(filter8, (v16u8)r_out, flat2);
-    ST_UB(filter8, src);
-    src += pitch;
-
-    /* q3 */
-    tmp0_r = q7_r_in - q2_r_in;
-    tmp0_r += q3_r_in;
-    tmp0_r -= p4_r_in;
-    tmp1_r += tmp0_r;
-    r_out = __msa_srari_h((v8i16)tmp1_r, 4);
-
-    tmp0_l = q7_l_in - q2_l_in;
-    tmp0_l += q3_l_in;
-    tmp0_l -= p4_l_in;
-    tmp1_l += tmp0_l;
-    l_out = __msa_srari_h((v8i16)tmp1_l, 4);
-
-    r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
-    q3 = __msa_bmnz_v(q3, (v16u8)r_out, flat2);
-    ST_UB(q3, src);
-    src += pitch;
-
-    /* q4 */
-    tmp0_r = q7_r_in - q3_r_in;
-    tmp0_r += q4_r_in;
-    tmp0_r -= p3_r_in;
-    tmp1_r += tmp0_r;
-    r_out = __msa_srari_h((v8i16)tmp1_r, 4);
-
-    tmp0_l = q7_l_in - q3_l_in;
-    tmp0_l += q4_l_in;
-    tmp0_l -= p3_l_in;
-    tmp1_l += tmp0_l;
-    l_out = __msa_srari_h((v8i16)tmp1_l, 4);
-
-    r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
-    q4 = __msa_bmnz_v(q4, (v16u8)r_out, flat2);
-    ST_UB(q4, src);
-    src += pitch;
-
-    /* q5 */
-    tmp0_r = q7_r_in - q4_r_in;
-    tmp0_r += q5_r_in;
-    tmp0_r -= p2_r_in;
-    tmp1_r += tmp0_r;
-    r_out = __msa_srari_h((v8i16)tmp1_r, 4);
-
-    tmp0_l = q7_l_in - q4_l_in;
-    tmp0_l += q5_l_in;
-    tmp0_l -= p2_l_in;
-    tmp1_l += tmp0_l;
-    l_out = __msa_srari_h((v8i16)tmp1_l, 4);
-
-    r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
-    q5 = __msa_bmnz_v(q5, (v16u8)r_out, flat2);
-    ST_UB(q5, src);
-    src += pitch;
-
-    /* q6 */
-    tmp0_r = q7_r_in - q5_r_in;
-    tmp0_r += q6_r_in;
-    tmp0_r -= p1_r_in;
-    tmp1_r += tmp0_r;
-    r_out = __msa_srari_h((v8i16)tmp1_r, 4);
-
-    tmp0_l = q7_l_in - q5_l_in;
-    tmp0_l += q6_l_in;
-    tmp0_l -= p1_l_in;
-    tmp1_l += tmp0_l;
-    l_out = __msa_srari_h((v8i16)tmp1_l, 4);
-
-    r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
-    q6 = __msa_bmnz_v(q6, (v16u8)r_out, flat2);
-    ST_UB(q6, src);
-  }
-}
-
-static void mb_lpf_horizontal_edge_dual(uint8_t *src, int32_t pitch,
-                                        const uint8_t *b_limit_ptr,
-                                        const uint8_t *limit_ptr,
-                                        const uint8_t *thresh_ptr,
-                                        int32_t count) {
-  DECLARE_ALIGNED(32, uint8_t, filter48[16 * 8]);
-  uint8_t early_exit = 0;
-
-  (void)count;
-
-  early_exit = aom_hz_lpf_t4_and_t8_16w(src, pitch, &filter48[0], b_limit_ptr,
-                                        limit_ptr, thresh_ptr);
-
-  if (0 == early_exit) {
-    aom_hz_lpf_t16_16w(src, pitch, filter48);
-  }
-}
-
-static void mb_lpf_horizontal_edge(uint8_t *src, int32_t pitch,
-                                   const uint8_t *b_limit_ptr,
-                                   const uint8_t *limit_ptr,
-                                   const uint8_t *thresh_ptr, int32_t count) {
-  if (1 == count) {
-    uint64_t p2_d, p1_d, p0_d, q0_d, q1_d, q2_d;
-    uint64_t dword0, dword1;
-    v16u8 flat2, mask, hev, flat, thresh, b_limit, limit;
-    v16u8 p3, p2, p1, p0, q3, q2, q1, q0, p7, p6, p5, p4, q4, q5, q6, q7;
-    v16u8 p2_out, p1_out, p0_out, q0_out, q1_out, q2_out;
-    v16u8 p0_filter16, p1_filter16;
-    v8i16 p2_filter8, p1_filter8, p0_filter8;
-    v8i16 q0_filter8, q1_filter8, q2_filter8;
-    v8u16 p7_r, p6_r, p5_r, p4_r, q7_r, q6_r, q5_r, q4_r;
-    v8u16 p3_r, p2_r, p1_r, p0_r, q3_r, q2_r, q1_r, q0_r;
-    v16i8 zero = { 0 };
-    v8u16 tmp0, tmp1, tmp2;
-
-    /* load vector elements */
-    LD_UB8((src - 4 * pitch), pitch, p3, p2, p1, p0, q0, q1, q2, q3);
-
-    thresh = (v16u8)__msa_fill_b(*thresh_ptr);
-    b_limit = (v16u8)__msa_fill_b(*b_limit_ptr);
-    limit = (v16u8)__msa_fill_b(*limit_ptr);
-
-    LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh, hev,
-                 mask, flat);
-    AOM_FLAT4(p3, p2, p0, q0, q2, q3, flat);
-    AOM_LPF_FILTER4_8W(p1, p0, q0, q1, mask, hev, p1_out, p0_out, q0_out,
-                       q1_out);
-
-    flat = (v16u8)__msa_ilvr_d((v2i64)zero, (v2i64)flat);
-
-    if (__msa_test_bz_v(flat)) {
-      p1_d = __msa_copy_u_d((v2i64)p1_out, 0);
-      p0_d = __msa_copy_u_d((v2i64)p0_out, 0);
-      q0_d = __msa_copy_u_d((v2i64)q0_out, 0);
-      q1_d = __msa_copy_u_d((v2i64)q1_out, 0);
-      SD4(p1_d, p0_d, q0_d, q1_d, src - 2 * pitch, pitch);
-    } else {
-      /* convert 8 bit input data into 16 bit */
-      ILVR_B8_UH(zero, p3, zero, p2, zero, p1, zero, p0, zero, q0, zero, q1,
-                 zero, q2, zero, q3, p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r,
-                 q3_r);
-      AOM_FILTER8(p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r, p2_filter8,
-                  p1_filter8, p0_filter8, q0_filter8, q1_filter8, q2_filter8);
-
-      /* convert 16 bit output data into 8 bit */
-      PCKEV_B4_SH(zero, p2_filter8, zero, p1_filter8, zero, p0_filter8, zero,
-                  q0_filter8, p2_filter8, p1_filter8, p0_filter8, q0_filter8);
-      PCKEV_B2_SH(zero, q1_filter8, zero, q2_filter8, q1_filter8, q2_filter8);
-
-      /* store pixel values */
-      p2_out = __msa_bmnz_v(p2, (v16u8)p2_filter8, flat);
-      p1_out = __msa_bmnz_v(p1_out, (v16u8)p1_filter8, flat);
-      p0_out = __msa_bmnz_v(p0_out, (v16u8)p0_filter8, flat);
-      q0_out = __msa_bmnz_v(q0_out, (v16u8)q0_filter8, flat);
-      q1_out = __msa_bmnz_v(q1_out, (v16u8)q1_filter8, flat);
-      q2_out = __msa_bmnz_v(q2, (v16u8)q2_filter8, flat);
-
-      /* load 16 vector elements */
-      LD_UB4((src - 8 * pitch), pitch, p7, p6, p5, p4);
-      LD_UB4(src + (4 * pitch), pitch, q4, q5, q6, q7);
-
-      AOM_FLAT5(p7, p6, p5, p4, p0, q0, q4, q5, q6, q7, flat, flat2);
-
-      if (__msa_test_bz_v(flat2)) {
-        p2_d = __msa_copy_u_d((v2i64)p2_out, 0);
-        p1_d = __msa_copy_u_d((v2i64)p1_out, 0);
-        p0_d = __msa_copy_u_d((v2i64)p0_out, 0);
-        q0_d = __msa_copy_u_d((v2i64)q0_out, 0);
-        q1_d = __msa_copy_u_d((v2i64)q1_out, 0);
-        q2_d = __msa_copy_u_d((v2i64)q2_out, 0);
-
-        SD4(p2_d, p1_d, p0_d, q0_d, src - 3 * pitch, pitch);
-        SD(q1_d, src + pitch);
-        SD(q2_d, src + 2 * pitch);
-      } else {
-        /* LSB(right) 8 pixel operation */
-        ILVR_B8_UH(zero, p7, zero, p6, zero, p5, zero, p4, zero, q4, zero, q5,
-                   zero, q6, zero, q7, p7_r, p6_r, p5_r, p4_r, q4_r, q5_r, q6_r,
-                   q7_r);
-
-        tmp0 = p7_r << 3;
-        tmp0 -= p7_r;
-        tmp0 += p6_r;
-        tmp0 += q0_r;
-
-        src -= 7 * pitch;
-
-        /* calculation of p6 and p5 */
-        tmp1 = p6_r + p5_r + p4_r + p3_r;
-        tmp1 += (p2_r + p1_r + p0_r);
-        tmp1 += tmp0;
-        p0_filter16 = (v16u8)__msa_srari_h((v8i16)tmp1, 4);
-        tmp0 = p5_r - p6_r + q1_r - p7_r;
-        tmp1 += tmp0;
-        p1_filter16 = (v16u8)__msa_srari_h((v8i16)tmp1, 4);
-        PCKEV_B2_UB(zero, p0_filter16, zero, p1_filter16, p0_filter16,
-                    p1_filter16);
-        p0_filter16 = __msa_bmnz_v(p6, p0_filter16, flat2);
-        p1_filter16 = __msa_bmnz_v(p5, p1_filter16, flat2);
-        dword0 = __msa_copy_u_d((v2i64)p0_filter16, 0);
-        dword1 = __msa_copy_u_d((v2i64)p1_filter16, 0);
-        SD(dword0, src);
-        src += pitch;
-        SD(dword1, src);
-        src += pitch;
-
-        /* calculation of p4 and p3 */
-        tmp0 = p4_r - p5_r + q2_r - p7_r;
-        tmp2 = p3_r - p4_r + q3_r - p7_r;
-        tmp1 += tmp0;
-        p0_filter16 = (v16u8)__msa_srari_h((v8i16)tmp1, 4);
-        tmp1 += tmp2;
-        p1_filter16 = (v16u8)__msa_srari_h((v8i16)tmp1, 4);
-        PCKEV_B2_UB(zero, p0_filter16, zero, p1_filter16, p0_filter16,
-                    p1_filter16);
-        p0_filter16 = __msa_bmnz_v(p4, p0_filter16, flat2);
-        p1_filter16 = __msa_bmnz_v(p3, p1_filter16, flat2);
-        dword0 = __msa_copy_u_d((v2i64)p0_filter16, 0);
-        dword1 = __msa_copy_u_d((v2i64)p1_filter16, 0);
-        SD(dword0, src);
-        src += pitch;
-        SD(dword1, src);
-        src += pitch;
-
-        /* calculation of p2 and p1 */
-        tmp0 = p2_r - p3_r + q4_r - p7_r;
-        tmp2 = p1_r - p2_r + q5_r - p7_r;
-        tmp1 += tmp0;
-        p0_filter16 = (v16u8)__msa_srari_h((v8i16)tmp1, 4);
-        tmp1 += tmp2;
-        p1_filter16 = (v16u8)__msa_srari_h((v8i16)tmp1, 4);
-        PCKEV_B2_UB(zero, p0_filter16, zero, p1_filter16, p0_filter16,
-                    p1_filter16);
-        p0_filter16 = __msa_bmnz_v(p2_out, p0_filter16, flat2);
-        p1_filter16 = __msa_bmnz_v(p1_out, p1_filter16, flat2);
-        dword0 = __msa_copy_u_d((v2i64)p0_filter16, 0);
-        dword1 = __msa_copy_u_d((v2i64)p1_filter16, 0);
-        SD(dword0, src);
-        src += pitch;
-        SD(dword1, src);
-        src += pitch;
-
-        /* calculation of p0 and q0 */
-        tmp0 = (p0_r - p1_r) + (q6_r - p7_r);
-        tmp2 = (q7_r - p0_r) + (q0_r - p7_r);
-        tmp1 += tmp0;
-        p0_filter16 = (v16u8)__msa_srari_h((v8i16)tmp1, 4);
-        tmp1 += tmp2;
-        p1_filter16 = (v16u8)__msa_srari_h((v8i16)tmp1, 4);
-        PCKEV_B2_UB(zero, p0_filter16, zero, p1_filter16, p0_filter16,
-                    p1_filter16);
-        p0_filter16 = __msa_bmnz_v(p0_out, p0_filter16, flat2);
-        p1_filter16 = __msa_bmnz_v(q0_out, p1_filter16, flat2);
-        dword0 = __msa_copy_u_d((v2i64)p0_filter16, 0);
-        dword1 = __msa_copy_u_d((v2i64)p1_filter16, 0);
-        SD(dword0, src);
-        src += pitch;
-        SD(dword1, src);
-        src += pitch;
-
-        /* calculation of q1 and q2 */
-        tmp0 = q7_r - q0_r + q1_r - p6_r;
-        tmp2 = q7_r - q1_r + q2_r - p5_r;
-        tmp1 += tmp0;
-        p0_filter16 = (v16u8)__msa_srari_h((v8i16)tmp1, 4);
-        tmp1 += tmp2;
-        p1_filter16 = (v16u8)__msa_srari_h((v8i16)tmp1, 4);
-        PCKEV_B2_UB(zero, p0_filter16, zero, p1_filter16, p0_filter16,
-                    p1_filter16);
-        p0_filter16 = __msa_bmnz_v(q1_out, p0_filter16, flat2);
-        p1_filter16 = __msa_bmnz_v(q2_out, p1_filter16, flat2);
-        dword0 = __msa_copy_u_d((v2i64)p0_filter16, 0);
-        dword1 = __msa_copy_u_d((v2i64)p1_filter16, 0);
-        SD(dword0, src);
-        src += pitch;
-        SD(dword1, src);
-        src += pitch;
-
-        /* calculation of q3 and q4 */
-        tmp0 = (q7_r - q2_r) + (q3_r - p4_r);
-        tmp2 = (q7_r - q3_r) + (q4_r - p3_r);
-        tmp1 += tmp0;
-        p0_filter16 = (v16u8)__msa_srari_h((v8i16)tmp1, 4);
-        tmp1 += tmp2;
-        p1_filter16 = (v16u8)__msa_srari_h((v8i16)tmp1, 4);
-        PCKEV_B2_UB(zero, p0_filter16, zero, p1_filter16, p0_filter16,
-                    p1_filter16);
-        p0_filter16 = __msa_bmnz_v(q3, p0_filter16, flat2);
-        p1_filter16 = __msa_bmnz_v(q4, p1_filter16, flat2);
-        dword0 = __msa_copy_u_d((v2i64)p0_filter16, 0);
-        dword1 = __msa_copy_u_d((v2i64)p1_filter16, 0);
-        SD(dword0, src);
-        src += pitch;
-        SD(dword1, src);
-        src += pitch;
-
-        /* calculation of q5 and q6 */
-        tmp0 = (q7_r - q4_r) + (q5_r - p2_r);
-        tmp2 = (q7_r - q5_r) + (q6_r - p1_r);
-        tmp1 += tmp0;
-        p0_filter16 = (v16u8)__msa_srari_h((v8i16)tmp1, 4);
-        tmp1 += tmp2;
-        p1_filter16 = (v16u8)__msa_srari_h((v8i16)tmp1, 4);
-        PCKEV_B2_UB(zero, p0_filter16, zero, p1_filter16, p0_filter16,
-                    p1_filter16);
-        p0_filter16 = __msa_bmnz_v(q5, p0_filter16, flat2);
-        p1_filter16 = __msa_bmnz_v(q6, p1_filter16, flat2);
-        dword0 = __msa_copy_u_d((v2i64)p0_filter16, 0);
-        dword1 = __msa_copy_u_d((v2i64)p1_filter16, 0);
-        SD(dword0, src);
-        src += pitch;
-        SD(dword1, src);
-      }
-    }
-  } else {
-    mb_lpf_horizontal_edge_dual(src, pitch, b_limit_ptr, limit_ptr, thresh_ptr,
-                                count);
-  }
-}
-
-void aom_lpf_horizontal_16_msa(uint8_t *src, int32_t pitch,
-                               const uint8_t *b_limit_ptr,
-                               const uint8_t *limit_ptr,
-                               const uint8_t *thresh_ptr) {
-  mb_lpf_horizontal_edge(src, pitch, b_limit_ptr, limit_ptr, thresh_ptr, 1);
-}
-
-void aom_lpf_horizontal_16_dual_msa(uint8_t *src, int32_t pitch,
-                                    const uint8_t *b_limit_ptr,
-                                    const uint8_t *limit_ptr,
-                                    const uint8_t *thresh_ptr) {
-  mb_lpf_horizontal_edge(src, pitch, b_limit_ptr, limit_ptr, thresh_ptr, 2);
-}
-
-static void transpose_16x8_to_8x16(uint8_t *input, int32_t in_pitch,
-                                   uint8_t *output, int32_t out_pitch) {
-  v16u8 p7_org, p6_org, p5_org, p4_org, p3_org, p2_org, p1_org, p0_org;
-  v16i8 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
-  v16u8 p7, p6, p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, q6, q7;
-
-  LD_UB8(input, in_pitch, p7_org, p6_org, p5_org, p4_org, p3_org, p2_org,
-         p1_org, p0_org);
-  /* 8x8 transpose */
-  TRANSPOSE8x8_UB_UB(p7_org, p6_org, p5_org, p4_org, p3_org, p2_org, p1_org,
-                     p0_org, p7, p6, p5, p4, p3, p2, p1, p0);
-  /* 8x8 transpose */
-  ILVL_B4_SB(p5_org, p7_org, p4_org, p6_org, p1_org, p3_org, p0_org, p2_org,
-             tmp0, tmp1, tmp2, tmp3);
-  ILVR_B2_SB(tmp1, tmp0, tmp3, tmp2, tmp4, tmp6);
-  ILVL_B2_SB(tmp1, tmp0, tmp3, tmp2, tmp5, tmp7);
-  ILVR_W2_UB(tmp6, tmp4, tmp7, tmp5, q0, q4);
-  ILVL_W2_UB(tmp6, tmp4, tmp7, tmp5, q2, q6);
-  SLDI_B4_0_UB(q0, q2, q4, q6, q1, q3, q5, q7, 8);
-
-  ST_UB8(p7, p6, p5, p4, p3, p2, p1, p0, output, out_pitch);
-  output += (8 * out_pitch);
-  ST_UB8(q0, q1, q2, q3, q4, q5, q6, q7, output, out_pitch);
-}
-
-static void transpose_8x16_to_16x8(uint8_t *input, int32_t in_pitch,
-                                   uint8_t *output, int32_t out_pitch) {
-  v16u8 p7_o, p6_o, p5_o, p4_o, p3_o, p2_o, p1_o, p0_o;
-  v16u8 p7, p6, p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, q6, q7;
-
-  LD_UB8(input, in_pitch, p7, p6, p5, p4, p3, p2, p1, p0);
-  LD_UB8(input + (8 * in_pitch), in_pitch, q0, q1, q2, q3, q4, q5, q6, q7);
-  TRANSPOSE16x8_UB_UB(p7, p6, p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5,
-                      q6, q7, p7_o, p6_o, p5_o, p4_o, p3_o, p2_o, p1_o, p0_o);
-  ST_UB8(p7_o, p6_o, p5_o, p4_o, p3_o, p2_o, p1_o, p0_o, output, out_pitch);
-}
-
-static void transpose_16x16(uint8_t *input, int32_t in_pitch, uint8_t *output,
-                            int32_t out_pitch) {
-  v16u8 row0, row1, row2, row3, row4, row5, row6, row7;
-  v16u8 row8, row9, row10, row11, row12, row13, row14, row15;
-  v16u8 p7, p6, p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, q6, q7;
-  v8i16 tmp0, tmp1, tmp4, tmp5, tmp6, tmp7;
-  v4i32 tmp2, tmp3;
-
-  LD_UB8(input, in_pitch, row0, row1, row2, row3, row4, row5, row6, row7);
-  input += (8 * in_pitch);
-  LD_UB8(input, in_pitch, row8, row9, row10, row11, row12, row13, row14, row15);
-
-  TRANSPOSE16x8_UB_UB(row0, row1, row2, row3, row4, row5, row6, row7, row8,
-                      row9, row10, row11, row12, row13, row14, row15, p7, p6,
-                      p5, p4, p3, p2, p1, p0);
-
-  /* transpose 16x8 matrix into 8x16 */
-  /* total 8 intermediate register and 32 instructions */
-  q7 = (v16u8)__msa_ilvod_d((v2i64)row8, (v2i64)row0);
-  q6 = (v16u8)__msa_ilvod_d((v2i64)row9, (v2i64)row1);
-  q5 = (v16u8)__msa_ilvod_d((v2i64)row10, (v2i64)row2);
-  q4 = (v16u8)__msa_ilvod_d((v2i64)row11, (v2i64)row3);
-  q3 = (v16u8)__msa_ilvod_d((v2i64)row12, (v2i64)row4);
-  q2 = (v16u8)__msa_ilvod_d((v2i64)row13, (v2i64)row5);
-  q1 = (v16u8)__msa_ilvod_d((v2i64)row14, (v2i64)row6);
-  q0 = (v16u8)__msa_ilvod_d((v2i64)row15, (v2i64)row7);
-
-  ILVEV_B2_SH(q7, q6, q5, q4, tmp0, tmp1);
-  tmp4 = (v8i16)__msa_ilvod_b((v16i8)q6, (v16i8)q7);
-  tmp5 = (v8i16)__msa_ilvod_b((v16i8)q4, (v16i8)q5);
-
-  ILVEV_B2_UB(q3, q2, q1, q0, q5, q7);
-  tmp6 = (v8i16)__msa_ilvod_b((v16i8)q2, (v16i8)q3);
-  tmp7 = (v8i16)__msa_ilvod_b((v16i8)q0, (v16i8)q1);
-
-  ILVEV_H2_SW(tmp0, tmp1, q5, q7, tmp2, tmp3);
-  q0 = (v16u8)__msa_ilvev_w(tmp3, tmp2);
-  q4 = (v16u8)__msa_ilvod_w(tmp3, tmp2);
-
-  tmp2 = (v4i32)__msa_ilvod_h(tmp1, tmp0);
-  tmp3 = (v4i32)__msa_ilvod_h((v8i16)q7, (v8i16)q5);
-  q2 = (v16u8)__msa_ilvev_w(tmp3, tmp2);
-  q6 = (v16u8)__msa_ilvod_w(tmp3, tmp2);
-
-  ILVEV_H2_SW(tmp4, tmp5, tmp6, tmp7, tmp2, tmp3);
-  q1 = (v16u8)__msa_ilvev_w(tmp3, tmp2);
-  q5 = (v16u8)__msa_ilvod_w(tmp3, tmp2);
-
-  tmp2 = (v4i32)__msa_ilvod_h(tmp5, tmp4);
-  tmp3 = (v4i32)__msa_ilvod_h(tmp7, tmp6);
-  q3 = (v16u8)__msa_ilvev_w(tmp3, tmp2);
-  q7 = (v16u8)__msa_ilvod_w(tmp3, tmp2);
-
-  ST_UB8(p7, p6, p5, p4, p3, p2, p1, p0, output, out_pitch);
-  output += (8 * out_pitch);
-  ST_UB8(q0, q1, q2, q3, q4, q5, q6, q7, output, out_pitch);
-}
-
-int32_t aom_vt_lpf_t4_and_t8_8w(uint8_t *src, uint8_t *filter48,
-                                uint8_t *src_org, int32_t pitch_org,
-                                const uint8_t *b_limit_ptr,
-                                const uint8_t *limit_ptr,
-                                const uint8_t *thresh_ptr) {
-  v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
-  v16u8 p2_out, p1_out, p0_out, q0_out, q1_out, q2_out;
-  v16u8 flat, mask, hev, thresh, b_limit, limit;
-  v8u16 p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r;
-  v8i16 p2_filt8_r, p1_filt8_r, p0_filt8_r, q0_filt8_r, q1_filt8_r, q2_filt8_r;
-  v16i8 zero = { 0 };
-  v8i16 vec0, vec1, vec2, vec3;
-
-  /* load vector elements */
-  LD_UB8(src - (4 * 16), 16, p3, p2, p1, p0, q0, q1, q2, q3);
-
-  thresh = (v16u8)__msa_fill_b(*thresh_ptr);
-  b_limit = (v16u8)__msa_fill_b(*b_limit_ptr);
-  limit = (v16u8)__msa_fill_b(*limit_ptr);
-
-  /* mask and hev */
-  LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh, hev,
-               mask, flat);
-  /* flat4 */
-  AOM_FLAT4(p3, p2, p0, q0, q2, q3, flat);
-  /* filter4 */
-  AOM_LPF_FILTER4_8W(p1, p0, q0, q1, mask, hev, p1_out, p0_out, q0_out, q1_out);
-
-  flat = (v16u8)__msa_ilvr_d((v2i64)zero, (v2i64)flat);
-
-  if (__msa_test_bz_v(flat)) {
-    ILVR_B2_SH(p0_out, p1_out, q1_out, q0_out, vec0, vec1);
-    ILVRL_H2_SH(vec1, vec0, vec2, vec3);
-    ST4x8_UB(vec2, vec3, (src_org - 2), pitch_org);
-    return 1;
-  } else {
-    ILVR_B8_UH(zero, p3, zero, p2, zero, p1, zero, p0, zero, q0, zero, q1, zero,
-               q2, zero, q3, p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r);
-    AOM_FILTER8(p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r, p2_filt8_r,
-                p1_filt8_r, p0_filt8_r, q0_filt8_r, q1_filt8_r, q2_filt8_r);
-
-    /* convert 16 bit output data into 8 bit */
-    p2_r = (v8u16)__msa_pckev_b((v16i8)p2_filt8_r, (v16i8)p2_filt8_r);
-    p1_r = (v8u16)__msa_pckev_b((v16i8)p1_filt8_r, (v16i8)p1_filt8_r);
-    p0_r = (v8u16)__msa_pckev_b((v16i8)p0_filt8_r, (v16i8)p0_filt8_r);
-    q0_r = (v8u16)__msa_pckev_b((v16i8)q0_filt8_r, (v16i8)q0_filt8_r);
-    q1_r = (v8u16)__msa_pckev_b((v16i8)q1_filt8_r, (v16i8)q1_filt8_r);
-    q2_r = (v8u16)__msa_pckev_b((v16i8)q2_filt8_r, (v16i8)q2_filt8_r);
-
-    /* store pixel values */
-    p2_out = __msa_bmnz_v(p2, (v16u8)p2_r, flat);
-    p1_out = __msa_bmnz_v(p1_out, (v16u8)p1_r, flat);
-    p0_out = __msa_bmnz_v(p0_out, (v16u8)p0_r, flat);
-    q0_out = __msa_bmnz_v(q0_out, (v16u8)q0_r, flat);
-    q1_out = __msa_bmnz_v(q1_out, (v16u8)q1_r, flat);
-    q2_out = __msa_bmnz_v(q2, (v16u8)q2_r, flat);
-
-    ST_UB4(p2_out, p1_out, p0_out, q0_out, filter48, 16);
-    filter48 += (4 * 16);
-    ST_UB2(q1_out, q2_out, filter48, 16);
-    filter48 += (2 * 16);
-    ST_UB(flat, filter48);
-
-    return 0;
-  }
-}
-
-int32_t aom_vt_lpf_t16_8w(uint8_t *src, uint8_t *src_org, int32_t pitch,
-                          uint8_t *filter48) {
-  v16i8 zero = { 0 };
-  v16u8 filter8, flat, flat2;
-  v16u8 p7, p6, p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, q6, q7;
-  v8u16 p7_r_in, p6_r_in, p5_r_in, p4_r_in, p3_r_in, p2_r_in, p1_r_in, p0_r_in;
-  v8u16 q7_r_in, q6_r_in, q5_r_in, q4_r_in, q3_r_in, q2_r_in, q1_r_in, q0_r_in;
-  v8u16 tmp0_r, tmp1_r;
-  v8i16 r_out;
-
-  flat = LD_UB(filter48 + 6 * 16);
-
-  LD_UB8((src - 8 * 16), 16, p7, p6, p5, p4, p3, p2, p1, p0);
-  LD_UB8(src, 16, q0, q1, q2, q3, q4, q5, q6, q7);
-
-  AOM_FLAT5(p7, p6, p5, p4, p0, q0, q4, q5, q6, q7, flat, flat2);
-
-  if (__msa_test_bz_v(flat2)) {
-    v8i16 vec0, vec1, vec2, vec3, vec4;
-
-    LD_UB4(filter48, 16, p2, p1, p0, q0);
-    LD_UB2(filter48 + 4 * 16, 16, q1, q2);
-
-    ILVR_B2_SH(p1, p2, q0, p0, vec0, vec1);
-    ILVRL_H2_SH(vec1, vec0, vec3, vec4);
-    vec2 = (v8i16)__msa_ilvr_b((v16i8)q2, (v16i8)q1);
-
-    src_org -= 3;
-    ST4x4_UB(vec3, vec3, 0, 1, 2, 3, src_org, pitch);
-    ST2x4_UB(vec2, 0, (src_org + 4), pitch);
-    src_org += (4 * pitch);
-    ST4x4_UB(vec4, vec4, 0, 1, 2, 3, src_org, pitch);
-    ST2x4_UB(vec2, 4, (src_org + 4), pitch);
-
-    return 1;
-  } else {
-    src -= 7 * 16;
-
-    ILVR_B8_UH(zero, p7, zero, p6, zero, p5, zero, p4, zero, p3, zero, p2, zero,
-               p1, zero, p0, p7_r_in, p6_r_in, p5_r_in, p4_r_in, p3_r_in,
-               p2_r_in, p1_r_in, p0_r_in);
-    q0_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q0);
-
-    tmp0_r = p7_r_in << 3;
-    tmp0_r -= p7_r_in;
-    tmp0_r += p6_r_in;
-    tmp0_r += q0_r_in;
-    tmp1_r = p6_r_in + p5_r_in;
-    tmp1_r += p4_r_in;
-    tmp1_r += p3_r_in;
-    tmp1_r += p2_r_in;
-    tmp1_r += p1_r_in;
-    tmp1_r += p0_r_in;
-    tmp1_r += tmp0_r;
-
-    r_out = __msa_srari_h((v8i16)tmp1_r, 4);
-    r_out = (v8i16)__msa_pckev_b((v16i8)r_out, (v16i8)r_out);
-    p6 = __msa_bmnz_v(p6, (v16u8)r_out, flat2);
-    ST8x1_UB(p6, src);
-    src += 16;
-
-    /* p5 */
-    q1_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q1);
-    tmp0_r = p5_r_in - p6_r_in;
-    tmp0_r += q1_r_in;
-    tmp0_r -= p7_r_in;
-    tmp1_r += tmp0_r;
-    r_out = __msa_srari_h((v8i16)tmp1_r, 4);
-    r_out = (v8i16)__msa_pckev_b((v16i8)r_out, (v16i8)r_out);
-    p5 = __msa_bmnz_v(p5, (v16u8)r_out, flat2);
-    ST8x1_UB(p5, src);
-    src += 16;
-
-    /* p4 */
-    q2_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q2);
-    tmp0_r = p4_r_in - p5_r_in;
-    tmp0_r += q2_r_in;
-    tmp0_r -= p7_r_in;
-    tmp1_r += tmp0_r;
-    r_out = __msa_srari_h((v8i16)tmp1_r, 4);
-    r_out = (v8i16)__msa_pckev_b((v16i8)r_out, (v16i8)r_out);
-    p4 = __msa_bmnz_v(p4, (v16u8)r_out, flat2);
-    ST8x1_UB(p4, src);
-    src += 16;
-
-    /* p3 */
-    q3_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q3);
-    tmp0_r = p3_r_in - p4_r_in;
-    tmp0_r += q3_r_in;
-    tmp0_r -= p7_r_in;
-    tmp1_r += tmp0_r;
-    r_out = __msa_srari_h((v8i16)tmp1_r, 4);
-    r_out = (v8i16)__msa_pckev_b((v16i8)r_out, (v16i8)r_out);
-    p3 = __msa_bmnz_v(p3, (v16u8)r_out, flat2);
-    ST8x1_UB(p3, src);
-    src += 16;
-
-    /* p2 */
-    q4_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q4);
-    filter8 = LD_UB(filter48);
-    tmp0_r = p2_r_in - p3_r_in;
-    tmp0_r += q4_r_in;
-    tmp0_r -= p7_r_in;
-    tmp1_r += tmp0_r;
-    r_out = __msa_srari_h((v8i16)tmp1_r, 4);
-    r_out = (v8i16)__msa_pckev_b((v16i8)r_out, (v16i8)r_out);
-    filter8 = __msa_bmnz_v(filter8, (v16u8)r_out, flat2);
-    ST8x1_UB(filter8, src);
-    src += 16;
-
-    /* p1 */
-    q5_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q5);
-    filter8 = LD_UB(filter48 + 16);
-    tmp0_r = p1_r_in - p2_r_in;
-    tmp0_r += q5_r_in;
-    tmp0_r -= p7_r_in;
-    tmp1_r += tmp0_r;
-    r_out = __msa_srari_h((v8i16)tmp1_r, 4);
-    r_out = (v8i16)__msa_pckev_b((v16i8)r_out, (v16i8)r_out);
-    filter8 = __msa_bmnz_v(filter8, (v16u8)r_out, flat2);
-    ST8x1_UB(filter8, src);
-    src += 16;
-
-    /* p0 */
-    q6_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q6);
-    filter8 = LD_UB(filter48 + 32);
-    tmp0_r = p0_r_in - p1_r_in;
-    tmp0_r += q6_r_in;
-    tmp0_r -= p7_r_in;
-    tmp1_r += tmp0_r;
-    r_out = __msa_srari_h((v8i16)tmp1_r, 4);
-    r_out = (v8i16)__msa_pckev_b((v16i8)r_out, (v16i8)r_out);
-    filter8 = __msa_bmnz_v(filter8, (v16u8)r_out, flat2);
-    ST8x1_UB(filter8, src);
-    src += 16;
-
-    /* q0 */
-    q7_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q7);
-    filter8 = LD_UB(filter48 + 48);
-    tmp0_r = q7_r_in - p0_r_in;
-    tmp0_r += q0_r_in;
-    tmp0_r -= p7_r_in;
-    tmp1_r += tmp0_r;
-    r_out = __msa_srari_h((v8i16)tmp1_r, 4);
-    r_out = (v8i16)__msa_pckev_b((v16i8)r_out, (v16i8)r_out);
-    filter8 = __msa_bmnz_v(filter8, (v16u8)r_out, flat2);
-    ST8x1_UB(filter8, src);
-    src += 16;
-
-    /* q1 */
-    filter8 = LD_UB(filter48 + 64);
-    tmp0_r = q7_r_in - q0_r_in;
-    tmp0_r += q1_r_in;
-    tmp0_r -= p6_r_in;
-    tmp1_r += tmp0_r;
-    r_out = __msa_srari_h((v8i16)tmp1_r, 4);
-    r_out = (v8i16)__msa_pckev_b((v16i8)r_out, (v16i8)r_out);
-    filter8 = __msa_bmnz_v(filter8, (v16u8)r_out, flat2);
-    ST8x1_UB(filter8, src);
-    src += 16;
-
-    /* q2 */
-    filter8 = LD_UB(filter48 + 80);
-    tmp0_r = q7_r_in - q1_r_in;
-    tmp0_r += q2_r_in;
-    tmp0_r -= p5_r_in;
-    tmp1_r += tmp0_r;
-    r_out = __msa_srari_h((v8i16)tmp1_r, 4);
-    r_out = (v8i16)__msa_pckev_b((v16i8)r_out, (v16i8)r_out);
-    filter8 = __msa_bmnz_v(filter8, (v16u8)r_out, flat2);
-    ST8x1_UB(filter8, src);
-    src += 16;
-
-    /* q3 */
-    tmp0_r = q7_r_in - q2_r_in;
-    tmp0_r += q3_r_in;
-    tmp0_r -= p4_r_in;
-    tmp1_r += tmp0_r;
-    r_out = __msa_srari_h((v8i16)tmp1_r, 4);
-    r_out = (v8i16)__msa_pckev_b((v16i8)r_out, (v16i8)r_out);
-    q3 = __msa_bmnz_v(q3, (v16u8)r_out, flat2);
-    ST8x1_UB(q3, src);
-    src += 16;
-
-    /* q4 */
-    tmp0_r = q7_r_in - q3_r_in;
-    tmp0_r += q4_r_in;
-    tmp0_r -= p3_r_in;
-    tmp1_r += tmp0_r;
-    r_out = __msa_srari_h((v8i16)tmp1_r, 4);
-    r_out = (v8i16)__msa_pckev_b((v16i8)r_out, (v16i8)r_out);
-    q4 = __msa_bmnz_v(q4, (v16u8)r_out, flat2);
-    ST8x1_UB(q4, src);
-    src += 16;
-
-    /* q5 */
-    tmp0_r = q7_r_in - q4_r_in;
-    tmp0_r += q5_r_in;
-    tmp0_r -= p2_r_in;
-    tmp1_r += tmp0_r;
-    r_out = __msa_srari_h((v8i16)tmp1_r, 4);
-    r_out = (v8i16)__msa_pckev_b((v16i8)r_out, (v16i8)r_out);
-    q5 = __msa_bmnz_v(q5, (v16u8)r_out, flat2);
-    ST8x1_UB(q5, src);
-    src += 16;
-
-    /* q6 */
-    tmp0_r = q7_r_in - q5_r_in;
-    tmp0_r += q6_r_in;
-    tmp0_r -= p1_r_in;
-    tmp1_r += tmp0_r;
-    r_out = __msa_srari_h((v8i16)tmp1_r, 4);
-    r_out = (v8i16)__msa_pckev_b((v16i8)r_out, (v16i8)r_out);
-    q6 = __msa_bmnz_v(q6, (v16u8)r_out, flat2);
-    ST8x1_UB(q6, src);
-
-    return 0;
-  }
-}
-
-void aom_lpf_vertical_16_msa(uint8_t *src, int32_t pitch,
-                             const uint8_t *b_limit_ptr,
-                             const uint8_t *limit_ptr,
-                             const uint8_t *thresh_ptr) {
-  uint8_t early_exit = 0;
-  DECLARE_ALIGNED(32, uint8_t, transposed_input[16 * 24]);
-  uint8_t *filter48 = &transposed_input[16 * 16];
-
-  transpose_16x8_to_8x16(src - 8, pitch, transposed_input, 16);
-
-  early_exit =
-      aom_vt_lpf_t4_and_t8_8w((transposed_input + 16 * 8), &filter48[0], src,
-                              pitch, b_limit_ptr, limit_ptr, thresh_ptr);
-
-  if (0 == early_exit) {
-    early_exit = aom_vt_lpf_t16_8w((transposed_input + 16 * 8), src, pitch,
-                                   &filter48[0]);
-
-    if (0 == early_exit) {
-      transpose_8x16_to_16x8(transposed_input, 16, src - 8, pitch);
-    }
-  }
-}
-
-int32_t aom_vt_lpf_t4_and_t8_16w(uint8_t *src, uint8_t *filter48,
-                                 uint8_t *src_org, int32_t pitch,
-                                 const uint8_t *b_limit_ptr,
-                                 const uint8_t *limit_ptr,
-                                 const uint8_t *thresh_ptr) {
-  v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
-  v16u8 p2_out, p1_out, p0_out, q0_out, q1_out, q2_out;
-  v16u8 flat, mask, hev, thresh, b_limit, limit;
-  v8u16 p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r;
-  v8u16 p3_l, p2_l, p1_l, p0_l, q0_l, q1_l, q2_l, q3_l;
-  v8i16 p2_filt8_r, p1_filt8_r, p0_filt8_r, q0_filt8_r, q1_filt8_r, q2_filt8_r;
-  v8i16 p2_filt8_l, p1_filt8_l, p0_filt8_l, q0_filt8_l, q1_filt8_l, q2_filt8_l;
-  v16i8 zero = { 0 };
-  v8i16 vec0, vec1, vec2, vec3, vec4, vec5;
-
-  /* load vector elements */
-  LD_UB8(src - (4 * 16), 16, p3, p2, p1, p0, q0, q1, q2, q3);
-
-  thresh = (v16u8)__msa_fill_b(*thresh_ptr);
-  b_limit = (v16u8)__msa_fill_b(*b_limit_ptr);
-  limit = (v16u8)__msa_fill_b(*limit_ptr);
-
-  /* mask and hev */
-  LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh, hev,
-               mask, flat);
-  /* flat4 */
-  AOM_FLAT4(p3, p2, p0, q0, q2, q3, flat);
-  /* filter4 */
-  AOM_LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev, p1_out, p0_out, q0_out, q1_out);
-
-  if (__msa_test_bz_v(flat)) {
-    ILVR_B2_SH(p0_out, p1_out, q1_out, q0_out, vec0, vec1);
-    ILVRL_H2_SH(vec1, vec0, vec2, vec3);
-    ILVL_B2_SH(p0_out, p1_out, q1_out, q0_out, vec0, vec1);
-    ILVRL_H2_SH(vec1, vec0, vec4, vec5);
-
-    src_org -= 2;
-    ST4x8_UB(vec2, vec3, src_org, pitch);
-    src_org += 8 * pitch;
-    ST4x8_UB(vec4, vec5, src_org, pitch);
-
-    return 1;
-  } else {
-    ILVR_B8_UH(zero, p3, zero, p2, zero, p1, zero, p0, zero, q0, zero, q1, zero,
-               q2, zero, q3, p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r);
-    AOM_FILTER8(p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r, p2_filt8_r,
-                p1_filt8_r, p0_filt8_r, q0_filt8_r, q1_filt8_r, q2_filt8_r);
-    ILVL_B4_UH(zero, p3, zero, p2, zero, p1, zero, p0, p3_l, p2_l, p1_l, p0_l);
-    ILVL_B4_UH(zero, q0, zero, q1, zero, q2, zero, q3, q0_l, q1_l, q2_l, q3_l);
-    AOM_FILTER8(p3_l, p2_l, p1_l, p0_l, q0_l, q1_l, q2_l, q3_l, p2_filt8_l,
-                p1_filt8_l, p0_filt8_l, q0_filt8_l, q1_filt8_l, q2_filt8_l);
-
-    /* convert 16 bit output data into 8 bit */
-    PCKEV_B4_SH(p2_filt8_l, p2_filt8_r, p1_filt8_l, p1_filt8_r, p0_filt8_l,
-                p0_filt8_r, q0_filt8_l, q0_filt8_r, p2_filt8_r, p1_filt8_r,
-                p0_filt8_r, q0_filt8_r);
-    PCKEV_B2_SH(q1_filt8_l, q1_filt8_r, q2_filt8_l, q2_filt8_r, q1_filt8_r,
-                q2_filt8_r);
-
-    /* store pixel values */
-    p2_out = __msa_bmnz_v(p2, (v16u8)p2_filt8_r, flat);
-    p1_out = __msa_bmnz_v(p1_out, (v16u8)p1_filt8_r, flat);
-    p0_out = __msa_bmnz_v(p0_out, (v16u8)p0_filt8_r, flat);
-    q0_out = __msa_bmnz_v(q0_out, (v16u8)q0_filt8_r, flat);
-    q1_out = __msa_bmnz_v(q1_out, (v16u8)q1_filt8_r, flat);
-    q2_out = __msa_bmnz_v(q2, (v16u8)q2_filt8_r, flat);
-
-    ST_UB4(p2_out, p1_out, p0_out, q0_out, filter48, 16);
-    filter48 += (4 * 16);
-    ST_UB2(q1_out, q2_out, filter48, 16);
-    filter48 += (2 * 16);
-    ST_UB(flat, filter48);
-
-    return 0;
-  }
-}
-
-int32_t aom_vt_lpf_t16_16w(uint8_t *src, uint8_t *src_org, int32_t pitch,
-                           uint8_t *filter48) {
-  v16u8 flat, flat2, filter8;
-  v16i8 zero = { 0 };
-  v16u8 p7, p6, p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, q6, q7;
-  v8u16 p7_r_in, p6_r_in, p5_r_in, p4_r_in, p3_r_in, p2_r_in, p1_r_in, p0_r_in;
-  v8u16 q7_r_in, q6_r_in, q5_r_in, q4_r_in, q3_r_in, q2_r_in, q1_r_in, q0_r_in;
-  v8u16 p7_l_in, p6_l_in, p5_l_in, p4_l_in, p3_l_in, p2_l_in, p1_l_in, p0_l_in;
-  v8u16 q7_l_in, q6_l_in, q5_l_in, q4_l_in, q3_l_in, q2_l_in, q1_l_in, q0_l_in;
-  v8u16 tmp0_r, tmp1_r, tmp0_l, tmp1_l;
-  v8i16 l_out, r_out;
-
-  flat = LD_UB(filter48 + 6 * 16);
-
-  LD_UB8((src - 8 * 16), 16, p7, p6, p5, p4, p3, p2, p1, p0);
-  LD_UB8(src, 16, q0, q1, q2, q3, q4, q5, q6, q7);
-
-  AOM_FLAT5(p7, p6, p5, p4, p0, q0, q4, q5, q6, q7, flat, flat2);
-
-  if (__msa_test_bz_v(flat2)) {
-    v8i16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
-
-    LD_UB4(filter48, 16, p2, p1, p0, q0);
-    LD_UB2(filter48 + 4 * 16, 16, q1, q2);
-
-    ILVR_B2_SH(p1, p2, q0, p0, vec0, vec1);
-    ILVRL_H2_SH(vec1, vec0, vec3, vec4);
-    ILVL_B2_SH(p1, p2, q0, p0, vec0, vec1);
-    ILVRL_H2_SH(vec1, vec0, vec6, vec7);
-    ILVRL_B2_SH(q2, q1, vec2, vec5);
-
-    src_org -= 3;
-    ST4x4_UB(vec3, vec3, 0, 1, 2, 3, src_org, pitch);
-    ST2x4_UB(vec2, 0, (src_org + 4), pitch);
-    src_org += (4 * pitch);
-    ST4x4_UB(vec4, vec4, 0, 1, 2, 3, src_org, pitch);
-    ST2x4_UB(vec2, 4, (src_org + 4), pitch);
-    src_org += (4 * pitch);
-    ST4x4_UB(vec6, vec6, 0, 1, 2, 3, src_org, pitch);
-    ST2x4_UB(vec5, 0, (src_org + 4), pitch);
-    src_org += (4 * pitch);
-    ST4x4_UB(vec7, vec7, 0, 1, 2, 3, src_org, pitch);
-    ST2x4_UB(vec5, 4, (src_org + 4), pitch);
-
-    return 1;
-  } else {
-    src -= 7 * 16;
-
-    ILVR_B8_UH(zero, p7, zero, p6, zero, p5, zero, p4, zero, p3, zero, p2, zero,
-               p1, zero, p0, p7_r_in, p6_r_in, p5_r_in, p4_r_in, p3_r_in,
-               p2_r_in, p1_r_in, p0_r_in);
-    q0_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q0);
-
-    tmp0_r = p7_r_in << 3;
-    tmp0_r -= p7_r_in;
-    tmp0_r += p6_r_in;
-    tmp0_r += q0_r_in;
-    tmp1_r = p6_r_in + p5_r_in;
-    tmp1_r += p4_r_in;
-    tmp1_r += p3_r_in;
-    tmp1_r += p2_r_in;
-    tmp1_r += p1_r_in;
-    tmp1_r += p0_r_in;
-    tmp1_r += tmp0_r;
-    r_out = __msa_srari_h((v8i16)tmp1_r, 4);
-
-    ILVL_B4_UH(zero, p7, zero, p6, zero, p5, zero, p4, p7_l_in, p6_l_in,
-               p5_l_in, p4_l_in);
-    ILVL_B4_UH(zero, p3, zero, p2, zero, p1, zero, p0, p3_l_in, p2_l_in,
-               p1_l_in, p0_l_in);
-    q0_l_in = (v8u16)__msa_ilvl_b(zero, (v16i8)q0);
-
-    tmp0_l = p7_l_in << 3;
-    tmp0_l -= p7_l_in;
-    tmp0_l += p6_l_in;
-    tmp0_l += q0_l_in;
-    tmp1_l = p6_l_in + p5_l_in;
-    tmp1_l += p4_l_in;
-    tmp1_l += p3_l_in;
-    tmp1_l += p2_l_in;
-    tmp1_l += p1_l_in;
-    tmp1_l += p0_l_in;
-    tmp1_l += tmp0_l;
-    l_out = __msa_srari_h((v8i16)tmp1_l, 4);
-
-    r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
-    p6 = __msa_bmnz_v(p6, (v16u8)r_out, flat2);
-    ST_UB(p6, src);
-    src += 16;
-
-    /* p5 */
-    q1_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q1);
-    tmp0_r = p5_r_in - p6_r_in;
-    tmp0_r += q1_r_in;
-    tmp0_r -= p7_r_in;
-    tmp1_r += tmp0_r;
-    r_out = __msa_srari_h((v8i16)tmp1_r, 4);
-    q1_l_in = (v8u16)__msa_ilvl_b(zero, (v16i8)q1);
-    tmp0_l = p5_l_in - p6_l_in;
-    tmp0_l += q1_l_in;
-    tmp0_l -= p7_l_in;
-    tmp1_l += tmp0_l;
-    l_out = __msa_srari_h((v8i16)tmp1_l, 4);
-    r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
-    p5 = __msa_bmnz_v(p5, (v16u8)r_out, flat2);
-    ST_UB(p5, src);
-    src += 16;
-
-    /* p4 */
-    q2_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q2);
-    tmp0_r = p4_r_in - p5_r_in;
-    tmp0_r += q2_r_in;
-    tmp0_r -= p7_r_in;
-    tmp1_r += tmp0_r;
-    r_out = __msa_srari_h((v8i16)tmp1_r, 4);
-    q2_l_in = (v8u16)__msa_ilvl_b(zero, (v16i8)q2);
-    tmp0_l = p4_l_in - p5_l_in;
-    tmp0_l += q2_l_in;
-    tmp0_l -= p7_l_in;
-    tmp1_l += tmp0_l;
-    l_out = __msa_srari_h((v8i16)tmp1_l, 4);
-    r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
-    p4 = __msa_bmnz_v(p4, (v16u8)r_out, flat2);
-    ST_UB(p4, src);
-    src += 16;
-
-    /* p3 */
-    q3_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q3);
-    tmp0_r = p3_r_in - p4_r_in;
-    tmp0_r += q3_r_in;
-    tmp0_r -= p7_r_in;
-    tmp1_r += tmp0_r;
-    r_out = __msa_srari_h((v8i16)tmp1_r, 4);
-    q3_l_in = (v8u16)__msa_ilvl_b(zero, (v16i8)q3);
-    tmp0_l = p3_l_in - p4_l_in;
-    tmp0_l += q3_l_in;
-    tmp0_l -= p7_l_in;
-    tmp1_l += tmp0_l;
-    l_out = __msa_srari_h((v8i16)tmp1_l, 4);
-    r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
-    p3 = __msa_bmnz_v(p3, (v16u8)r_out, flat2);
-    ST_UB(p3, src);
-    src += 16;
-
-    /* p2 */
-    q4_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q4);
-    filter8 = LD_UB(filter48);
-    tmp0_r = p2_r_in - p3_r_in;
-    tmp0_r += q4_r_in;
-    tmp0_r -= p7_r_in;
-    tmp1_r += tmp0_r;
-    r_out = __msa_srari_h((v8i16)tmp1_r, 4);
-    q4_l_in = (v8u16)__msa_ilvl_b(zero, (v16i8)q4);
-    tmp0_l = p2_l_in - p3_l_in;
-    tmp0_l += q4_l_in;
-    tmp0_l -= p7_l_in;
-    tmp1_l += tmp0_l;
-    l_out = __msa_srari_h((v8i16)tmp1_l, 4);
-    r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
-    filter8 = __msa_bmnz_v(filter8, (v16u8)r_out, flat2);
-    ST_UB(filter8, src);
-    src += 16;
-
-    /* p1 */
-    q5_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q5);
-    filter8 = LD_UB(filter48 + 16);
-    tmp0_r = p1_r_in - p2_r_in;
-    tmp0_r += q5_r_in;
-    tmp0_r -= p7_r_in;
-    tmp1_r += tmp0_r;
-    r_out = __msa_srari_h((v8i16)tmp1_r, 4);
-    q5_l_in = (v8u16)__msa_ilvl_b(zero, (v16i8)q5);
-    tmp0_l = p1_l_in - p2_l_in;
-    tmp0_l += q5_l_in;
-    tmp0_l -= p7_l_in;
-    tmp1_l += tmp0_l;
-    l_out = __msa_srari_h((v8i16)(tmp1_l), 4);
-    r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
-    filter8 = __msa_bmnz_v(filter8, (v16u8)r_out, flat2);
-    ST_UB(filter8, src);
-    src += 16;
-
-    /* p0 */
-    q6_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q6);
-    filter8 = LD_UB(filter48 + 32);
-    tmp0_r = p0_r_in - p1_r_in;
-    tmp0_r += q6_r_in;
-    tmp0_r -= p7_r_in;
-    tmp1_r += tmp0_r;
-    r_out = __msa_srari_h((v8i16)tmp1_r, 4);
-    q6_l_in = (v8u16)__msa_ilvl_b(zero, (v16i8)q6);
-    tmp0_l = p0_l_in - p1_l_in;
-    tmp0_l += q6_l_in;
-    tmp0_l -= p7_l_in;
-    tmp1_l += tmp0_l;
-    l_out = __msa_srari_h((v8i16)tmp1_l, 4);
-    r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
-    filter8 = __msa_bmnz_v(filter8, (v16u8)r_out, flat2);
-    ST_UB(filter8, src);
-    src += 16;
-
-    /* q0 */
-    q7_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q7);
-    filter8 = LD_UB(filter48 + 48);
-    tmp0_r = q7_r_in - p0_r_in;
-    tmp0_r += q0_r_in;
-    tmp0_r -= p7_r_in;
-    tmp1_r += tmp0_r;
-    r_out = __msa_srari_h((v8i16)tmp1_r, 4);
-    q7_l_in = (v8u16)__msa_ilvl_b(zero, (v16i8)q7);
-    tmp0_l = q7_l_in - p0_l_in;
-    tmp0_l += q0_l_in;
-    tmp0_l -= p7_l_in;
-    tmp1_l += tmp0_l;
-    l_out = __msa_srari_h((v8i16)tmp1_l, 4);
-    r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
-    filter8 = __msa_bmnz_v(filter8, (v16u8)r_out, flat2);
-    ST_UB(filter8, src);
-    src += 16;
-
-    /* q1 */
-    filter8 = LD_UB(filter48 + 64);
-    tmp0_r = q7_r_in - q0_r_in;
-    tmp0_r += q1_r_in;
-    tmp0_r -= p6_r_in;
-    tmp1_r += tmp0_r;
-    r_out = __msa_srari_h((v8i16)tmp1_r, 4);
-    tmp0_l = q7_l_in - q0_l_in;
-    tmp0_l += q1_l_in;
-    tmp0_l -= p6_l_in;
-    tmp1_l += tmp0_l;
-    l_out = __msa_srari_h((v8i16)tmp1_l, 4);
-    r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
-    filter8 = __msa_bmnz_v(filter8, (v16u8)r_out, flat2);
-    ST_UB(filter8, src);
-    src += 16;
-
-    /* q2 */
-    filter8 = LD_UB(filter48 + 80);
-    tmp0_r = q7_r_in - q1_r_in;
-    tmp0_r += q2_r_in;
-    tmp0_r -= p5_r_in;
-    tmp1_r += tmp0_r;
-    r_out = __msa_srari_h((v8i16)tmp1_r, 4);
-    tmp0_l = q7_l_in - q1_l_in;
-    tmp0_l += q2_l_in;
-    tmp0_l -= p5_l_in;
-    tmp1_l += tmp0_l;
-    l_out = __msa_srari_h((v8i16)tmp1_l, 4);
-    r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
-    filter8 = __msa_bmnz_v(filter8, (v16u8)r_out, flat2);
-    ST_UB(filter8, src);
-    src += 16;
-
-    /* q3 */
-    tmp0_r = q7_r_in - q2_r_in;
-    tmp0_r += q3_r_in;
-    tmp0_r -= p4_r_in;
-    tmp1_r += tmp0_r;
-    r_out = __msa_srari_h((v8i16)tmp1_r, 4);
-    tmp0_l = q7_l_in - q2_l_in;
-    tmp0_l += q3_l_in;
-    tmp0_l -= p4_l_in;
-    tmp1_l += tmp0_l;
-    l_out = __msa_srari_h((v8i16)tmp1_l, 4);
-    r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
-    q3 = __msa_bmnz_v(q3, (v16u8)r_out, flat2);
-    ST_UB(q3, src);
-    src += 16;
-
-    /* q4 */
-    tmp0_r = q7_r_in - q3_r_in;
-    tmp0_r += q4_r_in;
-    tmp0_r -= p3_r_in;
-    tmp1_r += tmp0_r;
-    r_out = __msa_srari_h((v8i16)tmp1_r, 4);
-    tmp0_l = q7_l_in - q3_l_in;
-    tmp0_l += q4_l_in;
-    tmp0_l -= p3_l_in;
-    tmp1_l += tmp0_l;
-    l_out = __msa_srari_h((v8i16)tmp1_l, 4);
-    r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
-    q4 = __msa_bmnz_v(q4, (v16u8)r_out, flat2);
-    ST_UB(q4, src);
-    src += 16;
-
-    /* q5 */
-    tmp0_r = q7_r_in - q4_r_in;
-    tmp0_r += q5_r_in;
-    tmp0_r -= p2_r_in;
-    tmp1_r += tmp0_r;
-    r_out = __msa_srari_h((v8i16)tmp1_r, 4);
-    tmp0_l = q7_l_in - q4_l_in;
-    tmp0_l += q5_l_in;
-    tmp0_l -= p2_l_in;
-    tmp1_l += tmp0_l;
-    l_out = __msa_srari_h((v8i16)tmp1_l, 4);
-    r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
-    q5 = __msa_bmnz_v(q5, (v16u8)r_out, flat2);
-    ST_UB(q5, src);
-    src += 16;
-
-    /* q6 */
-    tmp0_r = q7_r_in - q5_r_in;
-    tmp0_r += q6_r_in;
-    tmp0_r -= p1_r_in;
-    tmp1_r += tmp0_r;
-    r_out = __msa_srari_h((v8i16)tmp1_r, 4);
-    tmp0_l = q7_l_in - q5_l_in;
-    tmp0_l += q6_l_in;
-    tmp0_l -= p1_l_in;
-    tmp1_l += tmp0_l;
-    l_out = __msa_srari_h((v8i16)tmp1_l, 4);
-    r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
-    q6 = __msa_bmnz_v(q6, (v16u8)r_out, flat2);
-    ST_UB(q6, src);
-
-    return 0;
-  }
-}
-
-void aom_lpf_vertical_16_dual_msa(uint8_t *src, int32_t pitch,
-                                  const uint8_t *b_limit_ptr,
-                                  const uint8_t *limit_ptr,
-                                  const uint8_t *thresh_ptr) {
-  uint8_t early_exit = 0;
-  DECLARE_ALIGNED(32, uint8_t, transposed_input[16 * 24]);
-  uint8_t *filter48 = &transposed_input[16 * 16];
-
-  transpose_16x16((src - 8), pitch, &transposed_input[0], 16);
-
-  early_exit =
-      aom_vt_lpf_t4_and_t8_16w((transposed_input + 16 * 8), &filter48[0], src,
-                               pitch, b_limit_ptr, limit_ptr, thresh_ptr);
-
-  if (0 == early_exit) {
-    early_exit = aom_vt_lpf_t16_16w((transposed_input + 16 * 8), src, pitch,
-                                    &filter48[0]);
-
-    if (0 == early_exit) {
-      transpose_16x16(transposed_input, 16, (src - 8), pitch);
-    }
-  }
-}
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/mips/loopfilter_4_msa.c b/chromium/third_party/libaom/source/libaom/aom_dsp/mips/loopfilter_4_msa.c
deleted file mode 100644
index dc0a9776455..00000000000
--- a/chromium/third_party/libaom/source/libaom/aom_dsp/mips/loopfilter_4_msa.c
+++ /dev/null
@@ -1,147 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "aom_dsp/mips/loopfilter_msa.h"
-
-void aom_lpf_horizontal_4_msa(uint8_t *src, int32_t pitch,
-                              const uint8_t *b_limit_ptr,
-                              const uint8_t *limit_ptr,
-                              const uint8_t *thresh_ptr) {
-  uint64_t p1_d, p0_d, q0_d, q1_d;
-  v16u8 mask, hev, flat, thresh, b_limit, limit;
-  v16u8 p3, p2, p1, p0, q3, q2, q1, q0, p1_out, p0_out, q0_out, q1_out;
-
-  /* load vector elements */
-  LD_UB8((src - 4 * pitch), pitch, p3, p2, p1, p0, q0, q1, q2, q3);
-
-  thresh = (v16u8)__msa_fill_b(*thresh_ptr);
-  b_limit = (v16u8)__msa_fill_b(*b_limit_ptr);
-  limit = (v16u8)__msa_fill_b(*limit_ptr);
-
-  LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh, hev,
-               mask, flat);
-  AOM_LPF_FILTER4_8W(p1, p0, q0, q1, mask, hev, p1_out, p0_out, q0_out, q1_out);
-
-  p1_d = __msa_copy_u_d((v2i64)p1_out, 0);
-  p0_d = __msa_copy_u_d((v2i64)p0_out, 0);
-  q0_d = __msa_copy_u_d((v2i64)q0_out, 0);
-  q1_d = __msa_copy_u_d((v2i64)q1_out, 0);
-  SD4(p1_d, p0_d, q0_d, q1_d, (src - 2 * pitch), pitch);
-}
-
-void aom_lpf_horizontal_4_dual_msa(uint8_t *src, int32_t pitch,
-                                   const uint8_t *b_limit0_ptr,
-                                   const uint8_t *limit0_ptr,
-                                   const uint8_t *thresh0_ptr,
-                                   const uint8_t *b_limit1_ptr,
-                                   const uint8_t *limit1_ptr,
-                                   const uint8_t *thresh1_ptr) {
-  v16u8 mask, hev, flat, thresh0, b_limit0, limit0, thresh1, b_limit1, limit1;
-  v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
-
-  /* load vector elements */
-  LD_UB8((src - 4 * pitch), pitch, p3, p2, p1, p0, q0, q1, q2, q3);
-
-  thresh0 = (v16u8)__msa_fill_b(*thresh0_ptr);
-  thresh1 = (v16u8)__msa_fill_b(*thresh1_ptr);
-  thresh0 = (v16u8)__msa_ilvr_d((v2i64)thresh1, (v2i64)thresh0);
-
-  b_limit0 = (v16u8)__msa_fill_b(*b_limit0_ptr);
-  b_limit1 = (v16u8)__msa_fill_b(*b_limit1_ptr);
-  b_limit0 = (v16u8)__msa_ilvr_d((v2i64)b_limit1, (v2i64)b_limit0);
-
-  limit0 = (v16u8)__msa_fill_b(*limit0_ptr);
-  limit1 = (v16u8)__msa_fill_b(*limit1_ptr);
-  limit0 = (v16u8)__msa_ilvr_d((v2i64)limit1, (v2i64)limit0);
-
-  LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit0, b_limit0, thresh0, hev,
-               mask, flat);
-  AOM_LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev, p1, p0, q0, q1);
-
-  ST_UB4(p1, p0, q0, q1, (src - 2 * pitch), pitch);
-}
-
-void aom_lpf_vertical_4_msa(uint8_t *src, int32_t pitch,
-                            const uint8_t *b_limit_ptr,
-                            const uint8_t *limit_ptr,
-                            const uint8_t *thresh_ptr) {
-  v16u8 mask, hev, flat, limit, thresh, b_limit;
-  v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
-  v8i16 vec0, vec1, vec2, vec3;
-
-  LD_UB8((src - 4), pitch, p3, p2, p1, p0, q0, q1, q2, q3);
-
-  thresh = (v16u8)__msa_fill_b(*thresh_ptr);
-  b_limit = (v16u8)__msa_fill_b(*b_limit_ptr);
-  limit = (v16u8)__msa_fill_b(*limit_ptr);
-
-  TRANSPOSE8x8_UB_UB(p3, p2, p1, p0, q0, q1, q2, q3, p3, p2, p1, p0, q0, q1, q2,
-                     q3);
-  LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh, hev,
-               mask, flat);
-  AOM_LPF_FILTER4_8W(p1, p0, q0, q1, mask, hev, p1, p0, q0, q1);
-  ILVR_B2_SH(p0, p1, q1, q0, vec0, vec1);
-  ILVRL_H2_SH(vec1, vec0, vec2, vec3);
-
-  src -= 2;
-  ST4x4_UB(vec2, vec2, 0, 1, 2, 3, src, pitch);
-  src += 4 * pitch;
-  ST4x4_UB(vec3, vec3, 0, 1, 2, 3, src, pitch);
-}
-
-void aom_lpf_vertical_4_dual_msa(uint8_t *src, int32_t pitch,
-                                 const uint8_t *b_limit0_ptr,
-                                 const uint8_t *limit0_ptr,
-                                 const uint8_t *thresh0_ptr,
-                                 const uint8_t *b_limit1_ptr,
-                                 const uint8_t *limit1_ptr,
-                                 const uint8_t *thresh1_ptr) {
-  v16u8 mask, hev, flat;
-  v16u8 thresh0, b_limit0, limit0, thresh1, b_limit1, limit1;
-  v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
-  v16u8 row0, row1, row2, row3, row4, row5, row6, row7;
-  v16u8 row8, row9, row10, row11, row12, row13, row14, row15;
-  v8i16 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
-
-  LD_UB8(src - 4, pitch, row0, row1, row2, row3, row4, row5, row6, row7);
-  LD_UB8(src - 4 + (8 * pitch), pitch, row8, row9, row10, row11, row12, row13,
-         row14, row15);
-
-  TRANSPOSE16x8_UB_UB(row0, row1, row2, row3, row4, row5, row6, row7, row8,
-                      row9, row10, row11, row12, row13, row14, row15, p3, p2,
-                      p1, p0, q0, q1, q2, q3);
-
-  thresh0 = (v16u8)__msa_fill_b(*thresh0_ptr);
-  thresh1 = (v16u8)__msa_fill_b(*thresh1_ptr);
-  thresh0 = (v16u8)__msa_ilvr_d((v2i64)thresh1, (v2i64)thresh0);
-
-  b_limit0 = (v16u8)__msa_fill_b(*b_limit0_ptr);
-  b_limit1 = (v16u8)__msa_fill_b(*b_limit1_ptr);
-  b_limit0 = (v16u8)__msa_ilvr_d((v2i64)b_limit1, (v2i64)b_limit0);
-
-  limit0 = (v16u8)__msa_fill_b(*limit0_ptr);
-  limit1 = (v16u8)__msa_fill_b(*limit1_ptr);
-  limit0 = (v16u8)__msa_ilvr_d((v2i64)limit1, (v2i64)limit0);
-
-  LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit0, b_limit0, thresh0, hev,
-               mask, flat);
-  AOM_LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev, p1, p0, q0, q1);
-  ILVR_B2_SH(p0, p1, q1, q0, tmp0, tmp1);
-  ILVRL_H2_SH(tmp1, tmp0, tmp2, tmp3);
-  ILVL_B2_SH(p0, p1, q1, q0, tmp0, tmp1);
-  ILVRL_H2_SH(tmp1, tmp0, tmp4, tmp5);
-
-  src -= 2;
-
-  ST4x8_UB(tmp2, tmp3, src, pitch);
-  src += (8 * pitch);
-  ST4x8_UB(tmp4, tmp5, src, pitch);
-}
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/mips/loopfilter_8_msa.c b/chromium/third_party/libaom/source/libaom/aom_dsp/mips/loopfilter_8_msa.c
deleted file mode 100644
index dc203e79cf7..00000000000
--- a/chromium/third_party/libaom/source/libaom/aom_dsp/mips/loopfilter_8_msa.c
+++ /dev/null
@@ -1,333 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "aom_dsp/mips/loopfilter_msa.h"
-
-void aom_lpf_horizontal_8_msa(uint8_t *src, int32_t pitch,
-                              const uint8_t *b_limit_ptr,
-                              const uint8_t *limit_ptr,
-                              const uint8_t *thresh_ptr) {
-  uint64_t p2_d, p1_d, p0_d, q0_d, q1_d, q2_d;
-  v16u8 mask, hev, flat, thresh, b_limit, limit;
-  v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
-  v16u8 p2_out, p1_out, p0_out, q0_out, q1_out, q2_out;
-  v8i16 p2_filter8, p1_filter8, p0_filter8, q0_filter8, q1_filter8, q2_filter8;
-  v8u16 p3_r, p2_r, p1_r, p0_r, q3_r, q2_r, q1_r, q0_r;
-  v16i8 zero = { 0 };
-
-  /* load vector elements */
-  LD_UB8((src - 4 * pitch), pitch, p3, p2, p1, p0, q0, q1, q2, q3);
-
-  thresh = (v16u8)__msa_fill_b(*thresh_ptr);
-  b_limit = (v16u8)__msa_fill_b(*b_limit_ptr);
-  limit = (v16u8)__msa_fill_b(*limit_ptr);
-
-  LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh, hev,
-               mask, flat);
-  AOM_FLAT4(p3, p2, p0, q0, q2, q3, flat);
-  AOM_LPF_FILTER4_8W(p1, p0, q0, q1, mask, hev, p1_out, p0_out, q0_out, q1_out);
-
-  flat = (v16u8)__msa_ilvr_d((v2i64)zero, (v2i64)flat);
-
-  if (__msa_test_bz_v(flat)) {
-    p1_d = __msa_copy_u_d((v2i64)p1_out, 0);
-    p0_d = __msa_copy_u_d((v2i64)p0_out, 0);
-    q0_d = __msa_copy_u_d((v2i64)q0_out, 0);
-    q1_d = __msa_copy_u_d((v2i64)q1_out, 0);
-    SD4(p1_d, p0_d, q0_d, q1_d, (src - 2 * pitch), pitch);
-  } else {
-    ILVR_B8_UH(zero, p3, zero, p2, zero, p1, zero, p0, zero, q0, zero, q1, zero,
-               q2, zero, q3, p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r);
-    AOM_FILTER8(p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r, p2_filter8,
-                p1_filter8, p0_filter8, q0_filter8, q1_filter8, q2_filter8);
-
-    /* convert 16 bit output data into 8 bit */
-    PCKEV_B4_SH(zero, p2_filter8, zero, p1_filter8, zero, p0_filter8, zero,
-                q0_filter8, p2_filter8, p1_filter8, p0_filter8, q0_filter8);
-    PCKEV_B2_SH(zero, q1_filter8, zero, q2_filter8, q1_filter8, q2_filter8);
-
-    /* store pixel values */
-    p2_out = __msa_bmnz_v(p2, (v16u8)p2_filter8, flat);
-    p1_out = __msa_bmnz_v(p1_out, (v16u8)p1_filter8, flat);
-    p0_out = __msa_bmnz_v(p0_out, (v16u8)p0_filter8, flat);
-    q0_out = __msa_bmnz_v(q0_out, (v16u8)q0_filter8, flat);
-    q1_out = __msa_bmnz_v(q1_out, (v16u8)q1_filter8, flat);
-    q2_out = __msa_bmnz_v(q2, (v16u8)q2_filter8, flat);
-
-    p2_d = __msa_copy_u_d((v2i64)p2_out, 0);
-    p1_d = __msa_copy_u_d((v2i64)p1_out, 0);
-    p0_d = __msa_copy_u_d((v2i64)p0_out, 0);
-    q0_d = __msa_copy_u_d((v2i64)q0_out, 0);
-    q1_d = __msa_copy_u_d((v2i64)q1_out, 0);
-    q2_d = __msa_copy_u_d((v2i64)q2_out, 0);
-
-    src -= 3 * pitch;
-
-    SD4(p2_d, p1_d, p0_d, q0_d, src, pitch);
-    src += (4 * pitch);
-    SD(q1_d, src);
-    src += pitch;
-    SD(q2_d, src);
-  }
-}
-
-void aom_lpf_horizontal_8_dual_msa(
-    uint8_t *src, int32_t pitch, const uint8_t *b_limit0, const uint8_t *limit0,
-    const uint8_t *thresh0, const uint8_t *b_limit1, const uint8_t *limit1,
-    const uint8_t *thresh1) {
-  v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
-  v16u8 p2_out, p1_out, p0_out, q0_out, q1_out, q2_out;
-  v16u8 flat, mask, hev, tmp, thresh, b_limit, limit;
-  v8u16 p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r;
-  v8u16 p3_l, p2_l, p1_l, p0_l, q0_l, q1_l, q2_l, q3_l;
-  v8i16 p2_filt8_r, p1_filt8_r, p0_filt8_r, q0_filt8_r, q1_filt8_r, q2_filt8_r;
-  v8i16 p2_filt8_l, p1_filt8_l, p0_filt8_l, q0_filt8_l, q1_filt8_l, q2_filt8_l;
-  v16u8 zero = { 0 };
-
-  /* load vector elements */
-  LD_UB8(src - (4 * pitch), pitch, p3, p2, p1, p0, q0, q1, q2, q3);
-
-  thresh = (v16u8)__msa_fill_b(*thresh0);
-  tmp = (v16u8)__msa_fill_b(*thresh1);
-  thresh = (v16u8)__msa_ilvr_d((v2i64)tmp, (v2i64)thresh);
-
-  b_limit = (v16u8)__msa_fill_b(*b_limit0);
-  tmp = (v16u8)__msa_fill_b(*b_limit1);
-  b_limit = (v16u8)__msa_ilvr_d((v2i64)tmp, (v2i64)b_limit);
-
-  limit = (v16u8)__msa_fill_b(*limit0);
-  tmp = (v16u8)__msa_fill_b(*limit1);
-  limit = (v16u8)__msa_ilvr_d((v2i64)tmp, (v2i64)limit);
-
-  /* mask and hev */
-  LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh, hev,
-               mask, flat);
-  AOM_FLAT4(p3, p2, p0, q0, q2, q3, flat);
-  AOM_LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev, p1_out, p0_out, q0_out, q1_out);
-
-  if (__msa_test_bz_v(flat)) {
-    ST_UB4(p1_out, p0_out, q0_out, q1_out, (src - 2 * pitch), pitch);
-  } else {
-    ILVR_B8_UH(zero, p3, zero, p2, zero, p1, zero, p0, zero, q0, zero, q1, zero,
-               q2, zero, q3, p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r);
-    AOM_FILTER8(p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r, p2_filt8_r,
-                p1_filt8_r, p0_filt8_r, q0_filt8_r, q1_filt8_r, q2_filt8_r);
-
-    ILVL_B4_UH(zero, p3, zero, p2, zero, p1, zero, p0, p3_l, p2_l, p1_l, p0_l);
-    ILVL_B4_UH(zero, q0, zero, q1, zero, q2, zero, q3, q0_l, q1_l, q2_l, q3_l);
-    AOM_FILTER8(p3_l, p2_l, p1_l, p0_l, q0_l, q1_l, q2_l, q3_l, p2_filt8_l,
-                p1_filt8_l, p0_filt8_l, q0_filt8_l, q1_filt8_l, q2_filt8_l);
-
-    /* convert 16 bit output data into 8 bit */
-    PCKEV_B4_SH(p2_filt8_l, p2_filt8_r, p1_filt8_l, p1_filt8_r, p0_filt8_l,
-                p0_filt8_r, q0_filt8_l, q0_filt8_r, p2_filt8_r, p1_filt8_r,
-                p0_filt8_r, q0_filt8_r);
-    PCKEV_B2_SH(q1_filt8_l, q1_filt8_r, q2_filt8_l, q2_filt8_r, q1_filt8_r,
-                q2_filt8_r);
-
-    /* store pixel values */
-    p2_out = __msa_bmnz_v(p2, (v16u8)p2_filt8_r, flat);
-    p1_out = __msa_bmnz_v(p1_out, (v16u8)p1_filt8_r, flat);
-    p0_out = __msa_bmnz_v(p0_out, (v16u8)p0_filt8_r, flat);
-    q0_out = __msa_bmnz_v(q0_out, (v16u8)q0_filt8_r, flat);
-    q1_out = __msa_bmnz_v(q1_out, (v16u8)q1_filt8_r, flat);
-    q2_out = __msa_bmnz_v(q2, (v16u8)q2_filt8_r, flat);
-
-    src -= 3 * pitch;
-
-    ST_UB4(p2_out, p1_out, p0_out, q0_out, src, pitch);
-    src += (4 * pitch);
-    ST_UB2(q1_out, q2_out, src, pitch);
-    src += (2 * pitch);
-  }
-}
-
-void aom_lpf_vertical_8_msa(uint8_t *src, int32_t pitch,
-                            const uint8_t *b_limit_ptr,
-                            const uint8_t *limit_ptr,
-                            const uint8_t *thresh_ptr) {
-  v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
-  v16u8 p1_out, p0_out, q0_out, q1_out;
-  v16u8 flat, mask, hev, thresh, b_limit, limit;
-  v8u16 p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r;
-  v8i16 p2_filt8_r, p1_filt8_r, p0_filt8_r, q0_filt8_r, q1_filt8_r, q2_filt8_r;
-  v16u8 zero = { 0 };
-  v8i16 vec0, vec1, vec2, vec3, vec4;
-
-  /* load vector elements */
-  LD_UB8(src - 4, pitch, p3, p2, p1, p0, q0, q1, q2, q3);
-
-  TRANSPOSE8x8_UB_UB(p3, p2, p1, p0, q0, q1, q2, q3, p3, p2, p1, p0, q0, q1, q2,
-                     q3);
-
-  thresh = (v16u8)__msa_fill_b(*thresh_ptr);
-  b_limit = (v16u8)__msa_fill_b(*b_limit_ptr);
-  limit = (v16u8)__msa_fill_b(*limit_ptr);
-
-  /* mask and hev */
-  LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh, hev,
-               mask, flat);
-  /* flat4 */
-  AOM_FLAT4(p3, p2, p0, q0, q2, q3, flat);
-  /* filter4 */
-  AOM_LPF_FILTER4_8W(p1, p0, q0, q1, mask, hev, p1_out, p0_out, q0_out, q1_out);
-
-  flat = (v16u8)__msa_ilvr_d((v2i64)zero, (v2i64)flat);
-
-  if (__msa_test_bz_v(flat)) {
-    /* Store 4 pixels p1-_q1 */
-    ILVR_B2_SH(p0_out, p1_out, q1_out, q0_out, vec0, vec1);
-    ILVRL_H2_SH(vec1, vec0, vec2, vec3);
-
-    src -= 2;
-    ST4x4_UB(vec2, vec2, 0, 1, 2, 3, src, pitch);
-    src += 4 * pitch;
-    ST4x4_UB(vec3, vec3, 0, 1, 2, 3, src, pitch);
-  } else {
-    ILVR_B8_UH(zero, p3, zero, p2, zero, p1, zero, p0, zero, q0, zero, q1, zero,
-               q2, zero, q3, p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r);
-    AOM_FILTER8(p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r, p2_filt8_r,
-                p1_filt8_r, p0_filt8_r, q0_filt8_r, q1_filt8_r, q2_filt8_r);
-    /* convert 16 bit output data into 8 bit */
-    PCKEV_B4_SH(p2_filt8_r, p2_filt8_r, p1_filt8_r, p1_filt8_r, p0_filt8_r,
-                p0_filt8_r, q0_filt8_r, q0_filt8_r, p2_filt8_r, p1_filt8_r,
-                p0_filt8_r, q0_filt8_r);
-    PCKEV_B2_SH(q1_filt8_r, q1_filt8_r, q2_filt8_r, q2_filt8_r, q1_filt8_r,
-                q2_filt8_r);
-
-    /* store pixel values */
-    p2 = __msa_bmnz_v(p2, (v16u8)p2_filt8_r, flat);
-    p1 = __msa_bmnz_v(p1_out, (v16u8)p1_filt8_r, flat);
-    p0 = __msa_bmnz_v(p0_out, (v16u8)p0_filt8_r, flat);
-    q0 = __msa_bmnz_v(q0_out, (v16u8)q0_filt8_r, flat);
-    q1 = __msa_bmnz_v(q1_out, (v16u8)q1_filt8_r, flat);
-    q2 = __msa_bmnz_v(q2, (v16u8)q2_filt8_r, flat);
-
-    /* Store 6 pixels p2-_q2 */
-    ILVR_B2_SH(p1, p2, q0, p0, vec0, vec1);
-    ILVRL_H2_SH(vec1, vec0, vec2, vec3);
-    vec4 = (v8i16)__msa_ilvr_b((v16i8)q2, (v16i8)q1);
-
-    src -= 3;
-    ST4x4_UB(vec2, vec2, 0, 1, 2, 3, src, pitch);
-    ST2x4_UB(vec4, 0, src + 4, pitch);
-    src += (4 * pitch);
-    ST4x4_UB(vec3, vec3, 0, 1, 2, 3, src, pitch);
-    ST2x4_UB(vec4, 4, src + 4, pitch);
-  }
-}
-
-void aom_lpf_vertical_8_dual_msa(uint8_t *src, int32_t pitch,
-                                 const uint8_t *b_limit0, const uint8_t *limit0,
-                                 const uint8_t *thresh0,
-                                 const uint8_t *b_limit1, const uint8_t *limit1,
-                                 const uint8_t *thresh1) {
-  uint8_t *temp_src;
-  v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
-  v16u8 p1_out, p0_out, q0_out, q1_out;
-  v16u8 flat, mask, hev, thresh, b_limit, limit;
-  v16u8 row4, row5, row6, row7, row12, row13, row14, row15;
-  v8u16 p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r;
-  v8u16 p3_l, p2_l, p1_l, p0_l, q0_l, q1_l, q2_l, q3_l;
-  v8i16 p2_filt8_r, p1_filt8_r, p0_filt8_r, q0_filt8_r, q1_filt8_r, q2_filt8_r;
-  v8i16 p2_filt8_l, p1_filt8_l, p0_filt8_l, q0_filt8_l, q1_filt8_l, q2_filt8_l;
-  v16u8 zero = { 0 };
-  v8i16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
-
-  temp_src = src - 4;
-
-  LD_UB8(temp_src, pitch, p0, p1, p2, p3, row4, row5, row6, row7);
-  temp_src += (8 * pitch);
-  LD_UB8(temp_src, pitch, q3, q2, q1, q0, row12, row13, row14, row15);
-
-  /* transpose 16x8 matrix into 8x16 */
-  TRANSPOSE16x8_UB_UB(p0, p1, p2, p3, row4, row5, row6, row7, q3, q2, q1, q0,
-                      row12, row13, row14, row15, p3, p2, p1, p0, q0, q1, q2,
-                      q3);
-
-  thresh = (v16u8)__msa_fill_b(*thresh0);
-  vec0 = (v8i16)__msa_fill_b(*thresh1);
-  thresh = (v16u8)__msa_ilvr_d((v2i64)vec0, (v2i64)thresh);
-
-  b_limit = (v16u8)__msa_fill_b(*b_limit0);
-  vec0 = (v8i16)__msa_fill_b(*b_limit1);
-  b_limit = (v16u8)__msa_ilvr_d((v2i64)vec0, (v2i64)b_limit);
-
-  limit = (v16u8)__msa_fill_b(*limit0);
-  vec0 = (v8i16)__msa_fill_b(*limit1);
-  limit = (v16u8)__msa_ilvr_d((v2i64)vec0, (v2i64)limit);
-
-  /* mask and hev */
-  LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh, hev,
-               mask, flat);
-  /* flat4 */
-  AOM_FLAT4(p3, p2, p0, q0, q2, q3, flat);
-  /* filter4 */
-  AOM_LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev, p1_out, p0_out, q0_out, q1_out);
-
-  if (__msa_test_bz_v(flat)) {
-    ILVR_B2_SH(p0_out, p1_out, q1_out, q0_out, vec0, vec1);
-    ILVRL_H2_SH(vec1, vec0, vec2, vec3);
-    ILVL_B2_SH(p0_out, p1_out, q1_out, q0_out, vec0, vec1);
-    ILVRL_H2_SH(vec1, vec0, vec4, vec5);
-
-    src -= 2;
-    ST4x8_UB(vec2, vec3, src, pitch);
-    src += 8 * pitch;
-    ST4x8_UB(vec4, vec5, src, pitch);
-  } else {
-    ILVR_B8_UH(zero, p3, zero, p2, zero, p1, zero, p0, zero, q0, zero, q1, zero,
-               q2, zero, q3, p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r);
-    AOM_FILTER8(p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r, p2_filt8_r,
-                p1_filt8_r, p0_filt8_r, q0_filt8_r, q1_filt8_r, q2_filt8_r);
-
-    ILVL_B4_UH(zero, p3, zero, p2, zero, p1, zero, p0, p3_l, p2_l, p1_l, p0_l);
-    ILVL_B4_UH(zero, q0, zero, q1, zero, q2, zero, q3, q0_l, q1_l, q2_l, q3_l);
-
-    /* filter8 */
-    AOM_FILTER8(p3_l, p2_l, p1_l, p0_l, q0_l, q1_l, q2_l, q3_l, p2_filt8_l,
-                p1_filt8_l, p0_filt8_l, q0_filt8_l, q1_filt8_l, q2_filt8_l);
-
-    /* convert 16 bit output data into 8 bit */
-    PCKEV_B4_SH(p2_filt8_l, p2_filt8_r, p1_filt8_l, p1_filt8_r, p0_filt8_l,
-                p0_filt8_r, q0_filt8_l, q0_filt8_r, p2_filt8_r, p1_filt8_r,
-                p0_filt8_r, q0_filt8_r);
-    PCKEV_B2_SH(q1_filt8_l, q1_filt8_r, q2_filt8_l, q2_filt8_r, q1_filt8_r,
-                q2_filt8_r);
-
-    /* store pixel values */
-    p2 = __msa_bmnz_v(p2, (v16u8)p2_filt8_r, flat);
-    p1 = __msa_bmnz_v(p1_out, (v16u8)p1_filt8_r, flat);
-    p0 = __msa_bmnz_v(p0_out, (v16u8)p0_filt8_r, flat);
-    q0 = __msa_bmnz_v(q0_out, (v16u8)q0_filt8_r, flat);
-    q1 = __msa_bmnz_v(q1_out, (v16u8)q1_filt8_r, flat);
-    q2 = __msa_bmnz_v(q2, (v16u8)q2_filt8_r, flat);
-
-    ILVR_B2_SH(p1, p2, q0, p0, vec0, vec1);
-    ILVRL_H2_SH(vec1, vec0, vec3, vec4);
-    ILVL_B2_SH(p1, p2, q0, p0, vec0, vec1);
-    ILVRL_H2_SH(vec1, vec0, vec6, vec7);
-    ILVRL_B2_SH(q2, q1, vec2, vec5);
-
-    src -= 3;
-    ST4x4_UB(vec3, vec3, 0, 1, 2, 3, src, pitch);
-    ST2x4_UB(vec2, 0, src + 4, pitch);
-    src += (4 * pitch);
-    ST4x4_UB(vec4, vec4, 0, 1, 2, 3, src, pitch);
-    ST2x4_UB(vec2, 4, src + 4, pitch);
-    src += (4 * pitch);
-    ST4x4_UB(vec6, vec6, 0, 1, 2, 3, src, pitch);
-    ST2x4_UB(vec5, 0, src + 4, pitch);
-    src += (4 * pitch);
-    ST4x4_UB(vec7, vec7, 0, 1, 2, 3, src, pitch);
-    ST2x4_UB(vec5, 4, src + 4, pitch);
-  }
-}
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/mips/loopfilter_filters_dspr2.c b/chromium/third_party/libaom/source/libaom/aom_dsp/mips/loopfilter_filters_dspr2.c
deleted file mode 100644
index 8c41278beb5..00000000000
--- a/chromium/third_party/libaom/source/libaom/aom_dsp/mips/loopfilter_filters_dspr2.c
+++ /dev/null
@@ -1,328 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <stdlib.h>
-
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom/aom_integer.h"
-#include "aom_dsp/mips/common_dspr2.h"
-#include "aom_dsp/mips/loopfilter_filters_dspr2.h"
-#include "aom_dsp/mips/loopfilter_macros_dspr2.h"
-#include "aom_dsp/mips/loopfilter_masks_dspr2.h"
-#include "aom_mem/aom_mem.h"
-
-#if HAVE_DSPR2
-void aom_lpf_horizontal_4_dspr2(unsigned char *s, int pitch,
-                                const uint8_t *blimit, const uint8_t *limit,
-                                const uint8_t *thresh) {
-  uint8_t i;
-  uint32_t mask;
-  uint32_t hev;
-  uint32_t pm1, p0, p1, p2, p3, p4, p5, p6;
-  uint8_t *sm1, *s0, *s1, *s2, *s3, *s4, *s5, *s6;
-  uint32_t thresh_vec, flimit_vec, limit_vec;
-  uint32_t uflimit, ulimit, uthresh;
-
-  uflimit = *blimit;
-  ulimit = *limit;
-  uthresh = *thresh;
-
-  /* create quad-byte */
-  __asm__ __volatile__(
-      "replv.qb       %[thresh_vec],    %[uthresh]    \n\t"
-      "replv.qb       %[flimit_vec],    %[uflimit]    \n\t"
-      "replv.qb       %[limit_vec],     %[ulimit]     \n\t"
-
-      : [thresh_vec] "=&r"(thresh_vec), [flimit_vec] "=&r"(flimit_vec),
-        [limit_vec] "=r"(limit_vec)
-      : [uthresh] "r"(uthresh), [uflimit] "r"(uflimit), [ulimit] "r"(ulimit));
-
-  /* prefetch data for store */
-  prefetch_store(s);
-
-  /* loop filter designed to work using chars so that we can make maximum use
-     of 8 bit simd instructions. */
-  for (i = 0; i < 2; i++) {
-    sm1 = s - (pitch << 2);
-    s0 = sm1 + pitch;
-    s1 = s0 + pitch;
-    s2 = s - pitch;
-    s3 = s;
-    s4 = s + pitch;
-    s5 = s4 + pitch;
-    s6 = s5 + pitch;
-
-    __asm__ __volatile__(
-        "lw     %[p1],  (%[s1])    \n\t"
-        "lw     %[p2],  (%[s2])    \n\t"
-        "lw     %[p3],  (%[s3])    \n\t"
-        "lw     %[p4],  (%[s4])    \n\t"
-
-        : [p1] "=&r"(p1), [p2] "=&r"(p2), [p3] "=&r"(p3), [p4] "=&r"(p4)
-        : [s1] "r"(s1), [s2] "r"(s2), [s3] "r"(s3), [s4] "r"(s4));
-
-    /* if (p1 - p4 == 0) and (p2 - p3 == 0)
-       mask will be zero and filtering is not needed */
-    if (!(((p1 - p4) == 0) && ((p2 - p3) == 0))) {
-      __asm__ __volatile__(
-          "lw       %[pm1], (%[sm1])   \n\t"
-          "lw       %[p0],  (%[s0])    \n\t"
-          "lw       %[p5],  (%[s5])    \n\t"
-          "lw       %[p6],  (%[s6])    \n\t"
-
-          : [pm1] "=&r"(pm1), [p0] "=&r"(p0), [p5] "=&r"(p5), [p6] "=&r"(p6)
-          : [sm1] "r"(sm1), [s0] "r"(s0), [s5] "r"(s5), [s6] "r"(s6));
-
-      filter_hev_mask_dspr2(limit_vec, flimit_vec, p1, p2, pm1, p0, p3, p4, p5,
-                            p6, thresh_vec, &hev, &mask);
-
-      /* if mask == 0 do filtering is not needed */
-      if (mask) {
-        /* filtering */
-        filter_dspr2(mask, hev, &p1, &p2, &p3, &p4);
-
-        __asm__ __volatile__(
-            "sw     %[p1],  (%[s1])    \n\t"
-            "sw     %[p2],  (%[s2])    \n\t"
-            "sw     %[p3],  (%[s3])    \n\t"
-            "sw     %[p4],  (%[s4])    \n\t"
-
-            :
-            : [p1] "r"(p1), [p2] "r"(p2), [p3] "r"(p3), [p4] "r"(p4),
-              [s1] "r"(s1), [s2] "r"(s2), [s3] "r"(s3), [s4] "r"(s4));
-      }
-    }
-
-    s = s + 4;
-  }
-}
-
-void aom_lpf_vertical_4_dspr2(unsigned char *s, int pitch,
-                              const uint8_t *blimit, const uint8_t *limit,
-                              const uint8_t *thresh) {
-  uint8_t i;
-  uint32_t mask, hev;
-  uint32_t pm1, p0, p1, p2, p3, p4, p5, p6;
-  uint8_t *s1, *s2, *s3, *s4;
-  uint32_t prim1, prim2, sec3, sec4, prim3, prim4;
-  uint32_t thresh_vec, flimit_vec, limit_vec;
-  uint32_t uflimit, ulimit, uthresh;
-
-  uflimit = *blimit;
-  ulimit = *limit;
-  uthresh = *thresh;
-
-  /* create quad-byte */
-  __asm__ __volatile__(
-      "replv.qb       %[thresh_vec],    %[uthresh]    \n\t"
-      "replv.qb       %[flimit_vec],    %[uflimit]    \n\t"
-      "replv.qb       %[limit_vec],     %[ulimit]     \n\t"
-
-      : [thresh_vec] "=&r"(thresh_vec), [flimit_vec] "=&r"(flimit_vec),
-        [limit_vec] "=r"(limit_vec)
-      : [uthresh] "r"(uthresh), [uflimit] "r"(uflimit), [ulimit] "r"(ulimit));
-
-  /* prefetch data for store */
-  prefetch_store(s + pitch);
-
-  for (i = 0; i < 2; i++) {
-    s1 = s;
-    s2 = s + pitch;
-    s3 = s2 + pitch;
-    s4 = s3 + pitch;
-    s = s4 + pitch;
-
-    /* load quad-byte vectors
-     * memory is 4 byte aligned
-     */
-    p2 = *((uint32_t *)(s1 - 4));
-    p6 = *((uint32_t *)(s1));
-    p1 = *((uint32_t *)(s2 - 4));
-    p5 = *((uint32_t *)(s2));
-    p0 = *((uint32_t *)(s3 - 4));
-    p4 = *((uint32_t *)(s3));
-    pm1 = *((uint32_t *)(s4 - 4));
-    p3 = *((uint32_t *)(s4));
-
-    /* transpose pm1, p0, p1, p2 */
-    __asm__ __volatile__(
-        "precrq.qb.ph   %[prim1],   %[p2],      %[p1]       \n\t"
-        "precr.qb.ph    %[prim2],   %[p2],      %[p1]       \n\t"
-        "precrq.qb.ph   %[prim3],   %[p0],      %[pm1]      \n\t"
-        "precr.qb.ph    %[prim4],   %[p0],      %[pm1]      \n\t"
-
-        "precrq.qb.ph   %[p1],      %[prim1],   %[prim2]    \n\t"
-        "precr.qb.ph    %[pm1],     %[prim1],   %[prim2]    \n\t"
-        "precrq.qb.ph   %[sec3],    %[prim3],   %[prim4]    \n\t"
-        "precr.qb.ph    %[sec4],    %[prim3],   %[prim4]    \n\t"
-
-        "precrq.ph.w    %[p2],      %[p1],      %[sec3]     \n\t"
-        "precrq.ph.w    %[p0],      %[pm1],     %[sec4]     \n\t"
-        "append         %[p1],      %[sec3],    16          \n\t"
-        "append         %[pm1],     %[sec4],    16          \n\t"
-
-        : [prim1] "=&r"(prim1), [prim2] "=&r"(prim2), [prim3] "=&r"(prim3),
-          [prim4] "=&r"(prim4), [p2] "+r"(p2), [p1] "+r"(p1), [p0] "+r"(p0),
-          [pm1] "+r"(pm1), [sec3] "=&r"(sec3), [sec4] "=&r"(sec4)
-        :);
-
-    /* transpose p3, p4, p5, p6 */
-    __asm__ __volatile__(
-        "precrq.qb.ph   %[prim1],   %[p6],      %[p5]       \n\t"
-        "precr.qb.ph    %[prim2],   %[p6],      %[p5]       \n\t"
-        "precrq.qb.ph   %[prim3],   %[p4],      %[p3]       \n\t"
-        "precr.qb.ph    %[prim4],   %[p4],      %[p3]       \n\t"
-
-        "precrq.qb.ph   %[p5],      %[prim1],   %[prim2]    \n\t"
-        "precr.qb.ph    %[p3],      %[prim1],   %[prim2]    \n\t"
-        "precrq.qb.ph   %[sec3],    %[prim3],   %[prim4]    \n\t"
-        "precr.qb.ph    %[sec4],    %[prim3],   %[prim4]    \n\t"
-
-        "precrq.ph.w    %[p6],      %[p5],      %[sec3]     \n\t"
-        "precrq.ph.w    %[p4],      %[p3],      %[sec4]     \n\t"
-        "append         %[p5],      %[sec3],    16          \n\t"
-        "append         %[p3],      %[sec4],    16          \n\t"
-
-        : [prim1] "=&r"(prim1), [prim2] "=&r"(prim2), [prim3] "=&r"(prim3),
-          [prim4] "=&r"(prim4), [p6] "+r"(p6), [p5] "+r"(p5), [p4] "+r"(p4),
-          [p3] "+r"(p3), [sec3] "=&r"(sec3), [sec4] "=&r"(sec4)
-        :);
-
-    /* if (p1 - p4 == 0) and (p2 - p3 == 0)
-     * mask will be zero and filtering is not needed
-     */
-    if (!(((p1 - p4) == 0) && ((p2 - p3) == 0))) {
-      filter_hev_mask_dspr2(limit_vec, flimit_vec, p1, p2, pm1, p0, p3, p4, p5,
-                            p6, thresh_vec, &hev, &mask);
-
-      /* if mask == 0 do filtering is not needed */
-      if (mask) {
-        /* filtering */
-        filter_dspr2(mask, hev, &p1, &p2, &p3, &p4);
-
-        /* unpack processed 4x4 neighborhood
-         * don't use transpose on output data
-         * because memory isn't aligned
-         */
-        __asm__ __volatile__(
-            "sb     %[p4],   1(%[s4])    \n\t"
-            "sb     %[p3],   0(%[s4])    \n\t"
-            "sb     %[p2],  -1(%[s4])    \n\t"
-            "sb     %[p1],  -2(%[s4])    \n\t"
-
-            :
-            : [p4] "r"(p4), [p3] "r"(p3), [p2] "r"(p2), [p1] "r"(p1),
-              [s4] "r"(s4));
-
-        __asm__ __volatile__(
-            "srl    %[p4],  %[p4],  8     \n\t"
-            "srl    %[p3],  %[p3],  8     \n\t"
-            "srl    %[p2],  %[p2],  8     \n\t"
-            "srl    %[p1],  %[p1],  8     \n\t"
-
-            : [p4] "+r"(p4), [p3] "+r"(p3), [p2] "+r"(p2), [p1] "+r"(p1)
-            :);
-
-        __asm__ __volatile__(
-            "sb     %[p4],   1(%[s3])    \n\t"
-            "sb     %[p3],   0(%[s3])    \n\t"
-            "sb     %[p2],  -1(%[s3])    \n\t"
-            "sb     %[p1],  -2(%[s3])    \n\t"
-
-            : [p1] "+r"(p1)
-            : [p4] "r"(p4), [p3] "r"(p3), [p2] "r"(p2), [s3] "r"(s3));
-
-        __asm__ __volatile__(
-            "srl    %[p4],  %[p4],  8     \n\t"
-            "srl    %[p3],  %[p3],  8     \n\t"
-            "srl    %[p2],  %[p2],  8     \n\t"
-            "srl    %[p1],  %[p1],  8     \n\t"
-
-            : [p4] "+r"(p4), [p3] "+r"(p3), [p2] "+r"(p2), [p1] "+r"(p1)
-            :);
-
-        __asm__ __volatile__(
-            "sb     %[p4],   1(%[s2])    \n\t"
-            "sb     %[p3],   0(%[s2])    \n\t"
-            "sb     %[p2],  -1(%[s2])    \n\t"
-            "sb     %[p1],  -2(%[s2])    \n\t"
-
-            :
-            : [p4] "r"(p4), [p3] "r"(p3), [p2] "r"(p2), [p1] "r"(p1),
-              [s2] "r"(s2));
-
-        __asm__ __volatile__(
-            "srl    %[p4],  %[p4],  8     \n\t"
-            "srl    %[p3],  %[p3],  8     \n\t"
-            "srl    %[p2],  %[p2],  8     \n\t"
-            "srl    %[p1],  %[p1],  8     \n\t"
-
-            : [p4] "+r"(p4), [p3] "+r"(p3), [p2] "+r"(p2), [p1] "+r"(p1)
-            :);
-
-        __asm__ __volatile__(
-            "sb     %[p4],   1(%[s1])    \n\t"
-            "sb     %[p3],   0(%[s1])    \n\t"
-            "sb     %[p2],  -1(%[s1])    \n\t"
-            "sb     %[p1],  -2(%[s1])    \n\t"
-
-            :
-            : [p4] "r"(p4), [p3] "r"(p3), [p2] "r"(p2), [p1] "r"(p1),
-              [s1] "r"(s1));
-      }
-    }
-  }
-}
-
-void aom_lpf_horizontal_4_dual_dspr2(
-    uint8_t *s, int p /* pitch */, const uint8_t *blimit0,
-    const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1,
-    const uint8_t *limit1, const uint8_t *thresh1) {
-  aom_lpf_horizontal_4_dspr2(s, p, blimit0, limit0, thresh0);
-  aom_lpf_horizontal_4_dspr2(s + 8, p, blimit1, limit1, thresh1);
-}
-
-void aom_lpf_horizontal_8_dual_dspr2(
-    uint8_t *s, int p /* pitch */, const uint8_t *blimit0,
-    const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1,
-    const uint8_t *limit1, const uint8_t *thresh1) {
-  aom_lpf_horizontal_8_dspr2(s, p, blimit0, limit0, thresh0);
-  aom_lpf_horizontal_8_dspr2(s + 8, p, blimit1, limit1, thresh1);
-}
-
-void aom_lpf_vertical_4_dual_dspr2(uint8_t *s, int p, const uint8_t *blimit0,
-                                   const uint8_t *limit0,
-                                   const uint8_t *thresh0,
-                                   const uint8_t *blimit1,
-                                   const uint8_t *limit1,
-                                   const uint8_t *thresh1) {
-  aom_lpf_vertical_4_dspr2(s, p, blimit0, limit0, thresh0);
-  aom_lpf_vertical_4_dspr2(s + 8 * p, p, blimit1, limit1, thresh1);
-}
-
-void aom_lpf_vertical_8_dual_dspr2(uint8_t *s, int p, const uint8_t *blimit0,
-                                   const uint8_t *limit0,
-                                   const uint8_t *thresh0,
-                                   const uint8_t *blimit1,
-                                   const uint8_t *limit1,
-                                   const uint8_t *thresh1) {
-  aom_lpf_vertical_8_dspr2(s, p, blimit0, limit0, thresh0);
-  aom_lpf_vertical_8_dspr2(s + 8 * p, p, blimit1, limit1, thresh1);
-}
-
-void aom_lpf_vertical_16_dual_dspr2(uint8_t *s, int p, const uint8_t *blimit,
-                                    const uint8_t *limit,
-                                    const uint8_t *thresh) {
-  aom_lpf_vertical_16_dspr2(s, p, blimit, limit, thresh);
-  aom_lpf_vertical_16_dspr2(s + 8 * p, p, blimit, limit, thresh);
-}
-#endif  // #if HAVE_DSPR2
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/mips/loopfilter_filters_dspr2.h b/chromium/third_party/libaom/source/libaom/aom_dsp/mips/loopfilter_filters_dspr2.h
deleted file mode 100644
index 28f0dc35a30..00000000000
--- a/chromium/third_party/libaom/source/libaom/aom_dsp/mips/loopfilter_filters_dspr2.h
+++ /dev/null
@@ -1,736 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AOM_DSP_MIPS_LOOPFILTER_FILTERS_DSPR2_H_
-#define AOM_AOM_DSP_MIPS_LOOPFILTER_FILTERS_DSPR2_H_
-
-#include <stdlib.h>
-
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom/aom_integer.h"
-#include "aom_mem/aom_mem.h"
-#include "aom_ports/mem.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#if HAVE_DSPR2
-/* inputs & outputs are quad-byte vectors */
-static INLINE void filter_dspr2(uint32_t mask, uint32_t hev, uint32_t *ps1,
-                                uint32_t *ps0, uint32_t *qs0, uint32_t *qs1) {
-  int32_t aom_filter_l, aom_filter_r;
-  int32_t Filter1_l, Filter1_r, Filter2_l, Filter2_r;
-  int32_t subr_r, subr_l;
-  uint32_t t1, t2, HWM, t3;
-  uint32_t hev_l, hev_r, mask_l, mask_r, invhev_l, invhev_r;
-  int32_t vps1, vps0, vqs0, vqs1;
-  int32_t vps1_l, vps1_r, vps0_l, vps0_r, vqs0_l, vqs0_r, vqs1_l, vqs1_r;
-  uint32_t N128;
-
-  N128 = 0x80808080;
-  t1 = 0x03000300;
-  t2 = 0x04000400;
-  t3 = 0x01000100;
-  HWM = 0xFF00FF00;
-
-  vps0 = (*ps0) ^ N128;
-  vps1 = (*ps1) ^ N128;
-  vqs0 = (*qs0) ^ N128;
-  vqs1 = (*qs1) ^ N128;
-
-  /* use halfword pairs instead quad-bytes because of accuracy */
-  vps0_l = vps0 & HWM;
-  vps0_r = vps0 << 8;
-  vps0_r = vps0_r & HWM;
-
-  vps1_l = vps1 & HWM;
-  vps1_r = vps1 << 8;
-  vps1_r = vps1_r & HWM;
-
-  vqs0_l = vqs0 & HWM;
-  vqs0_r = vqs0 << 8;
-  vqs0_r = vqs0_r & HWM;
-
-  vqs1_l = vqs1 & HWM;
-  vqs1_r = vqs1 << 8;
-  vqs1_r = vqs1_r & HWM;
-
-  mask_l = mask & HWM;
-  mask_r = mask << 8;
-  mask_r = mask_r & HWM;
-
-  hev_l = hev & HWM;
-  hev_r = hev << 8;
-  hev_r = hev_r & HWM;
-
-  __asm__ __volatile__(
-      /* aom_filter = aom_signed_char_clamp(ps1 - qs1); */
-      "subq_s.ph    %[aom_filter_l], %[vps1_l],       %[vqs1_l]       \n\t"
-      "subq_s.ph    %[aom_filter_r], %[vps1_r],       %[vqs1_r]       \n\t"
-
-      /* qs0 - ps0 */
-      "subq_s.ph    %[subr_l],       %[vqs0_l],       %[vps0_l]       \n\t"
-      "subq_s.ph    %[subr_r],       %[vqs0_r],       %[vps0_r]       \n\t"
-
-      /* aom_filter &= hev; */
-      "and          %[aom_filter_l], %[aom_filter_l], %[hev_l]        \n\t"
-      "and          %[aom_filter_r], %[aom_filter_r], %[hev_r]        \n\t"
-
-      /* aom_filter = aom_signed_char_clamp(aom_filter + 3 * (qs0 - ps0)); */
-      "addq_s.ph    %[aom_filter_l], %[aom_filter_l], %[subr_l]       \n\t"
-      "addq_s.ph    %[aom_filter_r], %[aom_filter_r], %[subr_r]       \n\t"
-      "xor          %[invhev_l],     %[hev_l],        %[HWM]          \n\t"
-      "addq_s.ph    %[aom_filter_l], %[aom_filter_l], %[subr_l]       \n\t"
-      "addq_s.ph    %[aom_filter_r], %[aom_filter_r], %[subr_r]       \n\t"
-      "xor          %[invhev_r],     %[hev_r],        %[HWM]          \n\t"
-      "addq_s.ph    %[aom_filter_l], %[aom_filter_l], %[subr_l]       \n\t"
-      "addq_s.ph    %[aom_filter_r], %[aom_filter_r], %[subr_r]       \n\t"
-
-      /* aom_filter &= mask; */
-      "and          %[aom_filter_l], %[aom_filter_l], %[mask_l]       \n\t"
-      "and          %[aom_filter_r], %[aom_filter_r], %[mask_r]       \n\t"
-
-      : [aom_filter_l] "=&r"(aom_filter_l), [aom_filter_r] "=&r"(aom_filter_r),
-        [subr_l] "=&r"(subr_l), [subr_r] "=&r"(subr_r),
-        [invhev_l] "=&r"(invhev_l), [invhev_r] "=&r"(invhev_r)
-      : [vps0_l] "r"(vps0_l), [vps0_r] "r"(vps0_r), [vps1_l] "r"(vps1_l),
-        [vps1_r] "r"(vps1_r), [vqs0_l] "r"(vqs0_l), [vqs0_r] "r"(vqs0_r),
-        [vqs1_l] "r"(vqs1_l), [vqs1_r] "r"(vqs1_r), [mask_l] "r"(mask_l),
-        [mask_r] "r"(mask_r), [hev_l] "r"(hev_l), [hev_r] "r"(hev_r),
-        [HWM] "r"(HWM));
-
-  /* save bottom 3 bits so that we round one side +4 and the other +3 */
-  __asm__ __volatile__(
-      /* Filter2 = aom_signed_char_clamp(aom_filter + 3) >>= 3; */
-      "addq_s.ph    %[Filter1_l],    %[aom_filter_l], %[t2]           \n\t"
-      "addq_s.ph    %[Filter1_r],    %[aom_filter_r], %[t2]           \n\t"
-
-      /* Filter1 = aom_signed_char_clamp(aom_filter + 4) >>= 3; */
-      "addq_s.ph    %[Filter2_l],    %[aom_filter_l], %[t1]           \n\t"
-      "addq_s.ph    %[Filter2_r],    %[aom_filter_r], %[t1]           \n\t"
-      "shra.ph      %[Filter1_r],    %[Filter1_r],    3               \n\t"
-      "shra.ph      %[Filter1_l],    %[Filter1_l],    3               \n\t"
-
-      "shra.ph      %[Filter2_l],    %[Filter2_l],    3               \n\t"
-      "shra.ph      %[Filter2_r],    %[Filter2_r],    3               \n\t"
-
-      "and          %[Filter1_l],    %[Filter1_l],    %[HWM]          \n\t"
-      "and          %[Filter1_r],    %[Filter1_r],    %[HWM]          \n\t"
-
-      /* vps0 = aom_signed_char_clamp(ps0 + Filter2); */
-      "addq_s.ph    %[vps0_l],       %[vps0_l],       %[Filter2_l]    \n\t"
-      "addq_s.ph    %[vps0_r],       %[vps0_r],       %[Filter2_r]    \n\t"
-
-      /* vqs0 = aom_signed_char_clamp(qs0 - Filter1); */
-      "subq_s.ph    %[vqs0_l],       %[vqs0_l],       %[Filter1_l]    \n\t"
-      "subq_s.ph    %[vqs0_r],       %[vqs0_r],       %[Filter1_r]    \n\t"
-
-      : [Filter1_l] "=&r"(Filter1_l), [Filter1_r] "=&r"(Filter1_r),
-        [Filter2_l] "=&r"(Filter2_l), [Filter2_r] "=&r"(Filter2_r),
-        [vps0_l] "+r"(vps0_l), [vps0_r] "+r"(vps0_r), [vqs0_l] "+r"(vqs0_l),
-        [vqs0_r] "+r"(vqs0_r)
-      : [t1] "r"(t1), [t2] "r"(t2), [HWM] "r"(HWM),
-        [aom_filter_l] "r"(aom_filter_l), [aom_filter_r] "r"(aom_filter_r));
-
-  __asm__ __volatile__(
-      /* (aom_filter += 1) >>= 1 */
-      "addqh.ph    %[Filter1_l],    %[Filter1_l],     %[t3]           \n\t"
-      "addqh.ph    %[Filter1_r],    %[Filter1_r],     %[t3]           \n\t"
-
-      /* aom_filter &= ~hev; */
-      "and          %[Filter1_l],    %[Filter1_l],    %[invhev_l]     \n\t"
-      "and          %[Filter1_r],    %[Filter1_r],    %[invhev_r]     \n\t"
-
-      /* vps1 = aom_signed_char_clamp(ps1 + aom_filter); */
-      "addq_s.ph    %[vps1_l],       %[vps1_l],       %[Filter1_l]    \n\t"
-      "addq_s.ph    %[vps1_r],       %[vps1_r],       %[Filter1_r]    \n\t"
-
-      /* vqs1 = aom_signed_char_clamp(qs1 - aom_filter); */
-      "subq_s.ph    %[vqs1_l],       %[vqs1_l],       %[Filter1_l]    \n\t"
-      "subq_s.ph    %[vqs1_r],       %[vqs1_r],       %[Filter1_r]    \n\t"
-
-      : [Filter1_l] "+r"(Filter1_l), [Filter1_r] "+r"(Filter1_r),
-        [vps1_l] "+r"(vps1_l), [vps1_r] "+r"(vps1_r), [vqs1_l] "+r"(vqs1_l),
-        [vqs1_r] "+r"(vqs1_r)
-      : [t3] "r"(t3), [invhev_l] "r"(invhev_l), [invhev_r] "r"(invhev_r));
-
-  /* Create quad-bytes from halfword pairs */
-  vqs0_l = vqs0_l & HWM;
-  vqs1_l = vqs1_l & HWM;
-  vps0_l = vps0_l & HWM;
-  vps1_l = vps1_l & HWM;
-
-  __asm__ __volatile__(
-      "shrl.ph      %[vqs0_r],       %[vqs0_r],       8   \n\t"
-      "shrl.ph      %[vps0_r],       %[vps0_r],       8   \n\t"
-      "shrl.ph      %[vqs1_r],       %[vqs1_r],       8   \n\t"
-      "shrl.ph      %[vps1_r],       %[vps1_r],       8   \n\t"
-
-      : [vps1_r] "+r"(vps1_r), [vqs1_r] "+r"(vqs1_r), [vps0_r] "+r"(vps0_r),
-        [vqs0_r] "+r"(vqs0_r)
-      :);
-
-  vqs0 = vqs0_l | vqs0_r;
-  vqs1 = vqs1_l | vqs1_r;
-  vps0 = vps0_l | vps0_r;
-  vps1 = vps1_l | vps1_r;
-
-  *ps0 = vps0 ^ N128;
-  *ps1 = vps1 ^ N128;
-  *qs0 = vqs0 ^ N128;
-  *qs1 = vqs1 ^ N128;
-}
-
-static INLINE void filter1_dspr2(uint32_t mask, uint32_t hev, uint32_t ps1,
-                                 uint32_t ps0, uint32_t qs0, uint32_t qs1,
-                                 uint32_t *p1_f0, uint32_t *p0_f0,
-                                 uint32_t *q0_f0, uint32_t *q1_f0) {
-  int32_t aom_filter_l, aom_filter_r;
-  int32_t Filter1_l, Filter1_r, Filter2_l, Filter2_r;
-  int32_t subr_r, subr_l;
-  uint32_t t1, t2, HWM, t3;
-  uint32_t hev_l, hev_r, mask_l, mask_r, invhev_l, invhev_r;
-  int32_t vps1, vps0, vqs0, vqs1;
-  int32_t vps1_l, vps1_r, vps0_l, vps0_r, vqs0_l, vqs0_r, vqs1_l, vqs1_r;
-  uint32_t N128;
-
-  N128 = 0x80808080;
-  t1 = 0x03000300;
-  t2 = 0x04000400;
-  t3 = 0x01000100;
-  HWM = 0xFF00FF00;
-
-  vps0 = (ps0) ^ N128;
-  vps1 = (ps1) ^ N128;
-  vqs0 = (qs0) ^ N128;
-  vqs1 = (qs1) ^ N128;
-
-  /* use halfword pairs instead quad-bytes because of accuracy */
-  vps0_l = vps0 & HWM;
-  vps0_r = vps0 << 8;
-  vps0_r = vps0_r & HWM;
-
-  vps1_l = vps1 & HWM;
-  vps1_r = vps1 << 8;
-  vps1_r = vps1_r & HWM;
-
-  vqs0_l = vqs0 & HWM;
-  vqs0_r = vqs0 << 8;
-  vqs0_r = vqs0_r & HWM;
-
-  vqs1_l = vqs1 & HWM;
-  vqs1_r = vqs1 << 8;
-  vqs1_r = vqs1_r & HWM;
-
-  mask_l = mask & HWM;
-  mask_r = mask << 8;
-  mask_r = mask_r & HWM;
-
-  hev_l = hev & HWM;
-  hev_r = hev << 8;
-  hev_r = hev_r & HWM;
-
-  __asm__ __volatile__(
-      /* aom_filter = aom_signed_char_clamp(ps1 - qs1); */
-      "subq_s.ph    %[aom_filter_l], %[vps1_l],       %[vqs1_l]       \n\t"
-      "subq_s.ph    %[aom_filter_r], %[vps1_r],       %[vqs1_r]       \n\t"
-
-      /* qs0 - ps0 */
-      "subq_s.ph    %[subr_l],       %[vqs0_l],       %[vps0_l]       \n\t"
-      "subq_s.ph    %[subr_r],       %[vqs0_r],       %[vps0_r]       \n\t"
-
-      /* aom_filter &= hev; */
-      "and          %[aom_filter_l], %[aom_filter_l], %[hev_l]        \n\t"
-      "and          %[aom_filter_r], %[aom_filter_r], %[hev_r]        \n\t"
-
-      /* aom_filter = aom_signed_char_clamp(aom_filter + 3 * (qs0 - ps0)); */
-      "addq_s.ph    %[aom_filter_l], %[aom_filter_l], %[subr_l]       \n\t"
-      "addq_s.ph    %[aom_filter_r], %[aom_filter_r], %[subr_r]       \n\t"
-      "xor          %[invhev_l],     %[hev_l],        %[HWM]          \n\t"
-      "addq_s.ph    %[aom_filter_l], %[aom_filter_l], %[subr_l]       \n\t"
-      "addq_s.ph    %[aom_filter_r], %[aom_filter_r], %[subr_r]       \n\t"
-      "xor          %[invhev_r],     %[hev_r],        %[HWM]          \n\t"
-      "addq_s.ph    %[aom_filter_l], %[aom_filter_l], %[subr_l]       \n\t"
-      "addq_s.ph    %[aom_filter_r], %[aom_filter_r], %[subr_r]       \n\t"
-
-      /* aom_filter &= mask; */
-      "and          %[aom_filter_l], %[aom_filter_l], %[mask_l]       \n\t"
-      "and          %[aom_filter_r], %[aom_filter_r], %[mask_r]       \n\t"
-
-      : [aom_filter_l] "=&r"(aom_filter_l), [aom_filter_r] "=&r"(aom_filter_r),
-        [subr_l] "=&r"(subr_l), [subr_r] "=&r"(subr_r),
-        [invhev_l] "=&r"(invhev_l), [invhev_r] "=&r"(invhev_r)
-      : [vps0_l] "r"(vps0_l), [vps0_r] "r"(vps0_r), [vps1_l] "r"(vps1_l),
-        [vps1_r] "r"(vps1_r), [vqs0_l] "r"(vqs0_l), [vqs0_r] "r"(vqs0_r),
-        [vqs1_l] "r"(vqs1_l), [vqs1_r] "r"(vqs1_r), [mask_l] "r"(mask_l),
-        [mask_r] "r"(mask_r), [hev_l] "r"(hev_l), [hev_r] "r"(hev_r),
-        [HWM] "r"(HWM));
-
-  /* save bottom 3 bits so that we round one side +4 and the other +3 */
-  __asm__ __volatile__(
-      /* Filter2 = aom_signed_char_clamp(aom_filter + 3) >>= 3; */
-      "addq_s.ph    %[Filter1_l],    %[aom_filter_l], %[t2]           \n\t"
-      "addq_s.ph    %[Filter1_r],    %[aom_filter_r], %[t2]           \n\t"
-
-      /* Filter1 = aom_signed_char_clamp(aom_filter + 4) >>= 3; */
-      "addq_s.ph    %[Filter2_l],    %[aom_filter_l], %[t1]           \n\t"
-      "addq_s.ph    %[Filter2_r],    %[aom_filter_r], %[t1]           \n\t"
-      "shra.ph      %[Filter1_r],    %[Filter1_r],    3               \n\t"
-      "shra.ph      %[Filter1_l],    %[Filter1_l],    3               \n\t"
-
-      "shra.ph      %[Filter2_l],    %[Filter2_l],    3               \n\t"
-      "shra.ph      %[Filter2_r],    %[Filter2_r],    3               \n\t"
-
-      "and          %[Filter1_l],    %[Filter1_l],    %[HWM]          \n\t"
-      "and          %[Filter1_r],    %[Filter1_r],    %[HWM]          \n\t"
-
-      /* vps0 = aom_signed_char_clamp(ps0 + Filter2); */
-      "addq_s.ph    %[vps0_l],       %[vps0_l],       %[Filter2_l]    \n\t"
-      "addq_s.ph    %[vps0_r],       %[vps0_r],       %[Filter2_r]    \n\t"
-
-      /* vqs0 = aom_signed_char_clamp(qs0 - Filter1); */
-      "subq_s.ph    %[vqs0_l],       %[vqs0_l],       %[Filter1_l]    \n\t"
-      "subq_s.ph    %[vqs0_r],       %[vqs0_r],       %[Filter1_r]    \n\t"
-
-      : [Filter1_l] "=&r"(Filter1_l), [Filter1_r] "=&r"(Filter1_r),
-        [Filter2_l] "=&r"(Filter2_l), [Filter2_r] "=&r"(Filter2_r),
-        [vps0_l] "+r"(vps0_l), [vps0_r] "+r"(vps0_r), [vqs0_l] "+r"(vqs0_l),
-        [vqs0_r] "+r"(vqs0_r)
-      : [t1] "r"(t1), [t2] "r"(t2), [HWM] "r"(HWM),
-        [aom_filter_l] "r"(aom_filter_l), [aom_filter_r] "r"(aom_filter_r));
-
-  __asm__ __volatile__(
-      /* (aom_filter += 1) >>= 1 */
-      "addqh.ph    %[Filter1_l],    %[Filter1_l],     %[t3]           \n\t"
-      "addqh.ph    %[Filter1_r],    %[Filter1_r],     %[t3]           \n\t"
-
-      /* aom_filter &= ~hev; */
-      "and          %[Filter1_l],    %[Filter1_l],    %[invhev_l]     \n\t"
-      "and          %[Filter1_r],    %[Filter1_r],    %[invhev_r]     \n\t"
-
-      /* vps1 = aom_signed_char_clamp(ps1 + aom_filter); */
-      "addq_s.ph    %[vps1_l],       %[vps1_l],       %[Filter1_l]    \n\t"
-      "addq_s.ph    %[vps1_r],       %[vps1_r],       %[Filter1_r]    \n\t"
-
-      /* vqs1 = aom_signed_char_clamp(qs1 - aom_filter); */
-      "subq_s.ph    %[vqs1_l],       %[vqs1_l],       %[Filter1_l]    \n\t"
-      "subq_s.ph    %[vqs1_r],       %[vqs1_r],       %[Filter1_r]    \n\t"
-
-      : [Filter1_l] "+r"(Filter1_l), [Filter1_r] "+r"(Filter1_r),
-        [vps1_l] "+r"(vps1_l), [vps1_r] "+r"(vps1_r), [vqs1_l] "+r"(vqs1_l),
-        [vqs1_r] "+r"(vqs1_r)
-      : [t3] "r"(t3), [invhev_l] "r"(invhev_l), [invhev_r] "r"(invhev_r));
-
-  /* Create quad-bytes from halfword pairs */
-  vqs0_l = vqs0_l & HWM;
-  vqs1_l = vqs1_l & HWM;
-  vps0_l = vps0_l & HWM;
-  vps1_l = vps1_l & HWM;
-
-  __asm__ __volatile__(
-      "shrl.ph      %[vqs0_r],       %[vqs0_r],       8   \n\t"
-      "shrl.ph      %[vps0_r],       %[vps0_r],       8   \n\t"
-      "shrl.ph      %[vqs1_r],       %[vqs1_r],       8   \n\t"
-      "shrl.ph      %[vps1_r],       %[vps1_r],       8   \n\t"
-
-      : [vps1_r] "+r"(vps1_r), [vqs1_r] "+r"(vqs1_r), [vps0_r] "+r"(vps0_r),
-        [vqs0_r] "+r"(vqs0_r)
-      :);
-
-  vqs0 = vqs0_l | vqs0_r;
-  vqs1 = vqs1_l | vqs1_r;
-  vps0 = vps0_l | vps0_r;
-  vps1 = vps1_l | vps1_r;
-
-  *p0_f0 = vps0 ^ N128;
-  *p1_f0 = vps1 ^ N128;
-  *q0_f0 = vqs0 ^ N128;
-  *q1_f0 = vqs1 ^ N128;
-}
-
-static INLINE void mbfilter_dspr2(uint32_t *op3, uint32_t *op2, uint32_t *op1,
-                                  uint32_t *op0, uint32_t *oq0, uint32_t *oq1,
-                                  uint32_t *oq2, uint32_t *oq3) {
-  /* use a 7 tap filter [1, 1, 1, 2, 1, 1, 1] for flat line */
-  const uint32_t p3 = *op3, p2 = *op2, p1 = *op1, p0 = *op0;
-  const uint32_t q0 = *oq0, q1 = *oq1, q2 = *oq2, q3 = *oq3;
-  uint32_t res_op2, res_op1, res_op0;
-  uint32_t res_oq0, res_oq1, res_oq2;
-  uint32_t tmp;
-  uint32_t add_p210_q012;
-  uint32_t u32Four = 0x00040004;
-
-  /* *op2 = ROUND_POWER_OF_TWO(p3 + p3 + p3 + p2 + p2 + p1 + p0 + q0, 3)  1 */
-  /* *op1 = ROUND_POWER_OF_TWO(p3 + p3 + p2 + p1 + p1 + p0 + q0 + q1, 3)  2 */
-  /* *op0 = ROUND_POWER_OF_TWO(p3 + p2 + p1 + p0 + p0 + q0 + q1 + q2, 3)  3 */
-  /* *oq0 = ROUND_POWER_OF_TWO(p2 + p1 + p0 + q0 + q0 + q1 + q2 + q3, 3)  4 */
-  /* *oq1 = ROUND_POWER_OF_TWO(p1 + p0 + q0 + q1 + q1 + q2 + q3 + q3, 3)  5 */
-  /* *oq2 = ROUND_POWER_OF_TWO(p0 + q0 + q1 + q2 + q2 + q3 + q3 + q3, 3)  6 */
-
-  __asm__ __volatile__(
-      "addu.ph    %[add_p210_q012],  %[p2],             %[p1]            \n\t"
-      "addu.ph    %[add_p210_q012],  %[add_p210_q012],  %[p0]            \n\t"
-      "addu.ph    %[add_p210_q012],  %[add_p210_q012],  %[q0]            \n\t"
-      "addu.ph    %[add_p210_q012],  %[add_p210_q012],  %[q1]            \n\t"
-      "addu.ph    %[add_p210_q012],  %[add_p210_q012],  %[q2]            \n\t"
-      "addu.ph    %[add_p210_q012],  %[add_p210_q012],  %[u32Four]       \n\t"
-
-      "shll.ph    %[tmp],            %[p3],             1                \n\t"
-      "addu.ph    %[res_op2],        %[tmp],            %[p3]            \n\t"
-      "addu.ph    %[res_op1],        %[p3],             %[p3]            \n\t"
-      "addu.ph    %[res_op2],        %[res_op2],        %[p2]            \n\t"
-      "addu.ph    %[res_op1],        %[res_op1],        %[p1]            \n\t"
-      "addu.ph    %[res_op2],        %[res_op2],        %[add_p210_q012] \n\t"
-      "addu.ph    %[res_op1],        %[res_op1],        %[add_p210_q012] \n\t"
-      "subu.ph    %[res_op2],        %[res_op2],        %[q1]            \n\t"
-      "subu.ph    %[res_op1],        %[res_op1],        %[q2]            \n\t"
-      "subu.ph    %[res_op2],        %[res_op2],        %[q2]            \n\t"
-      "shrl.ph    %[res_op1],        %[res_op1],        3                \n\t"
-      "shrl.ph    %[res_op2],        %[res_op2],        3                \n\t"
-      "addu.ph    %[res_op0],        %[p3],             %[p0]            \n\t"
-      "addu.ph    %[res_oq0],        %[q0],             %[q3]            \n\t"
-      "addu.ph    %[res_op0],        %[res_op0],        %[add_p210_q012] \n\t"
-      "addu.ph    %[res_oq0],        %[res_oq0],        %[add_p210_q012] \n\t"
-      "addu.ph    %[res_oq1],        %[q3],             %[q3]            \n\t"
-      "shll.ph    %[tmp],            %[q3],             1                \n\t"
-      "addu.ph    %[res_oq1],        %[res_oq1],        %[q1]            \n\t"
-      "addu.ph    %[res_oq2],        %[tmp],            %[q3]            \n\t"
-      "addu.ph    %[res_oq1],        %[res_oq1],        %[add_p210_q012] \n\t"
-      "addu.ph    %[res_oq2],        %[res_oq2],        %[add_p210_q012] \n\t"
-      "subu.ph    %[res_oq1],        %[res_oq1],        %[p2]            \n\t"
-      "addu.ph    %[res_oq2],        %[res_oq2],        %[q2]            \n\t"
-      "shrl.ph    %[res_oq1],        %[res_oq1],        3                \n\t"
-      "subu.ph    %[res_oq2],        %[res_oq2],        %[p2]            \n\t"
-      "shrl.ph    %[res_oq0],        %[res_oq0],        3                \n\t"
-      "subu.ph    %[res_oq2],        %[res_oq2],        %[p1]            \n\t"
-      "shrl.ph    %[res_op0],        %[res_op0],        3                \n\t"
-      "shrl.ph    %[res_oq2],        %[res_oq2],        3                \n\t"
-
-      : [add_p210_q012] "=&r"(add_p210_q012), [tmp] "=&r"(tmp),
-        [res_op2] "=&r"(res_op2), [res_op1] "=&r"(res_op1),
-        [res_op0] "=&r"(res_op0), [res_oq0] "=&r"(res_oq0),
-        [res_oq1] "=&r"(res_oq1), [res_oq2] "=&r"(res_oq2)
-      : [p0] "r"(p0), [q0] "r"(q0), [p1] "r"(p1), [q1] "r"(q1), [p2] "r"(p2),
-        [q2] "r"(q2), [p3] "r"(p3), [q3] "r"(q3), [u32Four] "r"(u32Four));
-
-  *op2 = res_op2;
-  *op1 = res_op1;
-  *op0 = res_op0;
-  *oq0 = res_oq0;
-  *oq1 = res_oq1;
-  *oq2 = res_oq2;
-}
-
-static INLINE void mbfilter1_dspr2(uint32_t p3, uint32_t p2, uint32_t p1,
-                                   uint32_t p0, uint32_t q0, uint32_t q1,
-                                   uint32_t q2, uint32_t q3, uint32_t *op2_f1,
-                                   uint32_t *op1_f1, uint32_t *op0_f1,
-                                   uint32_t *oq0_f1, uint32_t *oq1_f1,
-                                   uint32_t *oq2_f1) {
-  /* use a 7 tap filter [1, 1, 1, 2, 1, 1, 1] for flat line */
-  uint32_t res_op2, res_op1, res_op0;
-  uint32_t res_oq0, res_oq1, res_oq2;
-  uint32_t tmp;
-  uint32_t add_p210_q012;
-  uint32_t u32Four = 0x00040004;
-
-  /* *op2 = ROUND_POWER_OF_TWO(p3 + p3 + p3 + p2 + p2 + p1 + p0 + q0, 3)   1 */
-  /* *op1 = ROUND_POWER_OF_TWO(p3 + p3 + p2 + p1 + p1 + p0 + q0 + q1, 3)   2 */
-  /* *op0 = ROUND_POWER_OF_TWO(p3 + p2 + p1 + p0 + p0 + q0 + q1 + q2, 3)   3 */
-  /* *oq0 = ROUND_POWER_OF_TWO(p2 + p1 + p0 + q0 + q0 + q1 + q2 + q3, 3)   4 */
-  /* *oq1 = ROUND_POWER_OF_TWO(p1 + p0 + q0 + q1 + q1 + q2 + q3 + q3, 3)   5 */
-  /* *oq2 = ROUND_POWER_OF_TWO(p0 + q0 + q1 + q2 + q2 + q3 + q3 + q3, 3)   6 */
-
-  __asm__ __volatile__(
-      "addu.ph    %[add_p210_q012],  %[p2],             %[p1]             \n\t"
-      "addu.ph    %[add_p210_q012],  %[add_p210_q012],  %[p0]             \n\t"
-      "addu.ph    %[add_p210_q012],  %[add_p210_q012],  %[q0]             \n\t"
-      "addu.ph    %[add_p210_q012],  %[add_p210_q012],  %[q1]             \n\t"
-      "addu.ph    %[add_p210_q012],  %[add_p210_q012],  %[q2]             \n\t"
-      "addu.ph    %[add_p210_q012],  %[add_p210_q012],  %[u32Four]        \n\t"
-
-      "shll.ph    %[tmp],            %[p3],             1                 \n\t"
-      "addu.ph    %[res_op2],        %[tmp],            %[p3]             \n\t"
-      "addu.ph    %[res_op1],        %[p3],             %[p3]             \n\t"
-      "addu.ph    %[res_op2],        %[res_op2],        %[p2]             \n\t"
-      "addu.ph    %[res_op1],        %[res_op1],        %[p1]             \n\t"
-      "addu.ph    %[res_op2],        %[res_op2],        %[add_p210_q012]  \n\t"
-      "addu.ph    %[res_op1],        %[res_op1],        %[add_p210_q012]  \n\t"
-      "subu.ph    %[res_op2],        %[res_op2],        %[q1]             \n\t"
-      "subu.ph    %[res_op1],        %[res_op1],        %[q2]             \n\t"
-      "subu.ph    %[res_op2],        %[res_op2],        %[q2]             \n\t"
-      "shrl.ph    %[res_op1],        %[res_op1],        3                 \n\t"
-      "shrl.ph    %[res_op2],        %[res_op2],        3                 \n\t"
-      "addu.ph    %[res_op0],        %[p3],             %[p0]             \n\t"
-      "addu.ph    %[res_oq0],        %[q0],             %[q3]             \n\t"
-      "addu.ph    %[res_op0],        %[res_op0],        %[add_p210_q012]  \n\t"
-      "addu.ph    %[res_oq0],        %[res_oq0],        %[add_p210_q012]  \n\t"
-      "addu.ph    %[res_oq1],        %[q3],             %[q3]             \n\t"
-      "shll.ph    %[tmp],            %[q3],             1                 \n\t"
-      "addu.ph    %[res_oq1],        %[res_oq1],        %[q1]             \n\t"
-      "addu.ph    %[res_oq2],        %[tmp],            %[q3]             \n\t"
-      "addu.ph    %[res_oq1],        %[res_oq1],        %[add_p210_q012]  \n\t"
-      "addu.ph    %[res_oq2],        %[res_oq2],        %[add_p210_q012]  \n\t"
-      "subu.ph    %[res_oq1],        %[res_oq1],        %[p2]             \n\t"
-      "addu.ph    %[res_oq2],        %[res_oq2],        %[q2]             \n\t"
-      "shrl.ph    %[res_oq1],        %[res_oq1],        3                 \n\t"
-      "subu.ph    %[res_oq2],        %[res_oq2],        %[p2]             \n\t"
-      "shrl.ph    %[res_oq0],        %[res_oq0],        3                 \n\t"
-      "subu.ph    %[res_oq2],        %[res_oq2],        %[p1]             \n\t"
-      "shrl.ph    %[res_op0],        %[res_op0],        3                 \n\t"
-      "shrl.ph    %[res_oq2],        %[res_oq2],        3                 \n\t"
-
-      : [add_p210_q012] "=&r"(add_p210_q012), [tmp] "=&r"(tmp),
-        [res_op2] "=&r"(res_op2), [res_op1] "=&r"(res_op1),
-        [res_op0] "=&r"(res_op0), [res_oq0] "=&r"(res_oq0),
-        [res_oq1] "=&r"(res_oq1), [res_oq2] "=&r"(res_oq2)
-      : [p0] "r"(p0), [q0] "r"(q0), [p1] "r"(p1), [q1] "r"(q1), [p2] "r"(p2),
-        [q2] "r"(q2), [p3] "r"(p3), [q3] "r"(q3), [u32Four] "r"(u32Four));
-
-  *op2_f1 = res_op2;
-  *op1_f1 = res_op1;
-  *op0_f1 = res_op0;
-  *oq0_f1 = res_oq0;
-  *oq1_f1 = res_oq1;
-  *oq2_f1 = res_oq2;
-}
-
-static INLINE void wide_mbfilter_dspr2(
-    uint32_t *op7, uint32_t *op6, uint32_t *op5, uint32_t *op4, uint32_t *op3,
-    uint32_t *op2, uint32_t *op1, uint32_t *op0, uint32_t *oq0, uint32_t *oq1,
-    uint32_t *oq2, uint32_t *oq3, uint32_t *oq4, uint32_t *oq5, uint32_t *oq6,
-    uint32_t *oq7) {
-  const uint32_t p7 = *op7, p6 = *op6, p5 = *op5, p4 = *op4;
-  const uint32_t p3 = *op3, p2 = *op2, p1 = *op1, p0 = *op0;
-  const uint32_t q0 = *oq0, q1 = *oq1, q2 = *oq2, q3 = *oq3;
-  const uint32_t q4 = *oq4, q5 = *oq5, q6 = *oq6, q7 = *oq7;
-  uint32_t res_op6, res_op5, res_op4, res_op3, res_op2, res_op1, res_op0;
-  uint32_t res_oq0, res_oq1, res_oq2, res_oq3, res_oq4, res_oq5, res_oq6;
-  uint32_t tmp;
-  uint32_t add_p6toq6;
-  uint32_t u32Eight = 0x00080008;
-
-  __asm__ __volatile__(
-      /* addition of p6,p5,p4,p3,p2,p1,p0,q0,q1,q2,q3,q4,q5,q6
-         which is used most of the time */
-      "addu.ph      %[add_p6toq6],     %[p6],              %[p5]         \n\t"
-      "addu.ph      %[add_p6toq6],     %[add_p6toq6],      %[p4]         \n\t"
-      "addu.ph      %[add_p6toq6],     %[add_p6toq6],      %[p3]         \n\t"
-      "addu.ph      %[add_p6toq6],     %[add_p6toq6],      %[p2]         \n\t"
-      "addu.ph      %[add_p6toq6],     %[add_p6toq6],      %[p1]         \n\t"
-      "addu.ph      %[add_p6toq6],     %[add_p6toq6],      %[p0]         \n\t"
-      "addu.ph      %[add_p6toq6],     %[add_p6toq6],      %[q0]         \n\t"
-      "addu.ph      %[add_p6toq6],     %[add_p6toq6],      %[q1]         \n\t"
-      "addu.ph      %[add_p6toq6],     %[add_p6toq6],      %[q2]         \n\t"
-      "addu.ph      %[add_p6toq6],     %[add_p6toq6],      %[q3]         \n\t"
-      "addu.ph      %[add_p6toq6],     %[add_p6toq6],      %[q4]         \n\t"
-      "addu.ph      %[add_p6toq6],     %[add_p6toq6],      %[q5]         \n\t"
-      "addu.ph      %[add_p6toq6],     %[add_p6toq6],      %[q6]         \n\t"
-      "addu.ph      %[add_p6toq6],     %[add_p6toq6],      %[u32Eight]   \n\t"
-
-      : [add_p6toq6] "=&r"(add_p6toq6)
-      : [p6] "r"(p6), [p5] "r"(p5), [p4] "r"(p4), [p3] "r"(p3), [p2] "r"(p2),
-        [p1] "r"(p1), [p0] "r"(p0), [q0] "r"(q0), [q1] "r"(q1), [q2] "r"(q2),
-        [q3] "r"(q3), [q4] "r"(q4), [q5] "r"(q5), [q6] "r"(q6),
-        [u32Eight] "r"(u32Eight));
-
-  __asm__ __volatile__(
-      /* *op6 = ROUND_POWER_OF_TWO(p7 * 7 + p6 * 2 + p5 + p4 +
-                                   p3 + p2 + p1 + p0 + q0, 4) */
-      "shll.ph       %[tmp],            %[p7],            3               \n\t"
-      "subu.ph       %[res_op6],        %[tmp],           %[p7]           \n\t"
-      "addu.ph       %[res_op6],        %[res_op6],       %[p6]           \n\t"
-      "addu.ph       %[res_op6],        %[res_op6],       %[add_p6toq6]   \n\t"
-      "subu.ph       %[res_op6],        %[res_op6],       %[q1]           \n\t"
-      "subu.ph       %[res_op6],        %[res_op6],       %[q2]           \n\t"
-      "subu.ph       %[res_op6],        %[res_op6],       %[q3]           \n\t"
-      "subu.ph       %[res_op6],        %[res_op6],       %[q4]           \n\t"
-      "subu.ph       %[res_op6],        %[res_op6],       %[q5]           \n\t"
-      "subu.ph       %[res_op6],        %[res_op6],       %[q6]           \n\t"
-      "shrl.ph       %[res_op6],        %[res_op6],       4               \n\t"
-
-      /* *op5 = ROUND_POWER_OF_TWO(p7 * 6 + p6 + p5 * 2 + p4 + p3 +
-                                   p2 + p1 + p0 + q0 + q1, 4) */
-      "shll.ph       %[tmp],            %[p7],            2               \n\t"
-      "addu.ph       %[res_op5],        %[tmp],           %[p7]           \n\t"
-      "addu.ph       %[res_op5],        %[res_op5],       %[p7]           \n\t"
-      "addu.ph       %[res_op5],        %[res_op5],       %[p5]           \n\t"
-      "addu.ph       %[res_op5],        %[res_op5],       %[add_p6toq6]   \n\t"
-      "subu.ph       %[res_op5],        %[res_op5],       %[q2]           \n\t"
-      "subu.ph       %[res_op5],        %[res_op5],       %[q3]           \n\t"
-      "subu.ph       %[res_op5],        %[res_op5],       %[q4]           \n\t"
-      "subu.ph       %[res_op5],        %[res_op5],       %[q5]           \n\t"
-      "subu.ph       %[res_op5],        %[res_op5],       %[q6]           \n\t"
-      "shrl.ph       %[res_op5],        %[res_op5],       4               \n\t"
-
-      /* *op4 = ROUND_POWER_OF_TWO(p7 * 5 + p6 + p5 + p4 * 2 + p3 + p2 +
-                                   p1 + p0 + q0 + q1 + q2, 4) */
-      "shll.ph       %[tmp],            %[p7],            2               \n\t"
-      "addu.ph       %[res_op4],        %[tmp],           %[p7]           \n\t"
-      "addu.ph       %[res_op4],        %[res_op4],       %[p4]           \n\t"
-      "addu.ph       %[res_op4],        %[res_op4],       %[add_p6toq6]   \n\t"
-      "subu.ph       %[res_op4],        %[res_op4],       %[q3]           \n\t"
-      "subu.ph       %[res_op4],        %[res_op4],       %[q4]           \n\t"
-      "subu.ph       %[res_op4],        %[res_op4],       %[q5]           \n\t"
-      "subu.ph       %[res_op4],        %[res_op4],       %[q6]           \n\t"
-      "shrl.ph       %[res_op4],        %[res_op4],       4               \n\t"
-
-      /* *op3 = ROUND_POWER_OF_TWO(p7 * 4 + p6 + p5 + p4 + p3 * 2 + p2 +
-                                   p1 + p0 + q0 + q1 + q2 + q3, 4) */
-      "shll.ph       %[tmp],            %[p7],            2               \n\t"
-      "addu.ph       %[res_op3],        %[tmp],           %[p3]           \n\t"
-      "addu.ph       %[res_op3],        %[res_op3],       %[add_p6toq6]   \n\t"
-      "subu.ph       %[res_op3],        %[res_op3],       %[q4]           \n\t"
-      "subu.ph       %[res_op3],        %[res_op3],       %[q5]           \n\t"
-      "subu.ph       %[res_op3],        %[res_op3],       %[q6]           \n\t"
-      "shrl.ph       %[res_op3],        %[res_op3],       4               \n\t"
-
-      /* *op2 = ROUND_POWER_OF_TWO(p7 * 3 + p6 + p5 + p4 + p3 + p2 * 2 + p1 +
-                                   p0 + q0 + q1 + q2 + q3 + q4, 4) */
-      "shll.ph       %[tmp],            %[p7],            1               \n\t"
-      "addu.ph       %[res_op2],        %[tmp],           %[p7]           \n\t"
-      "addu.ph       %[res_op2],        %[res_op2],       %[p2]           \n\t"
-      "addu.ph       %[res_op2],        %[res_op2],       %[add_p6toq6]   \n\t"
-      "subu.ph       %[res_op2],        %[res_op2],       %[q5]           \n\t"
-      "subu.ph       %[res_op2],        %[res_op2],       %[q6]           \n\t"
-      "shrl.ph       %[res_op2],        %[res_op2],       4               \n\t"
-
-      /* *op1 = ROUND_POWER_OF_TWO(p7 * 2 + p6 + p5 + p4 + p3 + p2 + p1 * 2 +
-                                   p0 + q0 + q1 + q2 + q3 + q4 + q5, 4); */
-      "shll.ph       %[tmp],            %[p7],            1               \n\t"
-      "addu.ph       %[res_op1],        %[tmp],           %[p1]           \n\t"
-      "addu.ph       %[res_op1],        %[res_op1],       %[add_p6toq6]   \n\t"
-      "subu.ph       %[res_op1],        %[res_op1],       %[q6]           \n\t"
-      "shrl.ph       %[res_op1],        %[res_op1],       4               \n\t"
-
-      /* *op0 = ROUND_POWER_OF_TWO(p7 + p6 + p5 + p4 + p3 + p2 + p1 + p0 * 2 +
-                                  q0 + q1 + q2 + q3 + q4 + q5 + q6, 4) */
-      "addu.ph       %[res_op0],        %[p7],            %[p0]           \n\t"
-      "addu.ph       %[res_op0],        %[res_op0],       %[add_p6toq6]   \n\t"
-      "shrl.ph       %[res_op0],        %[res_op0],       4               \n\t"
-
-      : [res_op6] "=&r"(res_op6), [res_op5] "=&r"(res_op5),
-        [res_op4] "=&r"(res_op4), [res_op3] "=&r"(res_op3),
-        [res_op2] "=&r"(res_op2), [res_op1] "=&r"(res_op1),
-        [res_op0] "=&r"(res_op0), [tmp] "=&r"(tmp)
-      : [p7] "r"(p7), [p6] "r"(p6), [p5] "r"(p5), [p4] "r"(p4), [p3] "r"(p3),
-        [p2] "r"(p2), [p1] "r"(p1), [p0] "r"(p0), [q2] "r"(q2), [q1] "r"(q1),
-        [q3] "r"(q3), [q4] "r"(q4), [q5] "r"(q5), [q6] "r"(q6),
-        [add_p6toq6] "r"(add_p6toq6));
-
-  *op6 = res_op6;
-  *op5 = res_op5;
-  *op4 = res_op4;
-  *op3 = res_op3;
-  *op2 = res_op2;
-  *op1 = res_op1;
-  *op0 = res_op0;
-
-  __asm__ __volatile__(
-      /* *oq0 = ROUND_POWER_OF_TWO(p6 + p5 + p4 + p3 + p2 + p1 + p0 + q0 * 2 +
-                                   q1 + q2 + q3 + q4 + q5 + q6 + q7, 4); */
-      "addu.ph       %[res_oq0],        %[q7],            %[q0]           \n\t"
-      "addu.ph       %[res_oq0],        %[res_oq0],       %[add_p6toq6]   \n\t"
-      "shrl.ph       %[res_oq0],        %[res_oq0],       4               \n\t"
-
-      /* *oq1 = ROUND_POWER_OF_TWO(p5 + p4 + p3 + p2 + p1 + p0 + q0 + q1 * 2 +
-                                   q2 + q3 + q4 + q5 + q6 + q7 * 2, 4) */
-      "shll.ph       %[tmp],            %[q7],            1               \n\t"
-      "addu.ph       %[res_oq1],        %[tmp],           %[q1]           \n\t"
-      "addu.ph       %[res_oq1],        %[res_oq1],       %[add_p6toq6]   \n\t"
-      "subu.ph       %[res_oq1],        %[res_oq1],       %[p6]           \n\t"
-      "shrl.ph       %[res_oq1],        %[res_oq1],       4               \n\t"
-
-      /* *oq2 = ROUND_POWER_OF_TWO(p4 + p3 + p2 + p1 + p0 + q0 + q1 + q2 * 2 +
-                                   q3 + q4 + q5 + q6 + q7 * 3, 4) */
-      "shll.ph       %[tmp],            %[q7],            1               \n\t"
-      "addu.ph       %[res_oq2],        %[tmp],           %[q7]           \n\t"
-      "addu.ph       %[res_oq2],        %[res_oq2],       %[q2]           \n\t"
-      "addu.ph       %[res_oq2],        %[res_oq2],       %[add_p6toq6]   \n\t"
-      "subu.ph       %[res_oq2],        %[res_oq2],       %[p5]           \n\t"
-      "subu.ph       %[res_oq2],        %[res_oq2],       %[p6]           \n\t"
-      "shrl.ph       %[res_oq2],        %[res_oq2],       4               \n\t"
-
-      /* *oq3 = ROUND_POWER_OF_TWO(p3 + p2 + p1 + p0 + q0 + q1 + q2 +
-                                   q3 * 2 + q4 + q5 + q6 + q7 * 4, 4) */
-      "shll.ph       %[tmp],            %[q7],            2               \n\t"
-      "addu.ph       %[res_oq3],        %[tmp],           %[q3]           \n\t"
-      "addu.ph       %[res_oq3],        %[res_oq3],       %[add_p6toq6]   \n\t"
-      "subu.ph       %[res_oq3],        %[res_oq3],       %[p4]           \n\t"
-      "subu.ph       %[res_oq3],        %[res_oq3],       %[p5]           \n\t"
-      "subu.ph       %[res_oq3],        %[res_oq3],       %[p6]           \n\t"
-      "shrl.ph       %[res_oq3],        %[res_oq3],       4               \n\t"
-
-      /* *oq4 = ROUND_POWER_OF_TWO(p2 + p1 + p0 + q0 + q1 + q2 + q3 +
-                                   q4 * 2 + q5 + q6 + q7 * 5, 4) */
-      "shll.ph       %[tmp],            %[q7],            2               \n\t"
-      "addu.ph       %[res_oq4],        %[tmp],           %[q7]           \n\t"
-      "addu.ph       %[res_oq4],        %[res_oq4],       %[q4]           \n\t"
-      "addu.ph       %[res_oq4],        %[res_oq4],       %[add_p6toq6]   \n\t"
-      "subu.ph       %[res_oq4],        %[res_oq4],       %[p3]           \n\t"
-      "subu.ph       %[res_oq4],        %[res_oq4],       %[p4]           \n\t"
-      "subu.ph       %[res_oq4],        %[res_oq4],       %[p5]           \n\t"
-      "subu.ph       %[res_oq4],        %[res_oq4],       %[p6]           \n\t"
-      "shrl.ph       %[res_oq4],        %[res_oq4],       4               \n\t"
-
-      /* *oq5 = ROUND_POWER_OF_TWO(p1 + p0 + q0 + q1 + q2 + q3 + q4 +
-                                   q5 * 2 + q6 + q7 * 6, 4) */
-      "shll.ph       %[tmp],            %[q7],            2               \n\t"
-      "addu.ph       %[res_oq5],        %[tmp],           %[q7]           \n\t"
-      "addu.ph       %[res_oq5],        %[res_oq5],       %[q7]           \n\t"
-      "addu.ph       %[res_oq5],        %[res_oq5],       %[q5]           \n\t"
-      "addu.ph       %[res_oq5],        %[res_oq5],       %[add_p6toq6]   \n\t"
-      "subu.ph       %[res_oq5],        %[res_oq5],       %[p2]           \n\t"
-      "subu.ph       %[res_oq5],        %[res_oq5],       %[p3]           \n\t"
-      "subu.ph       %[res_oq5],        %[res_oq5],       %[p4]           \n\t"
-      "subu.ph       %[res_oq5],        %[res_oq5],       %[p5]           \n\t"
-      "subu.ph       %[res_oq5],        %[res_oq5],       %[p6]           \n\t"
-      "shrl.ph       %[res_oq5],        %[res_oq5],       4               \n\t"
-
-      /* *oq6 = ROUND_POWER_OF_TWO(p0 + q0 + q1 + q2 + q3 +
-                                   q4 + q5 + q6 * 2 + q7 * 7, 4) */
-      "shll.ph       %[tmp],            %[q7],            3               \n\t"
-      "subu.ph       %[res_oq6],        %[tmp],           %[q7]           \n\t"
-      "addu.ph       %[res_oq6],        %[res_oq6],       %[q6]           \n\t"
-      "addu.ph       %[res_oq6],        %[res_oq6],       %[add_p6toq6]   \n\t"
-      "subu.ph       %[res_oq6],        %[res_oq6],       %[p1]           \n\t"
-      "subu.ph       %[res_oq6],        %[res_oq6],       %[p2]           \n\t"
-      "subu.ph       %[res_oq6],        %[res_oq6],       %[p3]           \n\t"
-      "subu.ph       %[res_oq6],        %[res_oq6],       %[p4]           \n\t"
-      "subu.ph       %[res_oq6],        %[res_oq6],       %[p5]           \n\t"
-      "subu.ph       %[res_oq6],        %[res_oq6],       %[p6]           \n\t"
-      "shrl.ph       %[res_oq6],        %[res_oq6],       4               \n\t"
-
-      : [res_oq6] "=&r"(res_oq6), [res_oq5] "=&r"(res_oq5),
-        [res_oq4] "=&r"(res_oq4), [res_oq3] "=&r"(res_oq3),
-        [res_oq2] "=&r"(res_oq2), [res_oq1] "=&r"(res_oq1),
-        [res_oq0] "=&r"(res_oq0), [tmp] "=&r"(tmp)
-      : [q7] "r"(q7), [q6] "r"(q6), [q5] "r"(q5), [q4] "r"(q4), [q3] "r"(q3),
-        [q2] "r"(q2), [q1] "r"(q1), [q0] "r"(q0), [p1] "r"(p1), [p2] "r"(p2),
-        [p3] "r"(p3), [p4] "r"(p4), [p5] "r"(p5), [p6] "r"(p6),
-        [add_p6toq6] "r"(add_p6toq6));
-
-  *oq0 = res_oq0;
-  *oq1 = res_oq1;
-  *oq2 = res_oq2;
-  *oq3 = res_oq3;
-  *oq4 = res_oq4;
-  *oq5 = res_oq5;
-  *oq6 = res_oq6;
-}
-#endif  // #if HAVE_DSPR2
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  // AOM_AOM_DSP_MIPS_LOOPFILTER_FILTERS_DSPR2_H_
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/mips/loopfilter_macros_dspr2.h b/chromium/third_party/libaom/source/libaom/aom_dsp/mips/loopfilter_macros_dspr2.h
deleted file mode 100644
index 62295d69dd5..00000000000
--- a/chromium/third_party/libaom/source/libaom/aom_dsp/mips/loopfilter_macros_dspr2.h
+++ /dev/null
@@ -1,437 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AOM_DSP_MIPS_LOOPFILTER_MACROS_DSPR2_H_
-#define AOM_AOM_DSP_MIPS_LOOPFILTER_MACROS_DSPR2_H_
-
-#include <stdlib.h>
-
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom/aom_integer.h"
-#include "aom_mem/aom_mem.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#if HAVE_DSPR2
-#define STORE_F0()                                                       \
-  {                                                                      \
-    __asm__ __volatile__(                                                \
-        "sb     %[q1_f0],    1(%[s4])           \n\t"                    \
-        "sb     %[q0_f0],    0(%[s4])           \n\t"                    \
-        "sb     %[p0_f0],   -1(%[s4])           \n\t"                    \
-        "sb     %[p1_f0],   -2(%[s4])           \n\t"                    \
-                                                                         \
-        :                                                                \
-        : [q1_f0] "r"(q1_f0), [q0_f0] "r"(q0_f0), [p0_f0] "r"(p0_f0),    \
-          [p1_f0] "r"(p1_f0), [s4] "r"(s4));                             \
-                                                                         \
-    __asm__ __volatile__(                                                \
-        "srl    %[q1_f0],   %[q1_f0],   8       \n\t"                    \
-        "srl    %[q0_f0],   %[q0_f0],   8       \n\t"                    \
-        "srl    %[p0_f0],   %[p0_f0],   8       \n\t"                    \
-        "srl    %[p1_f0],   %[p1_f0],   8       \n\t"                    \
-                                                                         \
-        : [q1_f0] "+r"(q1_f0), [q0_f0] "+r"(q0_f0), [p0_f0] "+r"(p0_f0), \
-          [p1_f0] "+r"(p1_f0)                                            \
-        :);                                                              \
-                                                                         \
-    __asm__ __volatile__(                                                \
-        "sb     %[q1_f0],    1(%[s3])           \n\t"                    \
-        "sb     %[q0_f0],    0(%[s3])           \n\t"                    \
-        "sb     %[p0_f0],   -1(%[s3])           \n\t"                    \
-        "sb     %[p1_f0],   -2(%[s3])           \n\t"                    \
-                                                                         \
-        : [p1_f0] "+r"(p1_f0)                                            \
-        : [q1_f0] "r"(q1_f0), [q0_f0] "r"(q0_f0), [s3] "r"(s3),          \
-          [p0_f0] "r"(p0_f0));                                           \
-                                                                         \
-    __asm__ __volatile__(                                                \
-        "srl    %[q1_f0],   %[q1_f0],   8       \n\t"                    \
-        "srl    %[q0_f0],   %[q0_f0],   8       \n\t"                    \
-        "srl    %[p0_f0],   %[p0_f0],   8       \n\t"                    \
-        "srl    %[p1_f0],   %[p1_f0],   8       \n\t"                    \
-                                                                         \
-        : [q1_f0] "+r"(q1_f0), [q0_f0] "+r"(q0_f0), [p0_f0] "+r"(p0_f0), \
-          [p1_f0] "+r"(p1_f0)                                            \
-        :);                                                              \
-                                                                         \
-    __asm__ __volatile__(                                                \
-        "sb     %[q1_f0],    1(%[s2])           \n\t"                    \
-        "sb     %[q0_f0],    0(%[s2])           \n\t"                    \
-        "sb     %[p0_f0],   -1(%[s2])           \n\t"                    \
-        "sb     %[p1_f0],   -2(%[s2])           \n\t"                    \
-                                                                         \
-        :                                                                \
-        : [q1_f0] "r"(q1_f0), [q0_f0] "r"(q0_f0), [p0_f0] "r"(p0_f0),    \
-          [p1_f0] "r"(p1_f0), [s2] "r"(s2));                             \
-                                                                         \
-    __asm__ __volatile__(                                                \
-        "srl    %[q1_f0],   %[q1_f0],   8       \n\t"                    \
-        "srl    %[q0_f0],   %[q0_f0],   8       \n\t"                    \
-        "srl    %[p0_f0],   %[p0_f0],   8       \n\t"                    \
-        "srl    %[p1_f0],   %[p1_f0],   8       \n\t"                    \
-                                                                         \
-        : [q1_f0] "+r"(q1_f0), [q0_f0] "+r"(q0_f0), [p0_f0] "+r"(p0_f0), \
-          [p1_f0] "+r"(p1_f0)                                            \
-        :);                                                              \
-                                                                         \
-    __asm__ __volatile__(                                                \
-        "sb     %[q1_f0],    1(%[s1])           \n\t"                    \
-        "sb     %[q0_f0],    0(%[s1])           \n\t"                    \
-        "sb     %[p0_f0],   -1(%[s1])           \n\t"                    \
-        "sb     %[p1_f0],   -2(%[s1])           \n\t"                    \
-                                                                         \
-        :                                                                \
-        : [q1_f0] "r"(q1_f0), [q0_f0] "r"(q0_f0), [p0_f0] "r"(p0_f0),    \
-          [p1_f0] "r"(p1_f0), [s1] "r"(s1));                             \
-  }
-
-#define STORE_F1()                                                             \
-  {                                                                            \
-    __asm__ __volatile__(                                                      \
-        "sb     %[q2_r],     2(%[s4])           \n\t"                          \
-        "sb     %[q1_r],     1(%[s4])           \n\t"                          \
-        "sb     %[q0_r],     0(%[s4])           \n\t"                          \
-        "sb     %[p0_r],    -1(%[s4])           \n\t"                          \
-        "sb     %[p1_r],    -2(%[s4])           \n\t"                          \
-        "sb     %[p2_r],    -3(%[s4])           \n\t"                          \
-                                                                               \
-        :                                                                      \
-        : [q2_r] "r"(q2_r), [q1_r] "r"(q1_r), [q0_r] "r"(q0_r),                \
-          [p0_r] "r"(p0_r), [p1_r] "r"(p1_r), [p2_r] "r"(p2_r), [s4] "r"(s4)); \
-                                                                               \
-    __asm__ __volatile__(                                                      \
-        "srl    %[q2_r],    %[q2_r],    16      \n\t"                          \
-        "srl    %[q1_r],    %[q1_r],    16      \n\t"                          \
-        "srl    %[q0_r],    %[q0_r],    16      \n\t"                          \
-        "srl    %[p0_r],    %[p0_r],    16      \n\t"                          \
-        "srl    %[p1_r],    %[p1_r],    16      \n\t"                          \
-        "srl    %[p2_r],    %[p2_r],    16      \n\t"                          \
-                                                                               \
-        : [q2_r] "+r"(q2_r), [q1_r] "+r"(q1_r), [q0_r] "+r"(q0_r),             \
-          [p0_r] "+r"(p0_r), [p1_r] "+r"(p1_r), [p2_r] "+r"(p2_r)              \
-        :);                                                                    \
-                                                                               \
-    __asm__ __volatile__(                                                      \
-        "sb     %[q2_r],     2(%[s3])           \n\t"                          \
-        "sb     %[q1_r],     1(%[s3])           \n\t"                          \
-        "sb     %[q0_r],     0(%[s3])           \n\t"                          \
-        "sb     %[p0_r],    -1(%[s3])           \n\t"                          \
-        "sb     %[p1_r],    -2(%[s3])           \n\t"                          \
-        "sb     %[p2_r],    -3(%[s3])           \n\t"                          \
-                                                                               \
-        :                                                                      \
-        : [q2_r] "r"(q2_r), [q1_r] "r"(q1_r), [q0_r] "r"(q0_r),                \
-          [p0_r] "r"(p0_r), [p1_r] "r"(p1_r), [p2_r] "r"(p2_r), [s3] "r"(s3)); \
-                                                                               \
-    __asm__ __volatile__(                                                      \
-        "sb     %[q2_l],     2(%[s2])           \n\t"                          \
-        "sb     %[q1_l],     1(%[s2])           \n\t"                          \
-        "sb     %[q0_l],     0(%[s2])           \n\t"                          \
-        "sb     %[p0_l],    -1(%[s2])           \n\t"                          \
-        "sb     %[p1_l],    -2(%[s2])           \n\t"                          \
-        "sb     %[p2_l],    -3(%[s2])           \n\t"                          \
-                                                                               \
-        :                                                                      \
-        : [q2_l] "r"(q2_l), [q1_l] "r"(q1_l), [q0_l] "r"(q0_l),                \
-          [p0_l] "r"(p0_l), [p1_l] "r"(p1_l), [p2_l] "r"(p2_l), [s2] "r"(s2)); \
-                                                                               \
-    __asm__ __volatile__(                                                      \
-        "srl    %[q2_l],    %[q2_l],    16      \n\t"                          \
-        "srl    %[q1_l],    %[q1_l],    16      \n\t"                          \
-        "srl    %[q0_l],    %[q0_l],    16      \n\t"                          \
-        "srl    %[p0_l],    %[p0_l],    16      \n\t"                          \
-        "srl    %[p1_l],    %[p1_l],    16      \n\t"                          \
-        "srl    %[p2_l],    %[p2_l],    16      \n\t"                          \
-                                                                               \
-        : [q2_l] "+r"(q2_l), [q1_l] "+r"(q1_l), [q0_l] "+r"(q0_l),             \
-          [p0_l] "+r"(p0_l), [p1_l] "+r"(p1_l), [p2_l] "+r"(p2_l)              \
-        :);                                                                    \
-                                                                               \
-    __asm__ __volatile__(                                                      \
-        "sb     %[q2_l],     2(%[s1])           \n\t"                          \
-        "sb     %[q1_l],     1(%[s1])           \n\t"                          \
-        "sb     %[q0_l],     0(%[s1])           \n\t"                          \
-        "sb     %[p0_l],    -1(%[s1])           \n\t"                          \
-        "sb     %[p1_l],    -2(%[s1])           \n\t"                          \
-        "sb     %[p2_l],    -3(%[s1])           \n\t"                          \
-                                                                               \
-        :                                                                      \
-        : [q2_l] "r"(q2_l), [q1_l] "r"(q1_l), [q0_l] "r"(q0_l),                \
-          [p0_l] "r"(p0_l), [p1_l] "r"(p1_l), [p2_l] "r"(p2_l), [s1] "r"(s1)); \
-  }
-
-#define STORE_F2()                                                 \
-  {                                                                \
-    __asm__ __volatile__(                                          \
-        "sb     %[q6_r],     6(%[s4])           \n\t"              \
-        "sb     %[q5_r],     5(%[s4])           \n\t"              \
-        "sb     %[q4_r],     4(%[s4])           \n\t"              \
-        "sb     %[q3_r],     3(%[s4])           \n\t"              \
-        "sb     %[q2_r],     2(%[s4])           \n\t"              \
-        "sb     %[q1_r],     1(%[s4])           \n\t"              \
-        "sb     %[q0_r],     0(%[s4])           \n\t"              \
-        "sb     %[p0_r],    -1(%[s4])           \n\t"              \
-        "sb     %[p1_r],    -2(%[s4])           \n\t"              \
-        "sb     %[p2_r],    -3(%[s4])           \n\t"              \
-        "sb     %[p3_r],    -4(%[s4])           \n\t"              \
-        "sb     %[p4_r],    -5(%[s4])           \n\t"              \
-        "sb     %[p5_r],    -6(%[s4])           \n\t"              \
-        "sb     %[p6_r],    -7(%[s4])           \n\t"              \
-                                                                   \
-        :                                                          \
-        : [q6_r] "r"(q6_r), [q5_r] "r"(q5_r), [q4_r] "r"(q4_r),    \
-          [q3_r] "r"(q3_r), [q2_r] "r"(q2_r), [q1_r] "r"(q1_r),    \
-          [q0_r] "r"(q0_r), [p0_r] "r"(p0_r), [p1_r] "r"(p1_r),    \
-          [p2_r] "r"(p2_r), [p3_r] "r"(p3_r), [p4_r] "r"(p4_r),    \
-          [p5_r] "r"(p5_r), [p6_r] "r"(p6_r), [s4] "r"(s4));       \
-                                                                   \
-    __asm__ __volatile__(                                          \
-        "srl    %[q6_r],    %[q6_r],    16      \n\t"              \
-        "srl    %[q5_r],    %[q5_r],    16      \n\t"              \
-        "srl    %[q4_r],    %[q4_r],    16      \n\t"              \
-        "srl    %[q3_r],    %[q3_r],    16      \n\t"              \
-        "srl    %[q2_r],    %[q2_r],    16      \n\t"              \
-        "srl    %[q1_r],    %[q1_r],    16      \n\t"              \
-        "srl    %[q0_r],    %[q0_r],    16      \n\t"              \
-        "srl    %[p0_r],    %[p0_r],    16      \n\t"              \
-        "srl    %[p1_r],    %[p1_r],    16      \n\t"              \
-        "srl    %[p2_r],    %[p2_r],    16      \n\t"              \
-        "srl    %[p3_r],    %[p3_r],    16      \n\t"              \
-        "srl    %[p4_r],    %[p4_r],    16      \n\t"              \
-        "srl    %[p5_r],    %[p5_r],    16      \n\t"              \
-        "srl    %[p6_r],    %[p6_r],    16      \n\t"              \
-                                                                   \
-        : [q6_r] "+r"(q6_r), [q5_r] "+r"(q5_r), [q4_r] "+r"(q4_r), \
-          [q3_r] "+r"(q3_r), [q2_r] "+r"(q2_r), [q1_r] "+r"(q1_r), \
-          [q0_r] "+r"(q0_r), [p0_r] "+r"(p0_r), [p1_r] "+r"(p1_r), \
-          [p2_r] "+r"(p2_r), [p3_r] "+r"(p3_r), [p4_r] "+r"(p4_r), \
-          [p5_r] "+r"(p5_r), [p6_r] "+r"(p6_r)                     \
-        :);                                                        \
-                                                                   \
-    __asm__ __volatile__(                                          \
-        "sb     %[q6_r],     6(%[s3])           \n\t"              \
-        "sb     %[q5_r],     5(%[s3])           \n\t"              \
-        "sb     %[q4_r],     4(%[s3])           \n\t"              \
-        "sb     %[q3_r],     3(%[s3])           \n\t"              \
-        "sb     %[q2_r],     2(%[s3])           \n\t"              \
-        "sb     %[q1_r],     1(%[s3])           \n\t"              \
-        "sb     %[q0_r],     0(%[s3])           \n\t"              \
-        "sb     %[p0_r],    -1(%[s3])           \n\t"              \
-        "sb     %[p1_r],    -2(%[s3])           \n\t"              \
-        "sb     %[p2_r],    -3(%[s3])           \n\t"              \
-        "sb     %[p3_r],    -4(%[s3])           \n\t"              \
-        "sb     %[p4_r],    -5(%[s3])           \n\t"              \
-        "sb     %[p5_r],    -6(%[s3])           \n\t"              \
-        "sb     %[p6_r],    -7(%[s3])           \n\t"              \
-                                                                   \
-        :                                                          \
-        : [q6_r] "r"(q6_r), [q5_r] "r"(q5_r), [q4_r] "r"(q4_r),    \
-          [q3_r] "r"(q3_r), [q2_r] "r"(q2_r), [q1_r] "r"(q1_r),    \
-          [q0_r] "r"(q0_r), [p0_r] "r"(p0_r), [p1_r] "r"(p1_r),    \
-          [p2_r] "r"(p2_r), [p3_r] "r"(p3_r), [p4_r] "r"(p4_r),    \
-          [p5_r] "r"(p5_r), [p6_r] "r"(p6_r), [s3] "r"(s3));       \
-                                                                   \
-    __asm__ __volatile__(                                          \
-        "sb     %[q6_l],     6(%[s2])           \n\t"              \
-        "sb     %[q5_l],     5(%[s2])           \n\t"              \
-        "sb     %[q4_l],     4(%[s2])           \n\t"              \
-        "sb     %[q3_l],     3(%[s2])           \n\t"              \
-        "sb     %[q2_l],     2(%[s2])           \n\t"              \
-        "sb     %[q1_l],     1(%[s2])           \n\t"              \
-        "sb     %[q0_l],     0(%[s2])           \n\t"              \
-        "sb     %[p0_l],    -1(%[s2])           \n\t"              \
-        "sb     %[p1_l],    -2(%[s2])           \n\t"              \
-        "sb     %[p2_l],    -3(%[s2])           \n\t"              \
-        "sb     %[p3_l],    -4(%[s2])           \n\t"              \
-        "sb     %[p4_l],    -5(%[s2])           \n\t"              \
-        "sb     %[p5_l],    -6(%[s2])           \n\t"              \
-        "sb     %[p6_l],    -7(%[s2])           \n\t"              \
-                                                                   \
-        :                                                          \
-        : [q6_l] "r"(q6_l), [q5_l] "r"(q5_l), [q4_l] "r"(q4_l),    \
-          [q3_l] "r"(q3_l), [q2_l] "r"(q2_l), [q1_l] "r"(q1_l),    \
-          [q0_l] "r"(q0_l), [p0_l] "r"(p0_l), [p1_l] "r"(p1_l),    \
-          [p2_l] "r"(p2_l), [p3_l] "r"(p3_l), [p4_l] "r"(p4_l),    \
-          [p5_l] "r"(p5_l), [p6_l] "r"(p6_l), [s2] "r"(s2));       \
-                                                                   \
-    __asm__ __volatile__(                                          \
-        "srl    %[q6_l],    %[q6_l],    16     \n\t"               \
-        "srl    %[q5_l],    %[q5_l],    16     \n\t"               \
-        "srl    %[q4_l],    %[q4_l],    16     \n\t"               \
-        "srl    %[q3_l],    %[q3_l],    16     \n\t"               \
-        "srl    %[q2_l],    %[q2_l],    16     \n\t"               \
-        "srl    %[q1_l],    %[q1_l],    16     \n\t"               \
-        "srl    %[q0_l],    %[q0_l],    16     \n\t"               \
-        "srl    %[p0_l],    %[p0_l],    16     \n\t"               \
-        "srl    %[p1_l],    %[p1_l],    16     \n\t"               \
-        "srl    %[p2_l],    %[p2_l],    16     \n\t"               \
-        "srl    %[p3_l],    %[p3_l],    16     \n\t"               \
-        "srl    %[p4_l],    %[p4_l],    16     \n\t"               \
-        "srl    %[p5_l],    %[p5_l],    16     \n\t"               \
-        "srl    %[p6_l],    %[p6_l],    16     \n\t"               \
-                                                                   \
-        : [q6_l] "+r"(q6_l), [q5_l] "+r"(q5_l), [q4_l] "+r"(q4_l), \
-          [q3_l] "+r"(q3_l), [q2_l] "+r"(q2_l), [q1_l] "+r"(q1_l), \
-          [q0_l] "+r"(q0_l), [p0_l] "+r"(p0_l), [p1_l] "+r"(p1_l), \
-          [p2_l] "+r"(p2_l), [p3_l] "+r"(p3_l), [p4_l] "+r"(p4_l), \
-          [p5_l] "+r"(p5_l), [p6_l] "+r"(p6_l)                     \
-        :);                                                        \
-                                                                   \
-    __asm__ __volatile__(                                          \
-        "sb     %[q6_l],     6(%[s1])           \n\t"              \
-        "sb     %[q5_l],     5(%[s1])           \n\t"              \
-        "sb     %[q4_l],     4(%[s1])           \n\t"              \
-        "sb     %[q3_l],     3(%[s1])           \n\t"              \
-        "sb     %[q2_l],     2(%[s1])           \n\t"              \
-        "sb     %[q1_l],     1(%[s1])           \n\t"              \
-        "sb     %[q0_l],     0(%[s1])           \n\t"              \
-        "sb     %[p0_l],    -1(%[s1])           \n\t"              \
-        "sb     %[p1_l],    -2(%[s1])           \n\t"              \
-        "sb     %[p2_l],    -3(%[s1])           \n\t"              \
-        "sb     %[p3_l],    -4(%[s1])           \n\t"              \
-        "sb     %[p4_l],    -5(%[s1])           \n\t"              \
-        "sb     %[p5_l],    -6(%[s1])           \n\t"              \
-        "sb     %[p6_l],    -7(%[s1])           \n\t"              \
-                                                                   \
-        :                                                          \
-        : [q6_l] "r"(q6_l), [q5_l] "r"(q5_l), [q4_l] "r"(q4_l),    \
-          [q3_l] "r"(q3_l), [q2_l] "r"(q2_l), [q1_l] "r"(q1_l),    \
-          [q0_l] "r"(q0_l), [p0_l] "r"(p0_l), [p1_l] "r"(p1_l),    \
-          [p2_l] "r"(p2_l), [p3_l] "r"(p3_l), [p4_l] "r"(p4_l),    \
-          [p5_l] "r"(p5_l), [p6_l] "r"(p6_l), [s1] "r"(s1));       \
-  }
-
-#define PACK_LEFT_0TO3()                                              \
-  {                                                                   \
-    __asm__ __volatile__(                                             \
-        "preceu.ph.qbl   %[p3_l],   %[p3]   \n\t"                     \
-        "preceu.ph.qbl   %[p2_l],   %[p2]   \n\t"                     \
-        "preceu.ph.qbl   %[p1_l],   %[p1]   \n\t"                     \
-        "preceu.ph.qbl   %[p0_l],   %[p0]   \n\t"                     \
-        "preceu.ph.qbl   %[q0_l],   %[q0]   \n\t"                     \
-        "preceu.ph.qbl   %[q1_l],   %[q1]   \n\t"                     \
-        "preceu.ph.qbl   %[q2_l],   %[q2]   \n\t"                     \
-        "preceu.ph.qbl   %[q3_l],   %[q3]   \n\t"                     \
-                                                                      \
-        : [p3_l] "=&r"(p3_l), [p2_l] "=&r"(p2_l), [p1_l] "=&r"(p1_l), \
-          [p0_l] "=&r"(p0_l), [q0_l] "=&r"(q0_l), [q1_l] "=&r"(q1_l), \
-          [q2_l] "=&r"(q2_l), [q3_l] "=&r"(q3_l)                      \
-        : [p3] "r"(p3), [p2] "r"(p2), [p1] "r"(p1), [p0] "r"(p0),     \
-          [q0] "r"(q0), [q1] "r"(q1), [q2] "r"(q2), [q3] "r"(q3));    \
-  }
-
-#define PACK_LEFT_4TO7()                                              \
-  {                                                                   \
-    __asm__ __volatile__(                                             \
-        "preceu.ph.qbl   %[p7_l],   %[p7]   \n\t"                     \
-        "preceu.ph.qbl   %[p6_l],   %[p6]   \n\t"                     \
-        "preceu.ph.qbl   %[p5_l],   %[p5]   \n\t"                     \
-        "preceu.ph.qbl   %[p4_l],   %[p4]   \n\t"                     \
-        "preceu.ph.qbl   %[q4_l],   %[q4]   \n\t"                     \
-        "preceu.ph.qbl   %[q5_l],   %[q5]   \n\t"                     \
-        "preceu.ph.qbl   %[q6_l],   %[q6]   \n\t"                     \
-        "preceu.ph.qbl   %[q7_l],   %[q7]   \n\t"                     \
-                                                                      \
-        : [p7_l] "=&r"(p7_l), [p6_l] "=&r"(p6_l), [p5_l] "=&r"(p5_l), \
-          [p4_l] "=&r"(p4_l), [q4_l] "=&r"(q4_l), [q5_l] "=&r"(q5_l), \
-          [q6_l] "=&r"(q6_l), [q7_l] "=&r"(q7_l)                      \
-        : [p7] "r"(p7), [p6] "r"(p6), [p5] "r"(p5), [p4] "r"(p4),     \
-          [q4] "r"(q4), [q5] "r"(q5), [q6] "r"(q6), [q7] "r"(q7));    \
-  }
-
-#define PACK_RIGHT_0TO3()                                             \
-  {                                                                   \
-    __asm__ __volatile__(                                             \
-        "preceu.ph.qbr   %[p3_r],   %[p3]  \n\t"                      \
-        "preceu.ph.qbr   %[p2_r],   %[p2]   \n\t"                     \
-        "preceu.ph.qbr   %[p1_r],   %[p1]   \n\t"                     \
-        "preceu.ph.qbr   %[p0_r],   %[p0]   \n\t"                     \
-        "preceu.ph.qbr   %[q0_r],   %[q0]   \n\t"                     \
-        "preceu.ph.qbr   %[q1_r],   %[q1]   \n\t"                     \
-        "preceu.ph.qbr   %[q2_r],   %[q2]   \n\t"                     \
-        "preceu.ph.qbr   %[q3_r],   %[q3]   \n\t"                     \
-                                                                      \
-        : [p3_r] "=&r"(p3_r), [p2_r] "=&r"(p2_r), [p1_r] "=&r"(p1_r), \
-          [p0_r] "=&r"(p0_r), [q0_r] "=&r"(q0_r), [q1_r] "=&r"(q1_r), \
-          [q2_r] "=&r"(q2_r), [q3_r] "=&r"(q3_r)                      \
-        : [p3] "r"(p3), [p2] "r"(p2), [p1] "r"(p1), [p0] "r"(p0),     \
-          [q0] "r"(q0), [q1] "r"(q1), [q2] "r"(q2), [q3] "r"(q3));    \
-  }
-
-#define PACK_RIGHT_4TO7()                                             \
-  {                                                                   \
-    __asm__ __volatile__(                                             \
-        "preceu.ph.qbr   %[p7_r],   %[p7]   \n\t"                     \
-        "preceu.ph.qbr   %[p6_r],   %[p6]   \n\t"                     \
-        "preceu.ph.qbr   %[p5_r],   %[p5]   \n\t"                     \
-        "preceu.ph.qbr   %[p4_r],   %[p4]   \n\t"                     \
-        "preceu.ph.qbr   %[q4_r],   %[q4]   \n\t"                     \
-        "preceu.ph.qbr   %[q5_r],   %[q5]   \n\t"                     \
-        "preceu.ph.qbr   %[q6_r],   %[q6]   \n\t"                     \
-        "preceu.ph.qbr   %[q7_r],   %[q7]   \n\t"                     \
-                                                                      \
-        : [p7_r] "=&r"(p7_r), [p6_r] "=&r"(p6_r), [p5_r] "=&r"(p5_r), \
-          [p4_r] "=&r"(p4_r), [q4_r] "=&r"(q4_r), [q5_r] "=&r"(q5_r), \
-          [q6_r] "=&r"(q6_r), [q7_r] "=&r"(q7_r)                      \
-        : [p7] "r"(p7), [p6] "r"(p6), [p5] "r"(p5), [p4] "r"(p4),     \
-          [q4] "r"(q4), [q5] "r"(q5), [q6] "r"(q6), [q7] "r"(q7));    \
-  }
-
-#define COMBINE_LEFT_RIGHT_0TO2()                                         \
-  {                                                                       \
-    __asm__ __volatile__(                                                 \
-        "precr.qb.ph    %[p2],  %[p2_l],    %[p2_r]    \n\t"              \
-        "precr.qb.ph    %[p1],  %[p1_l],    %[p1_r]    \n\t"              \
-        "precr.qb.ph    %[p0],  %[p0_l],    %[p0_r]    \n\t"              \
-        "precr.qb.ph    %[q0],  %[q0_l],    %[q0_r]    \n\t"              \
-        "precr.qb.ph    %[q1],  %[q1_l],    %[q1_r]    \n\t"              \
-        "precr.qb.ph    %[q2],  %[q2_l],    %[q2_r]    \n\t"              \
-                                                                          \
-        : [p2] "=&r"(p2), [p1] "=&r"(p1), [p0] "=&r"(p0), [q0] "=&r"(q0), \
-          [q1] "=&r"(q1), [q2] "=&r"(q2)                                  \
-        : [p2_l] "r"(p2_l), [p2_r] "r"(p2_r), [p1_l] "r"(p1_l),           \
-          [p1_r] "r"(p1_r), [p0_l] "r"(p0_l), [p0_r] "r"(p0_r),           \
-          [q0_l] "r"(q0_l), [q0_r] "r"(q0_r), [q1_l] "r"(q1_l),           \
-          [q1_r] "r"(q1_r), [q2_l] "r"(q2_l), [q2_r] "r"(q2_r));          \
-  }
-
-#define COMBINE_LEFT_RIGHT_3TO6()                                         \
-  {                                                                       \
-    __asm__ __volatile__(                                                 \
-        "precr.qb.ph    %[p6],  %[p6_l],    %[p6_r]    \n\t"              \
-        "precr.qb.ph    %[p5],  %[p5_l],    %[p5_r]    \n\t"              \
-        "precr.qb.ph    %[p4],  %[p4_l],    %[p4_r]    \n\t"              \
-        "precr.qb.ph    %[p3],  %[p3_l],    %[p3_r]    \n\t"              \
-        "precr.qb.ph    %[q3],  %[q3_l],    %[q3_r]    \n\t"              \
-        "precr.qb.ph    %[q4],  %[q4_l],    %[q4_r]    \n\t"              \
-        "precr.qb.ph    %[q5],  %[q5_l],    %[q5_r]    \n\t"              \
-        "precr.qb.ph    %[q6],  %[q6_l],    %[q6_r]    \n\t"              \
-                                                                          \
-        : [p6] "=&r"(p6), [p5] "=&r"(p5), [p4] "=&r"(p4), [p3] "=&r"(p3), \
-          [q3] "=&r"(q3), [q4] "=&r"(q4), [q5] "=&r"(q5), [q6] "=&r"(q6)  \
-        : [p6_l] "r"(p6_l), [p5_l] "r"(p5_l), [p4_l] "r"(p4_l),           \
-          [p3_l] "r"(p3_l), [p6_r] "r"(p6_r), [p5_r] "r"(p5_r),           \
-          [p4_r] "r"(p4_r), [p3_r] "r"(p3_r), [q3_l] "r"(q3_l),           \
-          [q4_l] "r"(q4_l), [q5_l] "r"(q5_l), [q6_l] "r"(q6_l),           \
-          [q3_r] "r"(q3_r), [q4_r] "r"(q4_r), [q5_r] "r"(q5_r),           \
-          [q6_r] "r"(q6_r));                                              \
-  }
-
-#endif  // #if HAVE_DSPR2
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  // AOM_AOM_DSP_MIPS_LOOPFILTER_MACROS_DSPR2_H_
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/mips/loopfilter_masks_dspr2.h b/chromium/third_party/libaom/source/libaom/aom_dsp/mips/loopfilter_masks_dspr2.h
deleted file mode 100644
index a0f57f386a2..00000000000
--- a/chromium/third_party/libaom/source/libaom/aom_dsp/mips/loopfilter_masks_dspr2.h
+++ /dev/null
@@ -1,357 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AOM_DSP_MIPS_LOOPFILTER_MASKS_DSPR2_H_
-#define AOM_AOM_DSP_MIPS_LOOPFILTER_MASKS_DSPR2_H_
-
-#include <stdlib.h>
-
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom/aom_integer.h"
-#include "aom_mem/aom_mem.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#if HAVE_DSPR2
-/* processing 4 pixels at the same time
- * compute hev and mask in the same function */
-static INLINE void filter_hev_mask_dspr2(uint32_t limit, uint32_t flimit,
-                                         uint32_t p1, uint32_t p0, uint32_t p3,
-                                         uint32_t p2, uint32_t q0, uint32_t q1,
-                                         uint32_t q2, uint32_t q3,
-                                         uint32_t thresh, uint32_t *hev,
-                                         uint32_t *mask) {
-  uint32_t c, r, r3, r_k;
-  uint32_t s1, s2, s3;
-  uint32_t ones = 0xFFFFFFFF;
-  uint32_t hev1;
-
-  __asm__ __volatile__(
-      /* mask |= (abs(p3 - p2) > limit) */
-      "subu_s.qb      %[c],   %[p3],     %[p2]        \n\t"
-      "subu_s.qb      %[r_k], %[p2],     %[p3]        \n\t"
-      "or             %[r_k], %[r_k],    %[c]         \n\t"
-      "cmpgu.lt.qb    %[c],   %[limit],  %[r_k]       \n\t"
-      "or             %[r],   $0,        %[c]         \n\t"
-
-      /* mask |= (abs(p2 - p1) > limit) */
-      "subu_s.qb      %[c],   %[p2],     %[p1]        \n\t"
-      "subu_s.qb      %[r_k], %[p1],     %[p2]        \n\t"
-      "or             %[r_k], %[r_k],    %[c]         \n\t"
-      "cmpgu.lt.qb    %[c],   %[limit],  %[r_k]       \n\t"
-      "or             %[r],   %[r],      %[c]         \n\t"
-
-      /* mask |= (abs(p1 - p0) > limit)
-       * hev  |= (abs(p1 - p0) > thresh)
-       */
-      "subu_s.qb      %[c],   %[p1],     %[p0]        \n\t"
-      "subu_s.qb      %[r_k], %[p0],     %[p1]        \n\t"
-      "or             %[r_k], %[r_k],    %[c]         \n\t"
-      "cmpgu.lt.qb    %[c],   %[thresh], %[r_k]       \n\t"
-      "or             %[r3],  $0,        %[c]         \n\t"
-      "cmpgu.lt.qb    %[c],   %[limit],  %[r_k]       \n\t"
-      "or             %[r],   %[r],      %[c]         \n\t"
-
-      /* mask |= (abs(q1 - q0) > limit)
-       * hev  |= (abs(q1 - q0) > thresh)
-       */
-      "subu_s.qb      %[c],   %[q1],     %[q0]        \n\t"
-      "subu_s.qb      %[r_k], %[q0],     %[q1]        \n\t"
-      "or             %[r_k], %[r_k],    %[c]         \n\t"
-      "cmpgu.lt.qb    %[c],   %[thresh], %[r_k]       \n\t"
-      "or             %[r3],  %[r3],     %[c]         \n\t"
-      "cmpgu.lt.qb    %[c],   %[limit],  %[r_k]       \n\t"
-      "or             %[r],   %[r],      %[c]         \n\t"
-
-      /* mask |= (abs(q2 - q1) > limit) */
-      "subu_s.qb      %[c],   %[q2],     %[q1]        \n\t"
-      "subu_s.qb      %[r_k], %[q1],     %[q2]        \n\t"
-      "or             %[r_k], %[r_k],    %[c]         \n\t"
-      "cmpgu.lt.qb    %[c],   %[limit],  %[r_k]       \n\t"
-      "or             %[r],   %[r],      %[c]         \n\t"
-      "sll            %[r3],    %[r3],    24          \n\t"
-
-      /* mask |= (abs(q3 - q2) > limit) */
-      "subu_s.qb      %[c],   %[q3],     %[q2]        \n\t"
-      "subu_s.qb      %[r_k], %[q2],     %[q3]        \n\t"
-      "or             %[r_k], %[r_k],    %[c]         \n\t"
-      "cmpgu.lt.qb    %[c],   %[limit],  %[r_k]       \n\t"
-      "or             %[r],   %[r],      %[c]         \n\t"
-
-      : [c] "=&r"(c), [r_k] "=&r"(r_k), [r] "=&r"(r), [r3] "=&r"(r3)
-      : [limit] "r"(limit), [p3] "r"(p3), [p2] "r"(p2), [p1] "r"(p1),
-        [p0] "r"(p0), [q1] "r"(q1), [q0] "r"(q0), [q2] "r"(q2), [q3] "r"(q3),
-        [thresh] "r"(thresh));
-
-  __asm__ __volatile__(
-      /* abs(p0 - q0) */
-      "subu_s.qb      %[c],   %[p0],     %[q0]        \n\t"
-      "subu_s.qb      %[r_k], %[q0],     %[p0]        \n\t"
-      "wrdsp          %[r3]                           \n\t"
-      "or             %[s1],  %[r_k],    %[c]         \n\t"
-
-      /* abs(p1 - q1) */
-      "subu_s.qb      %[c],    %[p1],    %[q1]        \n\t"
-      "addu_s.qb      %[s3],   %[s1],    %[s1]        \n\t"
-      "pick.qb        %[hev1], %[ones],  $0           \n\t"
-      "subu_s.qb      %[r_k],  %[q1],    %[p1]        \n\t"
-      "or             %[s2],   %[r_k],   %[c]         \n\t"
-
-      /* abs(p0 - q0) * 2 + abs(p1 - q1) / 2  > flimit * 2 + limit */
-      "shrl.qb        %[s2],   %[s2],     1           \n\t"
-      "addu_s.qb      %[s1],   %[s2],     %[s3]       \n\t"
-      "cmpgu.lt.qb    %[c],    %[flimit], %[s1]       \n\t"
-      "or             %[r],    %[r],      %[c]        \n\t"
-      "sll            %[r],    %[r],      24          \n\t"
-
-      "wrdsp          %[r]                            \n\t"
-      "pick.qb        %[s2],  $0,         %[ones]     \n\t"
-
-      : [c] "=&r"(c), [r_k] "=&r"(r_k), [s1] "=&r"(s1), [hev1] "=&r"(hev1),
-        [s2] "=&r"(s2), [r] "+r"(r), [s3] "=&r"(s3)
-      : [p0] "r"(p0), [q0] "r"(q0), [p1] "r"(p1), [r3] "r"(r3), [q1] "r"(q1),
-        [ones] "r"(ones), [flimit] "r"(flimit));
-
-  *hev = hev1;
-  *mask = s2;
-}
-
-static INLINE void filter_hev_mask_flatmask4_dspr2(
-    uint32_t limit, uint32_t flimit, uint32_t thresh, uint32_t p1, uint32_t p0,
-    uint32_t p3, uint32_t p2, uint32_t q0, uint32_t q1, uint32_t q2,
-    uint32_t q3, uint32_t *hev, uint32_t *mask, uint32_t *flat) {
-  uint32_t c, r, r3, r_k, r_flat;
-  uint32_t s1, s2, s3;
-  uint32_t ones = 0xFFFFFFFF;
-  uint32_t flat_thresh = 0x01010101;
-  uint32_t hev1;
-  uint32_t flat1;
-
-  __asm__ __volatile__(
-      /* mask |= (abs(p3 - p2) > limit) */
-      "subu_s.qb      %[c],       %[p3],          %[p2]        \n\t"
-      "subu_s.qb      %[r_k],     %[p2],          %[p3]        \n\t"
-      "or             %[r_k],     %[r_k],         %[c]         \n\t"
-      "cmpgu.lt.qb    %[c],       %[limit],       %[r_k]       \n\t"
-      "or             %[r],       $0,             %[c]         \n\t"
-
-      /* mask |= (abs(p2 - p1) > limit) */
-      "subu_s.qb      %[c],       %[p2],          %[p1]        \n\t"
-      "subu_s.qb      %[r_k],     %[p1],          %[p2]        \n\t"
-      "or             %[r_k],     %[r_k],         %[c]         \n\t"
-      "cmpgu.lt.qb    %[c],       %[limit],       %[r_k]       \n\t"
-      "or             %[r],       %[r],           %[c]         \n\t"
-
-      /* mask |= (abs(p1 - p0) > limit)
-       * hev  |= (abs(p1 - p0) > thresh)
-       * flat |= (abs(p1 - p0) > thresh)
-       */
-      "subu_s.qb      %[c],       %[p1],          %[p0]        \n\t"
-      "subu_s.qb      %[r_k],     %[p0],          %[p1]        \n\t"
-      "or             %[r_k],     %[r_k],         %[c]         \n\t"
-      "cmpgu.lt.qb    %[c],       %[thresh],      %[r_k]       \n\t"
-      "or             %[r3],      $0,             %[c]         \n\t"
-      "cmpgu.lt.qb    %[c],       %[limit],       %[r_k]       \n\t"
-      "or             %[r],       %[r],           %[c]         \n\t"
-      "cmpgu.lt.qb    %[c],       %[flat_thresh], %[r_k]       \n\t"
-      "or             %[r_flat],  $0,             %[c]         \n\t"
-
-      /* mask |= (abs(q1 - q0) > limit)
-       * hev  |= (abs(q1 - q0) > thresh)
-       * flat |= (abs(q1 - q0) > thresh)
-       */
-      "subu_s.qb      %[c],       %[q1],          %[q0]        \n\t"
-      "subu_s.qb      %[r_k],     %[q0],          %[q1]        \n\t"
-      "or             %[r_k],     %[r_k],         %[c]         \n\t"
-      "cmpgu.lt.qb    %[c],       %[thresh],      %[r_k]       \n\t"
-      "or             %[r3],      %[r3],          %[c]         \n\t"
-      "cmpgu.lt.qb    %[c],       %[limit],       %[r_k]       \n\t"
-      "or             %[r],       %[r],           %[c]         \n\t"
-      "cmpgu.lt.qb    %[c],       %[flat_thresh], %[r_k]       \n\t"
-      "or             %[r_flat],  %[r_flat],      %[c]         \n\t"
-
-      /* flat |= (abs(p0 - p2) > thresh) */
-      "subu_s.qb      %[c],       %[p0],          %[p2]        \n\t"
-      "subu_s.qb      %[r_k],     %[p2],          %[p0]        \n\t"
-      "or             %[r_k],     %[r_k],         %[c]         \n\t"
-      "cmpgu.lt.qb    %[c],       %[flat_thresh], %[r_k]       \n\t"
-      "or             %[r_flat],  %[r_flat],      %[c]         \n\t"
-
-      /* flat |= (abs(q0 - q2) > thresh) */
-      "subu_s.qb      %[c],       %[q0],          %[q2]        \n\t"
-      "subu_s.qb      %[r_k],     %[q2],          %[q0]        \n\t"
-      "or             %[r_k],     %[r_k],         %[c]         \n\t"
-      "cmpgu.lt.qb    %[c],       %[flat_thresh], %[r_k]       \n\t"
-      "or             %[r_flat],  %[r_flat],      %[c]         \n\t"
-
-      /* flat |= (abs(p3 - p0) > thresh) */
-      "subu_s.qb      %[c],       %[p3],          %[p0]        \n\t"
-      "subu_s.qb      %[r_k],     %[p0],          %[p3]        \n\t"
-      "or             %[r_k],     %[r_k],         %[c]         \n\t"
-      "cmpgu.lt.qb    %[c],       %[flat_thresh], %[r_k]       \n\t"
-      "or             %[r_flat],  %[r_flat],      %[c]         \n\t"
-
-      /* flat |= (abs(q3 - q0) > thresh) */
-      "subu_s.qb      %[c],       %[q3],          %[q0]        \n\t"
-      "subu_s.qb      %[r_k],     %[q0],          %[q3]        \n\t"
-      "or             %[r_k],     %[r_k],         %[c]         \n\t"
-      "cmpgu.lt.qb    %[c],       %[flat_thresh], %[r_k]       \n\t"
-      "or             %[r_flat],  %[r_flat],      %[c]         \n\t"
-      "sll            %[r_flat],  %[r_flat],      24           \n\t"
-      /* look at stall here */
-      "wrdsp          %[r_flat]                                \n\t"
-      "pick.qb        %[flat1],   $0,             %[ones]      \n\t"
-
-      /* mask |= (abs(q2 - q1) > limit) */
-      "subu_s.qb      %[c],       %[q2],          %[q1]        \n\t"
-      "subu_s.qb      %[r_k],     %[q1],          %[q2]        \n\t"
-      "or             %[r_k],     %[r_k],         %[c]         \n\t"
-      "cmpgu.lt.qb    %[c],       %[limit],       %[r_k]       \n\t"
-      "or             %[r],       %[r],           %[c]         \n\t"
-      "sll            %[r3],      %[r3],          24           \n\t"
-
-      /* mask |= (abs(q3 - q2) > limit) */
-      "subu_s.qb      %[c],       %[q3],          %[q2]        \n\t"
-      "subu_s.qb      %[r_k],     %[q2],          %[q3]        \n\t"
-      "or             %[r_k],     %[r_k],         %[c]         \n\t"
-      "cmpgu.lt.qb    %[c],       %[limit],       %[r_k]       \n\t"
-      "or             %[r],       %[r],           %[c]         \n\t"
-
-      : [c] "=&r"(c), [r_k] "=&r"(r_k), [r] "=&r"(r), [r3] "=&r"(r3),
-        [r_flat] "=&r"(r_flat), [flat1] "=&r"(flat1)
-      : [limit] "r"(limit), [p3] "r"(p3), [p2] "r"(p2), [p1] "r"(p1),
-        [p0] "r"(p0), [q1] "r"(q1), [q0] "r"(q0), [q2] "r"(q2), [q3] "r"(q3),
-        [thresh] "r"(thresh), [flat_thresh] "r"(flat_thresh), [ones] "r"(ones));
-
-  __asm__ __volatile__(
-      /* abs(p0 - q0) */
-      "subu_s.qb      %[c],   %[p0],     %[q0]        \n\t"
-      "subu_s.qb      %[r_k], %[q0],     %[p0]        \n\t"
-      "wrdsp          %[r3]                           \n\t"
-      "or             %[s1],  %[r_k],    %[c]         \n\t"
-
-      /* abs(p1 - q1) */
-      "subu_s.qb      %[c],    %[p1],    %[q1]        \n\t"
-      "addu_s.qb      %[s3],   %[s1],    %[s1]        \n\t"
-      "pick.qb        %[hev1], %[ones],  $0           \n\t"
-      "subu_s.qb      %[r_k],  %[q1],    %[p1]        \n\t"
-      "or             %[s2],   %[r_k],   %[c]         \n\t"
-
-      /* abs(p0 - q0) * 2 + abs(p1 - q1) / 2  > flimit * 2 + limit */
-      "shrl.qb        %[s2],   %[s2],     1           \n\t"
-      "addu_s.qb      %[s1],   %[s2],     %[s3]       \n\t"
-      "cmpgu.lt.qb    %[c],    %[flimit], %[s1]       \n\t"
-      "or             %[r],    %[r],      %[c]        \n\t"
-      "sll            %[r],    %[r],      24          \n\t"
-
-      "wrdsp          %[r]                            \n\t"
-      "pick.qb        %[s2],   $0,        %[ones]     \n\t"
-
-      : [c] "=&r"(c), [r_k] "=&r"(r_k), [s1] "=&r"(s1), [hev1] "=&r"(hev1),
-        [s2] "=&r"(s2), [r] "+r"(r), [s3] "=&r"(s3)
-      : [p0] "r"(p0), [q0] "r"(q0), [p1] "r"(p1), [r3] "r"(r3), [q1] "r"(q1),
-        [ones] "r"(ones), [flimit] "r"(flimit));
-
-  *hev = hev1;
-  *mask = s2;
-  *flat = flat1;
-}
-
-static INLINE void flatmask5(uint32_t p4, uint32_t p3, uint32_t p2, uint32_t p1,
-                             uint32_t p0, uint32_t q0, uint32_t q1, uint32_t q2,
-                             uint32_t q3, uint32_t q4, uint32_t *flat2) {
-  uint32_t c, r, r_k, r_flat;
-  uint32_t ones = 0xFFFFFFFF;
-  uint32_t flat_thresh = 0x01010101;
-  uint32_t flat1, flat3;
-
-  __asm__ __volatile__(
-      /* flat |= (abs(p4 - p0) > thresh) */
-      "subu_s.qb      %[c],   %[p4],           %[p0]        \n\t"
-      "subu_s.qb      %[r_k], %[p0],           %[p4]        \n\t"
-      "or             %[r_k], %[r_k],          %[c]         \n\t"
-      "cmpgu.lt.qb    %[c],   %[flat_thresh],  %[r_k]       \n\t"
-      "or             %[r],   $0,              %[c]         \n\t"
-
-      /* flat |= (abs(q4 - q0) > thresh) */
-      "subu_s.qb      %[c],     %[q4],           %[q0]     \n\t"
-      "subu_s.qb      %[r_k],   %[q0],           %[q4]     \n\t"
-      "or             %[r_k],   %[r_k],          %[c]      \n\t"
-      "cmpgu.lt.qb    %[c],     %[flat_thresh],  %[r_k]    \n\t"
-      "or             %[r],     %[r],            %[c]      \n\t"
-      "sll            %[r],     %[r],            24        \n\t"
-      "wrdsp          %[r]                                 \n\t"
-      "pick.qb        %[flat3], $0,           %[ones]      \n\t"
-
-      /* flat |= (abs(p1 - p0) > thresh) */
-      "subu_s.qb      %[c],       %[p1],          %[p0]        \n\t"
-      "subu_s.qb      %[r_k],     %[p0],          %[p1]        \n\t"
-      "or             %[r_k],     %[r_k],         %[c]         \n\t"
-      "cmpgu.lt.qb    %[c],       %[flat_thresh], %[r_k]       \n\t"
-      "or             %[r_flat],  $0,             %[c]         \n\t"
-
-      /* flat |= (abs(q1 - q0) > thresh) */
-      "subu_s.qb      %[c],      %[q1],           %[q0]        \n\t"
-      "subu_s.qb      %[r_k],    %[q0],           %[q1]        \n\t"
-      "or             %[r_k],    %[r_k],          %[c]         \n\t"
-      "cmpgu.lt.qb    %[c],      %[flat_thresh],  %[r_k]       \n\t"
-      "or             %[r_flat], %[r_flat],       %[c]         \n\t"
-
-      /* flat |= (abs(p0 - p2) > thresh) */
-      "subu_s.qb      %[c],       %[p0],          %[p2]        \n\t"
-      "subu_s.qb      %[r_k],     %[p2],          %[p0]        \n\t"
-      "or             %[r_k],     %[r_k],         %[c]         \n\t"
-      "cmpgu.lt.qb    %[c],       %[flat_thresh], %[r_k]       \n\t"
-      "or             %[r_flat],  %[r_flat],      %[c]         \n\t"
-
-      /* flat |= (abs(q0 - q2) > thresh) */
-      "subu_s.qb      %[c],       %[q0],          %[q2]        \n\t"
-      "subu_s.qb      %[r_k],     %[q2],          %[q0]        \n\t"
-      "or             %[r_k],     %[r_k],         %[c]         \n\t"
-      "cmpgu.lt.qb    %[c],       %[flat_thresh], %[r_k]       \n\t"
-      "or             %[r_flat],  %[r_flat],      %[c]         \n\t"
-
-      /* flat |= (abs(p3 - p0) > thresh) */
-      "subu_s.qb      %[c],       %[p3],          %[p0]        \n\t"
-      "subu_s.qb      %[r_k],     %[p0],          %[p3]        \n\t"
-      "or             %[r_k],     %[r_k],         %[c]         \n\t"
-      "cmpgu.lt.qb    %[c],       %[flat_thresh], %[r_k]       \n\t"
-      "or             %[r_flat],  %[r_flat],      %[c]         \n\t"
-
-      /* flat |= (abs(q3 - q0) > thresh) */
-      "subu_s.qb      %[c],       %[q3],          %[q0]        \n\t"
-      "subu_s.qb      %[r_k],     %[q0],          %[q3]        \n\t"
-      "or             %[r_k],     %[r_k],         %[c]         \n\t"
-      "cmpgu.lt.qb    %[c],       %[flat_thresh], %[r_k]       \n\t"
-      "or             %[r_flat],  %[r_flat],      %[c]         \n\t"
-      "sll            %[r_flat],  %[r_flat],      24           \n\t"
-      "wrdsp          %[r_flat]                                \n\t"
-      "pick.qb        %[flat1],   $0,             %[ones]      \n\t"
-      /* flat & flatmask4(thresh, p3, p2, p1, p0, q0, q1, q2, q3) */
-      "and            %[flat1],  %[flat3],        %[flat1]     \n\t"
-
-      : [c] "=&r"(c), [r_k] "=&r"(r_k), [r] "=&r"(r), [r_flat] "=&r"(r_flat),
-        [flat1] "=&r"(flat1), [flat3] "=&r"(flat3)
-      : [p4] "r"(p4), [p3] "r"(p3), [p2] "r"(p2), [p1] "r"(p1), [p0] "r"(p0),
-        [q0] "r"(q0), [q1] "r"(q1), [q2] "r"(q2), [q3] "r"(q3), [q4] "r"(q4),
-        [flat_thresh] "r"(flat_thresh), [ones] "r"(ones));
-
-  *flat2 = flat1;
-}
-#endif  // #if HAVE_DSPR2
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  // AOM_AOM_DSP_MIPS_LOOPFILTER_MASKS_DSPR2_H_
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/mips/loopfilter_mb_dspr2.c b/chromium/third_party/libaom/source/libaom/aom_dsp/mips/loopfilter_mb_dspr2.c
deleted file mode 100644
index b67ccfe9d88..00000000000
--- a/chromium/third_party/libaom/source/libaom/aom_dsp/mips/loopfilter_mb_dspr2.c
+++ /dev/null
@@ -1,590 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <stdlib.h>
-
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom/aom_integer.h"
-#include "aom_dsp/mips/common_dspr2.h"
-#include "aom_dsp/mips/loopfilter_filters_dspr2.h"
-#include "aom_dsp/mips/loopfilter_macros_dspr2.h"
-#include "aom_dsp/mips/loopfilter_masks_dspr2.h"
-#include "aom_mem/aom_mem.h"
-
-#if HAVE_DSPR2
-void aom_lpf_horizontal_8_dspr2(unsigned char *s, int pitch,
-                                const uint8_t *blimit, const uint8_t *limit,
-                                const uint8_t *thresh) {
-  uint32_t mask;
-  uint32_t hev, flat;
-  uint8_t i;
-  uint8_t *sp3, *sp2, *sp1, *sp0, *sq0, *sq1, *sq2, *sq3;
-  uint32_t thresh_vec, flimit_vec, limit_vec;
-  uint32_t uflimit, ulimit, uthresh;
-  uint32_t p1_f0, p0_f0, q0_f0, q1_f0;
-  uint32_t p3, p2, p1, p0, q0, q1, q2, q3;
-  uint32_t p0_l, p1_l, p2_l, p3_l, q0_l, q1_l, q2_l, q3_l;
-  uint32_t p0_r, p1_r, p2_r, p3_r, q0_r, q1_r, q2_r, q3_r;
-
-  uflimit = *blimit;
-  ulimit = *limit;
-  uthresh = *thresh;
-
-  /* create quad-byte */
-  __asm__ __volatile__(
-      "replv.qb       %[thresh_vec],    %[uthresh]    \n\t"
-      "replv.qb       %[flimit_vec],    %[uflimit]    \n\t"
-      "replv.qb       %[limit_vec],     %[ulimit]     \n\t"
-
-      : [thresh_vec] "=&r"(thresh_vec), [flimit_vec] "=&r"(flimit_vec),
-        [limit_vec] "=r"(limit_vec)
-      : [uthresh] "r"(uthresh), [uflimit] "r"(uflimit), [ulimit] "r"(ulimit));
-
-  /* prefetch data for store */
-  prefetch_store(s);
-
-  for (i = 0; i < 2; i++) {
-    sp3 = s - (pitch << 2);
-    sp2 = sp3 + pitch;
-    sp1 = sp2 + pitch;
-    sp0 = sp1 + pitch;
-    sq0 = s;
-    sq1 = s + pitch;
-    sq2 = sq1 + pitch;
-    sq3 = sq2 + pitch;
-
-    __asm__ __volatile__(
-        "lw     %[p3],      (%[sp3])    \n\t"
-        "lw     %[p2],      (%[sp2])    \n\t"
-        "lw     %[p1],      (%[sp1])    \n\t"
-        "lw     %[p0],      (%[sp0])    \n\t"
-        "lw     %[q0],      (%[sq0])    \n\t"
-        "lw     %[q1],      (%[sq1])    \n\t"
-        "lw     %[q2],      (%[sq2])    \n\t"
-        "lw     %[q3],      (%[sq3])    \n\t"
-
-        : [p3] "=&r"(p3), [p2] "=&r"(p2), [p1] "=&r"(p1), [p0] "=&r"(p0),
-          [q3] "=&r"(q3), [q2] "=&r"(q2), [q1] "=&r"(q1), [q0] "=&r"(q0)
-        : [sp3] "r"(sp3), [sp2] "r"(sp2), [sp1] "r"(sp1), [sp0] "r"(sp0),
-          [sq3] "r"(sq3), [sq2] "r"(sq2), [sq1] "r"(sq1), [sq0] "r"(sq0));
-
-    filter_hev_mask_flatmask4_dspr2(limit_vec, flimit_vec, thresh_vec, p1, p0,
-                                    p3, p2, q0, q1, q2, q3, &hev, &mask, &flat);
-
-    if ((flat == 0) && (mask != 0)) {
-      filter1_dspr2(mask, hev, p1, p0, q0, q1, &p1_f0, &p0_f0, &q0_f0, &q1_f0);
-
-      __asm__ __volatile__(
-          "sw       %[p1_f0],   (%[sp1])    \n\t"
-          "sw       %[p0_f0],   (%[sp0])    \n\t"
-          "sw       %[q0_f0],   (%[sq0])    \n\t"
-          "sw       %[q1_f0],   (%[sq1])    \n\t"
-
-          :
-          : [p1_f0] "r"(p1_f0), [p0_f0] "r"(p0_f0), [q0_f0] "r"(q0_f0),
-            [q1_f0] "r"(q1_f0), [sp1] "r"(sp1), [sp0] "r"(sp0), [sq0] "r"(sq0),
-            [sq1] "r"(sq1));
-    } else if ((mask & flat) == 0xFFFFFFFF) {
-      /* left 2 element operation */
-      PACK_LEFT_0TO3()
-      mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l, &q0_l, &q1_l, &q2_l, &q3_l);
-
-      /* right 2 element operation */
-      PACK_RIGHT_0TO3()
-      mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r, &q0_r, &q1_r, &q2_r, &q3_r);
-
-      COMBINE_LEFT_RIGHT_0TO2()
-
-      __asm__ __volatile__(
-          "sw       %[p2],      (%[sp2])    \n\t"
-          "sw       %[p1],      (%[sp1])    \n\t"
-          "sw       %[p0],      (%[sp0])    \n\t"
-          "sw       %[q0],      (%[sq0])    \n\t"
-          "sw       %[q1],      (%[sq1])    \n\t"
-          "sw       %[q2],      (%[sq2])    \n\t"
-
-          :
-          : [p2] "r"(p2), [p1] "r"(p1), [p0] "r"(p0), [q0] "r"(q0),
-            [q1] "r"(q1), [q2] "r"(q2), [sp2] "r"(sp2), [sp1] "r"(sp1),
-            [sp0] "r"(sp0), [sq0] "r"(sq0), [sq1] "r"(sq1), [sq2] "r"(sq2));
-    } else if ((flat != 0) && (mask != 0)) {
-      /* filtering */
-      filter1_dspr2(mask, hev, p1, p0, q0, q1, &p1_f0, &p0_f0, &q0_f0, &q1_f0);
-
-      /* left 2 element operation */
-      PACK_LEFT_0TO3()
-      mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l, &q0_l, &q1_l, &q2_l, &q3_l);
-
-      /* right 2 element operation */
-      PACK_RIGHT_0TO3()
-      mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r, &q0_r, &q1_r, &q2_r, &q3_r);
-
-      if (mask & flat & 0x000000FF) {
-        __asm__ __volatile__(
-            "sb     %[p2_r],    (%[sp2])    \n\t"
-            "sb     %[p1_r],    (%[sp1])    \n\t"
-            "sb     %[p0_r],    (%[sp0])    \n\t"
-            "sb     %[q0_r],    (%[sq0])    \n\t"
-            "sb     %[q1_r],    (%[sq1])    \n\t"
-            "sb     %[q2_r],    (%[sq2])    \n\t"
-
-            :
-            : [p2_r] "r"(p2_r), [p1_r] "r"(p1_r), [p0_r] "r"(p0_r),
-              [q0_r] "r"(q0_r), [q1_r] "r"(q1_r), [q2_r] "r"(q2_r),
-              [sp2] "r"(sp2), [sp1] "r"(sp1), [sp0] "r"(sp0), [sq0] "r"(sq0),
-              [sq1] "r"(sq1), [sq2] "r"(sq2));
-      } else if (mask & 0x000000FF) {
-        __asm__ __volatile__(
-            "sb         %[p1_f0],  (%[sp1])    \n\t"
-            "sb         %[p0_f0],  (%[sp0])    \n\t"
-            "sb         %[q0_f0],  (%[sq0])    \n\t"
-            "sb         %[q1_f0],  (%[sq1])    \n\t"
-
-            :
-            : [p1_f0] "r"(p1_f0), [p0_f0] "r"(p0_f0), [q0_f0] "r"(q0_f0),
-              [q1_f0] "r"(q1_f0), [sp1] "r"(sp1), [sp0] "r"(sp0),
-              [sq0] "r"(sq0), [sq1] "r"(sq1));
-      }
-
-      __asm__ __volatile__(
-          "srl      %[p2_r],    %[p2_r],    16      \n\t"
-          "srl      %[p1_r],    %[p1_r],    16      \n\t"
-          "srl      %[p0_r],    %[p0_r],    16      \n\t"
-          "srl      %[q0_r],    %[q0_r],    16      \n\t"
-          "srl      %[q1_r],    %[q1_r],    16      \n\t"
-          "srl      %[q2_r],    %[q2_r],    16      \n\t"
-          "srl      %[p1_f0],   %[p1_f0],   8       \n\t"
-          "srl      %[p0_f0],   %[p0_f0],   8       \n\t"
-          "srl      %[q0_f0],   %[q0_f0],   8       \n\t"
-          "srl      %[q1_f0],   %[q1_f0],   8       \n\t"
-
-          : [p2_r] "+r"(p2_r), [p1_r] "+r"(p1_r), [p0_r] "+r"(p0_r),
-            [q0_r] "+r"(q0_r), [q1_r] "+r"(q1_r), [q2_r] "+r"(q2_r),
-            [p1_f0] "+r"(p1_f0), [p0_f0] "+r"(p0_f0), [q0_f0] "+r"(q0_f0),
-            [q1_f0] "+r"(q1_f0)
-          :);
-
-      if (mask & flat & 0x0000FF00) {
-        __asm__ __volatile__(
-            "sb     %[p2_r],    +1(%[sp2])    \n\t"
-            "sb     %[p1_r],    +1(%[sp1])    \n\t"
-            "sb     %[p0_r],    +1(%[sp0])    \n\t"
-            "sb     %[q0_r],    +1(%[sq0])    \n\t"
-            "sb     %[q1_r],    +1(%[sq1])    \n\t"
-            "sb     %[q2_r],    +1(%[sq2])    \n\t"
-
-            :
-            : [p2_r] "r"(p2_r), [p1_r] "r"(p1_r), [p0_r] "r"(p0_r),
-              [q0_r] "r"(q0_r), [q1_r] "r"(q1_r), [q2_r] "r"(q2_r),
-              [sp2] "r"(sp2), [sp1] "r"(sp1), [sp0] "r"(sp0), [sq0] "r"(sq0),
-              [sq1] "r"(sq1), [sq2] "r"(sq2));
-      } else if (mask & 0x0000FF00) {
-        __asm__ __volatile__(
-            "sb     %[p1_f0],   +1(%[sp1])    \n\t"
-            "sb     %[p0_f0],   +1(%[sp0])    \n\t"
-            "sb     %[q0_f0],   +1(%[sq0])    \n\t"
-            "sb     %[q1_f0],   +1(%[sq1])    \n\t"
-
-            :
-            : [p1_f0] "r"(p1_f0), [p0_f0] "r"(p0_f0), [q0_f0] "r"(q0_f0),
-              [q1_f0] "r"(q1_f0), [sp1] "r"(sp1), [sp0] "r"(sp0),
-              [sq0] "r"(sq0), [sq1] "r"(sq1));
-      }
-
-      __asm__ __volatile__(
-          "srl      %[p1_f0],   %[p1_f0],   8     \n\t"
-          "srl      %[p0_f0],   %[p0_f0],   8     \n\t"
-          "srl      %[q0_f0],   %[q0_f0],   8     \n\t"
-          "srl      %[q1_f0],   %[q1_f0],   8     \n\t"
-
-          : [p2] "+r"(p2), [p1] "+r"(p1), [p0] "+r"(p0), [q0] "+r"(q0),
-            [q1] "+r"(q1), [q2] "+r"(q2), [p1_f0] "+r"(p1_f0),
-            [p0_f0] "+r"(p0_f0), [q0_f0] "+r"(q0_f0), [q1_f0] "+r"(q1_f0)
-          :);
-
-      if (mask & flat & 0x00FF0000) {
-        __asm__ __volatile__(
-            "sb     %[p2_l],    +2(%[sp2])    \n\t"
-            "sb     %[p1_l],    +2(%[sp1])    \n\t"
-            "sb     %[p0_l],    +2(%[sp0])    \n\t"
-            "sb     %[q0_l],    +2(%[sq0])    \n\t"
-            "sb     %[q1_l],    +2(%[sq1])    \n\t"
-            "sb     %[q2_l],    +2(%[sq2])    \n\t"
-
-            :
-            : [p2_l] "r"(p2_l), [p1_l] "r"(p1_l), [p0_l] "r"(p0_l),
-              [q0_l] "r"(q0_l), [q1_l] "r"(q1_l), [q2_l] "r"(q2_l),
-              [sp2] "r"(sp2), [sp1] "r"(sp1), [sp0] "r"(sp0), [sq0] "r"(sq0),
-              [sq1] "r"(sq1), [sq2] "r"(sq2));
-      } else if (mask & 0x00FF0000) {
-        __asm__ __volatile__(
-            "sb     %[p1_f0],   +2(%[sp1])    \n\t"
-            "sb     %[p0_f0],   +2(%[sp0])    \n\t"
-            "sb     %[q0_f0],   +2(%[sq0])    \n\t"
-            "sb     %[q1_f0],   +2(%[sq1])    \n\t"
-
-            :
-            : [p1_f0] "r"(p1_f0), [p0_f0] "r"(p0_f0), [q0_f0] "r"(q0_f0),
-              [q1_f0] "r"(q1_f0), [sp1] "r"(sp1), [sp0] "r"(sp0),
-              [sq0] "r"(sq0), [sq1] "r"(sq1));
-      }
-
-      __asm__ __volatile__(
-          "srl      %[p2_l],    %[p2_l],    16      \n\t"
-          "srl      %[p1_l],    %[p1_l],    16      \n\t"
-          "srl      %[p0_l],    %[p0_l],    16      \n\t"
-          "srl      %[q0_l],    %[q0_l],    16      \n\t"
-          "srl      %[q1_l],    %[q1_l],    16      \n\t"
-          "srl      %[q2_l],    %[q2_l],    16      \n\t"
-          "srl      %[p1_f0],   %[p1_f0],   8       \n\t"
-          "srl      %[p0_f0],   %[p0_f0],   8       \n\t"
-          "srl      %[q0_f0],   %[q0_f0],   8       \n\t"
-          "srl      %[q1_f0],   %[q1_f0],   8       \n\t"
-
-          : [p2_l] "+r"(p2_l), [p1_l] "+r"(p1_l), [p0_l] "+r"(p0_l),
-            [q0_l] "+r"(q0_l), [q1_l] "+r"(q1_l), [q2_l] "+r"(q2_l),
-            [p1_f0] "+r"(p1_f0), [p0_f0] "+r"(p0_f0), [q0_f0] "+r"(q0_f0),
-            [q1_f0] "+r"(q1_f0)
-          :);
-
-      if (mask & flat & 0xFF000000) {
-        __asm__ __volatile__(
-            "sb     %[p2_l],    +3(%[sp2])    \n\t"
-            "sb     %[p1_l],    +3(%[sp1])    \n\t"
-            "sb     %[p0_l],    +3(%[sp0])    \n\t"
-            "sb     %[q0_l],    +3(%[sq0])    \n\t"
-            "sb     %[q1_l],    +3(%[sq1])    \n\t"
-            "sb     %[q2_l],    +3(%[sq2])    \n\t"
-
-            :
-            : [p2_l] "r"(p2_l), [p1_l] "r"(p1_l), [p0_l] "r"(p0_l),
-              [q0_l] "r"(q0_l), [q1_l] "r"(q1_l), [q2_l] "r"(q2_l),
-              [sp2] "r"(sp2), [sp1] "r"(sp1), [sp0] "r"(sp0), [sq0] "r"(sq0),
-              [sq1] "r"(sq1), [sq2] "r"(sq2));
-      } else if (mask & 0xFF000000) {
-        __asm__ __volatile__(
-            "sb     %[p1_f0],   +3(%[sp1])    \n\t"
-            "sb     %[p0_f0],   +3(%[sp0])    \n\t"
-            "sb     %[q0_f0],   +3(%[sq0])    \n\t"
-            "sb     %[q1_f0],   +3(%[sq1])    \n\t"
-
-            :
-            : [p1_f0] "r"(p1_f0), [p0_f0] "r"(p0_f0), [q0_f0] "r"(q0_f0),
-              [q1_f0] "r"(q1_f0), [sp1] "r"(sp1), [sp0] "r"(sp0),
-              [sq0] "r"(sq0), [sq1] "r"(sq1));
-      }
-    }
-
-    s = s + 4;
-  }
-}
-
-void aom_lpf_vertical_8_dspr2(unsigned char *s, int pitch,
-                              const uint8_t *blimit, const uint8_t *limit,
-                              const uint8_t *thresh) {
-  uint8_t i;
-  uint32_t mask, hev, flat;
-  uint8_t *s1, *s2, *s3, *s4;
-  uint32_t prim1, prim2, sec3, sec4, prim3, prim4;
-  uint32_t thresh_vec, flimit_vec, limit_vec;
-  uint32_t uflimit, ulimit, uthresh;
-  uint32_t p3, p2, p1, p0, q3, q2, q1, q0;
-  uint32_t p1_f0, p0_f0, q0_f0, q1_f0;
-  uint32_t p0_l, p1_l, p2_l, p3_l, q0_l, q1_l, q2_l, q3_l;
-  uint32_t p0_r, p1_r, p2_r, p3_r, q0_r, q1_r, q2_r, q3_r;
-
-  uflimit = *blimit;
-  ulimit = *limit;
-  uthresh = *thresh;
-
-  /* create quad-byte */
-  __asm__ __volatile__(
-      "replv.qb     %[thresh_vec],  %[uthresh]    \n\t"
-      "replv.qb     %[flimit_vec],  %[uflimit]    \n\t"
-      "replv.qb     %[limit_vec],   %[ulimit]     \n\t"
-
-      : [thresh_vec] "=&r"(thresh_vec), [flimit_vec] "=&r"(flimit_vec),
-        [limit_vec] "=r"(limit_vec)
-      : [uthresh] "r"(uthresh), [uflimit] "r"(uflimit), [ulimit] "r"(ulimit));
-
-  prefetch_store(s + pitch);
-
-  for (i = 0; i < 2; i++) {
-    s1 = s;
-    s2 = s + pitch;
-    s3 = s2 + pitch;
-    s4 = s3 + pitch;
-    s = s4 + pitch;
-
-    __asm__ __volatile__(
-        "lw     %[p0],  -4(%[s1])    \n\t"
-        "lw     %[p1],  -4(%[s2])    \n\t"
-        "lw     %[p2],  -4(%[s3])    \n\t"
-        "lw     %[p3],  -4(%[s4])    \n\t"
-        "lw     %[q3],    (%[s1])    \n\t"
-        "lw     %[q2],    (%[s2])    \n\t"
-        "lw     %[q1],    (%[s3])    \n\t"
-        "lw     %[q0],    (%[s4])    \n\t"
-
-        : [p3] "=&r"(p3), [p2] "=&r"(p2), [p1] "=&r"(p1), [p0] "=&r"(p0),
-          [q0] "=&r"(q0), [q1] "=&r"(q1), [q2] "=&r"(q2), [q3] "=&r"(q3)
-        : [s1] "r"(s1), [s2] "r"(s2), [s3] "r"(s3), [s4] "r"(s4));
-
-    /* transpose p3, p2, p1, p0
-       original (when loaded from memory)
-       register       -4    -3   -2     -1
-         p0         p0_0  p0_1  p0_2  p0_3
-         p1         p1_0  p1_1  p1_2  p1_3
-         p2         p2_0  p2_1  p2_2  p2_3
-         p3         p3_0  p3_1  p3_2  p3_3
-
-       after transpose
-       register
-         p0         p3_3  p2_3  p1_3  p0_3
-         p1         p3_2  p2_2  p1_2  p0_2
-         p2         p3_1  p2_1  p1_1  p0_1
-         p3         p3_0  p2_0  p1_0  p0_0
-    */
-    __asm__ __volatile__(
-        "precrq.qb.ph   %[prim1],   %[p0],      %[p1]       \n\t"
-        "precr.qb.ph    %[prim2],   %[p0],      %[p1]       \n\t"
-        "precrq.qb.ph   %[prim3],   %[p2],      %[p3]       \n\t"
-        "precr.qb.ph    %[prim4],   %[p2],      %[p3]       \n\t"
-
-        "precrq.qb.ph   %[p1],      %[prim1],   %[prim2]    \n\t"
-        "precr.qb.ph    %[p3],      %[prim1],   %[prim2]    \n\t"
-        "precrq.qb.ph   %[sec3],    %[prim3],   %[prim4]    \n\t"
-        "precr.qb.ph    %[sec4],    %[prim3],   %[prim4]    \n\t"
-
-        "precrq.ph.w    %[p0],      %[p1],      %[sec3]     \n\t"
-        "precrq.ph.w    %[p2],      %[p3],      %[sec4]     \n\t"
-        "append         %[p1],      %[sec3],    16          \n\t"
-        "append         %[p3],      %[sec4],    16          \n\t"
-
-        : [prim1] "=&r"(prim1), [prim2] "=&r"(prim2), [prim3] "=&r"(prim3),
-          [prim4] "=&r"(prim4), [p0] "+r"(p0), [p1] "+r"(p1), [p2] "+r"(p2),
-          [p3] "+r"(p3), [sec3] "=&r"(sec3), [sec4] "=&r"(sec4)
-        :);
-
-    /* transpose q0, q1, q2, q3
-       original (when loaded from memory)
-       register       +1    +2    +3    +4
-         q3         q3_0  q3_1  q3_2  q3_3
-         q2         q2_0  q2_1  q2_2  q2_3
-         q1         q1_0  q1_1  q1_2  q1_3
-         q0         q0_0  q0_1  q0_2  q0_3
-
-       after transpose
-       register
-         q3         q0_3  q1_3  q2_3  q3_3
-         q2         q0_2  q1_2  q2_2  q3_2
-         q1         q0_1  q1_1  q2_1  q3_1
-         q0         q0_0  q1_0  q2_0  q3_0
-    */
-    __asm__ __volatile__(
-        "precrq.qb.ph   %[prim1],   %[q3],      %[q2]       \n\t"
-        "precr.qb.ph    %[prim2],   %[q3],      %[q2]       \n\t"
-        "precrq.qb.ph   %[prim3],   %[q1],      %[q0]       \n\t"
-        "precr.qb.ph    %[prim4],   %[q1],      %[q0]       \n\t"
-
-        "precrq.qb.ph   %[q2],      %[prim1],   %[prim2]    \n\t"
-        "precr.qb.ph    %[q0],      %[prim1],   %[prim2]    \n\t"
-        "precrq.qb.ph   %[sec3],    %[prim3],   %[prim4]    \n\t"
-        "precr.qb.ph    %[sec4],    %[prim3],   %[prim4]    \n\t"
-
-        "precrq.ph.w    %[q3],      %[q2],      %[sec3]     \n\t"
-        "precrq.ph.w    %[q1],      %[q0],      %[sec4]     \n\t"
-        "append         %[q2],      %[sec3],    16          \n\t"
-        "append         %[q0],      %[sec4],    16          \n\t"
-
-        : [prim1] "=&r"(prim1), [prim2] "=&r"(prim2), [prim3] "=&r"(prim3),
-          [prim4] "=&r"(prim4), [q3] "+r"(q3), [q2] "+r"(q2), [q1] "+r"(q1),
-          [q0] "+r"(q0), [sec3] "=&r"(sec3), [sec4] "=&r"(sec4)
-        :);
-
-    filter_hev_mask_flatmask4_dspr2(limit_vec, flimit_vec, thresh_vec, p1, p0,
-                                    p3, p2, q0, q1, q2, q3, &hev, &mask, &flat);
-
-    if ((flat == 0) && (mask != 0)) {
-      filter1_dspr2(mask, hev, p1, p0, q0, q1, &p1_f0, &p0_f0, &q0_f0, &q1_f0);
-      STORE_F0()
-    } else if ((mask & flat) == 0xFFFFFFFF) {
-      /* left 2 element operation */
-      PACK_LEFT_0TO3()
-      mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l, &q0_l, &q1_l, &q2_l, &q3_l);
-
-      /* right 2 element operation */
-      PACK_RIGHT_0TO3()
-      mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r, &q0_r, &q1_r, &q2_r, &q3_r);
-
-      STORE_F1()
-    } else if ((flat != 0) && (mask != 0)) {
-      filter1_dspr2(mask, hev, p1, p0, q0, q1, &p1_f0, &p0_f0, &q0_f0, &q1_f0);
-
-      /* left 2 element operation */
-      PACK_LEFT_0TO3()
-      mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l, &q0_l, &q1_l, &q2_l, &q3_l);
-
-      /* right 2 element operation */
-      PACK_RIGHT_0TO3()
-      mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r, &q0_r, &q1_r, &q2_r, &q3_r);
-
-      if (mask & flat & 0x000000FF) {
-        __asm__ __volatile__(
-            "sb         %[p2_r],  -3(%[s4])    \n\t"
-            "sb         %[p1_r],  -2(%[s4])    \n\t"
-            "sb         %[p0_r],  -1(%[s4])    \n\t"
-            "sb         %[q0_r],    (%[s4])    \n\t"
-            "sb         %[q1_r],  +1(%[s4])    \n\t"
-            "sb         %[q2_r],  +2(%[s4])    \n\t"
-
-            :
-            : [p2_r] "r"(p2_r), [p1_r] "r"(p1_r), [p0_r] "r"(p0_r),
-              [q0_r] "r"(q0_r), [q1_r] "r"(q1_r), [q2_r] "r"(q2_r),
-              [s4] "r"(s4));
-      } else if (mask & 0x000000FF) {
-        __asm__ __volatile__(
-            "sb         %[p1_f0],  -2(%[s4])    \n\t"
-            "sb         %[p0_f0],  -1(%[s4])    \n\t"
-            "sb         %[q0_f0],    (%[s4])    \n\t"
-            "sb         %[q1_f0],  +1(%[s4])    \n\t"
-
-            :
-            : [p1_f0] "r"(p1_f0), [p0_f0] "r"(p0_f0), [q0_f0] "r"(q0_f0),
-              [q1_f0] "r"(q1_f0), [s4] "r"(s4));
-      }
-
-      __asm__ __volatile__(
-          "srl      %[p2_r],    %[p2_r],    16      \n\t"
-          "srl      %[p1_r],    %[p1_r],    16      \n\t"
-          "srl      %[p0_r],    %[p0_r],    16      \n\t"
-          "srl      %[q0_r],    %[q0_r],    16      \n\t"
-          "srl      %[q1_r],    %[q1_r],    16      \n\t"
-          "srl      %[q2_r],    %[q2_r],    16      \n\t"
-          "srl      %[p1_f0],   %[p1_f0],   8       \n\t"
-          "srl      %[p0_f0],   %[p0_f0],   8       \n\t"
-          "srl      %[q0_f0],   %[q0_f0],   8       \n\t"
-          "srl      %[q1_f0],   %[q1_f0],   8       \n\t"
-
-          : [p2_r] "+r"(p2_r), [p1_r] "+r"(p1_r), [p0_r] "+r"(p0_r),
-            [q0_r] "+r"(q0_r), [q1_r] "+r"(q1_r), [q2_r] "+r"(q2_r),
-            [p1_f0] "+r"(p1_f0), [p0_f0] "+r"(p0_f0), [q0_f0] "+r"(q0_f0),
-            [q1_f0] "+r"(q1_f0)
-          :);
-
-      if (mask & flat & 0x0000FF00) {
-        __asm__ __volatile__(
-            "sb         %[p2_r],  -3(%[s3])    \n\t"
-            "sb         %[p1_r],  -2(%[s3])    \n\t"
-            "sb         %[p0_r],  -1(%[s3])    \n\t"
-            "sb         %[q0_r],    (%[s3])    \n\t"
-            "sb         %[q1_r],  +1(%[s3])    \n\t"
-            "sb         %[q2_r],  +2(%[s3])    \n\t"
-
-            :
-            : [p2_r] "r"(p2_r), [p1_r] "r"(p1_r), [p0_r] "r"(p0_r),
-              [q0_r] "r"(q0_r), [q1_r] "r"(q1_r), [q2_r] "r"(q2_r),
-              [s3] "r"(s3));
-      } else if (mask & 0x0000FF00) {
-        __asm__ __volatile__(
-            "sb         %[p1_f0],  -2(%[s3])    \n\t"
-            "sb         %[p0_f0],  -1(%[s3])    \n\t"
-            "sb         %[q0_f0],    (%[s3])    \n\t"
-            "sb         %[q1_f0],  +1(%[s3])    \n\t"
-
-            :
-            : [p1_f0] "r"(p1_f0), [p0_f0] "r"(p0_f0), [q0_f0] "r"(q0_f0),
-              [q1_f0] "r"(q1_f0), [s3] "r"(s3));
-      }
-
-      __asm__ __volatile__(
-          "srl      %[p1_f0],   %[p1_f0],   8     \n\t"
-          "srl      %[p0_f0],   %[p0_f0],   8     \n\t"
-          "srl      %[q0_f0],   %[q0_f0],   8     \n\t"
-          "srl      %[q1_f0],   %[q1_f0],   8     \n\t"
-
-          : [p2] "+r"(p2), [p1] "+r"(p1), [p0] "+r"(p0), [q0] "+r"(q0),
-            [q1] "+r"(q1), [q2] "+r"(q2), [p1_f0] "+r"(p1_f0),
-            [p0_f0] "+r"(p0_f0), [q0_f0] "+r"(q0_f0), [q1_f0] "+r"(q1_f0)
-          :);
-
-      if (mask & flat & 0x00FF0000) {
-        __asm__ __volatile__(
-            "sb         %[p2_l],  -3(%[s2])    \n\t"
-            "sb         %[p1_l],  -2(%[s2])    \n\t"
-            "sb         %[p0_l],  -1(%[s2])    \n\t"
-            "sb         %[q0_l],    (%[s2])    \n\t"
-            "sb         %[q1_l],  +1(%[s2])    \n\t"
-            "sb         %[q2_l],  +2(%[s2])    \n\t"
-
-            :
-            : [p2_l] "r"(p2_l), [p1_l] "r"(p1_l), [p0_l] "r"(p0_l),
-              [q0_l] "r"(q0_l), [q1_l] "r"(q1_l), [q2_l] "r"(q2_l),
-              [s2] "r"(s2));
-      } else if (mask & 0x00FF0000) {
-        __asm__ __volatile__(
-            "sb         %[p1_f0],  -2(%[s2])    \n\t"
-            "sb         %[p0_f0],  -1(%[s2])    \n\t"
-            "sb         %[q0_f0],    (%[s2])    \n\t"
-            "sb         %[q1_f0],  +1(%[s2])    \n\t"
-
-            :
-            : [p1_f0] "r"(p1_f0), [p0_f0] "r"(p0_f0), [q0_f0] "r"(q0_f0),
-              [q1_f0] "r"(q1_f0), [s2] "r"(s2));
-      }
-
-      __asm__ __volatile__(
-          "srl      %[p2_l],    %[p2_l],    16      \n\t"
-          "srl      %[p1_l],    %[p1_l],    16      \n\t"
-          "srl      %[p0_l],    %[p0_l],    16      \n\t"
-          "srl      %[q0_l],    %[q0_l],    16      \n\t"
-          "srl      %[q1_l],    %[q1_l],    16      \n\t"
-          "srl      %[q2_l],    %[q2_l],    16      \n\t"
-          "srl      %[p1_f0],   %[p1_f0],   8       \n\t"
-          "srl      %[p0_f0],   %[p0_f0],   8       \n\t"
-          "srl      %[q0_f0],   %[q0_f0],   8       \n\t"
-          "srl      %[q1_f0],   %[q1_f0],   8       \n\t"
-
-          : [p2_l] "+r"(p2_l), [p1_l] "+r"(p1_l), [p0_l] "+r"(p0_l),
-            [q0_l] "+r"(q0_l), [q1_l] "+r"(q1_l), [q2_l] "+r"(q2_l),
-            [p1_f0] "+r"(p1_f0), [p0_f0] "+r"(p0_f0), [q0_f0] "+r"(q0_f0),
-            [q1_f0] "+r"(q1_f0)
-          :);
-
-      if (mask & flat & 0xFF000000) {
-        __asm__ __volatile__(
-            "sb         %[p2_l],  -3(%[s1])    \n\t"
-            "sb         %[p1_l],  -2(%[s1])    \n\t"
-            "sb         %[p0_l],  -1(%[s1])    \n\t"
-            "sb         %[q0_l],    (%[s1])    \n\t"
-            "sb         %[q1_l],  +1(%[s1])    \n\t"
-            "sb         %[q2_l],  +2(%[s1])    \n\t"
-
-            :
-            : [p2_l] "r"(p2_l), [p1_l] "r"(p1_l), [p0_l] "r"(p0_l),
-              [q0_l] "r"(q0_l), [q1_l] "r"(q1_l), [q2_l] "r"(q2_l),
-              [s1] "r"(s1));
-      } else if (mask & 0xFF000000) {
-        __asm__ __volatile__(
-            "sb         %[p1_f0],  -2(%[s1])    \n\t"
-            "sb         %[p0_f0],  -1(%[s1])    \n\t"
-            "sb         %[q0_f0],    (%[s1])    \n\t"
-            "sb         %[q1_f0],  +1(%[s1])    \n\t"
-
-            :
-            : [p1_f0] "r"(p1_f0), [p0_f0] "r"(p0_f0), [q0_f0] "r"(q0_f0),
-              [q1_f0] "r"(q1_f0), [s1] "r"(s1));
-      }
-    }
-  }
-}
-#endif  // #if HAVE_DSPR2
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/mips/loopfilter_mb_horiz_dspr2.c b/chromium/third_party/libaom/source/libaom/aom_dsp/mips/loopfilter_mb_horiz_dspr2.c
deleted file mode 100644
index 34733e42ef8..00000000000
--- a/chromium/third_party/libaom/source/libaom/aom_dsp/mips/loopfilter_mb_horiz_dspr2.c
+++ /dev/null
@@ -1,734 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <stdlib.h>
-
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom/aom_integer.h"
-#include "aom_dsp/mips/common_dspr2.h"
-#include "aom_dsp/mips/loopfilter_filters_dspr2.h"
-#include "aom_dsp/mips/loopfilter_macros_dspr2.h"
-#include "aom_dsp/mips/loopfilter_masks_dspr2.h"
-#include "aom_mem/aom_mem.h"
-
-#if HAVE_DSPR2
-static void mb_lpf_horizontal_edge(unsigned char *s, int pitch,
-                                   const uint8_t *blimit, const uint8_t *limit,
-                                   const uint8_t *thresh, int count) {
-  uint32_t mask;
-  uint32_t hev, flat, flat2;
-  uint8_t i;
-  uint8_t *sp7, *sp6, *sp5, *sp4, *sp3, *sp2, *sp1, *sp0;
-  uint8_t *sq0, *sq1, *sq2, *sq3, *sq4, *sq5, *sq6, *sq7;
-  uint32_t thresh_vec, flimit_vec, limit_vec;
-  uint32_t uflimit, ulimit, uthresh;
-  uint32_t p7, p6, p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, q6, q7;
-  uint32_t p1_f0, p0_f0, q0_f0, q1_f0;
-  uint32_t p7_l, p6_l, p5_l, p4_l, p3_l, p2_l, p1_l, p0_l;
-  uint32_t q0_l, q1_l, q2_l, q3_l, q4_l, q5_l, q6_l, q7_l;
-  uint32_t p7_r, p6_r, p5_r, p4_r, p3_r, p2_r, p1_r, p0_r;
-  uint32_t q0_r, q1_r, q2_r, q3_r, q4_r, q5_r, q6_r, q7_r;
-  uint32_t p2_l_f1, p1_l_f1, p0_l_f1, p2_r_f1, p1_r_f1, p0_r_f1;
-  uint32_t q0_l_f1, q1_l_f1, q2_l_f1, q0_r_f1, q1_r_f1, q2_r_f1;
-
-  uflimit = *blimit;
-  ulimit = *limit;
-  uthresh = *thresh;
-
-  /* create quad-byte */
-  __asm__ __volatile__(
-      "replv.qb       %[thresh_vec],    %[uthresh]      \n\t"
-      "replv.qb       %[flimit_vec],    %[uflimit]      \n\t"
-      "replv.qb       %[limit_vec],     %[ulimit]       \n\t"
-
-      : [thresh_vec] "=&r"(thresh_vec), [flimit_vec] "=&r"(flimit_vec),
-        [limit_vec] "=r"(limit_vec)
-      : [uthresh] "r"(uthresh), [uflimit] "r"(uflimit), [ulimit] "r"(ulimit));
-
-  /* prefetch data for store */
-  prefetch_store(s);
-
-  for (i = 0; i < (2 * count); i++) {
-    sp7 = s - (pitch << 3);
-    sp6 = sp7 + pitch;
-    sp5 = sp6 + pitch;
-    sp4 = sp5 + pitch;
-    sp3 = sp4 + pitch;
-    sp2 = sp3 + pitch;
-    sp1 = sp2 + pitch;
-    sp0 = sp1 + pitch;
-    sq0 = s;
-    sq1 = s + pitch;
-    sq2 = sq1 + pitch;
-    sq3 = sq2 + pitch;
-    sq4 = sq3 + pitch;
-    sq5 = sq4 + pitch;
-    sq6 = sq5 + pitch;
-    sq7 = sq6 + pitch;
-
-    __asm__ __volatile__(
-        "lw     %[p7],      (%[sp7])            \n\t"
-        "lw     %[p6],      (%[sp6])            \n\t"
-        "lw     %[p5],      (%[sp5])            \n\t"
-        "lw     %[p4],      (%[sp4])            \n\t"
-        "lw     %[p3],      (%[sp3])            \n\t"
-        "lw     %[p2],      (%[sp2])            \n\t"
-        "lw     %[p1],      (%[sp1])            \n\t"
-        "lw     %[p0],      (%[sp0])            \n\t"
-
-        : [p3] "=&r"(p3), [p2] "=&r"(p2), [p1] "=&r"(p1), [p0] "=&r"(p0),
-          [p7] "=&r"(p7), [p6] "=&r"(p6), [p5] "=&r"(p5), [p4] "=&r"(p4)
-        : [sp3] "r"(sp3), [sp2] "r"(sp2), [sp1] "r"(sp1), [sp0] "r"(sp0),
-          [sp4] "r"(sp4), [sp5] "r"(sp5), [sp6] "r"(sp6), [sp7] "r"(sp7));
-
-    __asm__ __volatile__(
-        "lw     %[q0],      (%[sq0])            \n\t"
-        "lw     %[q1],      (%[sq1])            \n\t"
-        "lw     %[q2],      (%[sq2])            \n\t"
-        "lw     %[q3],      (%[sq3])            \n\t"
-        "lw     %[q4],      (%[sq4])            \n\t"
-        "lw     %[q5],      (%[sq5])            \n\t"
-        "lw     %[q6],      (%[sq6])            \n\t"
-        "lw     %[q7],      (%[sq7])            \n\t"
-
-        : [q3] "=&r"(q3), [q2] "=&r"(q2), [q1] "=&r"(q1), [q0] "=&r"(q0),
-          [q7] "=&r"(q7), [q6] "=&r"(q6), [q5] "=&r"(q5), [q4] "=&r"(q4)
-        : [sq3] "r"(sq3), [sq2] "r"(sq2), [sq1] "r"(sq1), [sq0] "r"(sq0),
-          [sq4] "r"(sq4), [sq5] "r"(sq5), [sq6] "r"(sq6), [sq7] "r"(sq7));
-
-    filter_hev_mask_flatmask4_dspr2(limit_vec, flimit_vec, thresh_vec, p1, p0,
-                                    p3, p2, q0, q1, q2, q3, &hev, &mask, &flat);
-
-    flatmask5(p7, p6, p5, p4, p0, q0, q4, q5, q6, q7, &flat2);
-
-    /* f0 */
-    if (((flat2 == 0) && (flat == 0) && (mask != 0)) ||
-        ((flat2 != 0) && (flat == 0) && (mask != 0))) {
-      filter1_dspr2(mask, hev, p1, p0, q0, q1, &p1_f0, &p0_f0, &q0_f0, &q1_f0);
-
-      __asm__ __volatile__(
-          "sw       %[p1_f0],   (%[sp1])            \n\t"
-          "sw       %[p0_f0],   (%[sp0])            \n\t"
-          "sw       %[q0_f0],   (%[sq0])            \n\t"
-          "sw       %[q1_f0],   (%[sq1])            \n\t"
-
-          :
-          : [p1_f0] "r"(p1_f0), [p0_f0] "r"(p0_f0), [q0_f0] "r"(q0_f0),
-            [q1_f0] "r"(q1_f0), [sp1] "r"(sp1), [sp0] "r"(sp0), [sq0] "r"(sq0),
-            [sq1] "r"(sq1));
-    } else if ((flat2 == 0XFFFFFFFF) && (flat == 0xFFFFFFFF) &&
-               (mask == 0xFFFFFFFF)) {
-      /* f2 */
-      PACK_LEFT_0TO3()
-      PACK_LEFT_4TO7()
-      wide_mbfilter_dspr2(&p7_l, &p6_l, &p5_l, &p4_l, &p3_l, &p2_l, &p1_l,
-                          &p0_l, &q0_l, &q1_l, &q2_l, &q3_l, &q4_l, &q5_l,
-                          &q6_l, &q7_l);
-
-      PACK_RIGHT_0TO3()
-      PACK_RIGHT_4TO7()
-      wide_mbfilter_dspr2(&p7_r, &p6_r, &p5_r, &p4_r, &p3_r, &p2_r, &p1_r,
-                          &p0_r, &q0_r, &q1_r, &q2_r, &q3_r, &q4_r, &q5_r,
-                          &q6_r, &q7_r);
-
-      COMBINE_LEFT_RIGHT_0TO2()
-      COMBINE_LEFT_RIGHT_3TO6()
-
-      __asm__ __volatile__(
-          "sw         %[p6], (%[sp6])    \n\t"
-          "sw         %[p5], (%[sp5])    \n\t"
-          "sw         %[p4], (%[sp4])    \n\t"
-          "sw         %[p3], (%[sp3])    \n\t"
-          "sw         %[p2], (%[sp2])    \n\t"
-          "sw         %[p1], (%[sp1])    \n\t"
-          "sw         %[p0], (%[sp0])    \n\t"
-
-          :
-          : [p6] "r"(p6), [p5] "r"(p5), [p4] "r"(p4), [p3] "r"(p3),
-            [p2] "r"(p2), [p1] "r"(p1), [p0] "r"(p0), [sp6] "r"(sp6),
-            [sp5] "r"(sp5), [sp4] "r"(sp4), [sp3] "r"(sp3), [sp2] "r"(sp2),
-            [sp1] "r"(sp1), [sp0] "r"(sp0));
-
-      __asm__ __volatile__(
-          "sw         %[q6], (%[sq6])    \n\t"
-          "sw         %[q5], (%[sq5])    \n\t"
-          "sw         %[q4], (%[sq4])    \n\t"
-          "sw         %[q3], (%[sq3])    \n\t"
-          "sw         %[q2], (%[sq2])    \n\t"
-          "sw         %[q1], (%[sq1])    \n\t"
-          "sw         %[q0], (%[sq0])    \n\t"
-
-          :
-          : [q6] "r"(q6), [q5] "r"(q5), [q4] "r"(q4), [q3] "r"(q3),
-            [q2] "r"(q2), [q1] "r"(q1), [q0] "r"(q0), [sq6] "r"(sq6),
-            [sq5] "r"(sq5), [sq4] "r"(sq4), [sq3] "r"(sq3), [sq2] "r"(sq2),
-            [sq1] "r"(sq1), [sq0] "r"(sq0));
-    } else if ((flat2 == 0) && (flat == 0xFFFFFFFF) && (mask == 0xFFFFFFFF)) {
-      /* f1 */
-      /* left 2 element operation */
-      PACK_LEFT_0TO3()
-      mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l, &q0_l, &q1_l, &q2_l, &q3_l);
-
-      /* right 2 element operation */
-      PACK_RIGHT_0TO3()
-      mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r, &q0_r, &q1_r, &q2_r, &q3_r);
-
-      COMBINE_LEFT_RIGHT_0TO2()
-
-      __asm__ __volatile__(
-          "sw         %[p2], (%[sp2])    \n\t"
-          "sw         %[p1], (%[sp1])    \n\t"
-          "sw         %[p0], (%[sp0])    \n\t"
-          "sw         %[q0], (%[sq0])    \n\t"
-          "sw         %[q1], (%[sq1])    \n\t"
-          "sw         %[q2], (%[sq2])    \n\t"
-
-          :
-          : [p2] "r"(p2), [p1] "r"(p1), [p0] "r"(p0), [q0] "r"(q0),
-            [q1] "r"(q1), [q2] "r"(q2), [sp2] "r"(sp2), [sp1] "r"(sp1),
-            [sp0] "r"(sp0), [sq0] "r"(sq0), [sq1] "r"(sq1), [sq2] "r"(sq2));
-    } else if ((flat2 == 0) && (flat != 0) && (mask != 0)) {
-      /* f0+f1 */
-      filter1_dspr2(mask, hev, p1, p0, q0, q1, &p1_f0, &p0_f0, &q0_f0, &q1_f0);
-
-      /* left 2 element operation */
-      PACK_LEFT_0TO3()
-      mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l, &q0_l, &q1_l, &q2_l, &q3_l);
-
-      /* right 2 element operation */
-      PACK_RIGHT_0TO3()
-      mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r, &q0_r, &q1_r, &q2_r, &q3_r);
-
-      if (mask & flat & 0x000000FF) {
-        __asm__ __volatile__(
-            "sb         %[p2_r],  (%[sp2])    \n\t"
-            "sb         %[p1_r],  (%[sp1])    \n\t"
-            "sb         %[p0_r],  (%[sp0])    \n\t"
-            "sb         %[q0_r],  (%[sq0])    \n\t"
-            "sb         %[q1_r],  (%[sq1])    \n\t"
-            "sb         %[q2_r],  (%[sq2])    \n\t"
-
-            :
-            : [p2_r] "r"(p2_r), [p1_r] "r"(p1_r), [p0_r] "r"(p0_r),
-              [q0_r] "r"(q0_r), [q1_r] "r"(q1_r), [q2_r] "r"(q2_r),
-              [sp2] "r"(sp2), [sp1] "r"(sp1), [sp0] "r"(sp0), [sq0] "r"(sq0),
-              [sq1] "r"(sq1), [sq2] "r"(sq2));
-      } else if (mask & 0x000000FF) {
-        __asm__ __volatile__(
-            "sb         %[p1_f0],  (%[sp1])    \n\t"
-            "sb         %[p0_f0],  (%[sp0])    \n\t"
-            "sb         %[q0_f0],  (%[sq0])    \n\t"
-            "sb         %[q1_f0],  (%[sq1])    \n\t"
-
-            :
-            : [p1_f0] "r"(p1_f0), [p0_f0] "r"(p0_f0), [q0_f0] "r"(q0_f0),
-              [q1_f0] "r"(q1_f0), [sp1] "r"(sp1), [sp0] "r"(sp0),
-              [sq0] "r"(sq0), [sq1] "r"(sq1));
-      }
-
-      __asm__ __volatile__(
-          "srl      %[p2_r],    %[p2_r],    16      \n\t"
-          "srl      %[p1_r],    %[p1_r],    16      \n\t"
-          "srl      %[p0_r],    %[p0_r],    16      \n\t"
-          "srl      %[q0_r],    %[q0_r],    16      \n\t"
-          "srl      %[q1_r],    %[q1_r],    16      \n\t"
-          "srl      %[q2_r],    %[q2_r],    16      \n\t"
-          "srl      %[p1_f0],   %[p1_f0],   8       \n\t"
-          "srl      %[p0_f0],   %[p0_f0],   8       \n\t"
-          "srl      %[q0_f0],   %[q0_f0],   8       \n\t"
-          "srl      %[q1_f0],   %[q1_f0],   8       \n\t"
-
-          : [p2_r] "+r"(p2_r), [p1_r] "+r"(p1_r), [p0_r] "+r"(p0_r),
-            [q0_r] "+r"(q0_r), [q1_r] "+r"(q1_r), [q2_r] "+r"(q2_r),
-            [p1_f0] "+r"(p1_f0), [p0_f0] "+r"(p0_f0), [q0_f0] "+r"(q0_f0),
-            [q1_f0] "+r"(q1_f0)
-          :);
-
-      if (mask & flat & 0x0000FF00) {
-        __asm__ __volatile__(
-            "sb         %[p2_r],  +1(%[sp2])    \n\t"
-            "sb         %[p1_r],  +1(%[sp1])    \n\t"
-            "sb         %[p0_r],  +1(%[sp0])    \n\t"
-            "sb         %[q0_r],  +1(%[sq0])    \n\t"
-            "sb         %[q1_r],  +1(%[sq1])    \n\t"
-            "sb         %[q2_r],  +1(%[sq2])    \n\t"
-
-            :
-            : [p2_r] "r"(p2_r), [p1_r] "r"(p1_r), [p0_r] "r"(p0_r),
-              [q0_r] "r"(q0_r), [q1_r] "r"(q1_r), [q2_r] "r"(q2_r),
-              [sp2] "r"(sp2), [sp1] "r"(sp1), [sp0] "r"(sp0), [sq0] "r"(sq0),
-              [sq1] "r"(sq1), [sq2] "r"(sq2));
-      } else if (mask & 0x0000FF00) {
-        __asm__ __volatile__(
-            "sb         %[p1_f0],  +1(%[sp1])    \n\t"
-            "sb         %[p0_f0],  +1(%[sp0])    \n\t"
-            "sb         %[q0_f0],  +1(%[sq0])    \n\t"
-            "sb         %[q1_f0],  +1(%[sq1])    \n\t"
-
-            :
-            : [p1_f0] "r"(p1_f0), [p0_f0] "r"(p0_f0), [q0_f0] "r"(q0_f0),
-              [q1_f0] "r"(q1_f0), [sp1] "r"(sp1), [sp0] "r"(sp0),
-              [sq0] "r"(sq0), [sq1] "r"(sq1));
-      }
-
-      __asm__ __volatile__(
-          "srl      %[p1_f0],   %[p1_f0],   8     \n\t"
-          "srl      %[p0_f0],   %[p0_f0],   8     \n\t"
-          "srl      %[q0_f0],   %[q0_f0],   8     \n\t"
-          "srl      %[q1_f0],   %[q1_f0],   8     \n\t"
-
-          : [p1_f0] "+r"(p1_f0), [p0_f0] "+r"(p0_f0), [q0_f0] "+r"(q0_f0),
-            [q1_f0] "+r"(q1_f0)
-          :);
-
-      if (mask & flat & 0x00FF0000) {
-        __asm__ __volatile__(
-            "sb         %[p2_l],  +2(%[sp2])    \n\t"
-            "sb         %[p1_l],  +2(%[sp1])    \n\t"
-            "sb         %[p0_l],  +2(%[sp0])    \n\t"
-            "sb         %[q0_l],  +2(%[sq0])    \n\t"
-            "sb         %[q1_l],  +2(%[sq1])    \n\t"
-            "sb         %[q2_l],  +2(%[sq2])    \n\t"
-
-            :
-            : [p2_l] "r"(p2_l), [p1_l] "r"(p1_l), [p0_l] "r"(p0_l),
-              [q0_l] "r"(q0_l), [q1_l] "r"(q1_l), [q2_l] "r"(q2_l),
-              [sp2] "r"(sp2), [sp1] "r"(sp1), [sp0] "r"(sp0), [sq0] "r"(sq0),
-              [sq1] "r"(sq1), [sq2] "r"(sq2));
-      } else if (mask & 0x00FF0000) {
-        __asm__ __volatile__(
-            "sb         %[p1_f0],  +2(%[sp1])    \n\t"
-            "sb         %[p0_f0],  +2(%[sp0])    \n\t"
-            "sb         %[q0_f0],  +2(%[sq0])    \n\t"
-            "sb         %[q1_f0],  +2(%[sq1])    \n\t"
-
-            :
-            : [p1_f0] "r"(p1_f0), [p0_f0] "r"(p0_f0), [q0_f0] "r"(q0_f0),
-              [q1_f0] "r"(q1_f0), [sp1] "r"(sp1), [sp0] "r"(sp0),
-              [sq0] "r"(sq0), [sq1] "r"(sq1));
-      }
-
-      __asm__ __volatile__(
-          "srl      %[p2_l],    %[p2_l],    16      \n\t"
-          "srl      %[p1_l],    %[p1_l],    16      \n\t"
-          "srl      %[p0_l],    %[p0_l],    16      \n\t"
-          "srl      %[q0_l],    %[q0_l],    16      \n\t"
-          "srl      %[q1_l],    %[q1_l],    16      \n\t"
-          "srl      %[q2_l],    %[q2_l],    16      \n\t"
-          "srl      %[p1_f0],   %[p1_f0],   8       \n\t"
-          "srl      %[p0_f0],   %[p0_f0],   8       \n\t"
-          "srl      %[q0_f0],   %[q0_f0],   8       \n\t"
-          "srl      %[q1_f0],   %[q1_f0],   8       \n\t"
-
-          : [p2_l] "+r"(p2_l), [p1_l] "+r"(p1_l), [p0_l] "+r"(p0_l),
-            [q0_l] "+r"(q0_l), [q1_l] "+r"(q1_l), [q2_l] "+r"(q2_l),
-            [p1_f0] "+r"(p1_f0), [p0_f0] "+r"(p0_f0), [q0_f0] "+r"(q0_f0),
-            [q1_f0] "+r"(q1_f0)
-          :);
-
-      if (mask & flat & 0xFF000000) {
-        __asm__ __volatile__(
-            "sb         %[p2_l],  +3(%[sp2])    \n\t"
-            "sb         %[p1_l],  +3(%[sp1])    \n\t"
-            "sb         %[p0_l],  +3(%[sp0])    \n\t"
-            "sb         %[q0_l],  +3(%[sq0])    \n\t"
-            "sb         %[q1_l],  +3(%[sq1])    \n\t"
-            "sb         %[q2_l],  +3(%[sq2])    \n\t"
-
-            :
-            : [p2_l] "r"(p2_l), [p1_l] "r"(p1_l), [p0_l] "r"(p0_l),
-              [q0_l] "r"(q0_l), [q1_l] "r"(q1_l), [q2_l] "r"(q2_l),
-              [sp2] "r"(sp2), [sp1] "r"(sp1), [sp0] "r"(sp0), [sq0] "r"(sq0),
-              [sq1] "r"(sq1), [sq2] "r"(sq2));
-      } else if (mask & 0xFF000000) {
-        __asm__ __volatile__(
-            "sb         %[p1_f0],  +3(%[sp1])    \n\t"
-            "sb         %[p0_f0],  +3(%[sp0])    \n\t"
-            "sb         %[q0_f0],  +3(%[sq0])    \n\t"
-            "sb         %[q1_f0],  +3(%[sq1])    \n\t"
-
-            :
-            : [p1_f0] "r"(p1_f0), [p0_f0] "r"(p0_f0), [q0_f0] "r"(q0_f0),
-              [q1_f0] "r"(q1_f0), [sp1] "r"(sp1), [sp0] "r"(sp0),
-              [sq0] "r"(sq0), [sq1] "r"(sq1));
-      }
-    } else if ((flat2 != 0) && (flat != 0) && (mask != 0)) {
-      /* f0 + f1 + f2 */
-      /* f0  function */
-      filter1_dspr2(mask, hev, p1, p0, q0, q1, &p1_f0, &p0_f0, &q0_f0, &q1_f0);
-
-      /* f1  function */
-      /* left 2 element operation */
-      PACK_LEFT_0TO3()
-      mbfilter1_dspr2(p3_l, p2_l, p1_l, p0_l, q0_l, q1_l, q2_l, q3_l, &p2_l_f1,
-                      &p1_l_f1, &p0_l_f1, &q0_l_f1, &q1_l_f1, &q2_l_f1);
-
-      /* right 2 element operation */
-      PACK_RIGHT_0TO3()
-      mbfilter1_dspr2(p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r, &p2_r_f1,
-                      &p1_r_f1, &p0_r_f1, &q0_r_f1, &q1_r_f1, &q2_r_f1);
-
-      /* f2  function */
-      PACK_LEFT_4TO7()
-      wide_mbfilter_dspr2(&p7_l, &p6_l, &p5_l, &p4_l, &p3_l, &p2_l, &p1_l,
-                          &p0_l, &q0_l, &q1_l, &q2_l, &q3_l, &q4_l, &q5_l,
-                          &q6_l, &q7_l);
-
-      PACK_RIGHT_4TO7()
-      wide_mbfilter_dspr2(&p7_r, &p6_r, &p5_r, &p4_r, &p3_r, &p2_r, &p1_r,
-                          &p0_r, &q0_r, &q1_r, &q2_r, &q3_r, &q4_r, &q5_r,
-                          &q6_r, &q7_r);
-
-      if (mask & flat & flat2 & 0x000000FF) {
-        __asm__ __volatile__(
-            "sb         %[p6_r],  (%[sp6])    \n\t"
-            "sb         %[p5_r],  (%[sp5])    \n\t"
-            "sb         %[p4_r],  (%[sp4])    \n\t"
-            "sb         %[p3_r],  (%[sp3])    \n\t"
-            "sb         %[p2_r],  (%[sp2])    \n\t"
-            "sb         %[p1_r],  (%[sp1])    \n\t"
-            "sb         %[p0_r],  (%[sp0])    \n\t"
-
-            :
-            : [p6_r] "r"(p6_r), [p5_r] "r"(p5_r), [p4_r] "r"(p4_r),
-              [p3_r] "r"(p3_r), [p2_r] "r"(p2_r), [p1_r] "r"(p1_r),
-              [sp6] "r"(sp6), [sp5] "r"(sp5), [sp4] "r"(sp4), [sp3] "r"(sp3),
-              [sp2] "r"(sp2), [sp1] "r"(sp1), [p0_r] "r"(p0_r), [sp0] "r"(sp0));
-
-        __asm__ __volatile__(
-            "sb         %[q0_r],  (%[sq0])    \n\t"
-            "sb         %[q1_r],  (%[sq1])    \n\t"
-            "sb         %[q2_r],  (%[sq2])    \n\t"
-            "sb         %[q3_r],  (%[sq3])    \n\t"
-            "sb         %[q4_r],  (%[sq4])    \n\t"
-            "sb         %[q5_r],  (%[sq5])    \n\t"
-            "sb         %[q6_r],  (%[sq6])    \n\t"
-
-            :
-            : [q0_r] "r"(q0_r), [q1_r] "r"(q1_r), [q2_r] "r"(q2_r),
-              [q3_r] "r"(q3_r), [q4_r] "r"(q4_r), [q5_r] "r"(q5_r),
-              [q6_r] "r"(q6_r), [sq0] "r"(sq0), [sq1] "r"(sq1), [sq2] "r"(sq2),
-              [sq3] "r"(sq3), [sq4] "r"(sq4), [sq5] "r"(sq5), [sq6] "r"(sq6));
-      } else if (mask & flat & 0x000000FF) {
-        __asm__ __volatile__(
-            "sb         %[p2_r_f1],  (%[sp2])    \n\t"
-            "sb         %[p1_r_f1],  (%[sp1])    \n\t"
-            "sb         %[p0_r_f1],  (%[sp0])    \n\t"
-            "sb         %[q0_r_f1],  (%[sq0])    \n\t"
-            "sb         %[q1_r_f1],  (%[sq1])    \n\t"
-            "sb         %[q2_r_f1],  (%[sq2])    \n\t"
-
-            :
-            : [p2_r_f1] "r"(p2_r_f1), [p1_r_f1] "r"(p1_r_f1),
-              [p0_r_f1] "r"(p0_r_f1), [q0_r_f1] "r"(q0_r_f1),
-              [q1_r_f1] "r"(q1_r_f1), [q2_r_f1] "r"(q2_r_f1), [sp2] "r"(sp2),
-              [sp1] "r"(sp1), [sp0] "r"(sp0), [sq0] "r"(sq0), [sq1] "r"(sq1),
-              [sq2] "r"(sq2));
-      } else if (mask & 0x000000FF) {
-        __asm__ __volatile__(
-            "sb         %[p1_f0],  (%[sp1])    \n\t"
-            "sb         %[p0_f0],  (%[sp0])    \n\t"
-            "sb         %[q0_f0],  (%[sq0])    \n\t"
-            "sb         %[q1_f0],  (%[sq1])    \n\t"
-
-            :
-            : [p1_f0] "r"(p1_f0), [p0_f0] "r"(p0_f0), [q0_f0] "r"(q0_f0),
-              [q1_f0] "r"(q1_f0), [sp1] "r"(sp1), [sp0] "r"(sp0),
-              [sq0] "r"(sq0), [sq1] "r"(sq1));
-      }
-
-      __asm__ __volatile__(
-          "srl        %[p6_r], %[p6_r], 16     \n\t"
-          "srl        %[p5_r], %[p5_r], 16     \n\t"
-          "srl        %[p4_r], %[p4_r], 16     \n\t"
-          "srl        %[p3_r], %[p3_r], 16     \n\t"
-          "srl        %[p2_r], %[p2_r], 16     \n\t"
-          "srl        %[p1_r], %[p1_r], 16     \n\t"
-          "srl        %[p0_r], %[p0_r], 16     \n\t"
-          "srl        %[q0_r], %[q0_r], 16     \n\t"
-          "srl        %[q1_r], %[q1_r], 16     \n\t"
-          "srl        %[q2_r], %[q2_r], 16     \n\t"
-          "srl        %[q3_r], %[q3_r], 16     \n\t"
-          "srl        %[q4_r], %[q4_r], 16     \n\t"
-          "srl        %[q5_r], %[q5_r], 16     \n\t"
-          "srl        %[q6_r], %[q6_r], 16     \n\t"
-
-          : [q0_r] "+r"(q0_r), [q1_r] "+r"(q1_r), [q2_r] "+r"(q2_r),
-            [q3_r] "+r"(q3_r), [q4_r] "+r"(q4_r), [q5_r] "+r"(q5_r),
-            [p6_r] "+r"(p6_r), [p5_r] "+r"(p5_r), [p4_r] "+r"(p4_r),
-            [p3_r] "+r"(p3_r), [p2_r] "+r"(p2_r), [p1_r] "+r"(p1_r),
-            [q6_r] "+r"(q6_r), [p0_r] "+r"(p0_r)
-          :);
-
-      __asm__ __volatile__(
-          "srl        %[p2_r_f1], %[p2_r_f1], 16     \n\t"
-          "srl        %[p1_r_f1], %[p1_r_f1], 16     \n\t"
-          "srl        %[p0_r_f1], %[p0_r_f1], 16     \n\t"
-          "srl        %[q0_r_f1], %[q0_r_f1], 16     \n\t"
-          "srl        %[q1_r_f1], %[q1_r_f1], 16     \n\t"
-          "srl        %[q2_r_f1], %[q2_r_f1], 16     \n\t"
-          "srl        %[p1_f0],   %[p1_f0],   8      \n\t"
-          "srl        %[p0_f0],   %[p0_f0],   8      \n\t"
-          "srl        %[q0_f0],   %[q0_f0],   8      \n\t"
-          "srl        %[q1_f0],   %[q1_f0],   8      \n\t"
-
-          : [p2_r_f1] "+r"(p2_r_f1), [p1_r_f1] "+r"(p1_r_f1),
-            [p0_r_f1] "+r"(p0_r_f1), [q0_r_f1] "+r"(q0_r_f1),
-            [q1_r_f1] "+r"(q1_r_f1), [q2_r_f1] "+r"(q2_r_f1),
-            [p1_f0] "+r"(p1_f0), [p0_f0] "+r"(p0_f0), [q0_f0] "+r"(q0_f0),
-            [q1_f0] "+r"(q1_f0)
-          :);
-
-      if (mask & flat & flat2 & 0x0000FF00) {
-        __asm__ __volatile__(
-            "sb         %[p6_r],  +1(%[sp6])    \n\t"
-            "sb         %[p5_r],  +1(%[sp5])    \n\t"
-            "sb         %[p4_r],  +1(%[sp4])    \n\t"
-            "sb         %[p3_r],  +1(%[sp3])    \n\t"
-            "sb         %[p2_r],  +1(%[sp2])    \n\t"
-            "sb         %[p1_r],  +1(%[sp1])    \n\t"
-            "sb         %[p0_r],  +1(%[sp0])    \n\t"
-
-            :
-            : [p6_r] "r"(p6_r), [p5_r] "r"(p5_r), [p4_r] "r"(p4_r),
-              [p3_r] "r"(p3_r), [p2_r] "r"(p2_r), [p1_r] "r"(p1_r),
-              [p0_r] "r"(p0_r), [sp6] "r"(sp6), [sp5] "r"(sp5), [sp4] "r"(sp4),
-              [sp3] "r"(sp3), [sp2] "r"(sp2), [sp1] "r"(sp1), [sp0] "r"(sp0));
-
-        __asm__ __volatile__(
-            "sb         %[q0_r],  +1(%[sq0])    \n\t"
-            "sb         %[q1_r],  +1(%[sq1])    \n\t"
-            "sb         %[q2_r],  +1(%[sq2])    \n\t"
-            "sb         %[q3_r],  +1(%[sq3])    \n\t"
-            "sb         %[q4_r],  +1(%[sq4])    \n\t"
-            "sb         %[q5_r],  +1(%[sq5])    \n\t"
-            "sb         %[q6_r],  +1(%[sq6])    \n\t"
-
-            :
-            : [q0_r] "r"(q0_r), [q1_r] "r"(q1_r), [q2_r] "r"(q2_r),
-              [q3_r] "r"(q3_r), [q4_r] "r"(q4_r), [q5_r] "r"(q5_r),
-              [q6_r] "r"(q6_r), [sq0] "r"(sq0), [sq1] "r"(sq1), [sq2] "r"(sq2),
-              [sq3] "r"(sq3), [sq4] "r"(sq4), [sq5] "r"(sq5), [sq6] "r"(sq6));
-      } else if (mask & flat & 0x0000FF00) {
-        __asm__ __volatile__(
-            "sb         %[p2_r_f1],  +1(%[sp2])    \n\t"
-            "sb         %[p1_r_f1],  +1(%[sp1])    \n\t"
-            "sb         %[p0_r_f1],  +1(%[sp0])    \n\t"
-            "sb         %[q0_r_f1],  +1(%[sq0])    \n\t"
-            "sb         %[q1_r_f1],  +1(%[sq1])    \n\t"
-            "sb         %[q2_r_f1],  +1(%[sq2])    \n\t"
-
-            :
-            : [p2_r_f1] "r"(p2_r_f1), [p1_r_f1] "r"(p1_r_f1),
-              [p0_r_f1] "r"(p0_r_f1), [q0_r_f1] "r"(q0_r_f1),
-              [q1_r_f1] "r"(q1_r_f1), [q2_r_f1] "r"(q2_r_f1), [sp2] "r"(sp2),
-              [sp1] "r"(sp1), [sp0] "r"(sp0), [sq0] "r"(sq0), [sq1] "r"(sq1),
-              [sq2] "r"(sq2));
-      } else if (mask & 0x0000FF00) {
-        __asm__ __volatile__(
-            "sb         %[p1_f0],  +1(%[sp1])    \n\t"
-            "sb         %[p0_f0],  +1(%[sp0])    \n\t"
-            "sb         %[q0_f0],  +1(%[sq0])    \n\t"
-            "sb         %[q1_f0],  +1(%[sq1])    \n\t"
-
-            :
-            : [p1_f0] "r"(p1_f0), [p0_f0] "r"(p0_f0), [q0_f0] "r"(q0_f0),
-              [q1_f0] "r"(q1_f0), [sp1] "r"(sp1), [sp0] "r"(sp0),
-              [sq0] "r"(sq0), [sq1] "r"(sq1));
-      }
-
-      __asm__ __volatile__(
-          "srl        %[p1_f0], %[p1_f0], 8     \n\t"
-          "srl        %[p0_f0], %[p0_f0], 8     \n\t"
-          "srl        %[q0_f0], %[q0_f0], 8     \n\t"
-          "srl        %[q1_f0], %[q1_f0], 8     \n\t"
-
-          : [p1_f0] "+r"(p1_f0), [p0_f0] "+r"(p0_f0), [q0_f0] "+r"(q0_f0),
-            [q1_f0] "+r"(q1_f0)
-          :);
-
-      if (mask & flat & flat2 & 0x00FF0000) {
-        __asm__ __volatile__(
-            "sb         %[p6_l],  +2(%[sp6])    \n\t"
-            "sb         %[p5_l],  +2(%[sp5])    \n\t"
-            "sb         %[p4_l],  +2(%[sp4])    \n\t"
-            "sb         %[p3_l],  +2(%[sp3])    \n\t"
-            "sb         %[p2_l],  +2(%[sp2])    \n\t"
-            "sb         %[p1_l],  +2(%[sp1])    \n\t"
-            "sb         %[p0_l],  +2(%[sp0])    \n\t"
-
-            :
-            : [p6_l] "r"(p6_l), [p5_l] "r"(p5_l), [p4_l] "r"(p4_l),
-              [p3_l] "r"(p3_l), [p2_l] "r"(p2_l), [p1_l] "r"(p1_l),
-              [p0_l] "r"(p0_l), [sp6] "r"(sp6), [sp5] "r"(sp5), [sp4] "r"(sp4),
-              [sp3] "r"(sp3), [sp2] "r"(sp2), [sp1] "r"(sp1), [sp0] "r"(sp0));
-
-        __asm__ __volatile__(
-            "sb         %[q0_l],  +2(%[sq0])    \n\t"
-            "sb         %[q1_l],  +2(%[sq1])    \n\t"
-            "sb         %[q2_l],  +2(%[sq2])    \n\t"
-            "sb         %[q3_l],  +2(%[sq3])    \n\t"
-            "sb         %[q4_l],  +2(%[sq4])    \n\t"
-            "sb         %[q5_l],  +2(%[sq5])    \n\t"
-            "sb         %[q6_l],  +2(%[sq6])    \n\t"
-
-            :
-            : [q0_l] "r"(q0_l), [q1_l] "r"(q1_l), [q2_l] "r"(q2_l),
-              [q3_l] "r"(q3_l), [q4_l] "r"(q4_l), [q5_l] "r"(q5_l),
-              [q6_l] "r"(q6_l), [sq0] "r"(sq0), [sq1] "r"(sq1), [sq2] "r"(sq2),
-              [sq3] "r"(sq3), [sq4] "r"(sq4), [sq5] "r"(sq5), [sq6] "r"(sq6));
-      } else if (mask & flat & 0x00FF0000) {
-        __asm__ __volatile__(
-            "sb         %[p2_l_f1],  +2(%[sp2])    \n\t"
-            "sb         %[p1_l_f1],  +2(%[sp1])    \n\t"
-            "sb         %[p0_l_f1],  +2(%[sp0])    \n\t"
-            "sb         %[q0_l_f1],  +2(%[sq0])    \n\t"
-            "sb         %[q1_l_f1],  +2(%[sq1])    \n\t"
-            "sb         %[q2_l_f1],  +2(%[sq2])    \n\t"
-
-            :
-            : [p2_l_f1] "r"(p2_l_f1), [p1_l_f1] "r"(p1_l_f1),
-              [p0_l_f1] "r"(p0_l_f1), [q0_l_f1] "r"(q0_l_f1),
-              [q1_l_f1] "r"(q1_l_f1), [q2_l_f1] "r"(q2_l_f1), [sp2] "r"(sp2),
-              [sp1] "r"(sp1), [sp0] "r"(sp0), [sq0] "r"(sq0), [sq1] "r"(sq1),
-              [sq2] "r"(sq2));
-      } else if (mask & 0x00FF0000) {
-        __asm__ __volatile__(
-            "sb         %[p1_f0],  +2(%[sp1])    \n\t"
-            "sb         %[p0_f0],  +2(%[sp0])    \n\t"
-            "sb         %[q0_f0],  +2(%[sq0])    \n\t"
-            "sb         %[q1_f0],  +2(%[sq1])    \n\t"
-
-            :
-            : [p1_f0] "r"(p1_f0), [p0_f0] "r"(p0_f0), [q0_f0] "r"(q0_f0),
-              [q1_f0] "r"(q1_f0), [sp1] "r"(sp1), [sp0] "r"(sp0),
-              [sq0] "r"(sq0), [sq1] "r"(sq1));
-      }
-
-      __asm__ __volatile__(
-          "srl      %[p6_l],    %[p6_l],    16   \n\t"
-          "srl      %[p5_l],    %[p5_l],    16   \n\t"
-          "srl      %[p4_l],    %[p4_l],    16   \n\t"
-          "srl      %[p3_l],    %[p3_l],    16   \n\t"
-          "srl      %[p2_l],    %[p2_l],    16   \n\t"
-          "srl      %[p1_l],    %[p1_l],    16   \n\t"
-          "srl      %[p0_l],    %[p0_l],    16   \n\t"
-          "srl      %[q0_l],    %[q0_l],    16   \n\t"
-          "srl      %[q1_l],    %[q1_l],    16   \n\t"
-          "srl      %[q2_l],    %[q2_l],    16   \n\t"
-          "srl      %[q3_l],    %[q3_l],    16   \n\t"
-          "srl      %[q4_l],    %[q4_l],    16   \n\t"
-          "srl      %[q5_l],    %[q5_l],    16   \n\t"
-          "srl      %[q6_l],    %[q6_l],    16   \n\t"
-
-          : [q0_l] "+r"(q0_l), [q1_l] "+r"(q1_l), [q2_l] "+r"(q2_l),
-            [q3_l] "+r"(q3_l), [q4_l] "+r"(q4_l), [q5_l] "+r"(q5_l),
-            [q6_l] "+r"(q6_l), [p6_l] "+r"(p6_l), [p5_l] "+r"(p5_l),
-            [p4_l] "+r"(p4_l), [p3_l] "+r"(p3_l), [p2_l] "+r"(p2_l),
-            [p1_l] "+r"(p1_l), [p0_l] "+r"(p0_l)
-          :);
-
-      __asm__ __volatile__(
-          "srl      %[p2_l_f1],   %[p2_l_f1],   16   \n\t"
-          "srl      %[p1_l_f1],   %[p1_l_f1],   16   \n\t"
-          "srl      %[p0_l_f1],   %[p0_l_f1],   16   \n\t"
-          "srl      %[q0_l_f1],   %[q0_l_f1],   16   \n\t"
-          "srl      %[q1_l_f1],   %[q1_l_f1],   16   \n\t"
-          "srl      %[q2_l_f1],   %[q2_l_f1],   16   \n\t"
-          "srl      %[p1_f0],     %[p1_f0],     8    \n\t"
-          "srl      %[p0_f0],     %[p0_f0],     8    \n\t"
-          "srl      %[q0_f0],     %[q0_f0],     8    \n\t"
-          "srl      %[q1_f0],     %[q1_f0],     8    \n\t"
-
-          : [p2_l_f1] "+r"(p2_l_f1), [p1_l_f1] "+r"(p1_l_f1),
-            [p0_l_f1] "+r"(p0_l_f1), [q0_l_f1] "+r"(q0_l_f1),
-            [q1_l_f1] "+r"(q1_l_f1), [q2_l_f1] "+r"(q2_l_f1),
-            [p1_f0] "+r"(p1_f0), [p0_f0] "+r"(p0_f0), [q0_f0] "+r"(q0_f0),
-            [q1_f0] "+r"(q1_f0)
-          :);
-
-      if (mask & flat & flat2 & 0xFF000000) {
-        __asm__ __volatile__(
-            "sb     %[p6_l],    +3(%[sp6])    \n\t"
-            "sb     %[p5_l],    +3(%[sp5])    \n\t"
-            "sb     %[p4_l],    +3(%[sp4])    \n\t"
-            "sb     %[p3_l],    +3(%[sp3])    \n\t"
-            "sb     %[p2_l],    +3(%[sp2])    \n\t"
-            "sb     %[p1_l],    +3(%[sp1])    \n\t"
-            "sb     %[p0_l],    +3(%[sp0])    \n\t"
-
-            :
-            : [p6_l] "r"(p6_l), [p5_l] "r"(p5_l), [p4_l] "r"(p4_l),
-              [p3_l] "r"(p3_l), [p2_l] "r"(p2_l), [p1_l] "r"(p1_l),
-              [p0_l] "r"(p0_l), [sp6] "r"(sp6), [sp5] "r"(sp5), [sp4] "r"(sp4),
-              [sp3] "r"(sp3), [sp2] "r"(sp2), [sp1] "r"(sp1), [sp0] "r"(sp0));
-
-        __asm__ __volatile__(
-            "sb     %[q0_l],    +3(%[sq0])    \n\t"
-            "sb     %[q1_l],    +3(%[sq1])    \n\t"
-            "sb     %[q2_l],    +3(%[sq2])    \n\t"
-            "sb     %[q3_l],    +3(%[sq3])    \n\t"
-            "sb     %[q4_l],    +3(%[sq4])    \n\t"
-            "sb     %[q5_l],    +3(%[sq5])    \n\t"
-            "sb     %[q6_l],    +3(%[sq6])    \n\t"
-
-            :
-            : [q0_l] "r"(q0_l), [q1_l] "r"(q1_l), [q2_l] "r"(q2_l),
-              [q3_l] "r"(q3_l), [q4_l] "r"(q4_l), [q5_l] "r"(q5_l),
-              [sq0] "r"(sq0), [sq1] "r"(sq1), [sq2] "r"(sq2), [sq3] "r"(sq3),
-              [sq4] "r"(sq4), [sq5] "r"(sq5), [q6_l] "r"(q6_l), [sq6] "r"(sq6));
-      } else if (mask & flat & 0xFF000000) {
-        __asm__ __volatile__(
-            "sb     %[p2_l_f1],     +3(%[sp2])    \n\t"
-            "sb     %[p1_l_f1],     +3(%[sp1])    \n\t"
-            "sb     %[p0_l_f1],     +3(%[sp0])    \n\t"
-            "sb     %[q0_l_f1],     +3(%[sq0])    \n\t"
-            "sb     %[q1_l_f1],     +3(%[sq1])    \n\t"
-            "sb     %[q2_l_f1],     +3(%[sq2])    \n\t"
-
-            :
-            : [p2_l_f1] "r"(p2_l_f1), [p1_l_f1] "r"(p1_l_f1),
-              [p0_l_f1] "r"(p0_l_f1), [q0_l_f1] "r"(q0_l_f1),
-              [q1_l_f1] "r"(q1_l_f1), [q2_l_f1] "r"(q2_l_f1), [sp2] "r"(sp2),
-              [sp1] "r"(sp1), [sp0] "r"(sp0), [sq0] "r"(sq0), [sq1] "r"(sq1),
-              [sq2] "r"(sq2));
-      } else if (mask & 0xFF000000) {
-        __asm__ __volatile__(
-            "sb     %[p1_f0],   +3(%[sp1])    \n\t"
-            "sb     %[p0_f0],   +3(%[sp0])    \n\t"
-            "sb     %[q0_f0],   +3(%[sq0])    \n\t"
-            "sb     %[q1_f0],   +3(%[sq1])    \n\t"
-
-            :
-            : [p1_f0] "r"(p1_f0), [p0_f0] "r"(p0_f0), [q0_f0] "r"(q0_f0),
-              [q1_f0] "r"(q1_f0), [sp1] "r"(sp1), [sp0] "r"(sp0),
-              [sq0] "r"(sq0), [sq1] "r"(sq1));
-      }
-    }
-
-    s = s + 4;
-  }
-}
-
-void aom_lpf_horizontal_16_dspr2(unsigned char *s, int pitch,
-                                 const uint8_t *blimit, const uint8_t *limit,
-                                 const uint8_t *thresh) {
-  mb_lpf_horizontal_edge(s, pitch, blimit, limit, thresh, 1);
-}
-
-void aom_lpf_horizontal_16_dual_dspr2(unsigned char *s, int pitch,
-                                      const uint8_t *blimit,
-                                      const uint8_t *limit,
-                                      const uint8_t *thresh) {
-  mb_lpf_horizontal_edge(s, pitch, blimit, limit, thresh, 2);
-}
-#endif  // #if HAVE_DSPR2
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/mips/loopfilter_mb_vert_dspr2.c b/chromium/third_party/libaom/source/libaom/aom_dsp/mips/loopfilter_mb_vert_dspr2.c
deleted file mode 100644
index 3d3f1ec9717..00000000000
--- a/chromium/third_party/libaom/source/libaom/aom_dsp/mips/loopfilter_mb_vert_dspr2.c
+++ /dev/null
@@ -1,758 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <stdlib.h>
-
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom/aom_integer.h"
-#include "aom_dsp/mips/common_dspr2.h"
-#include "aom_dsp/mips/loopfilter_filters_dspr2.h"
-#include "aom_dsp/mips/loopfilter_macros_dspr2.h"
-#include "aom_dsp/mips/loopfilter_masks_dspr2.h"
-#include "aom_mem/aom_mem.h"
-
-#if HAVE_DSPR2
-void aom_lpf_vertical_16_dspr2(uint8_t *s, int pitch, const uint8_t *blimit,
-                               const uint8_t *limit, const uint8_t *thresh) {
-  uint8_t i;
-  uint32_t mask, hev, flat, flat2;
-  uint8_t *s1, *s2, *s3, *s4;
-  uint32_t prim1, prim2, sec3, sec4, prim3, prim4;
-  uint32_t thresh_vec, flimit_vec, limit_vec;
-  uint32_t uflimit, ulimit, uthresh;
-  uint32_t p7, p6, p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, q6, q7;
-  uint32_t p1_f0, p0_f0, q0_f0, q1_f0;
-  uint32_t p7_l, p6_l, p5_l, p4_l, p3_l, p2_l, p1_l, p0_l;
-  uint32_t q0_l, q1_l, q2_l, q3_l, q4_l, q5_l, q6_l, q7_l;
-  uint32_t p7_r, p6_r, p5_r, p4_r, p3_r, p2_r, p1_r, p0_r;
-  uint32_t q0_r, q1_r, q2_r, q3_r, q4_r, q5_r, q6_r, q7_r;
-  uint32_t p2_l_f1, p1_l_f1, p0_l_f1, p2_r_f1, p1_r_f1, p0_r_f1;
-  uint32_t q0_l_f1, q1_l_f1, q2_l_f1, q0_r_f1, q1_r_f1, q2_r_f1;
-
-  uflimit = *blimit;
-  ulimit = *limit;
-  uthresh = *thresh;
-
-  /* create quad-byte */
-  __asm__ __volatile__(
-      "replv.qb     %[thresh_vec],     %[uthresh]    \n\t"
-      "replv.qb     %[flimit_vec],     %[uflimit]    \n\t"
-      "replv.qb     %[limit_vec],      %[ulimit]     \n\t"
-
-      : [thresh_vec] "=&r"(thresh_vec), [flimit_vec] "=&r"(flimit_vec),
-        [limit_vec] "=r"(limit_vec)
-      : [uthresh] "r"(uthresh), [uflimit] "r"(uflimit), [ulimit] "r"(ulimit));
-
-  prefetch_store(s + pitch);
-
-  for (i = 0; i < 2; i++) {
-    s1 = s;
-    s2 = s + pitch;
-    s3 = s2 + pitch;
-    s4 = s3 + pitch;
-    s = s4 + pitch;
-
-    __asm__ __volatile__(
-        "lw     %[p0],  -4(%[s1])    \n\t"
-        "lw     %[p1],  -4(%[s2])    \n\t"
-        "lw     %[p2],  -4(%[s3])    \n\t"
-        "lw     %[p3],  -4(%[s4])    \n\t"
-        "lw     %[p4],  -8(%[s1])    \n\t"
-        "lw     %[p5],  -8(%[s2])    \n\t"
-        "lw     %[p6],  -8(%[s3])    \n\t"
-        "lw     %[p7],  -8(%[s4])    \n\t"
-
-        : [p3] "=&r"(p3), [p2] "=&r"(p2), [p1] "=&r"(p1), [p0] "=&r"(p0),
-          [p7] "=&r"(p7), [p6] "=&r"(p6), [p5] "=&r"(p5), [p4] "=&r"(p4)
-        : [s1] "r"(s1), [s2] "r"(s2), [s3] "r"(s3), [s4] "r"(s4));
-
-    __asm__ __volatile__(
-        "lw     %[q3],  (%[s1])     \n\t"
-        "lw     %[q2],  (%[s2])     \n\t"
-        "lw     %[q1],  (%[s3])     \n\t"
-        "lw     %[q0],  (%[s4])     \n\t"
-        "lw     %[q7],  +4(%[s1])   \n\t"
-        "lw     %[q6],  +4(%[s2])   \n\t"
-        "lw     %[q5],  +4(%[s3])   \n\t"
-        "lw     %[q4],  +4(%[s4])   \n\t"
-
-        : [q3] "=&r"(q3), [q2] "=&r"(q2), [q1] "=&r"(q1), [q0] "=&r"(q0),
-          [q7] "=&r"(q7), [q6] "=&r"(q6), [q5] "=&r"(q5), [q4] "=&r"(q4)
-        : [s1] "r"(s1), [s2] "r"(s2), [s3] "r"(s3), [s4] "r"(s4));
-
-    /* transpose p3, p2, p1, p0
-       original (when loaded from memory)
-       register       -4    -3   -2     -1
-         p0         p0_0  p0_1  p0_2  p0_3
-         p1         p1_0  p1_1  p1_2  p1_3
-         p2         p2_0  p2_1  p2_2  p2_3
-         p3         p3_0  p3_1  p3_2  p3_3
-
-       after transpose
-       register
-         p0         p3_3  p2_3  p1_3  p0_3
-         p1         p3_2  p2_2  p1_2  p0_2
-         p2         p3_1  p2_1  p1_1  p0_1
-         p3         p3_0  p2_0  p1_0  p0_0
-    */
-    __asm__ __volatile__(
-        "precrq.qb.ph   %[prim1],   %[p0],      %[p1]       \n\t"
-        "precr.qb.ph    %[prim2],   %[p0],      %[p1]       \n\t"
-        "precrq.qb.ph   %[prim3],   %[p2],      %[p3]       \n\t"
-        "precr.qb.ph    %[prim4],   %[p2],      %[p3]       \n\t"
-
-        "precrq.qb.ph   %[p1],      %[prim1],   %[prim2]    \n\t"
-        "precr.qb.ph    %[p3],      %[prim1],   %[prim2]    \n\t"
-        "precrq.qb.ph   %[sec3],    %[prim3],   %[prim4]    \n\t"
-        "precr.qb.ph    %[sec4],    %[prim3],   %[prim4]    \n\t"
-
-        "precrq.ph.w    %[p0],      %[p1],      %[sec3]     \n\t"
-        "precrq.ph.w    %[p2],      %[p3],      %[sec4]     \n\t"
-        "append         %[p1],      %[sec3],    16          \n\t"
-        "append         %[p3],      %[sec4],    16          \n\t"
-
-        : [prim1] "=&r"(prim1), [prim2] "=&r"(prim2), [prim3] "=&r"(prim3),
-          [prim4] "=&r"(prim4), [p0] "+r"(p0), [p1] "+r"(p1), [p2] "+r"(p2),
-          [p3] "+r"(p3), [sec3] "=&r"(sec3), [sec4] "=&r"(sec4)
-        :);
-
-    /* transpose q0, q1, q2, q3
-       original (when loaded from memory)
-       register       +1    +2    +3    +4
-         q3         q3_0  q3_1  q3_2  q3_3
-         q2         q2_0  q2_1  q2_2  q2_3
-         q1         q1_0  q1_1  q1_2  q1_3
-         q0         q0_0  q0_1  q0_2  q0_3
-
-       after transpose
-       register
-         q3         q0_3  q1_3  q2_3  q3_3
-         q2         q0_2  q1_2  q2_2  q3_2
-         q1         q0_1  q1_1  q2_1  q3_1
-         q0         q0_0  q1_0  q2_0  q3_0
-    */
-    __asm__ __volatile__(
-        "precrq.qb.ph   %[prim1],   %[q3],      %[q2]       \n\t"
-        "precr.qb.ph    %[prim2],   %[q3],      %[q2]       \n\t"
-        "precrq.qb.ph   %[prim3],   %[q1],      %[q0]       \n\t"
-        "precr.qb.ph    %[prim4],   %[q1],      %[q0]       \n\t"
-
-        "precrq.qb.ph   %[q2],      %[prim1],   %[prim2]    \n\t"
-        "precr.qb.ph    %[q0],      %[prim1],   %[prim2]    \n\t"
-        "precrq.qb.ph   %[sec3],    %[prim3],   %[prim4]    \n\t"
-        "precr.qb.ph    %[sec4],    %[prim3],   %[prim4]    \n\t"
-
-        "precrq.ph.w    %[q3],      %[q2],      %[sec3]     \n\t"
-        "precrq.ph.w    %[q1],      %[q0],      %[sec4]     \n\t"
-        "append         %[q2],      %[sec3],    16          \n\t"
-        "append         %[q0],      %[sec4],    16          \n\t"
-
-        : [prim1] "=&r"(prim1), [prim2] "=&r"(prim2), [prim3] "=&r"(prim3),
-          [prim4] "=&r"(prim4), [q3] "+r"(q3), [q2] "+r"(q2), [q1] "+r"(q1),
-          [q0] "+r"(q0), [sec3] "=&r"(sec3), [sec4] "=&r"(sec4)
-        :);
-
-    /* transpose p7, p6, p5, p4
-       original (when loaded from memory)
-       register      -8    -7   -6     -5
-         p4         p4_0  p4_1  p4_2  p4_3
-         p5         p5_0  p5_1  p5_2  p5_3
-         p6         p6_0  p6_1  p6_2  p6_3
-         p7         p7_0  p7_1  p7_2  p7_3
-
-       after transpose
-       register
-         p4         p7_3  p6_3  p5_3  p4_3
-         p5         p7_2  p6_2  p5_2  p4_2
-         p6         p7_1  p6_1  p5_1  p4_1
-         p7         p7_0  p6_0  p5_0  p4_0
-    */
-    __asm__ __volatile__(
-        "precrq.qb.ph   %[prim1],   %[p4],      %[p5]       \n\t"
-        "precr.qb.ph    %[prim2],   %[p4],      %[p5]       \n\t"
-        "precrq.qb.ph   %[prim3],   %[p6],      %[p7]       \n\t"
-        "precr.qb.ph    %[prim4],   %[p6],      %[p7]       \n\t"
-
-        "precrq.qb.ph   %[p5],      %[prim1],   %[prim2]    \n\t"
-        "precr.qb.ph    %[p7],      %[prim1],   %[prim2]    \n\t"
-        "precrq.qb.ph   %[sec3],    %[prim3],   %[prim4]    \n\t"
-        "precr.qb.ph    %[sec4],    %[prim3],   %[prim4]    \n\t"
-
-        "precrq.ph.w    %[p4],      %[p5],      %[sec3]     \n\t"
-        "precrq.ph.w    %[p6],      %[p7],      %[sec4]     \n\t"
-        "append         %[p5],      %[sec3],    16          \n\t"
-        "append         %[p7],      %[sec4],    16          \n\t"
-
-        : [prim1] "=&r"(prim1), [prim2] "=&r"(prim2), [prim3] "=&r"(prim3),
-          [prim4] "=&r"(prim4), [p4] "+r"(p4), [p5] "+r"(p5), [p6] "+r"(p6),
-          [p7] "+r"(p7), [sec3] "=&r"(sec3), [sec4] "=&r"(sec4)
-        :);
-
-    /* transpose q4, q5, q6, q7
-       original (when loaded from memory)
-       register      +5    +6    +7    +8
-         q7         q7_0  q7_1  q7_2  q7_3
-         q6         q6_0  q6_1  q6_2  q6_3
-         q5         q5_0  q5_1  q5_2  q5_3
-         q4         q4_0  q4_1  q4_2  q4_3
-
-       after transpose
-       register
-         q7         q4_3  q5_3  q26_3  q7_3
-         q6         q4_2  q5_2  q26_2  q7_2
-         q5         q4_1  q5_1  q26_1  q7_1
-         q4         q4_0  q5_0  q26_0  q7_0
-    */
-    __asm__ __volatile__(
-        "precrq.qb.ph   %[prim1],   %[q7],      %[q6]       \n\t"
-        "precr.qb.ph    %[prim2],   %[q7],      %[q6]       \n\t"
-        "precrq.qb.ph   %[prim3],   %[q5],      %[q4]       \n\t"
-        "precr.qb.ph    %[prim4],   %[q5],      %[q4]       \n\t"
-
-        "precrq.qb.ph   %[q6],      %[prim1],   %[prim2]    \n\t"
-        "precr.qb.ph    %[q4],      %[prim1],   %[prim2]    \n\t"
-        "precrq.qb.ph   %[sec3],    %[prim3],   %[prim4]    \n\t"
-        "precr.qb.ph    %[sec4],    %[prim3],   %[prim4]    \n\t"
-
-        "precrq.ph.w    %[q7],      %[q6],      %[sec3]     \n\t"
-        "precrq.ph.w    %[q5],      %[q4],      %[sec4]     \n\t"
-        "append         %[q6],      %[sec3],    16          \n\t"
-        "append         %[q4],      %[sec4],    16          \n\t"
-
-        : [prim1] "=&r"(prim1), [prim2] "=&r"(prim2), [prim3] "=&r"(prim3),
-          [prim4] "=&r"(prim4), [q7] "+r"(q7), [q6] "+r"(q6), [q5] "+r"(q5),
-          [q4] "+r"(q4), [sec3] "=&r"(sec3), [sec4] "=&r"(sec4)
-        :);
-
-    filter_hev_mask_flatmask4_dspr2(limit_vec, flimit_vec, thresh_vec, p1, p0,
-                                    p3, p2, q0, q1, q2, q3, &hev, &mask, &flat);
-
-    flatmask5(p7, p6, p5, p4, p0, q0, q4, q5, q6, q7, &flat2);
-
-    /* f0 */
-    if (((flat2 == 0) && (flat == 0) && (mask != 0)) ||
-        ((flat2 != 0) && (flat == 0) && (mask != 0))) {
-      filter1_dspr2(mask, hev, p1, p0, q0, q1, &p1_f0, &p0_f0, &q0_f0, &q1_f0);
-      STORE_F0()
-    } else if ((flat2 == 0XFFFFFFFF) && (flat == 0xFFFFFFFF) &&
-               (mask == 0xFFFFFFFF)) {
-      /* f2 */
-      PACK_LEFT_0TO3()
-      PACK_LEFT_4TO7()
-      wide_mbfilter_dspr2(&p7_l, &p6_l, &p5_l, &p4_l, &p3_l, &p2_l, &p1_l,
-                          &p0_l, &q0_l, &q1_l, &q2_l, &q3_l, &q4_l, &q5_l,
-                          &q6_l, &q7_l);
-
-      PACK_RIGHT_0TO3()
-      PACK_RIGHT_4TO7()
-      wide_mbfilter_dspr2(&p7_r, &p6_r, &p5_r, &p4_r, &p3_r, &p2_r, &p1_r,
-                          &p0_r, &q0_r, &q1_r, &q2_r, &q3_r, &q4_r, &q5_r,
-                          &q6_r, &q7_r);
-
-      STORE_F2()
-    } else if ((flat2 == 0) && (flat == 0xFFFFFFFF) && (mask == 0xFFFFFFFF)) {
-      /* f1 */
-      PACK_LEFT_0TO3()
-      mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l, &q0_l, &q1_l, &q2_l, &q3_l);
-
-      PACK_RIGHT_0TO3()
-      mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r, &q0_r, &q1_r, &q2_r, &q3_r);
-
-      STORE_F1()
-    } else if ((flat2 == 0) && (flat != 0) && (mask != 0)) {
-      /* f0 + f1 */
-      filter1_dspr2(mask, hev, p1, p0, q0, q1, &p1_f0, &p0_f0, &q0_f0, &q1_f0);
-
-      /* left 2 element operation */
-      PACK_LEFT_0TO3()
-      mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l, &q0_l, &q1_l, &q2_l, &q3_l);
-
-      /* right 2 element operation */
-      PACK_RIGHT_0TO3()
-      mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r, &q0_r, &q1_r, &q2_r, &q3_r);
-
-      if (mask & flat & 0x000000FF) {
-        __asm__ __volatile__(
-            "sb     %[p2_r],    -3(%[s4])    \n\t"
-            "sb     %[p1_r],    -2(%[s4])    \n\t"
-            "sb     %[p0_r],    -1(%[s4])    \n\t"
-            "sb     %[q0_r],      (%[s4])    \n\t"
-            "sb     %[q1_r],    +1(%[s4])    \n\t"
-            "sb     %[q2_r],    +2(%[s4])    \n\t"
-
-            :
-            : [p2_r] "r"(p2_r), [p1_r] "r"(p1_r), [p0_r] "r"(p0_r),
-              [q0_r] "r"(q0_r), [q1_r] "r"(q1_r), [q2_r] "r"(q2_r),
-              [s4] "r"(s4));
-      } else if (mask & 0x000000FF) {
-        __asm__ __volatile__(
-            "sb         %[p1_f0],  -2(%[s4])    \n\t"
-            "sb         %[p0_f0],  -1(%[s4])    \n\t"
-            "sb         %[q0_f0],    (%[s4])    \n\t"
-            "sb         %[q1_f0],  +1(%[s4])    \n\t"
-
-            :
-            : [p1_f0] "r"(p1_f0), [p0_f0] "r"(p0_f0), [q0_f0] "r"(q0_f0),
-              [q1_f0] "r"(q1_f0), [s4] "r"(s4));
-      }
-
-      __asm__ __volatile__(
-          "srl      %[p2_r],    %[p2_r],    16      \n\t"
-          "srl      %[p1_r],    %[p1_r],    16      \n\t"
-          "srl      %[p0_r],    %[p0_r],    16      \n\t"
-          "srl      %[q0_r],    %[q0_r],    16      \n\t"
-          "srl      %[q1_r],    %[q1_r],    16      \n\t"
-          "srl      %[q2_r],    %[q2_r],    16      \n\t"
-          "srl      %[p1_f0],   %[p1_f0],   8       \n\t"
-          "srl      %[p0_f0],   %[p0_f0],   8       \n\t"
-          "srl      %[q0_f0],   %[q0_f0],   8       \n\t"
-          "srl      %[q1_f0],   %[q1_f0],   8       \n\t"
-
-          : [p2_r] "+r"(p2_r), [p1_r] "+r"(p1_r), [p0_r] "+r"(p0_r),
-            [q0_r] "+r"(q0_r), [q1_r] "+r"(q1_r), [q2_r] "+r"(q2_r),
-            [p1_f0] "+r"(p1_f0), [p0_f0] "+r"(p0_f0), [q0_f0] "+r"(q0_f0),
-            [q1_f0] "+r"(q1_f0)
-          :);
-
-      if (mask & flat & 0x0000FF00) {
-        __asm__ __volatile__(
-            "sb     %[p2_r],    -3(%[s3])    \n\t"
-            "sb     %[p1_r],    -2(%[s3])    \n\t"
-            "sb     %[p0_r],    -1(%[s3])    \n\t"
-            "sb     %[q0_r],      (%[s3])    \n\t"
-            "sb     %[q1_r],    +1(%[s3])    \n\t"
-            "sb     %[q2_r],    +2(%[s3])    \n\t"
-
-            :
-            : [p2_r] "r"(p2_r), [p1_r] "r"(p1_r), [p0_r] "r"(p0_r),
-              [q0_r] "r"(q0_r), [q1_r] "r"(q1_r), [q2_r] "r"(q2_r),
-              [s3] "r"(s3));
-      } else if (mask & 0x0000FF00) {
-        __asm__ __volatile__(
-            "sb     %[p1_f0],   -2(%[s3])    \n\t"
-            "sb     %[p0_f0],   -1(%[s3])    \n\t"
-            "sb     %[q0_f0],     (%[s3])    \n\t"
-            "sb     %[q1_f0],   +1(%[s3])    \n\t"
-
-            :
-            : [p1_f0] "r"(p1_f0), [p0_f0] "r"(p0_f0), [q0_f0] "r"(q0_f0),
-              [q1_f0] "r"(q1_f0), [s3] "r"(s3));
-      }
-
-      __asm__ __volatile__(
-          "srl      %[p1_f0],   %[p1_f0],   8     \n\t"
-          "srl      %[p0_f0],   %[p0_f0],   8     \n\t"
-          "srl      %[q0_f0],   %[q0_f0],   8     \n\t"
-          "srl      %[q1_f0],   %[q1_f0],   8     \n\t"
-
-          : [p1_f0] "+r"(p1_f0), [p0_f0] "+r"(p0_f0), [q0_f0] "+r"(q0_f0),
-            [q1_f0] "+r"(q1_f0)
-          :);
-
-      if (mask & flat & 0x00FF0000) {
-        __asm__ __volatile__(
-            "sb       %[p2_l],    -3(%[s2])    \n\t"
-            "sb       %[p1_l],    -2(%[s2])    \n\t"
-            "sb       %[p0_l],    -1(%[s2])    \n\t"
-            "sb       %[q0_l],      (%[s2])    \n\t"
-            "sb       %[q1_l],    +1(%[s2])    \n\t"
-            "sb       %[q2_l],    +2(%[s2])    \n\t"
-
-            :
-            : [p2_l] "r"(p2_l), [p1_l] "r"(p1_l), [p0_l] "r"(p0_l),
-              [q0_l] "r"(q0_l), [q1_l] "r"(q1_l), [q2_l] "r"(q2_l),
-              [s2] "r"(s2));
-      } else if (mask & 0x00FF0000) {
-        __asm__ __volatile__(
-            "sb     %[p1_f0],   -2(%[s2])    \n\t"
-            "sb     %[p0_f0],   -1(%[s2])    \n\t"
-            "sb     %[q0_f0],     (%[s2])    \n\t"
-            "sb     %[q1_f0],   +1(%[s2])    \n\t"
-
-            :
-            : [p1_f0] "r"(p1_f0), [p0_f0] "r"(p0_f0), [q0_f0] "r"(q0_f0),
-              [q1_f0] "r"(q1_f0), [s2] "r"(s2));
-      }
-
-      __asm__ __volatile__(
-          "srl      %[p2_l],    %[p2_l],    16      \n\t"
-          "srl      %[p1_l],    %[p1_l],    16      \n\t"
-          "srl      %[p0_l],    %[p0_l],    16      \n\t"
-          "srl      %[q0_l],    %[q0_l],    16      \n\t"
-          "srl      %[q1_l],    %[q1_l],    16      \n\t"
-          "srl      %[q2_l],    %[q2_l],    16      \n\t"
-          "srl      %[p1_f0],   %[p1_f0],   8       \n\t"
-          "srl      %[p0_f0],   %[p0_f0],   8       \n\t"
-          "srl      %[q0_f0],   %[q0_f0],   8       \n\t"
-          "srl      %[q1_f0],   %[q1_f0],   8       \n\t"
-
-          : [p2_l] "+r"(p2_l), [p1_l] "+r"(p1_l), [p0_l] "+r"(p0_l),
-            [q0_l] "+r"(q0_l), [q1_l] "+r"(q1_l), [q2_l] "+r"(q2_l),
-            [p1_f0] "+r"(p1_f0), [p0_f0] "+r"(p0_f0), [q0_f0] "+r"(q0_f0),
-            [q1_f0] "+r"(q1_f0)
-          :);
-
-      if (mask & flat & 0xFF000000) {
-        __asm__ __volatile__(
-            "sb     %[p2_l],    -3(%[s1])    \n\t"
-            "sb     %[p1_l],    -2(%[s1])    \n\t"
-            "sb     %[p0_l],    -1(%[s1])    \n\t"
-            "sb     %[q0_l],      (%[s1])    \n\t"
-            "sb     %[q1_l],    +1(%[s1])    \n\t"
-            "sb     %[q2_l],    +2(%[s1])    \n\t"
-
-            :
-            : [p2_l] "r"(p2_l), [p1_l] "r"(p1_l), [p0_l] "r"(p0_l),
-              [q0_l] "r"(q0_l), [q1_l] "r"(q1_l), [q2_l] "r"(q2_l),
-              [s1] "r"(s1));
-      } else if (mask & 0xFF000000) {
-        __asm__ __volatile__(
-            "sb     %[p1_f0],   -2(%[s1])    \n\t"
-            "sb     %[p0_f0],   -1(%[s1])    \n\t"
-            "sb     %[q0_f0],     (%[s1])    \n\t"
-            "sb     %[q1_f0],   +1(%[s1])    \n\t"
-
-            :
-            : [p1_f0] "r"(p1_f0), [p0_f0] "r"(p0_f0), [q0_f0] "r"(q0_f0),
-              [q1_f0] "r"(q1_f0), [s1] "r"(s1));
-      }
-    } else if ((flat2 != 0) && (flat != 0) && (mask != 0)) {
-      /* f0+f1+f2 */
-      filter1_dspr2(mask, hev, p1, p0, q0, q1, &p1_f0, &p0_f0, &q0_f0, &q1_f0);
-
-      PACK_LEFT_0TO3()
-      mbfilter1_dspr2(p3_l, p2_l, p1_l, p0_l, q0_l, q1_l, q2_l, q3_l, &p2_l_f1,
-                      &p1_l_f1, &p0_l_f1, &q0_l_f1, &q1_l_f1, &q2_l_f1);
-
-      PACK_RIGHT_0TO3()
-      mbfilter1_dspr2(p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r, &p2_r_f1,
-                      &p1_r_f1, &p0_r_f1, &q0_r_f1, &q1_r_f1, &q2_r_f1);
-
-      PACK_LEFT_4TO7()
-      wide_mbfilter_dspr2(&p7_l, &p6_l, &p5_l, &p4_l, &p3_l, &p2_l, &p1_l,
-                          &p0_l, &q0_l, &q1_l, &q2_l, &q3_l, &q4_l, &q5_l,
-                          &q6_l, &q7_l);
-
-      PACK_RIGHT_4TO7()
-      wide_mbfilter_dspr2(&p7_r, &p6_r, &p5_r, &p4_r, &p3_r, &p2_r, &p1_r,
-                          &p0_r, &q0_r, &q1_r, &q2_r, &q3_r, &q4_r, &q5_r,
-                          &q6_r, &q7_r);
-
-      if (mask & flat & flat2 & 0x000000FF) {
-        __asm__ __volatile__(
-            "sb     %[p6_r],    -7(%[s4])    \n\t"
-            "sb     %[p5_r],    -6(%[s4])    \n\t"
-            "sb     %[p4_r],    -5(%[s4])    \n\t"
-            "sb     %[p3_r],    -4(%[s4])    \n\t"
-            "sb     %[p2_r],    -3(%[s4])    \n\t"
-            "sb     %[p1_r],    -2(%[s4])    \n\t"
-            "sb     %[p0_r],    -1(%[s4])    \n\t"
-
-            :
-            : [p6_r] "r"(p6_r), [p5_r] "r"(p5_r), [p4_r] "r"(p4_r),
-              [p3_r] "r"(p3_r), [p2_r] "r"(p2_r), [p1_r] "r"(p1_r),
-              [p0_r] "r"(p0_r), [s4] "r"(s4));
-
-        __asm__ __volatile__(
-            "sb     %[q0_r],      (%[s4])    \n\t"
-            "sb     %[q1_r],    +1(%[s4])    \n\t"
-            "sb     %[q2_r],    +2(%[s4])    \n\t"
-            "sb     %[q3_r],    +3(%[s4])    \n\t"
-            "sb     %[q4_r],    +4(%[s4])    \n\t"
-            "sb     %[q5_r],    +5(%[s4])    \n\t"
-            "sb     %[q6_r],    +6(%[s4])    \n\t"
-
-            :
-            : [q0_r] "r"(q0_r), [q1_r] "r"(q1_r), [q2_r] "r"(q2_r),
-              [q3_r] "r"(q3_r), [q4_r] "r"(q4_r), [q5_r] "r"(q5_r),
-              [q6_r] "r"(q6_r), [s4] "r"(s4));
-      } else if (mask & flat & 0x000000FF) {
-        __asm__ __volatile__(
-            "sb     %[p2_r_f1],     -3(%[s4])    \n\t"
-            "sb     %[p1_r_f1],     -2(%[s4])    \n\t"
-            "sb     %[p0_r_f1],     -1(%[s4])    \n\t"
-            "sb     %[q0_r_f1],       (%[s4])    \n\t"
-            "sb     %[q1_r_f1],     +1(%[s4])    \n\t"
-            "sb     %[q2_r_f1],     +2(%[s4])    \n\t"
-
-            :
-            : [p2_r_f1] "r"(p2_r_f1), [p1_r_f1] "r"(p1_r_f1),
-              [p0_r_f1] "r"(p0_r_f1), [q0_r_f1] "r"(q0_r_f1),
-              [q1_r_f1] "r"(q1_r_f1), [q2_r_f1] "r"(q2_r_f1), [s4] "r"(s4));
-      } else if (mask & 0x000000FF) {
-        __asm__ __volatile__(
-            "sb     %[p1_f0],   -2(%[s4])    \n\t"
-            "sb     %[p0_f0],   -1(%[s4])    \n\t"
-            "sb     %[q0_f0],     (%[s4])    \n\t"
-            "sb     %[q1_f0],   +1(%[s4])    \n\t"
-
-            :
-            : [p1_f0] "r"(p1_f0), [p0_f0] "r"(p0_f0), [q0_f0] "r"(q0_f0),
-              [q1_f0] "r"(q1_f0), [s4] "r"(s4));
-      }
-
-      __asm__ __volatile__(
-          "srl      %[p6_r],        %[p6_r],        16     \n\t"
-          "srl      %[p5_r],        %[p5_r],        16     \n\t"
-          "srl      %[p4_r],        %[p4_r],        16     \n\t"
-          "srl      %[p3_r],        %[p3_r],        16     \n\t"
-          "srl      %[p2_r],        %[p2_r],        16     \n\t"
-          "srl      %[p1_r],        %[p1_r],        16     \n\t"
-          "srl      %[p0_r],        %[p0_r],        16     \n\t"
-          "srl      %[q0_r],        %[q0_r],        16     \n\t"
-          "srl      %[q1_r],        %[q1_r],        16     \n\t"
-          "srl      %[q2_r],        %[q2_r],        16     \n\t"
-          "srl      %[q3_r],        %[q3_r],        16     \n\t"
-          "srl      %[q4_r],        %[q4_r],        16     \n\t"
-          "srl      %[q5_r],        %[q5_r],        16     \n\t"
-          "srl      %[q6_r],        %[q6_r],        16     \n\t"
-
-          : [q0_r] "+r"(q0_r), [q1_r] "+r"(q1_r), [q2_r] "+r"(q2_r),
-            [q3_r] "+r"(q3_r), [q4_r] "+r"(q4_r), [q5_r] "+r"(q5_r),
-            [q6_r] "+r"(q6_r), [p6_r] "+r"(p6_r), [p5_r] "+r"(p5_r),
-            [p4_r] "+r"(p4_r), [p3_r] "+r"(p3_r), [p2_r] "+r"(p2_r),
-            [p1_r] "+r"(p1_r), [p0_r] "+r"(p0_r)
-          :);
-
-      __asm__ __volatile__(
-          "srl      %[p2_r_f1],     %[p2_r_f1],     16      \n\t"
-          "srl      %[p1_r_f1],     %[p1_r_f1],     16      \n\t"
-          "srl      %[p0_r_f1],     %[p0_r_f1],     16      \n\t"
-          "srl      %[q0_r_f1],     %[q0_r_f1],     16      \n\t"
-          "srl      %[q1_r_f1],     %[q1_r_f1],     16      \n\t"
-          "srl      %[q2_r_f1],     %[q2_r_f1],     16      \n\t"
-          "srl      %[p1_f0],       %[p1_f0],       8       \n\t"
-          "srl      %[p0_f0],       %[p0_f0],       8       \n\t"
-          "srl      %[q0_f0],       %[q0_f0],       8       \n\t"
-          "srl      %[q1_f0],       %[q1_f0],       8       \n\t"
-
-          : [p2_r_f1] "+r"(p2_r_f1), [p1_r_f1] "+r"(p1_r_f1),
-            [p0_r_f1] "+r"(p0_r_f1), [q0_r_f1] "+r"(q0_r_f1),
-            [q1_r_f1] "+r"(q1_r_f1), [q2_r_f1] "+r"(q2_r_f1),
-            [p1_f0] "+r"(p1_f0), [p0_f0] "+r"(p0_f0), [q0_f0] "+r"(q0_f0),
-            [q1_f0] "+r"(q1_f0)
-          :);
-
-      if (mask & flat & flat2 & 0x0000FF00) {
-        __asm__ __volatile__(
-            "sb     %[p6_r],    -7(%[s3])    \n\t"
-            "sb     %[p5_r],    -6(%[s3])    \n\t"
-            "sb     %[p4_r],    -5(%[s3])    \n\t"
-            "sb     %[p3_r],    -4(%[s3])    \n\t"
-            "sb     %[p2_r],    -3(%[s3])    \n\t"
-            "sb     %[p1_r],    -2(%[s3])    \n\t"
-            "sb     %[p0_r],    -1(%[s3])    \n\t"
-
-            :
-            : [p6_r] "r"(p6_r), [p5_r] "r"(p5_r), [p4_r] "r"(p4_r),
-              [p3_r] "r"(p3_r), [p2_r] "r"(p2_r), [p1_r] "r"(p1_r),
-              [p0_r] "r"(p0_r), [s3] "r"(s3));
-
-        __asm__ __volatile__(
-            "sb     %[q0_r],      (%[s3])    \n\t"
-            "sb     %[q1_r],    +1(%[s3])    \n\t"
-            "sb     %[q2_r],    +2(%[s3])    \n\t"
-            "sb     %[q3_r],    +3(%[s3])    \n\t"
-            "sb     %[q4_r],    +4(%[s3])    \n\t"
-            "sb     %[q5_r],    +5(%[s3])    \n\t"
-            "sb     %[q6_r],    +6(%[s3])    \n\t"
-
-            :
-            : [q0_r] "r"(q0_r), [q1_r] "r"(q1_r), [q2_r] "r"(q2_r),
-              [q3_r] "r"(q3_r), [q4_r] "r"(q4_r), [q5_r] "r"(q5_r),
-              [q6_r] "r"(q6_r), [s3] "r"(s3));
-      } else if (mask & flat & 0x0000FF00) {
-        __asm__ __volatile__(
-            "sb     %[p2_r_f1],     -3(%[s3])    \n\t"
-            "sb     %[p1_r_f1],     -2(%[s3])    \n\t"
-            "sb     %[p0_r_f1],     -1(%[s3])    \n\t"
-            "sb     %[q0_r_f1],       (%[s3])    \n\t"
-            "sb     %[q1_r_f1],     +1(%[s3])    \n\t"
-            "sb     %[q2_r_f1],     +2(%[s3])    \n\t"
-
-            :
-            : [p2_r_f1] "r"(p2_r_f1), [p1_r_f1] "r"(p1_r_f1),
-              [p0_r_f1] "r"(p0_r_f1), [q0_r_f1] "r"(q0_r_f1),
-              [q1_r_f1] "r"(q1_r_f1), [q2_r_f1] "r"(q2_r_f1), [s3] "r"(s3));
-      } else if (mask & 0x0000FF00) {
-        __asm__ __volatile__(
-            "sb     %[p1_f0],   -2(%[s3])    \n\t"
-            "sb     %[p0_f0],   -1(%[s3])    \n\t"
-            "sb     %[q0_f0],     (%[s3])    \n\t"
-            "sb     %[q1_f0],   +1(%[s3])    \n\t"
-
-            :
-            : [p1_f0] "r"(p1_f0), [p0_f0] "r"(p0_f0), [q0_f0] "r"(q0_f0),
-              [q1_f0] "r"(q1_f0), [s3] "r"(s3));
-      }
-
-      __asm__ __volatile__(
-          "srl      %[p1_f0],   %[p1_f0],   8     \n\t"
-          "srl      %[p0_f0],   %[p0_f0],   8     \n\t"
-          "srl      %[q0_f0],   %[q0_f0],   8     \n\t"
-          "srl      %[q1_f0],   %[q1_f0],   8     \n\t"
-
-          : [p1_f0] "+r"(p1_f0), [p0_f0] "+r"(p0_f0), [q0_f0] "+r"(q0_f0),
-            [q1_f0] "+r"(q1_f0)
-          :);
-
-      if (mask & flat & flat2 & 0x00FF0000) {
-        __asm__ __volatile__(
-            "sb     %[p6_l],    -7(%[s2])    \n\t"
-            "sb     %[p5_l],    -6(%[s2])    \n\t"
-            "sb     %[p4_l],    -5(%[s2])    \n\t"
-            "sb     %[p3_l],    -4(%[s2])    \n\t"
-            "sb     %[p2_l],    -3(%[s2])    \n\t"
-            "sb     %[p1_l],    -2(%[s2])    \n\t"
-            "sb     %[p0_l],    -1(%[s2])    \n\t"
-
-            :
-            : [p6_l] "r"(p6_l), [p5_l] "r"(p5_l), [p4_l] "r"(p4_l),
-              [p3_l] "r"(p3_l), [p2_l] "r"(p2_l), [p1_l] "r"(p1_l),
-              [p0_l] "r"(p0_l), [s2] "r"(s2));
-
-        __asm__ __volatile__(
-            "sb     %[q0_l],      (%[s2])    \n\t"
-            "sb     %[q1_l],    +1(%[s2])    \n\t"
-            "sb     %[q2_l],    +2(%[s2])    \n\t"
-            "sb     %[q3_l],    +3(%[s2])    \n\t"
-            "sb     %[q4_l],    +4(%[s2])    \n\t"
-            "sb     %[q5_l],    +5(%[s2])    \n\t"
-            "sb     %[q6_l],    +6(%[s2])    \n\t"
-
-            :
-            : [q0_l] "r"(q0_l), [q1_l] "r"(q1_l), [q2_l] "r"(q2_l),
-              [q3_l] "r"(q3_l), [q4_l] "r"(q4_l), [q5_l] "r"(q5_l),
-              [q6_l] "r"(q6_l), [s2] "r"(s2));
-      } else if (mask & flat & 0x00FF0000) {
-        __asm__ __volatile__(
-            "sb     %[p2_l_f1],     -3(%[s2])    \n\t"
-            "sb     %[p1_l_f1],     -2(%[s2])    \n\t"
-            "sb     %[p0_l_f1],     -1(%[s2])    \n\t"
-            "sb     %[q0_l_f1],       (%[s2])    \n\t"
-            "sb     %[q1_l_f1],     +1(%[s2])    \n\t"
-            "sb     %[q2_l_f1],     +2(%[s2])    \n\t"
-
-            :
-            : [p2_l_f1] "r"(p2_l_f1), [p1_l_f1] "r"(p1_l_f1),
-              [p0_l_f1] "r"(p0_l_f1), [q0_l_f1] "r"(q0_l_f1),
-              [q1_l_f1] "r"(q1_l_f1), [q2_l_f1] "r"(q2_l_f1), [s2] "r"(s2));
-      } else if (mask & 0x00FF0000) {
-        __asm__ __volatile__(
-            "sb     %[p1_f0],   -2(%[s2])    \n\t"
-            "sb     %[p0_f0],   -1(%[s2])    \n\t"
-            "sb     %[q0_f0],     (%[s2])    \n\t"
-            "sb     %[q1_f0],   +1(%[s2])    \n\t"
-
-            :
-            : [p1_f0] "r"(p1_f0), [p0_f0] "r"(p0_f0), [q0_f0] "r"(q0_f0),
-              [q1_f0] "r"(q1_f0), [s2] "r"(s2));
-      }
-
-      __asm__ __volatile__(
-          "srl      %[p6_l],        %[p6_l],        16     \n\t"
-          "srl      %[p5_l],        %[p5_l],        16     \n\t"
-          "srl      %[p4_l],        %[p4_l],        16     \n\t"
-          "srl      %[p3_l],        %[p3_l],        16     \n\t"
-          "srl      %[p2_l],        %[p2_l],        16     \n\t"
-          "srl      %[p1_l],        %[p1_l],        16     \n\t"
-          "srl      %[p0_l],        %[p0_l],        16     \n\t"
-          "srl      %[q0_l],        %[q0_l],        16     \n\t"
-          "srl      %[q1_l],        %[q1_l],        16     \n\t"
-          "srl      %[q2_l],        %[q2_l],        16     \n\t"
-          "srl      %[q3_l],        %[q3_l],        16     \n\t"
-          "srl      %[q4_l],        %[q4_l],        16     \n\t"
-          "srl      %[q5_l],        %[q5_l],        16     \n\t"
-          "srl      %[q6_l],        %[q6_l],        16     \n\t"
-
-          : [q0_l] "+r"(q0_l), [q1_l] "+r"(q1_l), [q2_l] "+r"(q2_l),
-            [q3_l] "+r"(q3_l), [q4_l] "+r"(q4_l), [q5_l] "+r"(q5_l),
-            [q6_l] "+r"(q6_l), [p6_l] "+r"(p6_l), [p5_l] "+r"(p5_l),
-            [p4_l] "+r"(p4_l), [p3_l] "+r"(p3_l), [p2_l] "+r"(p2_l),
-            [p1_l] "+r"(p1_l), [p0_l] "+r"(p0_l)
-          :);
-
-      __asm__ __volatile__(
-          "srl      %[p2_l_f1],     %[p2_l_f1],     16      \n\t"
-          "srl      %[p1_l_f1],     %[p1_l_f1],     16      \n\t"
-          "srl      %[p0_l_f1],     %[p0_l_f1],     16      \n\t"
-          "srl      %[q0_l_f1],     %[q0_l_f1],     16      \n\t"
-          "srl      %[q1_l_f1],     %[q1_l_f1],     16      \n\t"
-          "srl      %[q2_l_f1],     %[q2_l_f1],     16      \n\t"
-          "srl      %[p1_f0],       %[p1_f0],       8       \n\t"
-          "srl      %[p0_f0],       %[p0_f0],       8       \n\t"
-          "srl      %[q0_f0],       %[q0_f0],       8       \n\t"
-          "srl      %[q1_f0],       %[q1_f0],       8       \n\t"
-
-          : [p2_l_f1] "+r"(p2_l_f1), [p1_l_f1] "+r"(p1_l_f1),
-            [p0_l_f1] "+r"(p0_l_f1), [q0_l_f1] "+r"(q0_l_f1),
-            [q1_l_f1] "+r"(q1_l_f1), [q2_l_f1] "+r"(q2_l_f1),
-            [p1_f0] "+r"(p1_f0), [p0_f0] "+r"(p0_f0), [q0_f0] "+r"(q0_f0),
-            [q1_f0] "+r"(q1_f0)
-          :);
-
-      if (mask & flat & flat2 & 0xFF000000) {
-        __asm__ __volatile__(
-            "sb     %[p6_l],    -7(%[s1])    \n\t"
-            "sb     %[p5_l],    -6(%[s1])    \n\t"
-            "sb     %[p4_l],    -5(%[s1])    \n\t"
-            "sb     %[p3_l],    -4(%[s1])    \n\t"
-            "sb     %[p2_l],    -3(%[s1])    \n\t"
-            "sb     %[p1_l],    -2(%[s1])    \n\t"
-            "sb     %[p0_l],    -1(%[s1])    \n\t"
-
-            :
-            : [p6_l] "r"(p6_l), [p5_l] "r"(p5_l), [p4_l] "r"(p4_l),
-              [p3_l] "r"(p3_l), [p2_l] "r"(p2_l), [p1_l] "r"(p1_l),
-              [p0_l] "r"(p0_l), [s1] "r"(s1));
-
-        __asm__ __volatile__(
-            "sb     %[q0_l],     (%[s1])    \n\t"
-            "sb     %[q1_l],    1(%[s1])    \n\t"
-            "sb     %[q2_l],    2(%[s1])    \n\t"
-            "sb     %[q3_l],    3(%[s1])    \n\t"
-            "sb     %[q4_l],    4(%[s1])    \n\t"
-            "sb     %[q5_l],    5(%[s1])    \n\t"
-            "sb     %[q6_l],    6(%[s1])    \n\t"
-
-            :
-            : [q0_l] "r"(q0_l), [q1_l] "r"(q1_l), [q2_l] "r"(q2_l),
-              [q3_l] "r"(q3_l), [q4_l] "r"(q4_l), [q5_l] "r"(q5_l),
-              [q6_l] "r"(q6_l), [s1] "r"(s1));
-      } else if (mask & flat & 0xFF000000) {
-        __asm__ __volatile__(
-            "sb     %[p2_l_f1],     -3(%[s1])    \n\t"
-            "sb     %[p1_l_f1],     -2(%[s1])    \n\t"
-            "sb     %[p0_l_f1],     -1(%[s1])    \n\t"
-            "sb     %[q0_l_f1],       (%[s1])    \n\t"
-            "sb     %[q1_l_f1],     +1(%[s1])    \n\t"
-            "sb     %[q2_l_f1],     +2(%[s1])    \n\t"
-
-            :
-            : [p2_l_f1] "r"(p2_l_f1), [p1_l_f1] "r"(p1_l_f1),
-              [p0_l_f1] "r"(p0_l_f1), [q0_l_f1] "r"(q0_l_f1),
-              [q1_l_f1] "r"(q1_l_f1), [q2_l_f1] "r"(q2_l_f1), [s1] "r"(s1));
-      } else if (mask & 0xFF000000) {
-        __asm__ __volatile__(
-            "sb     %[p1_f0],   -2(%[s1])    \n\t"
-            "sb     %[p0_f0],   -1(%[s1])    \n\t"
-            "sb     %[q0_f0],     (%[s1])    \n\t"
-            "sb     %[q1_f0],   +1(%[s1])    \n\t"
-
-            :
-            : [p1_f0] "r"(p1_f0), [p0_f0] "r"(p0_f0), [q0_f0] "r"(q0_f0),
-              [q1_f0] "r"(q1_f0), [s1] "r"(s1));
-      }
-    }
-  }
-}
-#endif  // #if HAVE_DSPR2
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/mips/loopfilter_msa.h b/chromium/third_party/libaom/source/libaom/aom_dsp/mips/loopfilter_msa.h
deleted file mode 100644
index 54b0bb4bd94..00000000000
--- a/chromium/third_party/libaom/source/libaom/aom_dsp/mips/loopfilter_msa.h
+++ /dev/null
@@ -1,251 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AOM_DSP_MIPS_LOOPFILTER_MSA_H_
-#define AOM_AOM_DSP_MIPS_LOOPFILTER_MSA_H_
-
-#include "aom_dsp/mips/macros_msa.h"
-
-#define AOM_LPF_FILTER4_8W(p1_in, p0_in, q0_in, q1_in, mask_in, hev_in, \
-                           p1_out, p0_out, q0_out, q1_out)              \
-  {                                                                     \
-    v16i8 p1_m, p0_m, q0_m, q1_m, q0_sub_p0, filt_sign;                 \
-    v16i8 filt, filt1, filt2, cnst4b, cnst3b;                           \
-    v8i16 q0_sub_p0_r, filt_r, cnst3h;                                  \
-                                                                        \
-    p1_m = (v16i8)__msa_xori_b(p1_in, 0x80);                            \
-    p0_m = (v16i8)__msa_xori_b(p0_in, 0x80);                            \
-    q0_m = (v16i8)__msa_xori_b(q0_in, 0x80);                            \
-    q1_m = (v16i8)__msa_xori_b(q1_in, 0x80);                            \
-                                                                        \
-    filt = __msa_subs_s_b(p1_m, q1_m);                                  \
-    filt = filt & (v16i8)hev_in;                                        \
-    q0_sub_p0 = q0_m - p0_m;                                            \
-    filt_sign = __msa_clti_s_b(filt, 0);                                \
-                                                                        \
-    cnst3h = __msa_ldi_h(3);                                            \
-    q0_sub_p0_r = (v8i16)__msa_ilvr_b(q0_sub_p0, q0_sub_p0);            \
-    q0_sub_p0_r = __msa_dotp_s_h((v16i8)q0_sub_p0_r, (v16i8)cnst3h);    \
-    filt_r = (v8i16)__msa_ilvr_b(filt_sign, filt);                      \
-    filt_r += q0_sub_p0_r;                                              \
-    filt_r = __msa_sat_s_h(filt_r, 7);                                  \
-                                                                        \
-    /* combine left and right part */                                   \
-    filt = __msa_pckev_b((v16i8)filt_r, (v16i8)filt_r);                 \
-                                                                        \
-    filt = filt & (v16i8)mask_in;                                       \
-    cnst4b = __msa_ldi_b(4);                                            \
-    filt1 = __msa_adds_s_b(filt, cnst4b);                               \
-    filt1 >>= 3;                                                        \
-                                                                        \
-    cnst3b = __msa_ldi_b(3);                                            \
-    filt2 = __msa_adds_s_b(filt, cnst3b);                               \
-    filt2 >>= 3;                                                        \
-                                                                        \
-    q0_m = __msa_subs_s_b(q0_m, filt1);                                 \
-    q0_out = __msa_xori_b((v16u8)q0_m, 0x80);                           \
-    p0_m = __msa_adds_s_b(p0_m, filt2);                                 \
-    p0_out = __msa_xori_b((v16u8)p0_m, 0x80);                           \
-                                                                        \
-    filt = __msa_srari_b(filt1, 1);                                     \
-    hev_in = __msa_xori_b((v16u8)hev_in, 0xff);                         \
-    filt = filt & (v16i8)hev_in;                                        \
-                                                                        \
-    q1_m = __msa_subs_s_b(q1_m, filt);                                  \
-    q1_out = __msa_xori_b((v16u8)q1_m, 0x80);                           \
-    p1_m = __msa_adds_s_b(p1_m, filt);                                  \
-    p1_out = __msa_xori_b((v16u8)p1_m, 0x80);                           \
-  }
-
-#define AOM_LPF_FILTER4_4W(p1_in, p0_in, q0_in, q1_in, mask_in, hev_in, \
-                           p1_out, p0_out, q0_out, q1_out)              \
-  {                                                                     \
-    v16i8 p1_m, p0_m, q0_m, q1_m, q0_sub_p0, filt_sign;                 \
-    v16i8 filt, filt1, filt2, cnst4b, cnst3b;                           \
-    v8i16 q0_sub_p0_r, q0_sub_p0_l, filt_l, filt_r, cnst3h;             \
-                                                                        \
-    p1_m = (v16i8)__msa_xori_b(p1_in, 0x80);                            \
-    p0_m = (v16i8)__msa_xori_b(p0_in, 0x80);                            \
-    q0_m = (v16i8)__msa_xori_b(q0_in, 0x80);                            \
-    q1_m = (v16i8)__msa_xori_b(q1_in, 0x80);                            \
-                                                                        \
-    filt = __msa_subs_s_b(p1_m, q1_m);                                  \
-                                                                        \
-    filt = filt & (v16i8)hev_in;                                        \
-                                                                        \
-    q0_sub_p0 = q0_m - p0_m;                                            \
-    filt_sign = __msa_clti_s_b(filt, 0);                                \
-                                                                        \
-    cnst3h = __msa_ldi_h(3);                                            \
-    q0_sub_p0_r = (v8i16)__msa_ilvr_b(q0_sub_p0, q0_sub_p0);            \
-    q0_sub_p0_r = __msa_dotp_s_h((v16i8)q0_sub_p0_r, (v16i8)cnst3h);    \
-    filt_r = (v8i16)__msa_ilvr_b(filt_sign, filt);                      \
-    filt_r += q0_sub_p0_r;                                              \
-    filt_r = __msa_sat_s_h(filt_r, 7);                                  \
-                                                                        \
-    q0_sub_p0_l = (v8i16)__msa_ilvl_b(q0_sub_p0, q0_sub_p0);            \
-    q0_sub_p0_l = __msa_dotp_s_h((v16i8)q0_sub_p0_l, (v16i8)cnst3h);    \
-    filt_l = (v8i16)__msa_ilvl_b(filt_sign, filt);                      \
-    filt_l += q0_sub_p0_l;                                              \
-    filt_l = __msa_sat_s_h(filt_l, 7);                                  \
-                                                                        \
-    filt = __msa_pckev_b((v16i8)filt_l, (v16i8)filt_r);                 \
-    filt = filt & (v16i8)mask_in;                                       \
-                                                                        \
-    cnst4b = __msa_ldi_b(4);                                            \
-    filt1 = __msa_adds_s_b(filt, cnst4b);                               \
-    filt1 >>= 3;                                                        \
-                                                                        \
-    cnst3b = __msa_ldi_b(3);                                            \
-    filt2 = __msa_adds_s_b(filt, cnst3b);                               \
-    filt2 >>= 3;                                                        \
-                                                                        \
-    q0_m = __msa_subs_s_b(q0_m, filt1);                                 \
-    q0_out = __msa_xori_b((v16u8)q0_m, 0x80);                           \
-    p0_m = __msa_adds_s_b(p0_m, filt2);                                 \
-    p0_out = __msa_xori_b((v16u8)p0_m, 0x80);                           \
-                                                                        \
-    filt = __msa_srari_b(filt1, 1);                                     \
-    hev_in = __msa_xori_b((v16u8)hev_in, 0xff);                         \
-    filt = filt & (v16i8)hev_in;                                        \
-                                                                        \
-    q1_m = __msa_subs_s_b(q1_m, filt);                                  \
-    q1_out = __msa_xori_b((v16u8)q1_m, 0x80);                           \
-    p1_m = __msa_adds_s_b(p1_m, filt);                                  \
-    p1_out = __msa_xori_b((v16u8)p1_m, 0x80);                           \
-  }
-
-#define AOM_FLAT4(p3_in, p2_in, p0_in, q0_in, q2_in, q3_in, flat_out)    \
-  {                                                                      \
-    v16u8 tmp_flat4, p2_a_sub_p0, q2_a_sub_q0, p3_a_sub_p0, q3_a_sub_q0; \
-    v16u8 zero_in = { 0 };                                               \
-                                                                         \
-    tmp_flat4 = __msa_ori_b(zero_in, 1);                                 \
-    p2_a_sub_p0 = __msa_asub_u_b(p2_in, p0_in);                          \
-    q2_a_sub_q0 = __msa_asub_u_b(q2_in, q0_in);                          \
-    p3_a_sub_p0 = __msa_asub_u_b(p3_in, p0_in);                          \
-    q3_a_sub_q0 = __msa_asub_u_b(q3_in, q0_in);                          \
-                                                                         \
-    p2_a_sub_p0 = __msa_max_u_b(p2_a_sub_p0, q2_a_sub_q0);               \
-    flat_out = __msa_max_u_b(p2_a_sub_p0, flat_out);                     \
-    p3_a_sub_p0 = __msa_max_u_b(p3_a_sub_p0, q3_a_sub_q0);               \
-    flat_out = __msa_max_u_b(p3_a_sub_p0, flat_out);                     \
-                                                                         \
-    flat_out = (tmp_flat4 < (v16u8)flat_out);                            \
-    flat_out = __msa_xori_b(flat_out, 0xff);                             \
-    flat_out = flat_out & (mask);                                        \
-  }
-
-#define AOM_FLAT5(p7_in, p6_in, p5_in, p4_in, p0_in, q0_in, q4_in, q5_in, \
-                  q6_in, q7_in, flat_in, flat2_out)                       \
-  {                                                                       \
-    v16u8 tmp_flat5, zero_in = { 0 };                                     \
-    v16u8 p4_a_sub_p0, q4_a_sub_q0, p5_a_sub_p0, q5_a_sub_q0;             \
-    v16u8 p6_a_sub_p0, q6_a_sub_q0, p7_a_sub_p0, q7_a_sub_q0;             \
-                                                                          \
-    tmp_flat5 = __msa_ori_b(zero_in, 1);                                  \
-    p4_a_sub_p0 = __msa_asub_u_b(p4_in, p0_in);                           \
-    q4_a_sub_q0 = __msa_asub_u_b(q4_in, q0_in);                           \
-    p5_a_sub_p0 = __msa_asub_u_b(p5_in, p0_in);                           \
-    q5_a_sub_q0 = __msa_asub_u_b(q5_in, q0_in);                           \
-    p6_a_sub_p0 = __msa_asub_u_b(p6_in, p0_in);                           \
-    q6_a_sub_q0 = __msa_asub_u_b(q6_in, q0_in);                           \
-    p7_a_sub_p0 = __msa_asub_u_b(p7_in, p0_in);                           \
-    q7_a_sub_q0 = __msa_asub_u_b(q7_in, q0_in);                           \
-                                                                          \
-    p4_a_sub_p0 = __msa_max_u_b(p4_a_sub_p0, q4_a_sub_q0);                \
-    flat2_out = __msa_max_u_b(p5_a_sub_p0, q5_a_sub_q0);                  \
-    flat2_out = __msa_max_u_b(p4_a_sub_p0, flat2_out);                    \
-    p6_a_sub_p0 = __msa_max_u_b(p6_a_sub_p0, q6_a_sub_q0);                \
-    flat2_out = __msa_max_u_b(p6_a_sub_p0, flat2_out);                    \
-    p7_a_sub_p0 = __msa_max_u_b(p7_a_sub_p0, q7_a_sub_q0);                \
-    flat2_out = __msa_max_u_b(p7_a_sub_p0, flat2_out);                    \
-                                                                          \
-    flat2_out = (tmp_flat5 < (v16u8)flat2_out);                           \
-    flat2_out = __msa_xori_b(flat2_out, 0xff);                            \
-    flat2_out = flat2_out & flat_in;                                      \
-  }
-
-#define AOM_FILTER8(p3_in, p2_in, p1_in, p0_in, q0_in, q1_in, q2_in, q3_in, \
-                    p2_filt8_out, p1_filt8_out, p0_filt8_out, q0_filt8_out, \
-                    q1_filt8_out, q2_filt8_out)                             \
-  {                                                                         \
-    v8u16 tmp_filt8_0, tmp_filt8_1, tmp_filt8_2;                            \
-                                                                            \
-    tmp_filt8_2 = p2_in + p1_in + p0_in;                                    \
-    tmp_filt8_0 = p3_in << 1;                                               \
-                                                                            \
-    tmp_filt8_0 = tmp_filt8_0 + tmp_filt8_2 + q0_in;                        \
-    tmp_filt8_1 = tmp_filt8_0 + p3_in + p2_in;                              \
-    p2_filt8_out = (v8i16)__msa_srari_h((v8i16)tmp_filt8_1, 3);             \
-                                                                            \
-    tmp_filt8_1 = tmp_filt8_0 + p1_in + q1_in;                              \
-    p1_filt8_out = (v8i16)__msa_srari_h((v8i16)tmp_filt8_1, 3);             \
-                                                                            \
-    tmp_filt8_1 = q2_in + q1_in + q0_in;                                    \
-    tmp_filt8_2 = tmp_filt8_2 + tmp_filt8_1;                                \
-    tmp_filt8_0 = tmp_filt8_2 + (p0_in);                                    \
-    tmp_filt8_0 = tmp_filt8_0 + (p3_in);                                    \
-    p0_filt8_out = (v8i16)__msa_srari_h((v8i16)tmp_filt8_0, 3);             \
-                                                                            \
-    tmp_filt8_0 = q2_in + q3_in;                                            \
-    tmp_filt8_0 = p0_in + tmp_filt8_1 + tmp_filt8_0;                        \
-    tmp_filt8_1 = q3_in + q3_in;                                            \
-    tmp_filt8_1 = tmp_filt8_1 + tmp_filt8_0;                                \
-    q2_filt8_out = (v8i16)__msa_srari_h((v8i16)tmp_filt8_1, 3);             \
-                                                                            \
-    tmp_filt8_0 = tmp_filt8_2 + q3_in;                                      \
-    tmp_filt8_1 = tmp_filt8_0 + q0_in;                                      \
-    q0_filt8_out = (v8i16)__msa_srari_h((v8i16)tmp_filt8_1, 3);             \
-                                                                            \
-    tmp_filt8_1 = tmp_filt8_0 - p2_in;                                      \
-    tmp_filt8_0 = q1_in + q3_in;                                            \
-    tmp_filt8_1 = tmp_filt8_0 + tmp_filt8_1;                                \
-    q1_filt8_out = (v8i16)__msa_srari_h((v8i16)tmp_filt8_1, 3);             \
-  }
-
-#define LPF_MASK_HEV(p3_in, p2_in, p1_in, p0_in, q0_in, q1_in, q2_in, q3_in, \
-                     limit_in, b_limit_in, thresh_in, hev_out, mask_out,     \
-                     flat_out)                                               \
-  {                                                                          \
-    v16u8 p3_asub_p2_m, p2_asub_p1_m, p1_asub_p0_m, q1_asub_q0_m;            \
-    v16u8 p1_asub_q1_m, p0_asub_q0_m, q3_asub_q2_m, q2_asub_q1_m;            \
-                                                                             \
-    /* absolute subtraction of pixel values */                               \
-    p3_asub_p2_m = __msa_asub_u_b(p3_in, p2_in);                             \
-    p2_asub_p1_m = __msa_asub_u_b(p2_in, p1_in);                             \
-    p1_asub_p0_m = __msa_asub_u_b(p1_in, p0_in);                             \
-    q1_asub_q0_m = __msa_asub_u_b(q1_in, q0_in);                             \
-    q2_asub_q1_m = __msa_asub_u_b(q2_in, q1_in);                             \
-    q3_asub_q2_m = __msa_asub_u_b(q3_in, q2_in);                             \
-    p0_asub_q0_m = __msa_asub_u_b(p0_in, q0_in);                             \
-    p1_asub_q1_m = __msa_asub_u_b(p1_in, q1_in);                             \
-                                                                             \
-    /* calculation of hev */                                                 \
-    flat_out = __msa_max_u_b(p1_asub_p0_m, q1_asub_q0_m);                    \
-    hev_out = thresh_in < (v16u8)flat_out;                                   \
-                                                                             \
-    /* calculation of mask */                                                \
-    p0_asub_q0_m = __msa_adds_u_b(p0_asub_q0_m, p0_asub_q0_m);               \
-    p1_asub_q1_m >>= 1;                                                      \
-    p0_asub_q0_m = __msa_adds_u_b(p0_asub_q0_m, p1_asub_q1_m);               \
-                                                                             \
-    mask_out = b_limit_in < p0_asub_q0_m;                                    \
-    mask_out = __msa_max_u_b(flat_out, mask_out);                            \
-    p3_asub_p2_m = __msa_max_u_b(p3_asub_p2_m, p2_asub_p1_m);                \
-    mask_out = __msa_max_u_b(p3_asub_p2_m, mask_out);                        \
-    q2_asub_q1_m = __msa_max_u_b(q2_asub_q1_m, q3_asub_q2_m);                \
-    mask_out = __msa_max_u_b(q2_asub_q1_m, mask_out);                        \
-                                                                             \
-    mask_out = limit_in < (v16u8)mask_out;                                   \
-    mask_out = __msa_xori_b(mask_out, 0xff);                                 \
-  }
-#endif  // AOM_AOM_DSP_MIPS_LOOPFILTER_MSA_H_
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/mips/macros_msa.h b/chromium/third_party/libaom/source/libaom/aom_dsp/mips/macros_msa.h
deleted file mode 100644
index 9bfc27147b7..00000000000
--- a/chromium/third_party/libaom/source/libaom/aom_dsp/mips/macros_msa.h
+++ /dev/null
@@ -1,2058 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AOM_DSP_MIPS_MACROS_MSA_H_
-#define AOM_AOM_DSP_MIPS_MACROS_MSA_H_
-
-#include <msa.h>
-
-#include "config/aom_config.h"
-
-#include "aom/aom_integer.h"
-
-#define LD_B(RTYPE, psrc) *((const RTYPE *)(psrc))
-#define LD_UB(...) LD_B(v16u8, __VA_ARGS__)
-#define LD_SB(...) LD_B(v16i8, __VA_ARGS__)
-
-#define LD_H(RTYPE, psrc) *((const RTYPE *)(psrc))
-#define LD_UH(...) LD_H(v8u16, __VA_ARGS__)
-#define LD_SH(...) LD_H(v8i16, __VA_ARGS__)
-
-#define LD_W(RTYPE, psrc) *((const RTYPE *)(psrc))
-#define LD_SW(...) LD_W(v4i32, __VA_ARGS__)
-
-#define ST_B(RTYPE, in, pdst) *((RTYPE *)(pdst)) = (in)
-#define ST_UB(...) ST_B(v16u8, __VA_ARGS__)
-#define ST_SB(...) ST_B(v16i8, __VA_ARGS__)
-
-#define ST_H(RTYPE, in, pdst) *((RTYPE *)(pdst)) = (in)
-#define ST_SH(...) ST_H(v8i16, __VA_ARGS__)
-
-#define ST_W(RTYPE, in, pdst) *((RTYPE *)(pdst)) = (in)
-#define ST_SW(...) ST_W(v4i32, __VA_ARGS__)
-
-#if (__mips_isa_rev >= 6)
-#define LH(psrc)                                          \
-  ({                                                      \
-    const uint8_t *psrc_m = (const uint8_t *)(psrc);      \
-    uint16_t val_m;                                       \
-                                                          \
-    __asm__ __volatile__("lh  %[val_m],  %[psrc_m]  \n\t" \
-                                                          \
-                         : [val_m] "=r"(val_m)            \
-                         : [psrc_m] "m"(*psrc_m));        \
-                                                          \
-    val_m;                                                \
-  })
-
-#define LW(psrc)                                          \
-  ({                                                      \
-    const uint8_t *psrc_m = (const uint8_t *)(psrc);      \
-    uint32_t val_m;                                       \
-                                                          \
-    __asm__ __volatile__("lw  %[val_m],  %[psrc_m]  \n\t" \
-                                                          \
-                         : [val_m] "=r"(val_m)            \
-                         : [psrc_m] "m"(*psrc_m));        \
-                                                          \
-    val_m;                                                \
-  })
-
-#if (__mips == 64)
-#define LD(psrc)                                          \
-  ({                                                      \
-    const uint8_t *psrc_m = (const uint8_t *)(psrc);      \
-    uint64_t val_m = 0;                                   \
-                                                          \
-    __asm__ __volatile__("ld  %[val_m],  %[psrc_m]  \n\t" \
-                                                          \
-                         : [val_m] "=r"(val_m)            \
-                         : [psrc_m] "m"(*psrc_m));        \
-                                                          \
-    val_m;                                                \
-  })
-#else  // !(__mips == 64)
-#define LD(psrc)                                            \
-  ({                                                        \
-    const uint8_t *psrc_m = (const uint8_t *)(psrc);        \
-    uint32_t val0_m, val1_m;                                \
-    uint64_t val_m = 0;                                     \
-                                                            \
-    val0_m = LW(psrc_m);                                    \
-    val1_m = LW(psrc_m + 4);                                \
-                                                            \
-    val_m = (uint64_t)(val1_m);                             \
-    val_m = (uint64_t)((val_m << 32) & 0xFFFFFFFF00000000); \
-    val_m = (uint64_t)(val_m | (uint64_t)val0_m);           \
-                                                            \
-    val_m;                                                  \
-  })
-#endif  // (__mips == 64)
-
-#define SH(val, pdst)                                     \
-  {                                                       \
-    uint8_t *pdst_m = (uint8_t *)(pdst);                  \
-    const uint16_t val_m = (val);                         \
-                                                          \
-    __asm__ __volatile__("sh  %[val_m],  %[pdst_m]  \n\t" \
-                                                          \
-                         : [pdst_m] "=m"(*pdst_m)         \
-                         : [val_m] "r"(val_m));           \
-  }
-
-#define SW(val, pdst)                                     \
-  {                                                       \
-    uint8_t *pdst_m = (uint8_t *)(pdst);                  \
-    const uint32_t val_m = (val);                         \
-                                                          \
-    __asm__ __volatile__("sw  %[val_m],  %[pdst_m]  \n\t" \
-                                                          \
-                         : [pdst_m] "=m"(*pdst_m)         \
-                         : [val_m] "r"(val_m));           \
-  }
-
-#define SD(val, pdst)                                     \
-  {                                                       \
-    uint8_t *pdst_m = (uint8_t *)(pdst);                  \
-    const uint64_t val_m = (val);                         \
-                                                          \
-    __asm__ __volatile__("sd  %[val_m],  %[pdst_m]  \n\t" \
-                                                          \
-                         : [pdst_m] "=m"(*pdst_m)         \
-                         : [val_m] "r"(val_m));           \
-  }
-#else  // !(__mips_isa_rev >= 6)
-#define LH(psrc)                                           \
-  ({                                                       \
-    const uint8_t *psrc_m = (const uint8_t *)(psrc);       \
-    uint16_t val_m;                                        \
-                                                           \
-    __asm__ __volatile__("ulh  %[val_m],  %[psrc_m]  \n\t" \
-                                                           \
-                         : [val_m] "=r"(val_m)             \
-                         : [psrc_m] "m"(*psrc_m));         \
-                                                           \
-    val_m;                                                 \
-  })
-
-#define LW(psrc)                                           \
-  ({                                                       \
-    const uint8_t *psrc_m = (const uint8_t *)(psrc);       \
-    uint32_t val_m;                                        \
-                                                           \
-    __asm__ __volatile__("ulw  %[val_m],  %[psrc_m]  \n\t" \
-                                                           \
-                         : [val_m] "=r"(val_m)             \
-                         : [psrc_m] "m"(*psrc_m));         \
-                                                           \
-    val_m;                                                 \
-  })
-
-#if (__mips == 64)
-#define LD(psrc)                                           \
-  ({                                                       \
-    const uint8_t *psrc_m = (const uint8_t *)(psrc);       \
-    uint64_t val_m = 0;                                    \
-                                                           \
-    __asm__ __volatile__("uld  %[val_m],  %[psrc_m]  \n\t" \
-                                                           \
-                         : [val_m] "=r"(val_m)             \
-                         : [psrc_m] "m"(*psrc_m));         \
-                                                           \
-    val_m;                                                 \
-  })
-#else  // !(__mips == 64)
-#define LD(psrc)                                                              \
-  ({                                                                          \
-    const uint8_t *psrc_m1 = (const uint8_t *)(psrc);                         \
-    uint32_t val0_m, val1_m;                                                  \
-    uint64_t val_m_combined = 0;                                              \
-                                                                              \
-    val0_m = LW(psrc_m1);                                                     \
-    val1_m = LW(psrc_m1 + 4);                                                 \
-                                                                              \
-    val_m_combined = (uint64_t)(val1_m);                                      \
-    val_m_combined = (uint64_t)((val_m_combined << 32) & 0xFFFFFFFF00000000); \
-    val_m_combined = (uint64_t)(val_m_combined | (uint64_t)val0_m);           \
-                                                                              \
-    val_m_combined;                                                           \
-  })
-#endif  // (__mips == 64)
-
-#define SH(val, pdst)                                      \
-  {                                                        \
-    uint8_t *pdst_m = (uint8_t *)(pdst);                   \
-    const uint16_t val_m = (val);                          \
-                                                           \
-    __asm__ __volatile__("ush  %[val_m],  %[pdst_m]  \n\t" \
-                                                           \
-                         : [pdst_m] "=m"(*pdst_m)          \
-                         : [val_m] "r"(val_m));            \
-  }
-
-#define SW(val, pdst)                                      \
-  {                                                        \
-    uint8_t *pdst_m = (uint8_t *)(pdst);                   \
-    const uint32_t val_m = (val);                          \
-                                                           \
-    __asm__ __volatile__("usw  %[val_m],  %[pdst_m]  \n\t" \
-                                                           \
-                         : [pdst_m] "=m"(*pdst_m)          \
-                         : [val_m] "r"(val_m));            \
-  }
-
-#define SD(val, pdst)                                        \
-  {                                                          \
-    uint8_t *pdst_m1 = (uint8_t *)(pdst);                    \
-    uint32_t val0_m, val1_m;                                 \
-                                                             \
-    val0_m = (uint32_t)((val)&0x00000000FFFFFFFF);           \
-    val1_m = (uint32_t)(((val) >> 32) & 0x00000000FFFFFFFF); \
-                                                             \
-    SW(val0_m, pdst_m1);                                     \
-    SW(val1_m, pdst_m1 + 4);                                 \
-  }
-#endif  // (__mips_isa_rev >= 6)
-
-/* Description : Load 4 words with stride
-   Arguments   : Inputs  - psrc, stride
-                 Outputs - out0, out1, out2, out3
-   Details     : Load word in 'out0' from (psrc)
-                 Load word in 'out1' from (psrc + stride)
-                 Load word in 'out2' from (psrc + 2 * stride)
-                 Load word in 'out3' from (psrc + 3 * stride)
-*/
-#define LW4(psrc, stride, out0, out1, out2, out3) \
-  {                                               \
-    out0 = LW((psrc));                            \
-    out1 = LW((psrc) + stride);                   \
-    out2 = LW((psrc) + 2 * stride);               \
-    out3 = LW((psrc) + 3 * stride);               \
-  }
-
-/* Description : Load double words with stride
-   Arguments   : Inputs  - psrc, stride
-                 Outputs - out0, out1
-   Details     : Load double word in 'out0' from (psrc)
-                 Load double word in 'out1' from (psrc + stride)
-*/
-#define LD2(psrc, stride, out0, out1) \
-  {                                   \
-    out0 = LD((psrc));                \
-    out1 = LD((psrc) + stride);       \
-  }
-#define LD4(psrc, stride, out0, out1, out2, out3) \
-  {                                               \
-    LD2((psrc), stride, out0, out1);              \
-    LD2((psrc) + 2 * stride, stride, out2, out3); \
-  }
-
-/* Description : Store 4 words with stride
-   Arguments   : Inputs - in0, in1, in2, in3, pdst, stride
-   Details     : Store word from 'in0' to (pdst)
-                 Store word from 'in1' to (pdst + stride)
-                 Store word from 'in2' to (pdst + 2 * stride)
-                 Store word from 'in3' to (pdst + 3 * stride)
-*/
-#define SW4(in0, in1, in2, in3, pdst, stride) \
-  {                                           \
-    SW(in0, (pdst))                           \
-    SW(in1, (pdst) + stride);                 \
-    SW(in2, (pdst) + 2 * stride);             \
-    SW(in3, (pdst) + 3 * stride);             \
-  }
-
-/* Description : Store 4 double words with stride
-   Arguments   : Inputs - in0, in1, in2, in3, pdst, stride
-   Details     : Store double word from 'in0' to (pdst)
-                 Store double word from 'in1' to (pdst + stride)
-                 Store double word from 'in2' to (pdst + 2 * stride)
-                 Store double word from 'in3' to (pdst + 3 * stride)
-*/
-#define SD4(in0, in1, in2, in3, pdst, stride) \
-  {                                           \
-    SD(in0, (pdst))                           \
-    SD(in1, (pdst) + stride);                 \
-    SD(in2, (pdst) + 2 * stride);             \
-    SD(in3, (pdst) + 3 * stride);             \
-  }
-
-/* Description : Load vectors with 16 byte elements with stride
-   Arguments   : Inputs  - psrc, stride
-                 Outputs - out0, out1
-                 Return Type - as per RTYPE
-   Details     : Load 16 byte elements in 'out0' from (psrc)
-                 Load 16 byte elements in 'out1' from (psrc + stride)
-*/
-#define LD_B2(RTYPE, psrc, stride, out0, out1) \
-  {                                            \
-    out0 = LD_B(RTYPE, (psrc));                \
-    out1 = LD_B(RTYPE, (psrc) + stride);       \
-  }
-#define LD_UB2(...) LD_B2(v16u8, __VA_ARGS__)
-#define LD_SB2(...) LD_B2(v16i8, __VA_ARGS__)
-
-#define LD_B3(RTYPE, psrc, stride, out0, out1, out2) \
-  {                                                  \
-    LD_B2(RTYPE, (psrc), stride, out0, out1);        \
-    out2 = LD_B(RTYPE, (psrc) + 2 * stride);         \
-  }
-#define LD_UB3(...) LD_B3(v16u8, __VA_ARGS__)
-
-#define LD_B4(RTYPE, psrc, stride, out0, out1, out2, out3) \
-  {                                                        \
-    LD_B2(RTYPE, (psrc), stride, out0, out1);              \
-    LD_B2(RTYPE, (psrc) + 2 * stride, stride, out2, out3); \
-  }
-#define LD_UB4(...) LD_B4(v16u8, __VA_ARGS__)
-#define LD_SB4(...) LD_B4(v16i8, __VA_ARGS__)
-
-#define LD_B5(RTYPE, psrc, stride, out0, out1, out2, out3, out4) \
-  {                                                              \
-    LD_B4(RTYPE, (psrc), stride, out0, out1, out2, out3);        \
-    out4 = LD_B(RTYPE, (psrc) + 4 * stride);                     \
-  }
-#define LD_UB5(...) LD_B5(v16u8, __VA_ARGS__)
-#define LD_SB5(...) LD_B5(v16i8, __VA_ARGS__)
-
-#define LD_B7(RTYPE, psrc, stride, out0, out1, out2, out3, out4, out5, out6) \
-  {                                                                          \
-    LD_B5(RTYPE, (psrc), stride, out0, out1, out2, out3, out4);              \
-    LD_B2(RTYPE, (psrc) + 5 * stride, stride, out5, out6);                   \
-  }
-#define LD_SB7(...) LD_B7(v16i8, __VA_ARGS__)
-
-#define LD_B8(RTYPE, psrc, stride, out0, out1, out2, out3, out4, out5, out6, \
-              out7)                                                          \
-  {                                                                          \
-    LD_B4(RTYPE, (psrc), stride, out0, out1, out2, out3);                    \
-    LD_B4(RTYPE, (psrc) + 4 * stride, stride, out4, out5, out6, out7);       \
-  }
-#define LD_UB8(...) LD_B8(v16u8, __VA_ARGS__)
-#define LD_SB8(...) LD_B8(v16i8, __VA_ARGS__)
-
-/* Description : Load vectors with 8 halfword elements with stride
-   Arguments   : Inputs  - psrc, stride
-                 Outputs - out0, out1
-   Details     : Load 8 halfword elements in 'out0' from (psrc)
-                 Load 8 halfword elements in 'out1' from (psrc + stride)
-*/
-#define LD_H2(RTYPE, psrc, stride, out0, out1) \
-  {                                            \
-    out0 = LD_H(RTYPE, (psrc));                \
-    out1 = LD_H(RTYPE, (psrc) + (stride));     \
-  }
-#define LD_SH2(...) LD_H2(v8i16, __VA_ARGS__)
-
-#define LD_H4(RTYPE, psrc, stride, out0, out1, out2, out3) \
-  {                                                        \
-    LD_H2(RTYPE, (psrc), stride, out0, out1);              \
-    LD_H2(RTYPE, (psrc) + 2 * stride, stride, out2, out3); \
-  }
-#define LD_SH4(...) LD_H4(v8i16, __VA_ARGS__)
-
-#define LD_H8(RTYPE, psrc, stride, out0, out1, out2, out3, out4, out5, out6, \
-              out7)                                                          \
-  {                                                                          \
-    LD_H4(RTYPE, (psrc), stride, out0, out1, out2, out3);                    \
-    LD_H4(RTYPE, (psrc) + 4 * stride, stride, out4, out5, out6, out7);       \
-  }
-#define LD_SH8(...) LD_H8(v8i16, __VA_ARGS__)
-
-#define LD_H16(RTYPE, psrc, stride, out0, out1, out2, out3, out4, out5, out6,  \
-               out7, out8, out9, out10, out11, out12, out13, out14, out15)     \
-  {                                                                            \
-    LD_H8(RTYPE, (psrc), stride, out0, out1, out2, out3, out4, out5, out6,     \
-          out7);                                                               \
-    LD_H8(RTYPE, (psrc) + 8 * stride, stride, out8, out9, out10, out11, out12, \
-          out13, out14, out15);                                                \
-  }
-#define LD_SH16(...) LD_H16(v8i16, __VA_ARGS__)
-
-/* Description : Load 4x4 block of signed halfword elements from 1D source
-                 data into 4 vectors (Each vector with 4 signed halfwords)
-   Arguments   : Input   - psrc
-                 Outputs - out0, out1, out2, out3
-*/
-#define LD4x4_SH(psrc, out0, out1, out2, out3)            \
-  {                                                       \
-    out0 = LD_SH(psrc);                                   \
-    out2 = LD_SH(psrc + 8);                               \
-    out1 = (v8i16)__msa_ilvl_d((v2i64)out0, (v2i64)out0); \
-    out3 = (v8i16)__msa_ilvl_d((v2i64)out2, (v2i64)out2); \
-  }
-
-/* Description : Load 2 vectors of signed word elements with stride
-   Arguments   : Inputs  - psrc, stride
-                 Outputs - out0, out1
-                 Return Type - signed word
-*/
-#define LD_SW2(psrc, stride, out0, out1) \
-  {                                      \
-    out0 = LD_SW((psrc));                \
-    out1 = LD_SW((psrc) + stride);       \
-  }
-
-/* Description : Store vectors of 16 byte elements with stride
-   Arguments   : Inputs - in0, in1, pdst, stride
-   Details     : Store 16 byte elements from 'in0' to (pdst)
-                 Store 16 byte elements from 'in1' to (pdst + stride)
-*/
-#define ST_B2(RTYPE, in0, in1, pdst, stride) \
-  {                                          \
-    ST_B(RTYPE, in0, (pdst));                \
-    ST_B(RTYPE, in1, (pdst) + stride);       \
-  }
-#define ST_UB2(...) ST_B2(v16u8, __VA_ARGS__)
-
-#define ST_B4(RTYPE, in0, in1, in2, in3, pdst, stride)   \
-  {                                                      \
-    ST_B2(RTYPE, in0, in1, (pdst), stride);              \
-    ST_B2(RTYPE, in2, in3, (pdst) + 2 * stride, stride); \
-  }
-#define ST_UB4(...) ST_B4(v16u8, __VA_ARGS__)
-
-#define ST_B8(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, pdst, stride) \
-  {                                                                        \
-    ST_B4(RTYPE, in0, in1, in2, in3, pdst, stride);                        \
-    ST_B4(RTYPE, in4, in5, in6, in7, (pdst) + 4 * stride, stride);         \
-  }
-#define ST_UB8(...) ST_B8(v16u8, __VA_ARGS__)
-
-/* Description : Store vectors of 8 halfword elements with stride
-   Arguments   : Inputs - in0, in1, pdst, stride
-   Details     : Store 8 halfword elements from 'in0' to (pdst)
-                 Store 8 halfword elements from 'in1' to (pdst + stride)
-*/
-#define ST_H2(RTYPE, in0, in1, pdst, stride) \
-  {                                          \
-    ST_H(RTYPE, in0, (pdst));                \
-    ST_H(RTYPE, in1, (pdst) + stride);       \
-  }
-#define ST_SH2(...) ST_H2(v8i16, __VA_ARGS__)
-
-#define ST_H4(RTYPE, in0, in1, in2, in3, pdst, stride)   \
-  {                                                      \
-    ST_H2(RTYPE, in0, in1, (pdst), stride);              \
-    ST_H2(RTYPE, in2, in3, (pdst) + 2 * stride, stride); \
-  }
-#define ST_SH4(...) ST_H4(v8i16, __VA_ARGS__)
-
-#define ST_H8(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, pdst, stride) \
-  {                                                                        \
-    ST_H4(RTYPE, in0, in1, in2, in3, (pdst), stride);                      \
-    ST_H4(RTYPE, in4, in5, in6, in7, (pdst) + 4 * stride, stride);         \
-  }
-#define ST_SH8(...) ST_H8(v8i16, __VA_ARGS__)
-
-/* Description : Store vectors of word elements with stride
-   Arguments   : Inputs - in0, in1, pdst, stride
-   Details     : Store 4 word elements from 'in0' to (pdst)
-                 Store 4 word elements from 'in1' to (pdst + stride)
-*/
-#define ST_SW2(in0, in1, pdst, stride) \
-  {                                    \
-    ST_SW(in0, (pdst));                \
-    ST_SW(in1, (pdst) + stride);       \
-  }
-
-/* Description : Store 2x4 byte block to destination memory from input vector
-   Arguments   : Inputs - in, stidx, pdst, stride
-   Details     : Index 'stidx' halfword element from 'in' vector is copied to
-                 the GP register and stored to (pdst)
-                 Index 'stidx+1' halfword element from 'in' vector is copied to
-                 the GP register and stored to (pdst + stride)
-                 Index 'stidx+2' halfword element from 'in' vector is copied to
-                 the GP register and stored to (pdst + 2 * stride)
-                 Index 'stidx+3' halfword element from 'in' vector is copied to
-                 the GP register and stored to (pdst + 3 * stride)
-*/
-#define ST2x4_UB(in, stidx, pdst, stride)            \
-  {                                                  \
-    uint16_t out0_m, out1_m, out2_m, out3_m;         \
-    uint8_t *pblk_2x4_m = (uint8_t *)(pdst);         \
-                                                     \
-    out0_m = __msa_copy_u_h((v8i16)in, (stidx));     \
-    out1_m = __msa_copy_u_h((v8i16)in, (stidx + 1)); \
-    out2_m = __msa_copy_u_h((v8i16)in, (stidx + 2)); \
-    out3_m = __msa_copy_u_h((v8i16)in, (stidx + 3)); \
-                                                     \
-    SH(out0_m, pblk_2x4_m);                          \
-    SH(out1_m, pblk_2x4_m + stride);                 \
-    SH(out2_m, pblk_2x4_m + 2 * stride);             \
-    SH(out3_m, pblk_2x4_m + 3 * stride);             \
-  }
-
-/* Description : Store 4x2 byte block to destination memory from input vector
-   Arguments   : Inputs - in, pdst, stride
-   Details     : Index 0 word element from 'in' vector is copied to the GP
-                 register and stored to (pdst)
-                 Index 1 word element from 'in' vector is copied to the GP
-                 register and stored to (pdst + stride)
-*/
-#define ST4x2_UB(in, pdst, stride)           \
-  {                                          \
-    uint32_t out0_m, out1_m;                 \
-    uint8_t *pblk_4x2_m = (uint8_t *)(pdst); \
-                                             \
-    out0_m = __msa_copy_u_w((v4i32)in, 0);   \
-    out1_m = __msa_copy_u_w((v4i32)in, 1);   \
-                                             \
-    SW(out0_m, pblk_4x2_m);                  \
-    SW(out1_m, pblk_4x2_m + stride);         \
-  }
-
-/* Description : Store 4x4 byte block to destination memory from input vector
-   Arguments   : Inputs - in0, in1, pdst, stride
-   Details     : 'Idx0' word element from input vector 'in0' is copied to the
-                 GP register and stored to (pdst)
-                 'Idx1' word element from input vector 'in0' is copied to the
-                 GP register and stored to (pdst + stride)
-                 'Idx2' word element from input vector 'in0' is copied to the
-                 GP register and stored to (pdst + 2 * stride)
-                 'Idx3' word element from input vector 'in0' is copied to the
-                 GP register and stored to (pdst + 3 * stride)
-*/
-#define ST4x4_UB(in0, in1, idx0, idx1, idx2, idx3, pdst, stride) \
-  {                                                              \
-    uint32_t out0_m, out1_m, out2_m, out3_m;                     \
-    uint8_t *pblk_4x4_m = (uint8_t *)(pdst);                     \
-                                                                 \
-    out0_m = __msa_copy_u_w((v4i32)in0, idx0);                   \
-    out1_m = __msa_copy_u_w((v4i32)in0, idx1);                   \
-    out2_m = __msa_copy_u_w((v4i32)in1, idx2);                   \
-    out3_m = __msa_copy_u_w((v4i32)in1, idx3);                   \
-                                                                 \
-    SW4(out0_m, out1_m, out2_m, out3_m, pblk_4x4_m, stride);     \
-  }
-#define ST4x8_UB(in0, in1, pdst, stride)                           \
-  {                                                                \
-    uint8_t *pblk_4x8 = (uint8_t *)(pdst);                         \
-                                                                   \
-    ST4x4_UB(in0, in0, 0, 1, 2, 3, pblk_4x8, stride);              \
-    ST4x4_UB(in1, in1, 0, 1, 2, 3, pblk_4x8 + 4 * stride, stride); \
-  }
-
-/* Description : Store 8x1 byte block to destination memory from input vector
-   Arguments   : Inputs - in, pdst
-   Details     : Index 0 double word element from 'in' vector is copied to the
-                 GP register and stored to (pdst)
-*/
-#define ST8x1_UB(in, pdst)                 \
-  {                                        \
-    uint64_t out0_m;                       \
-                                           \
-    out0_m = __msa_copy_u_d((v2i64)in, 0); \
-    SD(out0_m, pdst);                      \
-  }
-
-/* Description : Store 8x2 byte block to destination memory from input vector
-   Arguments   : Inputs - in, pdst, stride
-   Details     : Index 0 double word element from 'in' vector is copied to the
-                 GP register and stored to (pdst)
-                 Index 1 double word element from 'in' vector is copied to the
-                 GP register and stored to (pdst + stride)
-*/
-#define ST8x2_UB(in, pdst, stride)           \
-  {                                          \
-    uint64_t out0_m, out1_m;                 \
-    uint8_t *pblk_8x2_m = (uint8_t *)(pdst); \
-                                             \
-    out0_m = __msa_copy_u_d((v2i64)in, 0);   \
-    out1_m = __msa_copy_u_d((v2i64)in, 1);   \
-                                             \
-    SD(out0_m, pblk_8x2_m);                  \
-    SD(out1_m, pblk_8x2_m + stride);         \
-  }
-
-/* Description : Store 8x4 byte block to destination memory from input
-                 vectors
-   Arguments   : Inputs - in0, in1, pdst, stride
-   Details     : Index 0 double word element from 'in0' vector is copied to the
-                 GP register and stored to (pdst)
-                 Index 1 double word element from 'in0' vector is copied to the
-                 GP register and stored to (pdst + stride)
-                 Index 0 double word element from 'in1' vector is copied to the
-                 GP register and stored to (pdst + 2 * stride)
-                 Index 1 double word element from 'in1' vector is copied to the
-                 GP register and stored to (pdst + 3 * stride)
-*/
-#define ST8x4_UB(in0, in1, pdst, stride)                     \
-  {                                                          \
-    uint64_t out0_m, out1_m, out2_m, out3_m;                 \
-    uint8_t *pblk_8x4_m = (uint8_t *)(pdst);                 \
-                                                             \
-    out0_m = __msa_copy_u_d((v2i64)in0, 0);                  \
-    out1_m = __msa_copy_u_d((v2i64)in0, 1);                  \
-    out2_m = __msa_copy_u_d((v2i64)in1, 0);                  \
-    out3_m = __msa_copy_u_d((v2i64)in1, 1);                  \
-                                                             \
-    SD4(out0_m, out1_m, out2_m, out3_m, pblk_8x4_m, stride); \
-  }
-
-/* Description : average with rounding (in0 + in1 + 1) / 2.
-   Arguments   : Inputs  - in0, in1, in2, in3,
-                 Outputs - out0, out1
-                 Return Type - as per RTYPE
-   Details     : Each unsigned byte element from 'in0' vector is added with
-                 each unsigned byte element from 'in1' vector. Then the average
-                 with rounding is calculated and written to 'out0'
-*/
-#define AVER_UB2(RTYPE, in0, in1, in2, in3, out0, out1)   \
-  {                                                       \
-    out0 = (RTYPE)__msa_aver_u_b((v16u8)in0, (v16u8)in1); \
-    out1 = (RTYPE)__msa_aver_u_b((v16u8)in2, (v16u8)in3); \
-  }
-#define AVER_UB2_UB(...) AVER_UB2(v16u8, __VA_ARGS__)
-
-#define AVER_UB4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, \
-                 out2, out3)                                                \
-  {                                                                         \
-    AVER_UB2(RTYPE, in0, in1, in2, in3, out0, out1)                         \
-    AVER_UB2(RTYPE, in4, in5, in6, in7, out2, out3)                         \
-  }
-#define AVER_UB4_UB(...) AVER_UB4(v16u8, __VA_ARGS__)
-
-/* Description : Immediate number of elements to slide with zero
-   Arguments   : Inputs  - in0, in1, slide_val
-                 Outputs - out0, out1
-                 Return Type - as per RTYPE
-   Details     : Byte elements from 'zero_m' vector are slid into 'in0' by
-                 value specified in the 'slide_val'
-*/
-#define SLDI_B2_0(RTYPE, in0, in1, out0, out1, slide_val)             \
-  {                                                                   \
-    v16i8 zero_m = { 0 };                                             \
-    out0 = (RTYPE)__msa_sldi_b((v16i8)zero_m, (v16i8)in0, slide_val); \
-    out1 = (RTYPE)__msa_sldi_b((v16i8)zero_m, (v16i8)in1, slide_val); \
-  }
-#define SLDI_B2_0_SW(...) SLDI_B2_0(v4i32, __VA_ARGS__)
-
-#define SLDI_B4_0(RTYPE, in0, in1, in2, in3, out0, out1, out2, out3, \
-                  slide_val)                                         \
-  {                                                                  \
-    SLDI_B2_0(RTYPE, in0, in1, out0, out1, slide_val);               \
-    SLDI_B2_0(RTYPE, in2, in3, out2, out3, slide_val);               \
-  }
-#define SLDI_B4_0_UB(...) SLDI_B4_0(v16u8, __VA_ARGS__)
-
-/* Description : Immediate number of elements to slide
-   Arguments   : Inputs  - in0_0, in0_1, in1_0, in1_1, slide_val
-                 Outputs - out0, out1
-                 Return Type - as per RTYPE
-   Details     : Byte elements from 'in0_0' vector are slid into 'in1_0' by
-                 value specified in the 'slide_val'
-*/
-#define SLDI_B2(RTYPE, in0_0, in0_1, in1_0, in1_1, out0, out1, slide_val) \
-  {                                                                       \
-    out0 = (RTYPE)__msa_sldi_b((v16i8)in0_0, (v16i8)in1_0, slide_val);    \
-    out1 = (RTYPE)__msa_sldi_b((v16i8)in0_1, (v16i8)in1_1, slide_val);    \
-  }
-#define SLDI_B2_UB(...) SLDI_B2(v16u8, __VA_ARGS__)
-#define SLDI_B2_SH(...) SLDI_B2(v8i16, __VA_ARGS__)
-
-#define SLDI_B3(RTYPE, in0_0, in0_1, in0_2, in1_0, in1_1, in1_2, out0, out1, \
-                out2, slide_val)                                             \
-  {                                                                          \
-    SLDI_B2(RTYPE, in0_0, in0_1, in1_0, in1_1, out0, out1, slide_val)        \
-    out2 = (RTYPE)__msa_sldi_b((v16i8)in0_2, (v16i8)in1_2, slide_val);       \
-  }
-#define SLDI_B3_SB(...) SLDI_B3(v16i8, __VA_ARGS__)
-#define SLDI_B3_UH(...) SLDI_B3(v8u16, __VA_ARGS__)
-
-/* Description : Shuffle byte vector elements as per mask vector
-   Arguments   : Inputs  - in0, in1, in2, in3, mask0, mask1
-                 Outputs - out0, out1
-                 Return Type - as per RTYPE
-   Details     : Byte elements from 'in0' & 'in1' are copied selectively to
-                 'out0' as per control vector 'mask0'
-*/
-#define VSHF_B2(RTYPE, in0, in1, in2, in3, mask0, mask1, out0, out1)  \
-  {                                                                   \
-    out0 = (RTYPE)__msa_vshf_b((v16i8)mask0, (v16i8)in1, (v16i8)in0); \
-    out1 = (RTYPE)__msa_vshf_b((v16i8)mask1, (v16i8)in3, (v16i8)in2); \
-  }
-#define VSHF_B2_UB(...) VSHF_B2(v16u8, __VA_ARGS__)
-#define VSHF_B2_SB(...) VSHF_B2(v16i8, __VA_ARGS__)
-#define VSHF_B2_UH(...) VSHF_B2(v8u16, __VA_ARGS__)
-
-#define VSHF_B4(RTYPE, in0, in1, mask0, mask1, mask2, mask3, out0, out1, out2, \
-                out3)                                                          \
-  {                                                                            \
-    VSHF_B2(RTYPE, in0, in1, in0, in1, mask0, mask1, out0, out1);              \
-    VSHF_B2(RTYPE, in0, in1, in0, in1, mask2, mask3, out2, out3);              \
-  }
-#define VSHF_B4_SB(...) VSHF_B4(v16i8, __VA_ARGS__)
-#define VSHF_B4_SH(...) VSHF_B4(v8i16, __VA_ARGS__)
-
-/* Description : Dot product of byte vector elements
-   Arguments   : Inputs  - mult0, mult1, cnst0, cnst1
-                 Outputs - out0, out1
-                 Return Type - as per RTYPE
-   Details     : Unsigned byte elements from 'mult0' are multiplied with
-                 unsigned byte elements from 'cnst0' producing a result
-                 twice the size of input i.e. unsigned halfword.
-                 The multiplication result of adjacent odd-even elements
-                 are added together and written to the 'out0' vector
-*/
-#define DOTP_UB2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1) \
-  {                                                             \
-    out0 = (RTYPE)__msa_dotp_u_h((v16u8)mult0, (v16u8)cnst0);   \
-    out1 = (RTYPE)__msa_dotp_u_h((v16u8)mult1, (v16u8)cnst1);   \
-  }
-#define DOTP_UB2_UH(...) DOTP_UB2(v8u16, __VA_ARGS__)
-
-#define DOTP_UB4(RTYPE, mult0, mult1, mult2, mult3, cnst0, cnst1, cnst2, \
-                 cnst3, out0, out1, out2, out3)                          \
-  {                                                                      \
-    DOTP_UB2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1);             \
-    DOTP_UB2(RTYPE, mult2, mult3, cnst2, cnst3, out2, out3);             \
-  }
-#define DOTP_UB4_UH(...) DOTP_UB4(v8u16, __VA_ARGS__)
-
-/* Description : Dot product of byte vector elements
-   Arguments   : Inputs  - mult0, mult1, cnst0, cnst1
-                 Outputs - out0, out1
-                 Return Type - as per RTYPE
-   Details     : Signed byte elements from 'mult0' are multiplied with
-                 signed byte elements from 'cnst0' producing a result
-                 twice the size of input i.e. signed halfword.
-                 The multiplication result of adjacent odd-even elements
-                 are added together and written to the 'out0' vector
-*/
-#define DOTP_SB2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1) \
-  {                                                             \
-    out0 = (RTYPE)__msa_dotp_s_h((v16i8)mult0, (v16i8)cnst0);   \
-    out1 = (RTYPE)__msa_dotp_s_h((v16i8)mult1, (v16i8)cnst1);   \
-  }
-#define DOTP_SB2_SH(...) DOTP_SB2(v8i16, __VA_ARGS__)
-
-#define DOTP_SB4(RTYPE, mult0, mult1, mult2, mult3, cnst0, cnst1, cnst2, \
-                 cnst3, out0, out1, out2, out3)                          \
-  {                                                                      \
-    DOTP_SB2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1);             \
-    DOTP_SB2(RTYPE, mult2, mult3, cnst2, cnst3, out2, out3);             \
-  }
-#define DOTP_SB4_SH(...) DOTP_SB4(v8i16, __VA_ARGS__)
-
-/* Description : Dot product of halfword vector elements
-   Arguments   : Inputs  - mult0, mult1, cnst0, cnst1
-                 Outputs - out0, out1
-                 Return Type - as per RTYPE
-   Details     : Signed halfword elements from 'mult0' are multiplied with
-                 signed halfword elements from 'cnst0' producing a result
-                 twice the size of input i.e. signed word.
-                 The multiplication result of adjacent odd-even elements
-                 are added together and written to the 'out0' vector
-*/
-#define DOTP_SH2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1) \
-  {                                                             \
-    out0 = (RTYPE)__msa_dotp_s_w((v8i16)mult0, (v8i16)cnst0);   \
-    out1 = (RTYPE)__msa_dotp_s_w((v8i16)mult1, (v8i16)cnst1);   \
-  }
-#define DOTP_SH2_SW(...) DOTP_SH2(v4i32, __VA_ARGS__)
-
-#define DOTP_SH4(RTYPE, mult0, mult1, mult2, mult3, cnst0, cnst1, cnst2, \
-                 cnst3, out0, out1, out2, out3)                          \
-  {                                                                      \
-    DOTP_SH2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1);             \
-    DOTP_SH2(RTYPE, mult2, mult3, cnst2, cnst3, out2, out3);             \
-  }
-#define DOTP_SH4_SW(...) DOTP_SH4(v4i32, __VA_ARGS__)
-
-/* Description : Dot product of word vector elements
-   Arguments   : Inputs  - mult0, mult1, cnst0, cnst1
-                 Outputs - out0, out1
-                 Return Type - as per RTYPE
-   Details     : Signed word elements from 'mult0' are multiplied with
-                 signed word elements from 'cnst0' producing a result
-                 twice the size of input i.e. signed double word.
-                 The multiplication result of adjacent odd-even elements
-                 are added together and written to the 'out0' vector
-*/
-#define DOTP_SW2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1) \
-  {                                                             \
-    out0 = (RTYPE)__msa_dotp_s_d((v4i32)mult0, (v4i32)cnst0);   \
-    out1 = (RTYPE)__msa_dotp_s_d((v4i32)mult1, (v4i32)cnst1);   \
-  }
-#define DOTP_SW2_SD(...) DOTP_SW2(v2i64, __VA_ARGS__)
-
-/* Description : Dot product & addition of byte vector elements
-   Arguments   : Inputs  - mult0, mult1, cnst0, cnst1
-                 Outputs - out0, out1
-                 Return Type - as per RTYPE
-   Details     : Signed byte elements from 'mult0' are multiplied with
-                 signed byte elements from 'cnst0' producing a result
-                 twice the size of input i.e. signed halfword.
-                 The multiplication result of adjacent odd-even elements
-                 are added to the 'out0' vector
-*/
-#define DPADD_SB2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1)            \
-  {                                                                         \
-    out0 = (RTYPE)__msa_dpadd_s_h((v8i16)out0, (v16i8)mult0, (v16i8)cnst0); \
-    out1 = (RTYPE)__msa_dpadd_s_h((v8i16)out1, (v16i8)mult1, (v16i8)cnst1); \
-  }
-#define DPADD_SB2_SH(...) DPADD_SB2(v8i16, __VA_ARGS__)
-
-#define DPADD_SB4(RTYPE, mult0, mult1, mult2, mult3, cnst0, cnst1, cnst2, \
-                  cnst3, out0, out1, out2, out3)                          \
-  {                                                                       \
-    DPADD_SB2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1);             \
-    DPADD_SB2(RTYPE, mult2, mult3, cnst2, cnst3, out2, out3);             \
-  }
-#define DPADD_SB4_SH(...) DPADD_SB4(v8i16, __VA_ARGS__)
-
-/* Description : Dot product & addition of halfword vector elements
-   Arguments   : Inputs  - mult0, mult1, cnst0, cnst1
-                 Outputs - out0, out1
-                 Return Type - as per RTYPE
-   Details     : Signed halfword elements from 'mult0' are multiplied with
-                 signed halfword elements from 'cnst0' producing a result
-                 twice the size of input i.e. signed word.
-                 The multiplication result of adjacent odd-even elements
-                 are added to the 'out0' vector
-*/
-#define DPADD_SH2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1)            \
-  {                                                                         \
-    out0 = (RTYPE)__msa_dpadd_s_w((v4i32)out0, (v8i16)mult0, (v8i16)cnst0); \
-    out1 = (RTYPE)__msa_dpadd_s_w((v4i32)out1, (v8i16)mult1, (v8i16)cnst1); \
-  }
-#define DPADD_SH2_SW(...) DPADD_SH2(v4i32, __VA_ARGS__)
-
-/* Description : Dot product & addition of double word vector elements
-   Arguments   : Inputs  - mult0, mult1
-                 Outputs - out0, out1
-                 Return Type - as per RTYPE
-   Details     : Each signed word element from 'mult0' is multiplied with itself
-                 producing an intermediate result twice the size of input
-                 i.e. signed double word
-                 The multiplication result of adjacent odd-even elements
-                 are added to the 'out0' vector
-*/
-#define DPADD_SD2(RTYPE, mult0, mult1, out0, out1)                          \
-  {                                                                         \
-    out0 = (RTYPE)__msa_dpadd_s_d((v2i64)out0, (v4i32)mult0, (v4i32)mult0); \
-    out1 = (RTYPE)__msa_dpadd_s_d((v2i64)out1, (v4i32)mult1, (v4i32)mult1); \
-  }
-#define DPADD_SD2_SD(...) DPADD_SD2(v2i64, __VA_ARGS__)
-
-/* Description : Minimum values between unsigned elements of
-                 either vector are copied to the output vector
-   Arguments   : Inputs  - in0, in1, min_vec
-                 Outputs - in place operation
-                 Return Type - as per RTYPE
-   Details     : Minimum of unsigned halfword element values from 'in0' and
-                 'min_vec' are written to output vector 'in0'
-*/
-#define MIN_UH2(RTYPE, in0, in1, min_vec)            \
-  {                                                  \
-    in0 = (RTYPE)__msa_min_u_h((v8u16)in0, min_vec); \
-    in1 = (RTYPE)__msa_min_u_h((v8u16)in1, min_vec); \
-  }
-#define MIN_UH2_UH(...) MIN_UH2(v8u16, __VA_ARGS__)
-
-#define MIN_UH4(RTYPE, in0, in1, in2, in3, min_vec) \
-  {                                                 \
-    MIN_UH2(RTYPE, in0, in1, min_vec);              \
-    MIN_UH2(RTYPE, in2, in3, min_vec);              \
-  }
-#define MIN_UH4_UH(...) MIN_UH4(v8u16, __VA_ARGS__)
-
-/* Description : Clips all signed halfword elements of input vector
-                 between 0 & 255
-   Arguments   : Input  - in
-                 Output - out_m
-                 Return Type - signed halfword
-*/
-#define CLIP_SH_0_255(in)                              \
-  ({                                                   \
-    v8i16 max_m = __msa_ldi_h(255);                    \
-    v8i16 out_m;                                       \
-                                                       \
-    out_m = __msa_maxi_s_h((v8i16)in, 0);              \
-    out_m = __msa_min_s_h((v8i16)max_m, (v8i16)out_m); \
-    out_m;                                             \
-  })
-#define CLIP_SH2_0_255(in0, in1) \
-  {                              \
-    in0 = CLIP_SH_0_255(in0);    \
-    in1 = CLIP_SH_0_255(in1);    \
-  }
-#define CLIP_SH4_0_255(in0, in1, in2, in3) \
-  {                                        \
-    CLIP_SH2_0_255(in0, in1);              \
-    CLIP_SH2_0_255(in2, in3);              \
-  }
-
-/* Description : Horizontal addition of 4 signed word elements of input vector
-   Arguments   : Input  - in       (signed word vector)
-                 Output - sum_m    (i32 sum)
-                 Return Type - signed word (GP)
-   Details     : 4 signed word elements of 'in' vector are added together and
-                 the resulting integer sum is returned
-*/
-#define HADD_SW_S32(in)                            \
-  ({                                               \
-    v2i64 res0_m, res1_m;                          \
-    int32_t sum_m;                                 \
-                                                   \
-    res0_m = __msa_hadd_s_d((v4i32)in, (v4i32)in); \
-    res1_m = __msa_splati_d(res0_m, 1);            \
-    res0_m = res0_m + res1_m;                      \
-    sum_m = __msa_copy_s_w((v4i32)res0_m, 0);      \
-    sum_m;                                         \
-  })
-
-/* Description : Horizontal addition of 8 unsigned halfword elements
-   Arguments   : Inputs  - in       (unsigned halfword vector)
-                 Outputs - sum_m    (u32 sum)
-                 Return Type - unsigned word
-   Details     : 8 unsigned halfword elements of input vector are added
-                 together and the resulting integer sum is returned
-*/
-#define HADD_UH_U32(in)                               \
-  ({                                                  \
-    v4u32 res_m;                                      \
-    v2u64 res0_m, res1_m;                             \
-    uint32_t sum_m;                                   \
-                                                      \
-    res_m = __msa_hadd_u_w((v8u16)in, (v8u16)in);     \
-    res0_m = __msa_hadd_u_d(res_m, res_m);            \
-    res1_m = (v2u64)__msa_splati_d((v2i64)res0_m, 1); \
-    res0_m = res0_m + res1_m;                         \
-    sum_m = __msa_copy_u_w((v4i32)res0_m, 0);         \
-    sum_m;                                            \
-  })
-
-/* Description : Horizontal addition of unsigned byte vector elements
-   Arguments   : Inputs  - in0, in1
-                 Outputs - out0, out1
-                 Return Type - as per RTYPE
-   Details     : Each unsigned odd byte element from 'in0' is added to
-                 even unsigned byte element from 'in0' (pairwise) and the
-                 halfword result is written to 'out0'
-*/
-#define HADD_UB2(RTYPE, in0, in1, out0, out1)             \
-  {                                                       \
-    out0 = (RTYPE)__msa_hadd_u_h((v16u8)in0, (v16u8)in0); \
-    out1 = (RTYPE)__msa_hadd_u_h((v16u8)in1, (v16u8)in1); \
-  }
-#define HADD_UB2_UH(...) HADD_UB2(v8u16, __VA_ARGS__)
-
-#define HADD_UB4(RTYPE, in0, in1, in2, in3, out0, out1, out2, out3) \
-  {                                                                 \
-    HADD_UB2(RTYPE, in0, in1, out0, out1);                          \
-    HADD_UB2(RTYPE, in2, in3, out2, out3);                          \
-  }
-#define HADD_UB4_UH(...) HADD_UB4(v8u16, __VA_ARGS__)
-
-/* Description : Horizontal subtraction of unsigned byte vector elements
-   Arguments   : Inputs  - in0, in1
-                 Outputs - out0, out1
-                 Return Type - as per RTYPE
-   Details     : Each unsigned odd byte element from 'in0' is subtracted from
-                 even unsigned byte element from 'in0' (pairwise) and the
-                 halfword result is written to 'out0'
-*/
-#define HSUB_UB2(RTYPE, in0, in1, out0, out1)             \
-  {                                                       \
-    out0 = (RTYPE)__msa_hsub_u_h((v16u8)in0, (v16u8)in0); \
-    out1 = (RTYPE)__msa_hsub_u_h((v16u8)in1, (v16u8)in1); \
-  }
-#define HSUB_UB2_SH(...) HSUB_UB2(v8i16, __VA_ARGS__)
-
-/* Description : SAD (Sum of Absolute Difference)
-   Arguments   : Inputs  - in0, in1, ref0, ref1
-                 Outputs - sad_m                 (halfword vector)
-                 Return Type - unsigned halfword
-   Details     : Absolute difference of all the byte elements from 'in0' with
-                 'ref0' is calculated and preserved in 'diff0'. Then even-odd
-                 pairs are added together to generate 8 halfword results.
-*/
-#define SAD_UB2_UH(in0, in1, ref0, ref1)                     \
-  ({                                                         \
-    v16u8 diff0_m, diff1_m;                                  \
-    v8u16 sad_m = { 0 };                                     \
-                                                             \
-    diff0_m = __msa_asub_u_b((v16u8)in0, (v16u8)ref0);       \
-    diff1_m = __msa_asub_u_b((v16u8)in1, (v16u8)ref1);       \
-                                                             \
-    sad_m += __msa_hadd_u_h((v16u8)diff0_m, (v16u8)diff0_m); \
-    sad_m += __msa_hadd_u_h((v16u8)diff1_m, (v16u8)diff1_m); \
-                                                             \
-    sad_m;                                                   \
-  })
-
-/* Description : Horizontal subtraction of signed halfword vector elements
-   Arguments   : Inputs  - in0, in1
-                 Outputs - out0, out1
-                 Return Type - as per RTYPE
-   Details     : Each signed odd halfword element from 'in0' is subtracted from
-                 even signed halfword element from 'in0' (pairwise) and the
-                 word result is written to 'out0'
-*/
-#define HSUB_UH2(RTYPE, in0, in1, out0, out1)             \
-  {                                                       \
-    out0 = (RTYPE)__msa_hsub_s_w((v8i16)in0, (v8i16)in0); \
-    out1 = (RTYPE)__msa_hsub_s_w((v8i16)in1, (v8i16)in1); \
-  }
-#define HSUB_UH2_SW(...) HSUB_UH2(v4i32, __VA_ARGS__)
-
-/* Description : Set element n input vector to GPR value
-   Arguments   : Inputs - in0, in1, in2, in3
-                 Output - out
-                 Return Type - as per RTYPE
-   Details     : Set element 0 in vector 'out' to value specified in 'in0'
-*/
-#define INSERT_W2(RTYPE, in0, in1, out)              \
-  {                                                  \
-    out = (RTYPE)__msa_insert_w((v4i32)out, 0, in0); \
-    out = (RTYPE)__msa_insert_w((v4i32)out, 1, in1); \
-  }
-#define INSERT_W2_SB(...) INSERT_W2(v16i8, __VA_ARGS__)
-
-#define INSERT_W4(RTYPE, in0, in1, in2, in3, out)    \
-  {                                                  \
-    out = (RTYPE)__msa_insert_w((v4i32)out, 0, in0); \
-    out = (RTYPE)__msa_insert_w((v4i32)out, 1, in1); \
-    out = (RTYPE)__msa_insert_w((v4i32)out, 2, in2); \
-    out = (RTYPE)__msa_insert_w((v4i32)out, 3, in3); \
-  }
-#define INSERT_W4_UB(...) INSERT_W4(v16u8, __VA_ARGS__)
-#define INSERT_W4_SB(...) INSERT_W4(v16i8, __VA_ARGS__)
-
-#define INSERT_D2(RTYPE, in0, in1, out)              \
-  {                                                  \
-    out = (RTYPE)__msa_insert_d((v2i64)out, 0, in0); \
-    out = (RTYPE)__msa_insert_d((v2i64)out, 1, in1); \
-  }
-#define INSERT_D2_UB(...) INSERT_D2(v16u8, __VA_ARGS__)
-#define INSERT_D2_SB(...) INSERT_D2(v16i8, __VA_ARGS__)
-
-/* Description : Interleave even byte elements from vectors
-   Arguments   : Inputs  - in0, in1, in2, in3
-                 Outputs - out0, out1
-                 Return Type - as per RTYPE
-   Details     : Even byte elements of 'in0' and 'in1' are interleaved
-                 and written to 'out0'
-*/
-#define ILVEV_B2(RTYPE, in0, in1, in2, in3, out0, out1)  \
-  {                                                      \
-    out0 = (RTYPE)__msa_ilvev_b((v16i8)in1, (v16i8)in0); \
-    out1 = (RTYPE)__msa_ilvev_b((v16i8)in3, (v16i8)in2); \
-  }
-#define ILVEV_B2_UB(...) ILVEV_B2(v16u8, __VA_ARGS__)
-#define ILVEV_B2_SH(...) ILVEV_B2(v8i16, __VA_ARGS__)
-
-/* Description : Interleave even halfword elements from vectors
-   Arguments   : Inputs  - in0, in1, in2, in3
-                 Outputs - out0, out1
-                 Return Type - as per RTYPE
-   Details     : Even halfword elements of 'in0' and 'in1' are interleaved
-                 and written to 'out0'
-*/
-#define ILVEV_H2(RTYPE, in0, in1, in2, in3, out0, out1)  \
-  {                                                      \
-    out0 = (RTYPE)__msa_ilvev_h((v8i16)in1, (v8i16)in0); \
-    out1 = (RTYPE)__msa_ilvev_h((v8i16)in3, (v8i16)in2); \
-  }
-#define ILVEV_H2_UB(...) ILVEV_H2(v16u8, __VA_ARGS__)
-#define ILVEV_H2_SH(...) ILVEV_H2(v8i16, __VA_ARGS__)
-#define ILVEV_H2_SW(...) ILVEV_H2(v4i32, __VA_ARGS__)
-
-/* Description : Interleave even word elements from vectors
-   Arguments   : Inputs  - in0, in1, in2, in3
-                 Outputs - out0, out1
-                 Return Type - as per RTYPE
-   Details     : Even word elements of 'in0' and 'in1' are interleaved
-                 and written to 'out0'
-*/
-#define ILVEV_W2(RTYPE, in0, in1, in2, in3, out0, out1)  \
-  {                                                      \
-    out0 = (RTYPE)__msa_ilvev_w((v4i32)in1, (v4i32)in0); \
-    out1 = (RTYPE)__msa_ilvev_w((v4i32)in3, (v4i32)in2); \
-  }
-#define ILVEV_W2_SB(...) ILVEV_W2(v16i8, __VA_ARGS__)
-
-/* Description : Interleave even double word elements from vectors
-   Arguments   : Inputs  - in0, in1, in2, in3
-                 Outputs - out0, out1
-                 Return Type - as per RTYPE
-   Details     : Even double word elements of 'in0' and 'in1' are interleaved
-                 and written to 'out0'
-*/
-#define ILVEV_D2(RTYPE, in0, in1, in2, in3, out0, out1)  \
-  {                                                      \
-    out0 = (RTYPE)__msa_ilvev_d((v2i64)in1, (v2i64)in0); \
-    out1 = (RTYPE)__msa_ilvev_d((v2i64)in3, (v2i64)in2); \
-  }
-#define ILVEV_D2_UB(...) ILVEV_D2(v16u8, __VA_ARGS__)
-
-/* Description : Interleave left half of byte elements from vectors
-   Arguments   : Inputs  - in0, in1, in2, in3
-                 Outputs - out0, out1
-                 Return Type - as per RTYPE
-   Details     : Left half of byte elements of 'in0' and 'in1' are interleaved
-                 and written to 'out0'.
-*/
-#define ILVL_B2(RTYPE, in0, in1, in2, in3, out0, out1)  \
-  {                                                     \
-    out0 = (RTYPE)__msa_ilvl_b((v16i8)in0, (v16i8)in1); \
-    out1 = (RTYPE)__msa_ilvl_b((v16i8)in2, (v16i8)in3); \
-  }
-#define ILVL_B2_UB(...) ILVL_B2(v16u8, __VA_ARGS__)
-#define ILVL_B2_SB(...) ILVL_B2(v16i8, __VA_ARGS__)
-#define ILVL_B2_UH(...) ILVL_B2(v8u16, __VA_ARGS__)
-#define ILVL_B2_SH(...) ILVL_B2(v8i16, __VA_ARGS__)
-
-#define ILVL_B4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, \
-                out2, out3)                                                \
-  {                                                                        \
-    ILVL_B2(RTYPE, in0, in1, in2, in3, out0, out1);                        \
-    ILVL_B2(RTYPE, in4, in5, in6, in7, out2, out3);                        \
-  }
-#define ILVL_B4_SB(...) ILVL_B4(v16i8, __VA_ARGS__)
-#define ILVL_B4_SH(...) ILVL_B4(v8i16, __VA_ARGS__)
-#define ILVL_B4_UH(...) ILVL_B4(v8u16, __VA_ARGS__)
-
-/* Description : Interleave left half of halfword elements from vectors
-   Arguments   : Inputs  - in0, in1, in2, in3
-                 Outputs - out0, out1
-                 Return Type - as per RTYPE
-   Details     : Left half of halfword elements of 'in0' and 'in1' are
-                 interleaved and written to 'out0'.
-*/
-#define ILVL_H2(RTYPE, in0, in1, in2, in3, out0, out1)  \
-  {                                                     \
-    out0 = (RTYPE)__msa_ilvl_h((v8i16)in0, (v8i16)in1); \
-    out1 = (RTYPE)__msa_ilvl_h((v8i16)in2, (v8i16)in3); \
-  }
-#define ILVL_H2_SH(...) ILVL_H2(v8i16, __VA_ARGS__)
-#define ILVL_H2_SW(...) ILVL_H2(v4i32, __VA_ARGS__)
-
-/* Description : Interleave left half of word elements from vectors
-   Arguments   : Inputs  - in0, in1, in2, in3
-                 Outputs - out0, out1
-                 Return Type - as per RTYPE
-   Details     : Left half of word elements of 'in0' and 'in1' are interleaved
-                 and written to 'out0'.
-*/
-#define ILVL_W2(RTYPE, in0, in1, in2, in3, out0, out1)  \
-  {                                                     \
-    out0 = (RTYPE)__msa_ilvl_w((v4i32)in0, (v4i32)in1); \
-    out1 = (RTYPE)__msa_ilvl_w((v4i32)in2, (v4i32)in3); \
-  }
-#define ILVL_W2_UB(...) ILVL_W2(v16u8, __VA_ARGS__)
-#define ILVL_W2_SH(...) ILVL_W2(v8i16, __VA_ARGS__)
-
-/* Description : Interleave right half of byte elements from vectors
-   Arguments   : Inputs  - in0, in1, in2, in3
-                 Outputs - out0, out1
-                 Return Type - as per RTYPE
-   Details     : Right half of byte elements of 'in0' and 'in1' are interleaved
-                 and written to out0.
-*/
-#define ILVR_B2(RTYPE, in0, in1, in2, in3, out0, out1)  \
-  {                                                     \
-    out0 = (RTYPE)__msa_ilvr_b((v16i8)in0, (v16i8)in1); \
-    out1 = (RTYPE)__msa_ilvr_b((v16i8)in2, (v16i8)in3); \
-  }
-#define ILVR_B2_UB(...) ILVR_B2(v16u8, __VA_ARGS__)
-#define ILVR_B2_SB(...) ILVR_B2(v16i8, __VA_ARGS__)
-#define ILVR_B2_UH(...) ILVR_B2(v8u16, __VA_ARGS__)
-#define ILVR_B2_SH(...) ILVR_B2(v8i16, __VA_ARGS__)
-
-#define ILVR_B4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, \
-                out2, out3)                                                \
-  {                                                                        \
-    ILVR_B2(RTYPE, in0, in1, in2, in3, out0, out1);                        \
-    ILVR_B2(RTYPE, in4, in5, in6, in7, out2, out3);                        \
-  }
-#define ILVR_B4_UB(...) ILVR_B4(v16u8, __VA_ARGS__)
-#define ILVR_B4_SB(...) ILVR_B4(v16i8, __VA_ARGS__)
-#define ILVR_B4_UH(...) ILVR_B4(v8u16, __VA_ARGS__)
-#define ILVR_B4_SH(...) ILVR_B4(v8i16, __VA_ARGS__)
-
-#define ILVR_B8(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, in8, in9, in10, \
-                in11, in12, in13, in14, in15, out0, out1, out2, out3, out4,    \
-                out5, out6, out7)                                              \
-  {                                                                            \
-    ILVR_B4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, out2,   \
-            out3);                                                             \
-    ILVR_B4(RTYPE, in8, in9, in10, in11, in12, in13, in14, in15, out4, out5,   \
-            out6, out7);                                                       \
-  }
-#define ILVR_B8_UH(...) ILVR_B8(v8u16, __VA_ARGS__)
-
-/* Description : Interleave right half of halfword elements from vectors
-   Arguments   : Inputs  - in0, in1, in2, in3
-                 Outputs - out0, out1
-                 Return Type - as per RTYPE
-   Details     : Right half of halfword elements of 'in0' and 'in1' are
-                 interleaved and written to 'out0'.
-*/
-#define ILVR_H2(RTYPE, in0, in1, in2, in3, out0, out1)  \
-  {                                                     \
-    out0 = (RTYPE)__msa_ilvr_h((v8i16)in0, (v8i16)in1); \
-    out1 = (RTYPE)__msa_ilvr_h((v8i16)in2, (v8i16)in3); \
-  }
-#define ILVR_H2_SH(...) ILVR_H2(v8i16, __VA_ARGS__)
-#define ILVR_H2_SW(...) ILVR_H2(v4i32, __VA_ARGS__)
-
-#define ILVR_H4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, \
-                out2, out3)                                                \
-  {                                                                        \
-    ILVR_H2(RTYPE, in0, in1, in2, in3, out0, out1);                        \
-    ILVR_H2(RTYPE, in4, in5, in6, in7, out2, out3);                        \
-  }
-#define ILVR_H4_SH(...) ILVR_H4(v8i16, __VA_ARGS__)
-
-#define ILVR_W2(RTYPE, in0, in1, in2, in3, out0, out1)  \
-  {                                                     \
-    out0 = (RTYPE)__msa_ilvr_w((v4i32)in0, (v4i32)in1); \
-    out1 = (RTYPE)__msa_ilvr_w((v4i32)in2, (v4i32)in3); \
-  }
-#define ILVR_W2_UB(...) ILVR_W2(v16u8, __VA_ARGS__)
-#define ILVR_W2_SH(...) ILVR_W2(v8i16, __VA_ARGS__)
-
-#define ILVR_W4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, \
-                out2, out3)                                                \
-  {                                                                        \
-    ILVR_W2(RTYPE, in0, in1, in2, in3, out0, out1);                        \
-    ILVR_W2(RTYPE, in4, in5, in6, in7, out2, out3);                        \
-  }
-#define ILVR_W4_UB(...) ILVR_W4(v16u8, __VA_ARGS__)
-
-/* Description : Interleave right half of double word elements from vectors
-   Arguments   : Inputs  - in0, in1, in2, in3
-                 Outputs - out0, out1
-                 Return Type - as per RTYPE
-   Details     : Right half of double word elements of 'in0' and 'in1' are
-                 interleaved and written to 'out0'.
-*/
-#define ILVR_D2(RTYPE, in0, in1, in2, in3, out0, out1)      \
-  {                                                         \
-    out0 = (RTYPE)__msa_ilvr_d((v2i64)(in0), (v2i64)(in1)); \
-    out1 = (RTYPE)__msa_ilvr_d((v2i64)(in2), (v2i64)(in3)); \
-  }
-#define ILVR_D2_UB(...) ILVR_D2(v16u8, __VA_ARGS__)
-#define ILVR_D2_SB(...) ILVR_D2(v16i8, __VA_ARGS__)
-#define ILVR_D2_SH(...) ILVR_D2(v8i16, __VA_ARGS__)
-
-#define ILVR_D3(RTYPE, in0, in1, in2, in3, in4, in5, out0, out1, out2) \
-  {                                                                    \
-    ILVR_D2(RTYPE, in0, in1, in2, in3, out0, out1);                    \
-    out2 = (RTYPE)__msa_ilvr_d((v2i64)(in4), (v2i64)(in5));            \
-  }
-#define ILVR_D3_SB(...) ILVR_D3(v16i8, __VA_ARGS__)
-
-#define ILVR_D4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, \
-                out2, out3)                                                \
-  {                                                                        \
-    ILVR_D2(RTYPE, in0, in1, in2, in3, out0, out1);                        \
-    ILVR_D2(RTYPE, in4, in5, in6, in7, out2, out3);                        \
-  }
-#define ILVR_D4_SB(...) ILVR_D4(v16i8, __VA_ARGS__)
-#define ILVR_D4_UB(...) ILVR_D4(v16u8, __VA_ARGS__)
-
-/* Description : Interleave both left and right half of input vectors
-   Arguments   : Inputs  - in0, in1
-                 Outputs - out0, out1
-                 Return Type - as per RTYPE
-   Details     : Right half of byte elements from 'in0' and 'in1' are
-                 interleaved and written to 'out0'
-*/
-#define ILVRL_B2(RTYPE, in0, in1, out0, out1)           \
-  {                                                     \
-    out0 = (RTYPE)__msa_ilvr_b((v16i8)in0, (v16i8)in1); \
-    out1 = (RTYPE)__msa_ilvl_b((v16i8)in0, (v16i8)in1); \
-  }
-#define ILVRL_B2_UB(...) ILVRL_B2(v16u8, __VA_ARGS__)
-#define ILVRL_B2_SB(...) ILVRL_B2(v16i8, __VA_ARGS__)
-#define ILVRL_B2_UH(...) ILVRL_B2(v8u16, __VA_ARGS__)
-#define ILVRL_B2_SH(...) ILVRL_B2(v8i16, __VA_ARGS__)
-
-#define ILVRL_H2(RTYPE, in0, in1, out0, out1)           \
-  {                                                     \
-    out0 = (RTYPE)__msa_ilvr_h((v8i16)in0, (v8i16)in1); \
-    out1 = (RTYPE)__msa_ilvl_h((v8i16)in0, (v8i16)in1); \
-  }
-#define ILVRL_H2_SH(...) ILVRL_H2(v8i16, __VA_ARGS__)
-#define ILVRL_H2_SW(...) ILVRL_H2(v4i32, __VA_ARGS__)
-
-#define ILVRL_W2(RTYPE, in0, in1, out0, out1)           \
-  {                                                     \
-    out0 = (RTYPE)__msa_ilvr_w((v4i32)in0, (v4i32)in1); \
-    out1 = (RTYPE)__msa_ilvl_w((v4i32)in0, (v4i32)in1); \
-  }
-#define ILVRL_W2_UB(...) ILVRL_W2(v16u8, __VA_ARGS__)
-#define ILVRL_W2_SH(...) ILVRL_W2(v8i16, __VA_ARGS__)
-#define ILVRL_W2_SW(...) ILVRL_W2(v4i32, __VA_ARGS__)
-
-/* Description : Saturate the halfword element values to the max
-                 unsigned value of (sat_val + 1) bits
-                 The element data width remains unchanged
-   Arguments   : Inputs  - in0, in1, sat_val
-                 Outputs - in place operation
-                 Return Type - as per RTYPE
-   Details     : Each unsigned halfword element from 'in0' is saturated to the
-                 value generated with (sat_val + 1) bit range.
-                 The results are written in place
-*/
-#define SAT_UH2(RTYPE, in0, in1, sat_val)            \
-  {                                                  \
-    in0 = (RTYPE)__msa_sat_u_h((v8u16)in0, sat_val); \
-    in1 = (RTYPE)__msa_sat_u_h((v8u16)in1, sat_val); \
-  }
-#define SAT_UH2_UH(...) SAT_UH2(v8u16, __VA_ARGS__)
-
-#define SAT_UH4(RTYPE, in0, in1, in2, in3, sat_val) \
-  {                                                 \
-    SAT_UH2(RTYPE, in0, in1, sat_val);              \
-    SAT_UH2(RTYPE, in2, in3, sat_val)               \
-  }
-#define SAT_UH4_UH(...) SAT_UH4(v8u16, __VA_ARGS__)
-
-/* Description : Saturate the halfword element values to the max
-                 unsigned value of (sat_val + 1) bits
-                 The element data width remains unchanged
-   Arguments   : Inputs  - in0, in1, sat_val
-                 Outputs - in place operation
-                 Return Type - as per RTYPE
-   Details     : Each unsigned halfword element from 'in0' is saturated to the
-                 value generated with (sat_val + 1) bit range
-                 The results are written in place
-*/
-#define SAT_SH2(RTYPE, in0, in1, sat_val)            \
-  {                                                  \
-    in0 = (RTYPE)__msa_sat_s_h((v8i16)in0, sat_val); \
-    in1 = (RTYPE)__msa_sat_s_h((v8i16)in1, sat_val); \
-  }
-#define SAT_SH2_SH(...) SAT_SH2(v8i16, __VA_ARGS__)
-
-#define SAT_SH4(RTYPE, in0, in1, in2, in3, sat_val) \
-  {                                                 \
-    SAT_SH2(RTYPE, in0, in1, sat_val);              \
-    SAT_SH2(RTYPE, in2, in3, sat_val);              \
-  }
-#define SAT_SH4_SH(...) SAT_SH4(v8i16, __VA_ARGS__)
-
-/* Description : Indexed halfword element values are replicated to all
-                 elements in output vector
-   Arguments   : Inputs  - in, idx0, idx1
-                 Outputs - out0, out1
-                 Return Type - as per RTYPE
-   Details     : 'idx0' element value from 'in' vector is replicated to all
-                  elements in 'out0' vector
-                  Valid index range for halfword operation is 0-7
-*/
-#define SPLATI_H2(RTYPE, in, idx0, idx1, out0, out1) \
-  {                                                  \
-    out0 = (RTYPE)__msa_splati_h((v8i16)in, idx0);   \
-    out1 = (RTYPE)__msa_splati_h((v8i16)in, idx1);   \
-  }
-#define SPLATI_H2_SH(...) SPLATI_H2(v8i16, __VA_ARGS__)
-
-#define SPLATI_H4(RTYPE, in, idx0, idx1, idx2, idx3, out0, out1, out2, out3) \
-  {                                                                          \
-    SPLATI_H2(RTYPE, in, idx0, idx1, out0, out1);                            \
-    SPLATI_H2(RTYPE, in, idx2, idx3, out2, out3);                            \
-  }
-#define SPLATI_H4_SB(...) SPLATI_H4(v16i8, __VA_ARGS__)
-#define SPLATI_H4_SH(...) SPLATI_H4(v8i16, __VA_ARGS__)
-
-/* Description : Pack even byte elements of vector pairs
-   Arguments   : Inputs  - in0, in1, in2, in3
-                 Outputs - out0, out1
-                 Return Type - as per RTYPE
-   Details     : Even byte elements of 'in0' are copied to the left half of
-                 'out0' & even byte elements of 'in1' are copied to the right
-                 half of 'out0'.
-*/
-#define PCKEV_B2(RTYPE, in0, in1, in2, in3, out0, out1)  \
-  {                                                      \
-    out0 = (RTYPE)__msa_pckev_b((v16i8)in0, (v16i8)in1); \
-    out1 = (RTYPE)__msa_pckev_b((v16i8)in2, (v16i8)in3); \
-  }
-#define PCKEV_B2_SB(...) PCKEV_B2(v16i8, __VA_ARGS__)
-#define PCKEV_B2_UB(...) PCKEV_B2(v16u8, __VA_ARGS__)
-#define PCKEV_B2_SH(...) PCKEV_B2(v8i16, __VA_ARGS__)
-
-#define PCKEV_B4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, \
-                 out2, out3)                                                \
-  {                                                                         \
-    PCKEV_B2(RTYPE, in0, in1, in2, in3, out0, out1);                        \
-    PCKEV_B2(RTYPE, in4, in5, in6, in7, out2, out3);                        \
-  }
-#define PCKEV_B4_SB(...) PCKEV_B4(v16i8, __VA_ARGS__)
-#define PCKEV_B4_UB(...) PCKEV_B4(v16u8, __VA_ARGS__)
-#define PCKEV_B4_SH(...) PCKEV_B4(v8i16, __VA_ARGS__)
-
-/* Description : Pack even halfword elements of vector pairs
-   Arguments   : Inputs  - in0, in1, in2, in3
-                 Outputs - out0, out1
-                 Return Type - as per RTYPE
-   Details     : Even halfword elements of 'in0' are copied to the left half of
-                 'out0' & even halfword elements of 'in1' are copied to the
-                 right half of 'out0'.
-*/
-#define PCKEV_H2(RTYPE, in0, in1, in2, in3, out0, out1)  \
-  {                                                      \
-    out0 = (RTYPE)__msa_pckev_h((v8i16)in0, (v8i16)in1); \
-    out1 = (RTYPE)__msa_pckev_h((v8i16)in2, (v8i16)in3); \
-  }
-#define PCKEV_H2_SH(...) PCKEV_H2(v8i16, __VA_ARGS__)
-#define PCKEV_H2_SW(...) PCKEV_H2(v4i32, __VA_ARGS__)
-
-#define PCKEV_H4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, \
-                 out2, out3)                                                \
-  {                                                                         \
-    PCKEV_H2(RTYPE, in0, in1, in2, in3, out0, out1);                        \
-    PCKEV_H2(RTYPE, in4, in5, in6, in7, out2, out3);                        \
-  }
-#define PCKEV_H4_SH(...) PCKEV_H4(v8i16, __VA_ARGS__)
-
-/* Description : Pack even double word elements of vector pairs
-   Arguments   : Inputs  - in0, in1, in2, in3
-                 Outputs - out0, out1
-                 Return Type - as per RTYPE
-   Details     : Even double elements of 'in0' are copied to the left half of
-                 'out0' & even double elements of 'in1' are copied to the right
-                 half of 'out0'.
-*/
-#define PCKEV_D2(RTYPE, in0, in1, in2, in3, out0, out1)  \
-  {                                                      \
-    out0 = (RTYPE)__msa_pckev_d((v2i64)in0, (v2i64)in1); \
-    out1 = (RTYPE)__msa_pckev_d((v2i64)in2, (v2i64)in3); \
-  }
-#define PCKEV_D2_UB(...) PCKEV_D2(v16u8, __VA_ARGS__)
-#define PCKEV_D2_SH(...) PCKEV_D2(v8i16, __VA_ARGS__)
-
-#define PCKEV_D4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, \
-                 out2, out3)                                                \
-  {                                                                         \
-    PCKEV_D2(RTYPE, in0, in1, in2, in3, out0, out1);                        \
-    PCKEV_D2(RTYPE, in4, in5, in6, in7, out2, out3);                        \
-  }
-#define PCKEV_D4_UB(...) PCKEV_D4(v16u8, __VA_ARGS__)
-
-/* Description : Each byte element is logically xor'ed with immediate 128
-   Arguments   : Inputs  - in0, in1
-                 Outputs - in place operation
-                 Return Type - as per RTYPE
-   Details     : Each unsigned byte element from input vector 'in0' is
-                 logically xor'ed with 128 and the result is stored in-place.
-*/
-#define XORI_B2_128(RTYPE, in0, in1)            \
-  {                                             \
-    in0 = (RTYPE)__msa_xori_b((v16u8)in0, 128); \
-    in1 = (RTYPE)__msa_xori_b((v16u8)in1, 128); \
-  }
-#define XORI_B2_128_UB(...) XORI_B2_128(v16u8, __VA_ARGS__)
-#define XORI_B2_128_SB(...) XORI_B2_128(v16i8, __VA_ARGS__)
-
-#define XORI_B3_128(RTYPE, in0, in1, in2)       \
-  {                                             \
-    XORI_B2_128(RTYPE, in0, in1);               \
-    in2 = (RTYPE)__msa_xori_b((v16u8)in2, 128); \
-  }
-#define XORI_B3_128_SB(...) XORI_B3_128(v16i8, __VA_ARGS__)
-
-#define XORI_B4_128(RTYPE, in0, in1, in2, in3) \
-  {                                            \
-    XORI_B2_128(RTYPE, in0, in1);              \
-    XORI_B2_128(RTYPE, in2, in3);              \
-  }
-#define XORI_B4_128_UB(...) XORI_B4_128(v16u8, __VA_ARGS__)
-#define XORI_B4_128_SB(...) XORI_B4_128(v16i8, __VA_ARGS__)
-
-#define XORI_B7_128(RTYPE, in0, in1, in2, in3, in4, in5, in6) \
-  {                                                           \
-    XORI_B4_128(RTYPE, in0, in1, in2, in3);                   \
-    XORI_B3_128(RTYPE, in4, in5, in6);                        \
-  }
-#define XORI_B7_128_SB(...) XORI_B7_128(v16i8, __VA_ARGS__)
-
-/* Description : Average of signed halfword elements -> (a + b) / 2
-   Arguments   : Inputs  - in0, in1, in2, in3, in4, in5, in6, in7
-                 Outputs - out0, out1, out2, out3
-                 Return Type - as per RTYPE
-   Details     : Each signed halfword element from 'in0' is added to each
-                 signed halfword element of 'in1' with full precision resulting
-                 in one extra bit in the result. The result is then divided by
-                 2 and written to 'out0'
-*/
-#define AVE_SH4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, \
-                out2, out3)                                                \
-  {                                                                        \
-    out0 = (RTYPE)__msa_ave_s_h((v8i16)in0, (v8i16)in1);                   \
-    out1 = (RTYPE)__msa_ave_s_h((v8i16)in2, (v8i16)in3);                   \
-    out2 = (RTYPE)__msa_ave_s_h((v8i16)in4, (v8i16)in5);                   \
-    out3 = (RTYPE)__msa_ave_s_h((v8i16)in6, (v8i16)in7);                   \
-  }
-#define AVE_SH4_SH(...) AVE_SH4(v8i16, __VA_ARGS__)
-
-/* Description : Addition of signed halfword elements and signed saturation
-   Arguments   : Inputs  - in0, in1, in2, in3
-                 Outputs - out0, out1
-                 Return Type - as per RTYPE
-   Details     : Signed halfword elements from 'in0' are added to signed
-                 halfword elements of 'in1'. The result is then signed saturated
-                 between halfword data type range
-*/
-#define ADDS_SH2(RTYPE, in0, in1, in2, in3, out0, out1)   \
-  {                                                       \
-    out0 = (RTYPE)__msa_adds_s_h((v8i16)in0, (v8i16)in1); \
-    out1 = (RTYPE)__msa_adds_s_h((v8i16)in2, (v8i16)in3); \
-  }
-#define ADDS_SH2_SH(...) ADDS_SH2(v8i16, __VA_ARGS__)
-
-#define ADDS_SH4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, \
-                 out2, out3)                                                \
-  {                                                                         \
-    ADDS_SH2(RTYPE, in0, in1, in2, in3, out0, out1);                        \
-    ADDS_SH2(RTYPE, in4, in5, in6, in7, out2, out3);                        \
-  }
-#define ADDS_SH4_SH(...) ADDS_SH4(v8i16, __VA_ARGS__)
-
-/* Description : Shift left all elements of vector (generic for all data types)
-   Arguments   : Inputs  - in0, in1, in2, in3, shift
-                 Outputs - in place operation
-                 Return Type - as per input vector RTYPE
-   Details     : Each element of vector 'in0' is left shifted by 'shift' and
-                 the result is written in-place.
-*/
-#define SLLI_4V(in0, in1, in2, in3, shift) \
-  {                                        \
-    in0 = in0 << shift;                    \
-    in1 = in1 << shift;                    \
-    in2 = in2 << shift;                    \
-    in3 = in3 << shift;                    \
-  }
-
-/* Description : Arithmetic shift right all elements of vector
-                 (generic for all data types)
-   Arguments   : Inputs  - in0, in1, in2, in3, shift
-                 Outputs - in place operation
-                 Return Type - as per input vector RTYPE
-   Details     : Each element of vector 'in0' is right shifted by 'shift' and
-                 the result is written in-place. 'shift' is a GP variable.
-*/
-#define SRA_4V(in0, in1, in2, in3, shift) \
-  {                                       \
-    in0 = in0 >> shift;                   \
-    in1 = in1 >> shift;                   \
-    in2 = in2 >> shift;                   \
-    in3 = in3 >> shift;                   \
-  }
-
-/* Description : Shift right arithmetic rounded words
-   Arguments   : Inputs  - in0, in1, shift
-                 Outputs - in place operation
-                 Return Type - as per RTYPE
-   Details     : Each element of vector 'in0' is shifted right arithmetically by
-                 the number of bits in the corresponding element in the vector
-                 'shift'. The last discarded bit is added to shifted value for
-                 rounding and the result is written in-place.
-                 'shift' is a vector.
-*/
-#define SRAR_W2(RTYPE, in0, in1, shift)                  \
-  {                                                      \
-    in0 = (RTYPE)__msa_srar_w((v4i32)in0, (v4i32)shift); \
-    in1 = (RTYPE)__msa_srar_w((v4i32)in1, (v4i32)shift); \
-  }
-
-#define SRAR_W4(RTYPE, in0, in1, in2, in3, shift) \
-  {                                               \
-    SRAR_W2(RTYPE, in0, in1, shift)               \
-    SRAR_W2(RTYPE, in2, in3, shift)               \
-  }
-#define SRAR_W4_SW(...) SRAR_W4(v4i32, __VA_ARGS__)
-
-/* Description : Shift right arithmetic rounded (immediate)
-   Arguments   : Inputs  - in0, in1, shift
-                 Outputs - in place operation
-                 Return Type - as per RTYPE
-   Details     : Each element of vector 'in0' is shifted right arithmetically by
-                 the value in 'shift'. The last discarded bit is added to the
-                 shifted value for rounding and the result is written in-place.
-                 'shift' is an immediate value.
-*/
-#define SRARI_H2(RTYPE, in0, in1, shift)           \
-  {                                                \
-    in0 = (RTYPE)__msa_srari_h((v8i16)in0, shift); \
-    in1 = (RTYPE)__msa_srari_h((v8i16)in1, shift); \
-  }
-#define SRARI_H2_UH(...) SRARI_H2(v8u16, __VA_ARGS__)
-#define SRARI_H2_SH(...) SRARI_H2(v8i16, __VA_ARGS__)
-
-#define SRARI_H4(RTYPE, in0, in1, in2, in3, shift) \
-  {                                                \
-    SRARI_H2(RTYPE, in0, in1, shift);              \
-    SRARI_H2(RTYPE, in2, in3, shift);              \
-  }
-#define SRARI_H4_UH(...) SRARI_H4(v8u16, __VA_ARGS__)
-#define SRARI_H4_SH(...) SRARI_H4(v8i16, __VA_ARGS__)
-
-#define SRARI_W2(RTYPE, in0, in1, shift)           \
-  {                                                \
-    in0 = (RTYPE)__msa_srari_w((v4i32)in0, shift); \
-    in1 = (RTYPE)__msa_srari_w((v4i32)in1, shift); \
-  }
-#define SRARI_W2_SW(...) SRARI_W2(v4i32, __VA_ARGS__)
-
-#define SRARI_W4(RTYPE, in0, in1, in2, in3, shift) \
-  {                                                \
-    SRARI_W2(RTYPE, in0, in1, shift);              \
-    SRARI_W2(RTYPE, in2, in3, shift);              \
-  }
-#define SRARI_W4_SW(...) SRARI_W4(v4i32, __VA_ARGS__)
-
-/* Description : Logical shift right all elements of vector (immediate)
-   Arguments   : Inputs  - in0, in1, in2, in3, shift
-                 Outputs - out0, out1, out2, out3
-                 Return Type - as per RTYPE
-   Details     : Each element of vector 'in0' is right shifted by 'shift' and
-                 the result is written in-place. 'shift' is an immediate value.
-*/
-#define SRLI_H4(RTYPE, in0, in1, in2, in3, out0, out1, out2, out3, shift) \
-  {                                                                       \
-    out0 = (RTYPE)__msa_srli_h((v8i16)in0, shift);                        \
-    out1 = (RTYPE)__msa_srli_h((v8i16)in1, shift);                        \
-    out2 = (RTYPE)__msa_srli_h((v8i16)in2, shift);                        \
-    out3 = (RTYPE)__msa_srli_h((v8i16)in3, shift);                        \
-  }
-#define SRLI_H4_SH(...) SRLI_H4(v8i16, __VA_ARGS__)
-
-/* Description : Multiplication of pairs of vectors
-   Arguments   : Inputs  - in0, in1, in2, in3
-                 Outputs - out0, out1
-   Details     : Each element from 'in0' is multiplied with elements from 'in1'
-                 and the result is written to 'out0'
-*/
-#define MUL2(in0, in1, in2, in3, out0, out1) \
-  {                                          \
-    out0 = in0 * in1;                        \
-    out1 = in2 * in3;                        \
-  }
-#define MUL4(in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, out2, out3) \
-  {                                                                          \
-    MUL2(in0, in1, in2, in3, out0, out1);                                    \
-    MUL2(in4, in5, in6, in7, out2, out3);                                    \
-  }
-
-/* Description : Addition of 2 pairs of vectors
-   Arguments   : Inputs  - in0, in1, in2, in3
-                 Outputs - out0, out1
-   Details     : Each element in 'in0' is added to 'in1' and result is written
-                 to 'out0'.
-*/
-#define ADD2(in0, in1, in2, in3, out0, out1) \
-  {                                          \
-    out0 = in0 + in1;                        \
-    out1 = in2 + in3;                        \
-  }
-#define ADD4(in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, out2, out3) \
-  {                                                                          \
-    ADD2(in0, in1, in2, in3, out0, out1);                                    \
-    ADD2(in4, in5, in6, in7, out2, out3);                                    \
-  }
-
-/* Description : Subtraction of 2 pairs of vectors
-   Arguments   : Inputs  - in0, in1, in2, in3
-                 Outputs - out0, out1
-   Details     : Each element in 'in1' is subtracted from 'in0' and result is
-                 written to 'out0'.
-*/
-#define SUB2(in0, in1, in2, in3, out0, out1) \
-  {                                          \
-    out0 = in0 - in1;                        \
-    out1 = in2 - in3;                        \
-  }
-#define SUB4(in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, out2, out3) \
-  {                                                                          \
-    out0 = in0 - in1;                                                        \
-    out1 = in2 - in3;                                                        \
-    out2 = in4 - in5;                                                        \
-    out3 = in6 - in7;                                                        \
-  }
-
-/* Description : Sign extend halfword elements from right half of the vector
-   Arguments   : Input  - in    (halfword vector)
-                 Output - out   (sign extended word vector)
-                 Return Type - signed word
-   Details     : Sign bit of halfword elements from input vector 'in' is
-                 extracted and interleaved with same vector 'in0' to generate
-                 4 word elements keeping sign intact
-*/
-#define UNPCK_R_SH_SW(in, out)                    \
-  {                                               \
-    v8i16 sign_m;                                 \
-                                                  \
-    sign_m = __msa_clti_s_h((v8i16)in, 0);        \
-    out = (v4i32)__msa_ilvr_h(sign_m, (v8i16)in); \
-  }
-
-/* Description : Zero extend unsigned byte elements to halfword elements
-   Arguments   : Input   - in          (unsigned byte vector)
-                 Outputs - out0, out1  (unsigned  halfword vectors)
-                 Return Type - signed halfword
-   Details     : Zero extended right half of vector is returned in 'out0'
-                 Zero extended left half of vector is returned in 'out1'
-*/
-#define UNPCK_UB_SH(in, out0, out1)      \
-  {                                      \
-    v16i8 zero_m = { 0 };                \
-                                         \
-    ILVRL_B2_SH(zero_m, in, out0, out1); \
-  }
-
-/* Description : Sign extend halfword elements from input vector and return
-                 the result in pair of vectors
-   Arguments   : Input   - in            (halfword vector)
-                 Outputs - out0, out1   (sign extended word vectors)
-                 Return Type - signed word
-   Details     : Sign bit of halfword elements from input vector 'in' is
-                 extracted and interleaved right with same vector 'in0' to
-                 generate 4 signed word elements in 'out0'
-                 Then interleaved left with same vector 'in0' to
-                 generate 4 signed word elements in 'out1'
-*/
-#define UNPCK_SH_SW(in, out0, out1)       \
-  {                                       \
-    v8i16 tmp_m;                          \
-                                          \
-    tmp_m = __msa_clti_s_h((v8i16)in, 0); \
-    ILVRL_H2_SW(tmp_m, in, out0, out1);   \
-  }
-
-/* Description : Butterfly of 4 input vectors
-   Arguments   : Inputs  - in0, in1, in2, in3
-                 Outputs - out0, out1, out2, out3
-   Details     : Butterfly operation
-*/
-#define BUTTERFLY_4(in0, in1, in2, in3, out0, out1, out2, out3) \
-  {                                                             \
-    out0 = in0 + in3;                                           \
-    out1 = in1 + in2;                                           \
-                                                                \
-    out2 = in1 - in2;                                           \
-    out3 = in0 - in3;                                           \
-  }
-
-/* Description : Butterfly of 8 input vectors
-   Arguments   : Inputs  - in0 ...  in7
-                 Outputs - out0 .. out7
-   Details     : Butterfly operation
-*/
-#define BUTTERFLY_8(in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, out2, \
-                    out3, out4, out5, out6, out7)                             \
-  {                                                                           \
-    out0 = in0 + in7;                                                         \
-    out1 = in1 + in6;                                                         \
-    out2 = in2 + in5;                                                         \
-    out3 = in3 + in4;                                                         \
-                                                                              \
-    out4 = in3 - in4;                                                         \
-    out5 = in2 - in5;                                                         \
-    out6 = in1 - in6;                                                         \
-    out7 = in0 - in7;                                                         \
-  }
-
-/* Description : Butterfly of 16 input vectors
-   Arguments   : Inputs  - in0 ...  in15
-                 Outputs - out0 .. out15
-   Details     : Butterfly operation
-*/
-#define BUTTERFLY_16(in0, in1, in2, in3, in4, in5, in6, in7, in8, in9, in10,  \
-                     in11, in12, in13, in14, in15, out0, out1, out2, out3,    \
-                     out4, out5, out6, out7, out8, out9, out10, out11, out12, \
-                     out13, out14, out15)                                     \
-  {                                                                           \
-    out0 = in0 + in15;                                                        \
-    out1 = in1 + in14;                                                        \
-    out2 = in2 + in13;                                                        \
-    out3 = in3 + in12;                                                        \
-    out4 = in4 + in11;                                                        \
-    out5 = in5 + in10;                                                        \
-    out6 = in6 + in9;                                                         \
-    out7 = in7 + in8;                                                         \
-                                                                              \
-    out8 = in7 - in8;                                                         \
-    out9 = in6 - in9;                                                         \
-    out10 = in5 - in10;                                                       \
-    out11 = in4 - in11;                                                       \
-    out12 = in3 - in12;                                                       \
-    out13 = in2 - in13;                                                       \
-    out14 = in1 - in14;                                                       \
-    out15 = in0 - in15;                                                       \
-  }
-
-/* Description : Transpose input 8x8 byte block
-   Arguments   : Inputs  - in0, in1, in2, in3, in4, in5, in6, in7
-                 Outputs - out0, out1, out2, out3, out4, out5, out6, out7
-                 Return Type - as per RTYPE
-*/
-#define TRANSPOSE8x8_UB(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, out0,   \
-                        out1, out2, out3, out4, out5, out6, out7)              \
-  {                                                                            \
-    v16i8 tmp0_m, tmp1_m, tmp2_m, tmp3_m;                                      \
-    v16i8 tmp4_m, tmp5_m, tmp6_m, tmp7_m;                                      \
-                                                                               \
-    ILVR_B4_SB(in2, in0, in3, in1, in6, in4, in7, in5, tmp0_m, tmp1_m, tmp2_m, \
-               tmp3_m);                                                        \
-    ILVRL_B2_SB(tmp1_m, tmp0_m, tmp4_m, tmp5_m);                               \
-    ILVRL_B2_SB(tmp3_m, tmp2_m, tmp6_m, tmp7_m);                               \
-    ILVRL_W2(RTYPE, tmp6_m, tmp4_m, out0, out2);                               \
-    ILVRL_W2(RTYPE, tmp7_m, tmp5_m, out4, out6);                               \
-    SLDI_B2_0(RTYPE, out0, out2, out1, out3, 8);                               \
-    SLDI_B2_0(RTYPE, out4, out6, out5, out7, 8);                               \
-  }
-#define TRANSPOSE8x8_UB_UB(...) TRANSPOSE8x8_UB(v16u8, __VA_ARGS__)
-
-/* Description : Transpose 16x8 block into 8x16 with byte elements in vectors
-   Arguments   : Inputs  - in0, in1, in2, in3, in4, in5, in6, in7,
-                           in8, in9, in10, in11, in12, in13, in14, in15
-                 Outputs - out0, out1, out2, out3, out4, out5, out6, out7
-                 Return Type - unsigned byte
-*/
-#define TRANSPOSE16x8_UB_UB(in0, in1, in2, in3, in4, in5, in6, in7, in8, in9, \
-                            in10, in11, in12, in13, in14, in15, out0, out1,   \
-                            out2, out3, out4, out5, out6, out7)               \
-  {                                                                           \
-    v16u8 tmp0_m, tmp1_m, tmp2_m, tmp3_m;                                     \
-    v16u8 tmp4_m, tmp5_m, tmp6_m, tmp7_m;                                     \
-                                                                              \
-    ILVEV_D2_UB(in0, in8, in1, in9, out7, out6);                              \
-    ILVEV_D2_UB(in2, in10, in3, in11, out5, out4);                            \
-    ILVEV_D2_UB(in4, in12, in5, in13, out3, out2);                            \
-    ILVEV_D2_UB(in6, in14, in7, in15, out1, out0);                            \
-                                                                              \
-    tmp0_m = (v16u8)__msa_ilvev_b((v16i8)out6, (v16i8)out7);                  \
-    tmp4_m = (v16u8)__msa_ilvod_b((v16i8)out6, (v16i8)out7);                  \
-    tmp1_m = (v16u8)__msa_ilvev_b((v16i8)out4, (v16i8)out5);                  \
-    tmp5_m = (v16u8)__msa_ilvod_b((v16i8)out4, (v16i8)out5);                  \
-    out5 = (v16u8)__msa_ilvev_b((v16i8)out2, (v16i8)out3);                    \
-    tmp6_m = (v16u8)__msa_ilvod_b((v16i8)out2, (v16i8)out3);                  \
-    out7 = (v16u8)__msa_ilvev_b((v16i8)out0, (v16i8)out1);                    \
-    tmp7_m = (v16u8)__msa_ilvod_b((v16i8)out0, (v16i8)out1);                  \
-                                                                              \
-    ILVEV_H2_UB(tmp0_m, tmp1_m, out5, out7, tmp2_m, tmp3_m);                  \
-    out0 = (v16u8)__msa_ilvev_w((v4i32)tmp3_m, (v4i32)tmp2_m);                \
-    out4 = (v16u8)__msa_ilvod_w((v4i32)tmp3_m, (v4i32)tmp2_m);                \
-                                                                              \
-    tmp2_m = (v16u8)__msa_ilvod_h((v8i16)tmp1_m, (v8i16)tmp0_m);              \
-    tmp3_m = (v16u8)__msa_ilvod_h((v8i16)out7, (v8i16)out5);                  \
-    out2 = (v16u8)__msa_ilvev_w((v4i32)tmp3_m, (v4i32)tmp2_m);                \
-    out6 = (v16u8)__msa_ilvod_w((v4i32)tmp3_m, (v4i32)tmp2_m);                \
-                                                                              \
-    ILVEV_H2_UB(tmp4_m, tmp5_m, tmp6_m, tmp7_m, tmp2_m, tmp3_m);              \
-    out1 = (v16u8)__msa_ilvev_w((v4i32)tmp3_m, (v4i32)tmp2_m);                \
-    out5 = (v16u8)__msa_ilvod_w((v4i32)tmp3_m, (v4i32)tmp2_m);                \
-                                                                              \
-    tmp2_m = (v16u8)__msa_ilvod_h((v8i16)tmp5_m, (v8i16)tmp4_m);              \
-    tmp2_m = (v16u8)__msa_ilvod_h((v8i16)tmp5_m, (v8i16)tmp4_m);              \
-    tmp3_m = (v16u8)__msa_ilvod_h((v8i16)tmp7_m, (v8i16)tmp6_m);              \
-    tmp3_m = (v16u8)__msa_ilvod_h((v8i16)tmp7_m, (v8i16)tmp6_m);              \
-    out3 = (v16u8)__msa_ilvev_w((v4i32)tmp3_m, (v4i32)tmp2_m);                \
-    out7 = (v16u8)__msa_ilvod_w((v4i32)tmp3_m, (v4i32)tmp2_m);                \
-  }
-
-/* Description : Transpose 4x4 block with half word elements in vectors
-   Arguments   : Inputs  - in0, in1, in2, in3
-                 Outputs - out0, out1, out2, out3
-                 Return Type - signed halfword
-*/
-#define TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, out0, out1, out2, out3) \
-  {                                                                    \
-    v8i16 s0_m, s1_m;                                                  \
-                                                                       \
-    ILVR_H2_SH(in1, in0, in3, in2, s0_m, s1_m);                        \
-    ILVRL_W2_SH(s1_m, s0_m, out0, out2);                               \
-    out1 = (v8i16)__msa_ilvl_d((v2i64)out0, (v2i64)out0);              \
-    out3 = (v8i16)__msa_ilvl_d((v2i64)out0, (v2i64)out2);              \
-  }
-
-/* Description : Transpose 4x8 block with half word elements in vectors
-   Arguments   : Inputs  - in0, in1, in2, in3, in4, in5, in6, in7
-                 Outputs - out0, out1, out2, out3, out4, out5, out6, out7
-                 Return Type - signed halfword
-*/
-#define TRANSPOSE4X8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, \
-                           out2, out3, out4, out5, out6, out7)                 \
-  {                                                                            \
-    v8i16 tmp0_m, tmp1_m, tmp2_m, tmp3_m;                                      \
-    v8i16 tmp0_n, tmp1_n, tmp2_n, tmp3_n;                                      \
-    v8i16 zero_m = { 0 };                                                      \
-                                                                               \
-    ILVR_H4_SH(in1, in0, in3, in2, in5, in4, in7, in6, tmp0_n, tmp1_n, tmp2_n, \
-               tmp3_n);                                                        \
-    ILVRL_W2_SH(tmp1_n, tmp0_n, tmp0_m, tmp2_m);                               \
-    ILVRL_W2_SH(tmp3_n, tmp2_n, tmp1_m, tmp3_m);                               \
-                                                                               \
-    out0 = (v8i16)__msa_ilvr_d((v2i64)tmp1_m, (v2i64)tmp0_m);                  \
-    out1 = (v8i16)__msa_ilvl_d((v2i64)tmp1_m, (v2i64)tmp0_m);                  \
-    out2 = (v8i16)__msa_ilvr_d((v2i64)tmp3_m, (v2i64)tmp2_m);                  \
-    out3 = (v8i16)__msa_ilvl_d((v2i64)tmp3_m, (v2i64)tmp2_m);                  \
-                                                                               \
-    out4 = zero_m;                                                             \
-    out5 = zero_m;                                                             \
-    out6 = zero_m;                                                             \
-    out7 = zero_m;                                                             \
-  }
-
-/* Description : Transpose 8x4 block with half word elements in vectors
-   Arguments   : Inputs  - in0, in1, in2, in3, in4, in5, in6, in7
-                 Outputs - out0, out1, out2, out3, out4, out5, out6, out7
-                 Return Type - signed halfword
-*/
-#define TRANSPOSE8X4_SH_SH(in0, in1, in2, in3, out0, out1, out2, out3) \
-  {                                                                    \
-    v8i16 tmp0_m, tmp1_m, tmp2_m, tmp3_m;                              \
-                                                                       \
-    ILVR_H2_SH(in1, in0, in3, in2, tmp0_m, tmp1_m);                    \
-    ILVL_H2_SH(in1, in0, in3, in2, tmp2_m, tmp3_m);                    \
-    ILVR_W2_SH(tmp1_m, tmp0_m, tmp3_m, tmp2_m, out0, out2);            \
-    ILVL_W2_SH(tmp1_m, tmp0_m, tmp3_m, tmp2_m, out1, out3);            \
-  }
-
-/* Description : Transpose 8x8 block with half word elements in vectors
-   Arguments   : Inputs  - in0, in1, in2, in3, in4, in5, in6, in7
-                 Outputs - out0, out1, out2, out3, out4, out5, out6, out7
-                 Return Type - as per RTYPE
-*/
-#define TRANSPOSE8x8_H(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, out0, \
-                       out1, out2, out3, out4, out5, out6, out7)            \
-  {                                                                         \
-    v8i16 s0_m, s1_m;                                                       \
-    v8i16 tmp0_m, tmp1_m, tmp2_m, tmp3_m;                                   \
-    v8i16 tmp4_m, tmp5_m, tmp6_m, tmp7_m;                                   \
-                                                                            \
-    ILVR_H2_SH(in6, in4, in7, in5, s0_m, s1_m);                             \
-    ILVRL_H2_SH(s1_m, s0_m, tmp0_m, tmp1_m);                                \
-    ILVL_H2_SH(in6, in4, in7, in5, s0_m, s1_m);                             \
-    ILVRL_H2_SH(s1_m, s0_m, tmp2_m, tmp3_m);                                \
-    ILVR_H2_SH(in2, in0, in3, in1, s0_m, s1_m);                             \
-    ILVRL_H2_SH(s1_m, s0_m, tmp4_m, tmp5_m);                                \
-    ILVL_H2_SH(in2, in0, in3, in1, s0_m, s1_m);                             \
-    ILVRL_H2_SH(s1_m, s0_m, tmp6_m, tmp7_m);                                \
-    PCKEV_D4(RTYPE, tmp0_m, tmp4_m, tmp1_m, tmp5_m, tmp2_m, tmp6_m, tmp3_m, \
-             tmp7_m, out0, out2, out4, out6);                               \
-    out1 = (RTYPE)__msa_pckod_d((v2i64)tmp0_m, (v2i64)tmp4_m);              \
-    out3 = (RTYPE)__msa_pckod_d((v2i64)tmp1_m, (v2i64)tmp5_m);              \
-    out5 = (RTYPE)__msa_pckod_d((v2i64)tmp2_m, (v2i64)tmp6_m);              \
-    out7 = (RTYPE)__msa_pckod_d((v2i64)tmp3_m, (v2i64)tmp7_m);              \
-  }
-#define TRANSPOSE8x8_SH_SH(...) TRANSPOSE8x8_H(v8i16, __VA_ARGS__)
-
-/* Description : Transpose 4x4 block with word elements in vectors
-   Arguments   : Inputs  - in0, in1, in2, in3
-                 Outputs - out0, out1, out2, out3
-                 Return Type - signed word
-*/
-#define TRANSPOSE4x4_SW_SW(in0, in1, in2, in3, out0, out1, out2, out3) \
-  {                                                                    \
-    v4i32 s0_m, s1_m, s2_m, s3_m;                                      \
-                                                                       \
-    ILVRL_W2_SW(in1, in0, s0_m, s1_m);                                 \
-    ILVRL_W2_SW(in3, in2, s2_m, s3_m);                                 \
-                                                                       \
-    out0 = (v4i32)__msa_ilvr_d((v2i64)s2_m, (v2i64)s0_m);              \
-    out1 = (v4i32)__msa_ilvl_d((v2i64)s2_m, (v2i64)s0_m);              \
-    out2 = (v4i32)__msa_ilvr_d((v2i64)s3_m, (v2i64)s1_m);              \
-    out3 = (v4i32)__msa_ilvl_d((v2i64)s3_m, (v2i64)s1_m);              \
-  }
-
-/* Description : Add block 4x4
-   Arguments   : Inputs - in0, in1, in2, in3, pdst, stride
-   Details     : Least significant 4 bytes from each input vector are added to
-                 the destination bytes, clipped between 0-255 and stored.
-*/
-#define ADDBLK_ST4x4_UB(in0, in1, in2, in3, pdst, stride)        \
-  {                                                              \
-    uint32_t src0_m, src1_m, src2_m, src3_m;                     \
-    v8i16 inp0_m, inp1_m, res0_m, res1_m;                        \
-    v16i8 dst0_m = { 0 };                                        \
-    v16i8 dst1_m = { 0 };                                        \
-    v16i8 zero_m = { 0 };                                        \
-                                                                 \
-    ILVR_D2_SH(in1, in0, in3, in2, inp0_m, inp1_m)               \
-    LW4(pdst, stride, src0_m, src1_m, src2_m, src3_m);           \
-    INSERT_W2_SB(src0_m, src1_m, dst0_m);                        \
-    INSERT_W2_SB(src2_m, src3_m, dst1_m);                        \
-    ILVR_B2_SH(zero_m, dst0_m, zero_m, dst1_m, res0_m, res1_m);  \
-    ADD2(res0_m, inp0_m, res1_m, inp1_m, res0_m, res1_m);        \
-    CLIP_SH2_0_255(res0_m, res1_m);                              \
-    PCKEV_B2_SB(res0_m, res0_m, res1_m, res1_m, dst0_m, dst1_m); \
-    ST4x4_UB(dst0_m, dst1_m, 0, 1, 0, 1, pdst, stride);          \
-  }
-
-/* Description : Pack even elements of input vectors & xor with 128
-   Arguments   : Inputs - in0, in1
-                 Output - out_m
-                 Return Type - unsigned byte
-   Details     : Signed byte even elements from 'in0' and 'in1' are packed
-                 together in one vector and the resulting vector is xor'ed with
-                 128 to shift the range from signed to unsigned byte
-*/
-#define PCKEV_XORI128_UB(in0, in1)                        \
-  ({                                                      \
-    v16u8 out_m;                                          \
-                                                          \
-    out_m = (v16u8)__msa_pckev_b((v16i8)in1, (v16i8)in0); \
-    out_m = (v16u8)__msa_xori_b((v16u8)out_m, 128);       \
-    out_m;                                                \
-  })
-
-/* Description : Converts inputs to unsigned bytes, interleave, average & store
-                 as 8x4 unsigned byte block
-   Arguments   : Inputs - in0, in1, in2, in3, dst0, dst1, dst2, dst3,
-                          pdst, stride
-*/
-#define CONVERT_UB_AVG_ST8x4_UB(in0, in1, in2, in3, dst0, dst1, dst2, dst3, \
-                                pdst, stride)                               \
-  {                                                                         \
-    v16u8 tmp0_m, tmp1_m, tmp2_m, tmp3_m;                                   \
-                                                                            \
-    tmp0_m = PCKEV_XORI128_UB(in0, in1);                                    \
-    tmp1_m = PCKEV_XORI128_UB(in2, in3);                                    \
-    ILVR_D2_UB(dst1, dst0, dst3, dst2, tmp2_m, tmp3_m);                     \
-    AVER_UB2_UB(tmp0_m, tmp2_m, tmp1_m, tmp3_m, tmp0_m, tmp1_m);            \
-    ST8x4_UB(tmp0_m, tmp1_m, pdst, stride);                                 \
-  }
-
-/* Description : Pack even byte elements and store byte vector in destination
-                 memory
-   Arguments   : Inputs - in0, in1, pdst
-*/
-#define PCKEV_ST_SB(in0, in1, pdst)                \
-  {                                                \
-    v16i8 tmp_m;                                   \
-                                                   \
-    tmp_m = __msa_pckev_b((v16i8)in1, (v16i8)in0); \
-    ST_SB(tmp_m, (pdst));                          \
-  }
-
-/* Description : Horizontal 2 tap filter kernel code
-   Arguments   : Inputs - in0, in1, mask, coeff, shift
-*/
-#define HORIZ_2TAP_FILT_UH(in0, in1, mask, coeff, shift)        \
-  ({                                                            \
-    v16i8 tmp0_m;                                               \
-    v8u16 tmp1_m;                                               \
-                                                                \
-    tmp0_m = __msa_vshf_b((v16i8)mask, (v16i8)in1, (v16i8)in0); \
-    tmp1_m = __msa_dotp_u_h((v16u8)tmp0_m, (v16u8)coeff);       \
-    tmp1_m = (v8u16)__msa_srari_h((v8i16)tmp1_m, shift);        \
-                                                                \
-    tmp1_m;                                                     \
-  })
-#endif  // AOM_AOM_DSP_MIPS_MACROS_MSA_H_
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/mips/sad_msa.c b/chromium/third_party/libaom/source/libaom/aom_dsp/mips/sad_msa.c
deleted file mode 100644
index 01d4a5239c1..00000000000
--- a/chromium/third_party/libaom/source/libaom/aom_dsp/mips/sad_msa.c
+++ /dev/null
@@ -1,800 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom_dsp/mips/macros_msa.h"
-
-#define SAD_INSVE_W4(RTYPE, in0, in1, in2, in3, out)       \
-  {                                                        \
-    out = (RTYPE)__msa_insve_w((v4i32)out, 0, (v4i32)in0); \
-    out = (RTYPE)__msa_insve_w((v4i32)out, 1, (v4i32)in1); \
-    out = (RTYPE)__msa_insve_w((v4i32)out, 2, (v4i32)in2); \
-    out = (RTYPE)__msa_insve_w((v4i32)out, 3, (v4i32)in3); \
-  }
-#define SAD_INSVE_W4_UB(...) SAD_INSVE_W4(v16u8, __VA_ARGS__)
-
-static uint32_t sad_4width_msa(const uint8_t *src_ptr, int32_t src_stride,
-                               const uint8_t *ref_ptr, int32_t ref_stride,
-                               int32_t height) {
-  int32_t ht_cnt;
-  uint32_t src0, src1, src2, src3, ref0, ref1, ref2, ref3;
-  v16u8 src = { 0 };
-  v16u8 ref = { 0 };
-  v16u8 diff;
-  v8u16 sad = { 0 };
-
-  for (ht_cnt = (height >> 2); ht_cnt--;) {
-    LW4(src_ptr, src_stride, src0, src1, src2, src3);
-    src_ptr += (4 * src_stride);
-    LW4(ref_ptr, ref_stride, ref0, ref1, ref2, ref3);
-    ref_ptr += (4 * ref_stride);
-
-    INSERT_W4_UB(src0, src1, src2, src3, src);
-    INSERT_W4_UB(ref0, ref1, ref2, ref3, ref);
-
-    diff = __msa_asub_u_b(src, ref);
-    sad += __msa_hadd_u_h(diff, diff);
-  }
-
-  return HADD_UH_U32(sad);
-}
-
-static uint32_t sad_8width_msa(const uint8_t *src, int32_t src_stride,
-                               const uint8_t *ref, int32_t ref_stride,
-                               int32_t height) {
-  int32_t ht_cnt;
-  v16u8 src0, src1, src2, src3, ref0, ref1, ref2, ref3;
-  v8u16 sad = { 0 };
-
-  for (ht_cnt = (height >> 2); ht_cnt--;) {
-    LD_UB4(src, src_stride, src0, src1, src2, src3);
-    src += (4 * src_stride);
-    LD_UB4(ref, ref_stride, ref0, ref1, ref2, ref3);
-    ref += (4 * ref_stride);
-
-    PCKEV_D4_UB(src1, src0, src3, src2, ref1, ref0, ref3, ref2, src0, src1,
-                ref0, ref1);
-    sad += SAD_UB2_UH(src0, src1, ref0, ref1);
-  }
-
-  return HADD_UH_U32(sad);
-}
-
-static uint32_t sad_16width_msa(const uint8_t *src, int32_t src_stride,
-                                const uint8_t *ref, int32_t ref_stride,
-                                int32_t height) {
-  int32_t ht_cnt;
-  v16u8 src0, src1, ref0, ref1;
-  v8u16 sad = { 0 };
-
-  for (ht_cnt = (height >> 2); ht_cnt--;) {
-    LD_UB2(src, src_stride, src0, src1);
-    src += (2 * src_stride);
-    LD_UB2(ref, ref_stride, ref0, ref1);
-    ref += (2 * ref_stride);
-    sad += SAD_UB2_UH(src0, src1, ref0, ref1);
-
-    LD_UB2(src, src_stride, src0, src1);
-    src += (2 * src_stride);
-    LD_UB2(ref, ref_stride, ref0, ref1);
-    ref += (2 * ref_stride);
-    sad += SAD_UB2_UH(src0, src1, ref0, ref1);
-  }
-
-  return HADD_UH_U32(sad);
-}
-
-static uint32_t sad_32width_msa(const uint8_t *src, int32_t src_stride,
-                                const uint8_t *ref, int32_t ref_stride,
-                                int32_t height) {
-  int32_t ht_cnt;
-  v16u8 src0, src1, ref0, ref1;
-  v8u16 sad = { 0 };
-
-  for (ht_cnt = (height >> 2); ht_cnt--;) {
-    LD_UB2(src, 16, src0, src1);
-    src += src_stride;
-    LD_UB2(ref, 16, ref0, ref1);
-    ref += ref_stride;
-    sad += SAD_UB2_UH(src0, src1, ref0, ref1);
-
-    LD_UB2(src, 16, src0, src1);
-    src += src_stride;
-    LD_UB2(ref, 16, ref0, ref1);
-    ref += ref_stride;
-    sad += SAD_UB2_UH(src0, src1, ref0, ref1);
-
-    LD_UB2(src, 16, src0, src1);
-    src += src_stride;
-    LD_UB2(ref, 16, ref0, ref1);
-    ref += ref_stride;
-    sad += SAD_UB2_UH(src0, src1, ref0, ref1);
-
-    LD_UB2(src, 16, src0, src1);
-    src += src_stride;
-    LD_UB2(ref, 16, ref0, ref1);
-    ref += ref_stride;
-    sad += SAD_UB2_UH(src0, src1, ref0, ref1);
-  }
-
-  return HADD_UH_U32(sad);
-}
-
-static uint32_t sad_64width_msa(const uint8_t *src, int32_t src_stride,
-                                const uint8_t *ref, int32_t ref_stride,
-                                int32_t height) {
-  int32_t ht_cnt;
-  uint32_t sad = 0;
-  v16u8 src0, src1, src2, src3;
-  v16u8 ref0, ref1, ref2, ref3;
-  v8u16 sad0 = { 0 };
-  v8u16 sad1 = { 0 };
-
-  for (ht_cnt = (height >> 1); ht_cnt--;) {
-    LD_UB4(src, 16, src0, src1, src2, src3);
-    src += src_stride;
-    LD_UB4(ref, 16, ref0, ref1, ref2, ref3);
-    ref += ref_stride;
-    sad0 += SAD_UB2_UH(src0, src1, ref0, ref1);
-    sad1 += SAD_UB2_UH(src2, src3, ref2, ref3);
-
-    LD_UB4(src, 16, src0, src1, src2, src3);
-    src += src_stride;
-    LD_UB4(ref, 16, ref0, ref1, ref2, ref3);
-    ref += ref_stride;
-    sad0 += SAD_UB2_UH(src0, src1, ref0, ref1);
-    sad1 += SAD_UB2_UH(src2, src3, ref2, ref3);
-  }
-
-  sad = HADD_UH_U32(sad0);
-  sad += HADD_UH_U32(sad1);
-
-  return sad;
-}
-
-static void sad_4width_x4d_msa(const uint8_t *src_ptr, int32_t src_stride,
-                               const uint8_t *const aref_ptr[4],
-                               int32_t ref_stride, int32_t height,
-                               uint32_t sad_array[4]) {
-  const uint8_t *ref0_ptr, *ref1_ptr, *ref2_ptr, *ref3_ptr;
-  int32_t ht_cnt;
-  uint32_t src0, src1, src2, src3;
-  uint32_t ref0, ref1, ref2, ref3;
-  v16u8 src = { 0 };
-  v16u8 ref = { 0 };
-  v16u8 diff;
-  v8u16 sad0 = { 0 };
-  v8u16 sad1 = { 0 };
-  v8u16 sad2 = { 0 };
-  v8u16 sad3 = { 0 };
-
-  ref0_ptr = aref_ptr[0];
-  ref1_ptr = aref_ptr[1];
-  ref2_ptr = aref_ptr[2];
-  ref3_ptr = aref_ptr[3];
-
-  for (ht_cnt = (height >> 2); ht_cnt--;) {
-    LW4(src_ptr, src_stride, src0, src1, src2, src3);
-    INSERT_W4_UB(src0, src1, src2, src3, src);
-    src_ptr += (4 * src_stride);
-
-    LW4(ref0_ptr, ref_stride, ref0, ref1, ref2, ref3);
-    INSERT_W4_UB(ref0, ref1, ref2, ref3, ref);
-    ref0_ptr += (4 * ref_stride);
-
-    diff = __msa_asub_u_b(src, ref);
-    sad0 += __msa_hadd_u_h(diff, diff);
-
-    LW4(ref1_ptr, ref_stride, ref0, ref1, ref2, ref3);
-    INSERT_W4_UB(ref0, ref1, ref2, ref3, ref);
-    ref1_ptr += (4 * ref_stride);
-
-    diff = __msa_asub_u_b(src, ref);
-    sad1 += __msa_hadd_u_h(diff, diff);
-
-    LW4(ref2_ptr, ref_stride, ref0, ref1, ref2, ref3);
-    INSERT_W4_UB(ref0, ref1, ref2, ref3, ref);
-    ref2_ptr += (4 * ref_stride);
-
-    diff = __msa_asub_u_b(src, ref);
-    sad2 += __msa_hadd_u_h(diff, diff);
-
-    LW4(ref3_ptr, ref_stride, ref0, ref1, ref2, ref3);
-    INSERT_W4_UB(ref0, ref1, ref2, ref3, ref);
-    ref3_ptr += (4 * ref_stride);
-
-    diff = __msa_asub_u_b(src, ref);
-    sad3 += __msa_hadd_u_h(diff, diff);
-  }
-
-  sad_array[0] = HADD_UH_U32(sad0);
-  sad_array[1] = HADD_UH_U32(sad1);
-  sad_array[2] = HADD_UH_U32(sad2);
-  sad_array[3] = HADD_UH_U32(sad3);
-}
-
-static void sad_8width_x4d_msa(const uint8_t *src_ptr, int32_t src_stride,
-                               const uint8_t *const aref_ptr[4],
-                               int32_t ref_stride, int32_t height,
-                               uint32_t sad_array[4]) {
-  int32_t ht_cnt;
-  const uint8_t *ref0_ptr, *ref1_ptr, *ref2_ptr, *ref3_ptr;
-  v16u8 src0, src1, src2, src3;
-  v16u8 ref0, ref1, ref2, ref3, ref4, ref5, ref6, ref7;
-  v16u8 ref8, ref9, ref10, ref11, ref12, ref13, ref14, ref15;
-  v8u16 sad0 = { 0 };
-  v8u16 sad1 = { 0 };
-  v8u16 sad2 = { 0 };
-  v8u16 sad3 = { 0 };
-
-  ref0_ptr = aref_ptr[0];
-  ref1_ptr = aref_ptr[1];
-  ref2_ptr = aref_ptr[2];
-  ref3_ptr = aref_ptr[3];
-
-  for (ht_cnt = (height >> 2); ht_cnt--;) {
-    LD_UB4(src_ptr, src_stride, src0, src1, src2, src3);
-    src_ptr += (4 * src_stride);
-    LD_UB4(ref0_ptr, ref_stride, ref0, ref1, ref2, ref3);
-    ref0_ptr += (4 * ref_stride);
-    LD_UB4(ref1_ptr, ref_stride, ref4, ref5, ref6, ref7);
-    ref1_ptr += (4 * ref_stride);
-    LD_UB4(ref2_ptr, ref_stride, ref8, ref9, ref10, ref11);
-    ref2_ptr += (4 * ref_stride);
-    LD_UB4(ref3_ptr, ref_stride, ref12, ref13, ref14, ref15);
-    ref3_ptr += (4 * ref_stride);
-
-    PCKEV_D2_UB(src1, src0, src3, src2, src0, src1);
-    PCKEV_D2_UB(ref1, ref0, ref3, ref2, ref0, ref1);
-    sad0 += SAD_UB2_UH(src0, src1, ref0, ref1);
-
-    PCKEV_D2_UB(ref5, ref4, ref7, ref6, ref0, ref1);
-    sad1 += SAD_UB2_UH(src0, src1, ref0, ref1);
-
-    PCKEV_D2_UB(ref9, ref8, ref11, ref10, ref0, ref1);
-    sad2 += SAD_UB2_UH(src0, src1, ref0, ref1);
-
-    PCKEV_D2_UB(ref13, ref12, ref15, ref14, ref0, ref1);
-    sad3 += SAD_UB2_UH(src0, src1, ref0, ref1);
-  }
-
-  sad_array[0] = HADD_UH_U32(sad0);
-  sad_array[1] = HADD_UH_U32(sad1);
-  sad_array[2] = HADD_UH_U32(sad2);
-  sad_array[3] = HADD_UH_U32(sad3);
-}
-
-static void sad_16width_x4d_msa(const uint8_t *src_ptr, int32_t src_stride,
-                                const uint8_t *const aref_ptr[4],
-                                int32_t ref_stride, int32_t height,
-                                uint32_t sad_array[4]) {
-  int32_t ht_cnt;
-  const uint8_t *ref0_ptr, *ref1_ptr, *ref2_ptr, *ref3_ptr;
-  v16u8 src, ref0, ref1, ref2, ref3, diff;
-  v8u16 sad0 = { 0 };
-  v8u16 sad1 = { 0 };
-  v8u16 sad2 = { 0 };
-  v8u16 sad3 = { 0 };
-
-  ref0_ptr = aref_ptr[0];
-  ref1_ptr = aref_ptr[1];
-  ref2_ptr = aref_ptr[2];
-  ref3_ptr = aref_ptr[3];
-
-  for (ht_cnt = (height >> 1); ht_cnt--;) {
-    src = LD_UB(src_ptr);
-    src_ptr += src_stride;
-    ref0 = LD_UB(ref0_ptr);
-    ref0_ptr += ref_stride;
-    ref1 = LD_UB(ref1_ptr);
-    ref1_ptr += ref_stride;
-    ref2 = LD_UB(ref2_ptr);
-    ref2_ptr += ref_stride;
-    ref3 = LD_UB(ref3_ptr);
-    ref3_ptr += ref_stride;
-
-    diff = __msa_asub_u_b(src, ref0);
-    sad0 += __msa_hadd_u_h(diff, diff);
-    diff = __msa_asub_u_b(src, ref1);
-    sad1 += __msa_hadd_u_h(diff, diff);
-    diff = __msa_asub_u_b(src, ref2);
-    sad2 += __msa_hadd_u_h(diff, diff);
-    diff = __msa_asub_u_b(src, ref3);
-    sad3 += __msa_hadd_u_h(diff, diff);
-
-    src = LD_UB(src_ptr);
-    src_ptr += src_stride;
-    ref0 = LD_UB(ref0_ptr);
-    ref0_ptr += ref_stride;
-    ref1 = LD_UB(ref1_ptr);
-    ref1_ptr += ref_stride;
-    ref2 = LD_UB(ref2_ptr);
-    ref2_ptr += ref_stride;
-    ref3 = LD_UB(ref3_ptr);
-    ref3_ptr += ref_stride;
-
-    diff = __msa_asub_u_b(src, ref0);
-    sad0 += __msa_hadd_u_h(diff, diff);
-    diff = __msa_asub_u_b(src, ref1);
-    sad1 += __msa_hadd_u_h(diff, diff);
-    diff = __msa_asub_u_b(src, ref2);
-    sad2 += __msa_hadd_u_h(diff, diff);
-    diff = __msa_asub_u_b(src, ref3);
-    sad3 += __msa_hadd_u_h(diff, diff);
-  }
-
-  sad_array[0] = HADD_UH_U32(sad0);
-  sad_array[1] = HADD_UH_U32(sad1);
-  sad_array[2] = HADD_UH_U32(sad2);
-  sad_array[3] = HADD_UH_U32(sad3);
-}
-
-static void sad_32width_x4d_msa(const uint8_t *src, int32_t src_stride,
-                                const uint8_t *const aref_ptr[4],
-                                int32_t ref_stride, int32_t height,
-                                uint32_t sad_array[4]) {
-  const uint8_t *ref0_ptr, *ref1_ptr, *ref2_ptr, *ref3_ptr;
-  int32_t ht_cnt;
-  v16u8 src0, src1, ref0, ref1;
-  v8u16 sad0 = { 0 };
-  v8u16 sad1 = { 0 };
-  v8u16 sad2 = { 0 };
-  v8u16 sad3 = { 0 };
-
-  ref0_ptr = aref_ptr[0];
-  ref1_ptr = aref_ptr[1];
-  ref2_ptr = aref_ptr[2];
-  ref3_ptr = aref_ptr[3];
-
-  for (ht_cnt = height; ht_cnt--;) {
-    LD_UB2(src, 16, src0, src1);
-    src += src_stride;
-
-    LD_UB2(ref0_ptr, 16, ref0, ref1);
-    ref0_ptr += ref_stride;
-    sad0 += SAD_UB2_UH(src0, src1, ref0, ref1);
-
-    LD_UB2(ref1_ptr, 16, ref0, ref1);
-    ref1_ptr += ref_stride;
-    sad1 += SAD_UB2_UH(src0, src1, ref0, ref1);
-
-    LD_UB2(ref2_ptr, 16, ref0, ref1);
-    ref2_ptr += ref_stride;
-    sad2 += SAD_UB2_UH(src0, src1, ref0, ref1);
-
-    LD_UB2(ref3_ptr, 16, ref0, ref1);
-    ref3_ptr += ref_stride;
-    sad3 += SAD_UB2_UH(src0, src1, ref0, ref1);
-  }
-
-  sad_array[0] = HADD_UH_U32(sad0);
-  sad_array[1] = HADD_UH_U32(sad1);
-  sad_array[2] = HADD_UH_U32(sad2);
-  sad_array[3] = HADD_UH_U32(sad3);
-}
-
-static void sad_64width_x4d_msa(const uint8_t *src, int32_t src_stride,
-                                const uint8_t *const aref_ptr[4],
-                                int32_t ref_stride, int32_t height,
-                                uint32_t sad_array[4]) {
-  const uint8_t *ref0_ptr, *ref1_ptr, *ref2_ptr, *ref3_ptr;
-  int32_t ht_cnt;
-  v16u8 src0, src1, src2, src3;
-  v16u8 ref0, ref1, ref2, ref3;
-  v8u16 sad0_0 = { 0 };
-  v8u16 sad0_1 = { 0 };
-  v8u16 sad1_0 = { 0 };
-  v8u16 sad1_1 = { 0 };
-  v8u16 sad2_0 = { 0 };
-  v8u16 sad2_1 = { 0 };
-  v8u16 sad3_0 = { 0 };
-  v8u16 sad3_1 = { 0 };
-
-  ref0_ptr = aref_ptr[0];
-  ref1_ptr = aref_ptr[1];
-  ref2_ptr = aref_ptr[2];
-  ref3_ptr = aref_ptr[3];
-
-  for (ht_cnt = height; ht_cnt--;) {
-    LD_UB4(src, 16, src0, src1, src2, src3);
-    src += src_stride;
-
-    LD_UB4(ref0_ptr, 16, ref0, ref1, ref2, ref3);
-    ref0_ptr += ref_stride;
-    sad0_0 += SAD_UB2_UH(src0, src1, ref0, ref1);
-    sad0_1 += SAD_UB2_UH(src2, src3, ref2, ref3);
-
-    LD_UB4(ref1_ptr, 16, ref0, ref1, ref2, ref3);
-    ref1_ptr += ref_stride;
-    sad1_0 += SAD_UB2_UH(src0, src1, ref0, ref1);
-    sad1_1 += SAD_UB2_UH(src2, src3, ref2, ref3);
-
-    LD_UB4(ref2_ptr, 16, ref0, ref1, ref2, ref3);
-    ref2_ptr += ref_stride;
-    sad2_0 += SAD_UB2_UH(src0, src1, ref0, ref1);
-    sad2_1 += SAD_UB2_UH(src2, src3, ref2, ref3);
-
-    LD_UB4(ref3_ptr, 16, ref0, ref1, ref2, ref3);
-    ref3_ptr += ref_stride;
-    sad3_0 += SAD_UB2_UH(src0, src1, ref0, ref1);
-    sad3_1 += SAD_UB2_UH(src2, src3, ref2, ref3);
-  }
-
-  sad_array[0] = HADD_UH_U32(sad0_0);
-  sad_array[0] += HADD_UH_U32(sad0_1);
-  sad_array[1] = HADD_UH_U32(sad1_0);
-  sad_array[1] += HADD_UH_U32(sad1_1);
-  sad_array[2] = HADD_UH_U32(sad2_0);
-  sad_array[2] += HADD_UH_U32(sad2_1);
-  sad_array[3] = HADD_UH_U32(sad3_0);
-  sad_array[3] += HADD_UH_U32(sad3_1);
-}
-
-static uint32_t avgsad_4width_msa(const uint8_t *src_ptr, int32_t src_stride,
-                                  const uint8_t *ref_ptr, int32_t ref_stride,
-                                  int32_t height, const uint8_t *sec_pred) {
-  int32_t ht_cnt;
-  uint32_t src0, src1, src2, src3, ref0, ref1, ref2, ref3;
-  v16u8 src = { 0 };
-  v16u8 ref = { 0 };
-  v16u8 diff, pred, comp;
-  v8u16 sad = { 0 };
-
-  for (ht_cnt = (height >> 2); ht_cnt--;) {
-    LW4(src_ptr, src_stride, src0, src1, src2, src3);
-    src_ptr += (4 * src_stride);
-    LW4(ref_ptr, ref_stride, ref0, ref1, ref2, ref3);
-    ref_ptr += (4 * ref_stride);
-    pred = LD_UB(sec_pred);
-    sec_pred += 16;
-
-    INSERT_W4_UB(src0, src1, src2, src3, src);
-    INSERT_W4_UB(ref0, ref1, ref2, ref3, ref);
-
-    comp = __msa_aver_u_b(pred, ref);
-    diff = __msa_asub_u_b(src, comp);
-    sad += __msa_hadd_u_h(diff, diff);
-  }
-
-  return HADD_UH_U32(sad);
-}
-
-static uint32_t avgsad_8width_msa(const uint8_t *src, int32_t src_stride,
-                                  const uint8_t *ref, int32_t ref_stride,
-                                  int32_t height, const uint8_t *sec_pred) {
-  int32_t ht_cnt;
-  v16u8 src0, src1, src2, src3, ref0, ref1, ref2, ref3;
-  v16u8 diff0, diff1, pred0, pred1;
-  v8u16 sad = { 0 };
-
-  for (ht_cnt = (height >> 2); ht_cnt--;) {
-    LD_UB4(src, src_stride, src0, src1, src2, src3);
-    src += (4 * src_stride);
-    LD_UB4(ref, ref_stride, ref0, ref1, ref2, ref3);
-    ref += (4 * ref_stride);
-    LD_UB2(sec_pred, 16, pred0, pred1);
-    sec_pred += 32;
-    PCKEV_D4_UB(src1, src0, src3, src2, ref1, ref0, ref3, ref2, src0, src1,
-                ref0, ref1);
-    AVER_UB2_UB(pred0, ref0, pred1, ref1, diff0, diff1);
-    sad += SAD_UB2_UH(src0, src1, diff0, diff1);
-  }
-
-  return HADD_UH_U32(sad);
-}
-
-static uint32_t avgsad_16width_msa(const uint8_t *src, int32_t src_stride,
-                                   const uint8_t *ref, int32_t ref_stride,
-                                   int32_t height, const uint8_t *sec_pred) {
-  int32_t ht_cnt;
-  v16u8 src0, src1, src2, src3, ref0, ref1, ref2, ref3;
-  v16u8 pred0, pred1, pred2, pred3, comp0, comp1;
-  v8u16 sad = { 0 };
-
-  for (ht_cnt = (height >> 3); ht_cnt--;) {
-    LD_UB4(src, src_stride, src0, src1, src2, src3);
-    src += (4 * src_stride);
-    LD_UB4(ref, ref_stride, ref0, ref1, ref2, ref3);
-    ref += (4 * ref_stride);
-    LD_UB4(sec_pred, 16, pred0, pred1, pred2, pred3);
-    sec_pred += (4 * 16);
-    AVER_UB2_UB(pred0, ref0, pred1, ref1, comp0, comp1);
-    sad += SAD_UB2_UH(src0, src1, comp0, comp1);
-    AVER_UB2_UB(pred2, ref2, pred3, ref3, comp0, comp1);
-    sad += SAD_UB2_UH(src2, src3, comp0, comp1);
-
-    LD_UB4(src, src_stride, src0, src1, src2, src3);
-    src += (4 * src_stride);
-    LD_UB4(ref, ref_stride, ref0, ref1, ref2, ref3);
-    ref += (4 * ref_stride);
-    LD_UB4(sec_pred, 16, pred0, pred1, pred2, pred3);
-    sec_pred += (4 * 16);
-    AVER_UB2_UB(pred0, ref0, pred1, ref1, comp0, comp1);
-    sad += SAD_UB2_UH(src0, src1, comp0, comp1);
-    AVER_UB2_UB(pred2, ref2, pred3, ref3, comp0, comp1);
-    sad += SAD_UB2_UH(src2, src3, comp0, comp1);
-  }
-
-  return HADD_UH_U32(sad);
-}
-
-static uint32_t avgsad_32width_msa(const uint8_t *src, int32_t src_stride,
-                                   const uint8_t *ref, int32_t ref_stride,
-                                   int32_t height, const uint8_t *sec_pred) {
-  int32_t ht_cnt;
-  v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
-  v16u8 ref0, ref1, ref2, ref3, ref4, ref5, ref6, ref7;
-  v16u8 pred0, pred1, pred2, pred3, pred4, pred5, pred6, pred7;
-  v16u8 comp0, comp1;
-  v8u16 sad = { 0 };
-
-  for (ht_cnt = (height >> 2); ht_cnt--;) {
-    LD_UB4(src, src_stride, src0, src2, src4, src6);
-    LD_UB4(src + 16, src_stride, src1, src3, src5, src7);
-    src += (4 * src_stride);
-
-    LD_UB4(ref, ref_stride, ref0, ref2, ref4, ref6);
-    LD_UB4(ref + 16, ref_stride, ref1, ref3, ref5, ref7);
-    ref += (4 * ref_stride);
-
-    LD_UB4(sec_pred, 32, pred0, pred2, pred4, pred6);
-    LD_UB4(sec_pred + 16, 32, pred1, pred3, pred5, pred7);
-    sec_pred += (4 * 32);
-
-    AVER_UB2_UB(pred0, ref0, pred1, ref1, comp0, comp1);
-    sad += SAD_UB2_UH(src0, src1, comp0, comp1);
-    AVER_UB2_UB(pred2, ref2, pred3, ref3, comp0, comp1);
-    sad += SAD_UB2_UH(src2, src3, comp0, comp1);
-    AVER_UB2_UB(pred4, ref4, pred5, ref5, comp0, comp1);
-    sad += SAD_UB2_UH(src4, src5, comp0, comp1);
-    AVER_UB2_UB(pred6, ref6, pred7, ref7, comp0, comp1);
-    sad += SAD_UB2_UH(src6, src7, comp0, comp1);
-  }
-
-  return HADD_UH_U32(sad);
-}
-
-static uint32_t avgsad_64width_msa(const uint8_t *src, int32_t src_stride,
-                                   const uint8_t *ref, int32_t ref_stride,
-                                   int32_t height, const uint8_t *sec_pred) {
-  int32_t ht_cnt;
-  v16u8 src0, src1, src2, src3;
-  v16u8 ref0, ref1, ref2, ref3;
-  v16u8 comp0, comp1, comp2, comp3;
-  v16u8 pred0, pred1, pred2, pred3;
-  v8u16 sad0 = { 0 };
-  v8u16 sad1 = { 0 };
-  v4u32 sad;
-
-  for (ht_cnt = (height >> 2); ht_cnt--;) {
-    LD_UB4(src, 16, src0, src1, src2, src3);
-    src += src_stride;
-    LD_UB4(ref, 16, ref0, ref1, ref2, ref3);
-    ref += ref_stride;
-    LD_UB4(sec_pred, 16, pred0, pred1, pred2, pred3);
-    sec_pred += 64;
-    AVER_UB4_UB(pred0, ref0, pred1, ref1, pred2, ref2, pred3, ref3, comp0,
-                comp1, comp2, comp3);
-    sad0 += SAD_UB2_UH(src0, src1, comp0, comp1);
-    sad1 += SAD_UB2_UH(src2, src3, comp2, comp3);
-
-    LD_UB4(src, 16, src0, src1, src2, src3);
-    src += src_stride;
-    LD_UB4(ref, 16, ref0, ref1, ref2, ref3);
-    ref += ref_stride;
-    LD_UB4(sec_pred, 16, pred0, pred1, pred2, pred3);
-    sec_pred += 64;
-    AVER_UB4_UB(pred0, ref0, pred1, ref1, pred2, ref2, pred3, ref3, comp0,
-                comp1, comp2, comp3);
-    sad0 += SAD_UB2_UH(src0, src1, comp0, comp1);
-    sad1 += SAD_UB2_UH(src2, src3, comp2, comp3);
-
-    LD_UB4(src, 16, src0, src1, src2, src3);
-    src += src_stride;
-    LD_UB4(ref, 16, ref0, ref1, ref2, ref3);
-    ref += ref_stride;
-    LD_UB4(sec_pred, 16, pred0, pred1, pred2, pred3);
-    sec_pred += 64;
-    AVER_UB4_UB(pred0, ref0, pred1, ref1, pred2, ref2, pred3, ref3, comp0,
-                comp1, comp2, comp3);
-    sad0 += SAD_UB2_UH(src0, src1, comp0, comp1);
-    sad1 += SAD_UB2_UH(src2, src3, comp2, comp3);
-
-    LD_UB4(src, 16, src0, src1, src2, src3);
-    src += src_stride;
-    LD_UB4(ref, 16, ref0, ref1, ref2, ref3);
-    ref += ref_stride;
-    LD_UB4(sec_pred, 16, pred0, pred1, pred2, pred3);
-    sec_pred += 64;
-    AVER_UB4_UB(pred0, ref0, pred1, ref1, pred2, ref2, pred3, ref3, comp0,
-                comp1, comp2, comp3);
-    sad0 += SAD_UB2_UH(src0, src1, comp0, comp1);
-    sad1 += SAD_UB2_UH(src2, src3, comp2, comp3);
-  }
-
-  sad = __msa_hadd_u_w(sad0, sad0);
-  sad += __msa_hadd_u_w(sad1, sad1);
-
-  return HADD_SW_S32(sad);
-}
-
-#define AOM_SAD_4xHEIGHT_MSA(height)                                         \
-  uint32_t aom_sad4x##height##_msa(const uint8_t *src, int32_t src_stride,   \
-                                   const uint8_t *ref, int32_t ref_stride) { \
-    return sad_4width_msa(src, src_stride, ref, ref_stride, height);         \
-  }
-
-#define AOM_SAD_8xHEIGHT_MSA(height)                                         \
-  uint32_t aom_sad8x##height##_msa(const uint8_t *src, int32_t src_stride,   \
-                                   const uint8_t *ref, int32_t ref_stride) { \
-    return sad_8width_msa(src, src_stride, ref, ref_stride, height);         \
-  }
-
-#define AOM_SAD_16xHEIGHT_MSA(height)                                         \
-  uint32_t aom_sad16x##height##_msa(const uint8_t *src, int32_t src_stride,   \
-                                    const uint8_t *ref, int32_t ref_stride) { \
-    return sad_16width_msa(src, src_stride, ref, ref_stride, height);         \
-  }
-
-#define AOM_SAD_32xHEIGHT_MSA(height)                                         \
-  uint32_t aom_sad32x##height##_msa(const uint8_t *src, int32_t src_stride,   \
-                                    const uint8_t *ref, int32_t ref_stride) { \
-    return sad_32width_msa(src, src_stride, ref, ref_stride, height);         \
-  }
-
-#define AOM_SAD_64xHEIGHT_MSA(height)                                         \
-  uint32_t aom_sad64x##height##_msa(const uint8_t *src, int32_t src_stride,   \
-                                    const uint8_t *ref, int32_t ref_stride) { \
-    return sad_64width_msa(src, src_stride, ref, ref_stride, height);         \
-  }
-
-#define AOM_SAD_4xHEIGHTx4D_MSA(height)                                   \
-  void aom_sad4x##height##x4d_msa(const uint8_t *src, int32_t src_stride, \
-                                  const uint8_t *const refs[4],           \
-                                  int32_t ref_stride, uint32_t sads[4]) { \
-    sad_4width_x4d_msa(src, src_stride, refs, ref_stride, height, sads);  \
-  }
-
-#define AOM_SAD_8xHEIGHTx4D_MSA(height)                                   \
-  void aom_sad8x##height##x4d_msa(const uint8_t *src, int32_t src_stride, \
-                                  const uint8_t *const refs[4],           \
-                                  int32_t ref_stride, uint32_t sads[4]) { \
-    sad_8width_x4d_msa(src, src_stride, refs, ref_stride, height, sads);  \
-  }
-
-#define AOM_SAD_16xHEIGHTx4D_MSA(height)                                   \
-  void aom_sad16x##height##x4d_msa(const uint8_t *src, int32_t src_stride, \
-                                   const uint8_t *const refs[4],           \
-                                   int32_t ref_stride, uint32_t sads[4]) { \
-    sad_16width_x4d_msa(src, src_stride, refs, ref_stride, height, sads);  \
-  }
-
-#define AOM_SAD_32xHEIGHTx4D_MSA(height)                                   \
-  void aom_sad32x##height##x4d_msa(const uint8_t *src, int32_t src_stride, \
-                                   const uint8_t *const refs[4],           \
-                                   int32_t ref_stride, uint32_t sads[4]) { \
-    sad_32width_x4d_msa(src, src_stride, refs, ref_stride, height, sads);  \
-  }
-
-#define AOM_SAD_64xHEIGHTx4D_MSA(height)                                   \
-  void aom_sad64x##height##x4d_msa(const uint8_t *src, int32_t src_stride, \
-                                   const uint8_t *const refs[4],           \
-                                   int32_t ref_stride, uint32_t sads[4]) { \
-    sad_64width_x4d_msa(src, src_stride, refs, ref_stride, height, sads);  \
-  }
-
-#define AOM_AVGSAD_4xHEIGHT_MSA(height)                                        \
-  uint32_t aom_sad4x##height##_avg_msa(const uint8_t *src, int32_t src_stride, \
-                                       const uint8_t *ref, int32_t ref_stride, \
-                                       const uint8_t *second_pred) {           \
-    return avgsad_4width_msa(src, src_stride, ref, ref_stride, height,         \
-                             second_pred);                                     \
-  }
-
-#define AOM_AVGSAD_8xHEIGHT_MSA(height)                                        \
-  uint32_t aom_sad8x##height##_avg_msa(const uint8_t *src, int32_t src_stride, \
-                                       const uint8_t *ref, int32_t ref_stride, \
-                                       const uint8_t *second_pred) {           \
-    return avgsad_8width_msa(src, src_stride, ref, ref_stride, height,         \
-                             second_pred);                                     \
-  }
-
-#define AOM_AVGSAD_16xHEIGHT_MSA(height)                                \
-  uint32_t aom_sad16x##height##_avg_msa(                                \
-      const uint8_t *src, int32_t src_stride, const uint8_t *ref,       \
-      int32_t ref_stride, const uint8_t *second_pred) {                 \
-    return avgsad_16width_msa(src, src_stride, ref, ref_stride, height, \
-                              second_pred);                             \
-  }
-
-#define AOM_AVGSAD_32xHEIGHT_MSA(height)                                \
-  uint32_t aom_sad32x##height##_avg_msa(                                \
-      const uint8_t *src, int32_t src_stride, const uint8_t *ref,       \
-      int32_t ref_stride, const uint8_t *second_pred) {                 \
-    return avgsad_32width_msa(src, src_stride, ref, ref_stride, height, \
-                              second_pred);                             \
-  }
-
-#define AOM_AVGSAD_64xHEIGHT_MSA(height)                                \
-  uint32_t aom_sad64x##height##_avg_msa(                                \
-      const uint8_t *src, int32_t src_stride, const uint8_t *ref,       \
-      int32_t ref_stride, const uint8_t *second_pred) {                 \
-    return avgsad_64width_msa(src, src_stride, ref, ref_stride, height, \
-                              second_pred);                             \
-  }
-
-/* clang-format off */
-// 64x64
-AOM_SAD_64xHEIGHT_MSA(64)
-AOM_SAD_64xHEIGHTx4D_MSA(64)
-AOM_AVGSAD_64xHEIGHT_MSA(64)
-
-// 64x32
-AOM_SAD_64xHEIGHT_MSA(32)
-AOM_SAD_64xHEIGHTx4D_MSA(32)
-AOM_AVGSAD_64xHEIGHT_MSA(32)
-
-// 32x64
-AOM_SAD_32xHEIGHT_MSA(64)
-AOM_SAD_32xHEIGHTx4D_MSA(64)
-AOM_AVGSAD_32xHEIGHT_MSA(64)
-
-// 32x32
-AOM_SAD_32xHEIGHT_MSA(32)
-AOM_SAD_32xHEIGHTx4D_MSA(32)
-AOM_AVGSAD_32xHEIGHT_MSA(32)
-
-// 32x16
-AOM_SAD_32xHEIGHT_MSA(16)
-AOM_SAD_32xHEIGHTx4D_MSA(16)
-AOM_AVGSAD_32xHEIGHT_MSA(16)
-
-// 16x32
-AOM_SAD_16xHEIGHT_MSA(32)
-AOM_SAD_16xHEIGHTx4D_MSA(32)
-AOM_AVGSAD_16xHEIGHT_MSA(32)
-
-// 16x16
-AOM_SAD_16xHEIGHT_MSA(16)
-AOM_SAD_16xHEIGHTx4D_MSA(16)
-AOM_AVGSAD_16xHEIGHT_MSA(16)
-
-// 16x8
-AOM_SAD_16xHEIGHT_MSA(8)
-AOM_SAD_16xHEIGHTx4D_MSA(8)
-AOM_AVGSAD_16xHEIGHT_MSA(8)
-
-// 8x16
-AOM_SAD_8xHEIGHT_MSA(16)
-AOM_SAD_8xHEIGHTx4D_MSA(16)
-AOM_AVGSAD_8xHEIGHT_MSA(16)
-
-// 8x8
-AOM_SAD_8xHEIGHT_MSA(8)
-AOM_SAD_8xHEIGHTx4D_MSA(8)
-AOM_AVGSAD_8xHEIGHT_MSA(8)
-
-// 8x4
-AOM_SAD_8xHEIGHT_MSA(4)
-AOM_SAD_8xHEIGHTx4D_MSA(4)
-AOM_AVGSAD_8xHEIGHT_MSA(4)
-
-// 4x8
-AOM_SAD_4xHEIGHT_MSA(8)
-AOM_SAD_4xHEIGHTx4D_MSA(8)
-AOM_AVGSAD_4xHEIGHT_MSA(8)
-
-// 4x4
-AOM_SAD_4xHEIGHT_MSA(4)
-AOM_SAD_4xHEIGHTx4D_MSA(4)
-AOM_AVGSAD_4xHEIGHT_MSA(4)
-    /* clang-format on */
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/mips/sub_pixel_variance_msa.c b/chromium/third_party/libaom/source/libaom/aom_dsp/mips/sub_pixel_variance_msa.c
deleted file mode 100644
index 810b6efaaa8..00000000000
--- a/chromium/third_party/libaom/source/libaom/aom_dsp/mips/sub_pixel_variance_msa.c
+++ /dev/null
@@ -1,1792 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom_ports/mem.h"
-#include "aom_dsp/mips/macros_msa.h"
-#include "aom_dsp/aom_filter.h"
-#include "aom_dsp/variance.h"
-
-#define CALC_MSE_AVG_B(src, ref, var, sub)                          \
-  {                                                                 \
-    v16u8 src_l0_m, src_l1_m;                                       \
-    v8i16 res_l0_m, res_l1_m;                                       \
-                                                                    \
-    ILVRL_B2_UB(src, ref, src_l0_m, src_l1_m);                      \
-    HSUB_UB2_SH(src_l0_m, src_l1_m, res_l0_m, res_l1_m);            \
-    DPADD_SH2_SW(res_l0_m, res_l1_m, res_l0_m, res_l1_m, var, var); \
-                                                                    \
-    sub += res_l0_m + res_l1_m;                                     \
-  }
-
-#define VARIANCE_WxH(sse, diff, shift) sse - (((uint32_t)diff * diff) >> shift)
-
-#define VARIANCE_LARGE_WxH(sse, diff, shift) \
-  sse - (((int64_t)diff * diff) >> shift)
-
-static uint32_t avg_sse_diff_4width_msa(const uint8_t *src_ptr,
-                                        int32_t src_stride,
-                                        const uint8_t *ref_ptr,
-                                        int32_t ref_stride,
-                                        const uint8_t *sec_pred, int32_t height,
-                                        int32_t *diff) {
-  int32_t ht_cnt;
-  uint32_t src0, src1, src2, src3;
-  uint32_t ref0, ref1, ref2, ref3;
-  v16u8 pred, src = { 0 };
-  v16u8 ref = { 0 };
-  v8i16 avg = { 0 };
-  v4i32 vec, var = { 0 };
-
-  for (ht_cnt = (height >> 2); ht_cnt--;) {
-    pred = LD_UB(sec_pred);
-    sec_pred += 16;
-    LW4(src_ptr, src_stride, src0, src1, src2, src3);
-    src_ptr += (4 * src_stride);
-    LW4(ref_ptr, ref_stride, ref0, ref1, ref2, ref3);
-    ref_ptr += (4 * ref_stride);
-
-    INSERT_W4_UB(src0, src1, src2, src3, src);
-    INSERT_W4_UB(ref0, ref1, ref2, ref3, ref);
-
-    src = __msa_aver_u_b(src, pred);
-    CALC_MSE_AVG_B(src, ref, var, avg);
-  }
-
-  vec = __msa_hadd_s_w(avg, avg);
-  *diff = HADD_SW_S32(vec);
-
-  return HADD_SW_S32(var);
-}
-
-static uint32_t avg_sse_diff_8width_msa(const uint8_t *src_ptr,
-                                        int32_t src_stride,
-                                        const uint8_t *ref_ptr,
-                                        int32_t ref_stride,
-                                        const uint8_t *sec_pred, int32_t height,
-                                        int32_t *diff) {
-  int32_t ht_cnt;
-  v16u8 src0, src1, src2, src3;
-  v16u8 ref0, ref1, ref2, ref3;
-  v16u8 pred0, pred1;
-  v8i16 avg = { 0 };
-  v4i32 vec, var = { 0 };
-
-  for (ht_cnt = (height >> 2); ht_cnt--;) {
-    LD_UB2(sec_pred, 16, pred0, pred1);
-    sec_pred += 32;
-    LD_UB4(src_ptr, src_stride, src0, src1, src2, src3);
-    src_ptr += (4 * src_stride);
-    LD_UB4(ref_ptr, ref_stride, ref0, ref1, ref2, ref3);
-    ref_ptr += (4 * ref_stride);
-
-    PCKEV_D4_UB(src1, src0, src3, src2, ref1, ref0, ref3, ref2, src0, src1,
-                ref0, ref1);
-    AVER_UB2_UB(src0, pred0, src1, pred1, src0, src1);
-    CALC_MSE_AVG_B(src0, ref0, var, avg);
-    CALC_MSE_AVG_B(src1, ref1, var, avg);
-  }
-
-  vec = __msa_hadd_s_w(avg, avg);
-  *diff = HADD_SW_S32(vec);
-
-  return HADD_SW_S32(var);
-}
-
-static uint32_t avg_sse_diff_16width_msa(const uint8_t *src_ptr,
-                                         int32_t src_stride,
-                                         const uint8_t *ref_ptr,
-                                         int32_t ref_stride,
-                                         const uint8_t *sec_pred,
-                                         int32_t height, int32_t *diff) {
-  int32_t ht_cnt;
-  v16u8 src, ref, pred;
-  v8i16 avg = { 0 };
-  v4i32 vec, var = { 0 };
-
-  for (ht_cnt = (height >> 2); ht_cnt--;) {
-    pred = LD_UB(sec_pred);
-    sec_pred += 16;
-    src = LD_UB(src_ptr);
-    src_ptr += src_stride;
-    ref = LD_UB(ref_ptr);
-    ref_ptr += ref_stride;
-    src = __msa_aver_u_b(src, pred);
-    CALC_MSE_AVG_B(src, ref, var, avg);
-
-    pred = LD_UB(sec_pred);
-    sec_pred += 16;
-    src = LD_UB(src_ptr);
-    src_ptr += src_stride;
-    ref = LD_UB(ref_ptr);
-    ref_ptr += ref_stride;
-    src = __msa_aver_u_b(src, pred);
-    CALC_MSE_AVG_B(src, ref, var, avg);
-
-    pred = LD_UB(sec_pred);
-    sec_pred += 16;
-    src = LD_UB(src_ptr);
-    src_ptr += src_stride;
-    ref = LD_UB(ref_ptr);
-    ref_ptr += ref_stride;
-    src = __msa_aver_u_b(src, pred);
-    CALC_MSE_AVG_B(src, ref, var, avg);
-
-    pred = LD_UB(sec_pred);
-    sec_pred += 16;
-    src = LD_UB(src_ptr);
-    src_ptr += src_stride;
-    ref = LD_UB(ref_ptr);
-    ref_ptr += ref_stride;
-    src = __msa_aver_u_b(src, pred);
-    CALC_MSE_AVG_B(src, ref, var, avg);
-  }
-
-  vec = __msa_hadd_s_w(avg, avg);
-  *diff = HADD_SW_S32(vec);
-
-  return HADD_SW_S32(var);
-}
-
-static uint32_t avg_sse_diff_32width_msa(const uint8_t *src_ptr,
-                                         int32_t src_stride,
-                                         const uint8_t *ref_ptr,
-                                         int32_t ref_stride,
-                                         const uint8_t *sec_pred,
-                                         int32_t height, int32_t *diff) {
-  int32_t ht_cnt;
-  v16u8 src0, src1, ref0, ref1, pred0, pred1;
-  v8i16 avg = { 0 };
-  v4i32 vec, var = { 0 };
-
-  for (ht_cnt = (height >> 2); ht_cnt--;) {
-    LD_UB2(sec_pred, 16, pred0, pred1);
-    sec_pred += 32;
-    LD_UB2(src_ptr, 16, src0, src1);
-    src_ptr += src_stride;
-    LD_UB2(ref_ptr, 16, ref0, ref1);
-    ref_ptr += ref_stride;
-    AVER_UB2_UB(src0, pred0, src1, pred1, src0, src1);
-    CALC_MSE_AVG_B(src0, ref0, var, avg);
-    CALC_MSE_AVG_B(src1, ref1, var, avg);
-
-    LD_UB2(sec_pred, 16, pred0, pred1);
-    sec_pred += 32;
-    LD_UB2(src_ptr, 16, src0, src1);
-    src_ptr += src_stride;
-    LD_UB2(ref_ptr, 16, ref0, ref1);
-    ref_ptr += ref_stride;
-    AVER_UB2_UB(src0, pred0, src1, pred1, src0, src1);
-    CALC_MSE_AVG_B(src0, ref0, var, avg);
-    CALC_MSE_AVG_B(src1, ref1, var, avg);
-
-    LD_UB2(sec_pred, 16, pred0, pred1);
-    sec_pred += 32;
-    LD_UB2(src_ptr, 16, src0, src1);
-    src_ptr += src_stride;
-    LD_UB2(ref_ptr, 16, ref0, ref1);
-    ref_ptr += ref_stride;
-    AVER_UB2_UB(src0, pred0, src1, pred1, src0, src1);
-    CALC_MSE_AVG_B(src0, ref0, var, avg);
-    CALC_MSE_AVG_B(src1, ref1, var, avg);
-
-    LD_UB2(sec_pred, 16, pred0, pred1);
-    sec_pred += 32;
-    LD_UB2(src_ptr, 16, src0, src1);
-    src_ptr += src_stride;
-    LD_UB2(ref_ptr, 16, ref0, ref1);
-    ref_ptr += ref_stride;
-    AVER_UB2_UB(src0, pred0, src1, pred1, src0, src1);
-    CALC_MSE_AVG_B(src0, ref0, var, avg);
-    CALC_MSE_AVG_B(src1, ref1, var, avg);
-  }
-
-  vec = __msa_hadd_s_w(avg, avg);
-  *diff = HADD_SW_S32(vec);
-
-  return HADD_SW_S32(var);
-}
-
-static uint32_t avg_sse_diff_32x64_msa(const uint8_t *src_ptr,
-                                       int32_t src_stride,
-                                       const uint8_t *ref_ptr,
-                                       int32_t ref_stride,
-                                       const uint8_t *sec_pred, int32_t *diff) {
-  int32_t ht_cnt;
-  v16u8 src0, src1, ref0, ref1, pred0, pred1;
-  v8i16 avg0 = { 0 };
-  v8i16 avg1 = { 0 };
-  v4i32 vec, var = { 0 };
-
-  for (ht_cnt = 16; ht_cnt--;) {
-    LD_UB2(sec_pred, 16, pred0, pred1);
-    sec_pred += 32;
-    LD_UB2(src_ptr, 16, src0, src1);
-    src_ptr += src_stride;
-    LD_UB2(ref_ptr, 16, ref0, ref1);
-    ref_ptr += ref_stride;
-    AVER_UB2_UB(src0, pred0, src1, pred1, src0, src1);
-    CALC_MSE_AVG_B(src0, ref0, var, avg0);
-    CALC_MSE_AVG_B(src1, ref1, var, avg1);
-
-    LD_UB2(sec_pred, 16, pred0, pred1);
-    sec_pred += 32;
-    LD_UB2(src_ptr, 16, src0, src1);
-    src_ptr += src_stride;
-    LD_UB2(ref_ptr, 16, ref0, ref1);
-    ref_ptr += ref_stride;
-    AVER_UB2_UB(src0, pred0, src1, pred1, src0, src1);
-    CALC_MSE_AVG_B(src0, ref0, var, avg0);
-    CALC_MSE_AVG_B(src1, ref1, var, avg1);
-
-    LD_UB2(sec_pred, 16, pred0, pred1);
-    sec_pred += 32;
-    LD_UB2(src_ptr, 16, src0, src1);
-    src_ptr += src_stride;
-    LD_UB2(ref_ptr, 16, ref0, ref1);
-    ref_ptr += ref_stride;
-    AVER_UB2_UB(src0, pred0, src1, pred1, src0, src1);
-    CALC_MSE_AVG_B(src0, ref0, var, avg0);
-    CALC_MSE_AVG_B(src1, ref1, var, avg1);
-
-    LD_UB2(sec_pred, 16, pred0, pred1);
-    sec_pred += 32;
-    LD_UB2(src_ptr, 16, src0, src1);
-    src_ptr += src_stride;
-    LD_UB2(ref_ptr, 16, ref0, ref1);
-    ref_ptr += ref_stride;
-    AVER_UB2_UB(src0, pred0, src1, pred1, src0, src1);
-    CALC_MSE_AVG_B(src0, ref0, var, avg0);
-    CALC_MSE_AVG_B(src1, ref1, var, avg1);
-  }
-
-  vec = __msa_hadd_s_w(avg0, avg0);
-  vec += __msa_hadd_s_w(avg1, avg1);
-  *diff = HADD_SW_S32(vec);
-
-  return HADD_SW_S32(var);
-}
-
-static uint32_t avg_sse_diff_64x32_msa(const uint8_t *src_ptr,
-                                       int32_t src_stride,
-                                       const uint8_t *ref_ptr,
-                                       int32_t ref_stride,
-                                       const uint8_t *sec_pred, int32_t *diff) {
-  int32_t ht_cnt;
-  v16u8 src0, src1, src2, src3;
-  v16u8 ref0, ref1, ref2, ref3;
-  v16u8 pred0, pred1, pred2, pred3;
-  v8i16 avg0 = { 0 };
-  v8i16 avg1 = { 0 };
-  v4i32 vec, var = { 0 };
-
-  for (ht_cnt = 16; ht_cnt--;) {
-    LD_UB4(sec_pred, 16, pred0, pred1, pred2, pred3);
-    sec_pred += 64;
-    LD_UB4(src_ptr, 16, src0, src1, src2, src3);
-    src_ptr += src_stride;
-    LD_UB4(ref_ptr, 16, ref0, ref1, ref2, ref3);
-    ref_ptr += ref_stride;
-    AVER_UB4_UB(src0, pred0, src1, pred1, src2, pred2, src3, pred3, src0, src1,
-                src2, src3);
-    CALC_MSE_AVG_B(src0, ref0, var, avg0);
-    CALC_MSE_AVG_B(src2, ref2, var, avg0);
-    CALC_MSE_AVG_B(src1, ref1, var, avg1);
-    CALC_MSE_AVG_B(src3, ref3, var, avg1);
-
-    LD_UB4(sec_pred, 16, pred0, pred1, pred2, pred3);
-    sec_pred += 64;
-    LD_UB4(src_ptr, 16, src0, src1, src2, src3);
-    src_ptr += src_stride;
-    LD_UB4(ref_ptr, 16, ref0, ref1, ref2, ref3);
-    ref_ptr += ref_stride;
-    AVER_UB4_UB(src0, pred0, src1, pred1, src2, pred2, src3, pred3, src0, src1,
-                src2, src3);
-    CALC_MSE_AVG_B(src0, ref0, var, avg0);
-    CALC_MSE_AVG_B(src2, ref2, var, avg0);
-    CALC_MSE_AVG_B(src1, ref1, var, avg1);
-    CALC_MSE_AVG_B(src3, ref3, var, avg1);
-  }
-
-  vec = __msa_hadd_s_w(avg0, avg0);
-  vec += __msa_hadd_s_w(avg1, avg1);
-
-  *diff = HADD_SW_S32(vec);
-
-  return HADD_SW_S32(var);
-}
-
-static uint32_t avg_sse_diff_64x64_msa(const uint8_t *src_ptr,
-                                       int32_t src_stride,
-                                       const uint8_t *ref_ptr,
-                                       int32_t ref_stride,
-                                       const uint8_t *sec_pred, int32_t *diff) {
-  int32_t ht_cnt;
-  v16u8 src0, src1, src2, src3;
-  v16u8 ref0, ref1, ref2, ref3;
-  v16u8 pred0, pred1, pred2, pred3;
-  v8i16 avg0 = { 0 };
-  v8i16 avg1 = { 0 };
-  v8i16 avg2 = { 0 };
-  v8i16 avg3 = { 0 };
-  v4i32 vec, var = { 0 };
-
-  for (ht_cnt = 32; ht_cnt--;) {
-    LD_UB4(sec_pred, 16, pred0, pred1, pred2, pred3);
-    sec_pred += 64;
-    LD_UB4(src_ptr, 16, src0, src1, src2, src3);
-    src_ptr += src_stride;
-    LD_UB4(ref_ptr, 16, ref0, ref1, ref2, ref3);
-    ref_ptr += ref_stride;
-    AVER_UB4_UB(src0, pred0, src1, pred1, src2, pred2, src3, pred3, src0, src1,
-                src2, src3);
-    CALC_MSE_AVG_B(src0, ref0, var, avg0);
-    CALC_MSE_AVG_B(src1, ref1, var, avg1);
-    CALC_MSE_AVG_B(src2, ref2, var, avg2);
-    CALC_MSE_AVG_B(src3, ref3, var, avg3);
-
-    LD_UB4(sec_pred, 16, pred0, pred1, pred2, pred3);
-    sec_pred += 64;
-    LD_UB4(src_ptr, 16, src0, src1, src2, src3);
-    src_ptr += src_stride;
-    LD_UB4(ref_ptr, 16, ref0, ref1, ref2, ref3);
-    ref_ptr += ref_stride;
-    AVER_UB4_UB(src0, pred0, src1, pred1, src2, pred2, src3, pred3, src0, src1,
-                src2, src3);
-    CALC_MSE_AVG_B(src0, ref0, var, avg0);
-    CALC_MSE_AVG_B(src1, ref1, var, avg1);
-    CALC_MSE_AVG_B(src2, ref2, var, avg2);
-    CALC_MSE_AVG_B(src3, ref3, var, avg3);
-  }
-
-  vec = __msa_hadd_s_w(avg0, avg0);
-  vec += __msa_hadd_s_w(avg1, avg1);
-  vec += __msa_hadd_s_w(avg2, avg2);
-  vec += __msa_hadd_s_w(avg3, avg3);
-  *diff = HADD_SW_S32(vec);
-
-  return HADD_SW_S32(var);
-}
-
-static uint32_t sub_pixel_sse_diff_4width_h_msa(
-    const uint8_t *src, int32_t src_stride, const uint8_t *dst,
-    int32_t dst_stride, const uint8_t *filter, int32_t height, int32_t *diff) {
-  int16_t filtval;
-  uint32_t loop_cnt;
-  uint32_t ref0, ref1, ref2, ref3;
-  v16u8 filt0, ref = { 0 };
-  v16i8 src0, src1, src2, src3;
-  v16i8 mask = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
-  v8u16 vec0, vec1, vec2, vec3;
-  v8i16 avg = { 0 };
-  v4i32 vec, var = { 0 };
-
-  filtval = LH(filter);
-  filt0 = (v16u8)__msa_fill_h(filtval);
-
-  for (loop_cnt = (height >> 2); loop_cnt--;) {
-    LD_SB4(src, src_stride, src0, src1, src2, src3);
-    src += (4 * src_stride);
-    LW4(dst, dst_stride, ref0, ref1, ref2, ref3);
-    dst += (4 * dst_stride);
-    INSERT_W4_UB(ref0, ref1, ref2, ref3, ref);
-    VSHF_B2_UH(src0, src0, src1, src1, mask, mask, vec0, vec1);
-    VSHF_B2_UH(src2, src2, src3, src3, mask, mask, vec2, vec3);
-    DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, vec0, vec1,
-                vec2, vec3);
-    SRARI_H4_UH(vec0, vec1, vec2, vec3, FILTER_BITS);
-    PCKEV_B4_SB(vec0, vec0, vec1, vec1, vec2, vec2, vec3, vec3, src0, src1,
-                src2, src3);
-    ILVEV_W2_SB(src0, src1, src2, src3, src0, src2);
-    src0 = (v16i8)__msa_ilvev_d((v2i64)src2, (v2i64)src0);
-    CALC_MSE_AVG_B(src0, ref, var, avg);
-  }
-
-  vec = __msa_hadd_s_w(avg, avg);
-  *diff = HADD_SW_S32(vec);
-
-  return HADD_SW_S32(var);
-}
-
-static uint32_t sub_pixel_sse_diff_8width_h_msa(
-    const uint8_t *src, int32_t src_stride, const uint8_t *dst,
-    int32_t dst_stride, const uint8_t *filter, int32_t height, int32_t *diff) {
-  int16_t filtval;
-  uint32_t loop_cnt;
-  v16u8 filt0, out, ref0, ref1, ref2, ref3;
-  v16i8 src0, src1, src2, src3;
-  v16i8 mask = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
-  v8u16 vec0, vec1, vec2, vec3;
-  v8i16 avg = { 0 };
-  v4i32 vec, var = { 0 };
-
-  filtval = LH(filter);
-  filt0 = (v16u8)__msa_fill_h(filtval);
-
-  for (loop_cnt = (height >> 2); loop_cnt--;) {
-    LD_SB4(src, src_stride, src0, src1, src2, src3);
-    src += (4 * src_stride);
-    LD_UB4(dst, dst_stride, ref0, ref1, ref2, ref3);
-    dst += (4 * dst_stride);
-
-    PCKEV_D2_UB(ref1, ref0, ref3, ref2, ref0, ref1);
-    VSHF_B2_UH(src0, src0, src1, src1, mask, mask, vec0, vec1);
-    VSHF_B2_UH(src2, src2, src3, src3, mask, mask, vec2, vec3);
-    DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, vec0, vec1,
-                vec2, vec3);
-    SRARI_H4_UH(vec0, vec1, vec2, vec3, FILTER_BITS);
-    PCKEV_B4_SB(vec0, vec0, vec1, vec1, vec2, vec2, vec3, vec3, src0, src1,
-                src2, src3);
-    out = (v16u8)__msa_ilvev_d((v2i64)src1, (v2i64)src0);
-    CALC_MSE_AVG_B(out, ref0, var, avg);
-    out = (v16u8)__msa_ilvev_d((v2i64)src3, (v2i64)src2);
-    CALC_MSE_AVG_B(out, ref1, var, avg);
-  }
-
-  vec = __msa_hadd_s_w(avg, avg);
-  *diff = HADD_SW_S32(vec);
-
-  return HADD_SW_S32(var);
-}
-
-static uint32_t sub_pixel_sse_diff_16width_h_msa(
-    const uint8_t *src, int32_t src_stride, const uint8_t *dst,
-    int32_t dst_stride, const uint8_t *filter, int32_t height, int32_t *diff) {
-  int16_t filtval;
-  uint32_t loop_cnt;
-  v16i8 src0, src1, src2, src3, src4, src5, src6, src7;
-  v16i8 mask = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
-  v16u8 dst0, dst1, dst2, dst3, filt0;
-  v8u16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
-  v8u16 out0, out1, out2, out3, out4, out5, out6, out7;
-  v8i16 avg = { 0 };
-  v4i32 vec, var = { 0 };
-
-  filtval = LH(filter);
-  filt0 = (v16u8)__msa_fill_h(filtval);
-
-  for (loop_cnt = (height >> 2); loop_cnt--;) {
-    LD_SB4(src, src_stride, src0, src2, src4, src6);
-    LD_SB4(src + 8, src_stride, src1, src3, src5, src7);
-    src += (4 * src_stride);
-    LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
-    dst += (4 * dst_stride);
-
-    VSHF_B2_UH(src0, src0, src1, src1, mask, mask, vec0, vec1);
-    VSHF_B2_UH(src2, src2, src3, src3, mask, mask, vec2, vec3);
-    VSHF_B2_UH(src4, src4, src5, src5, mask, mask, vec4, vec5);
-    VSHF_B2_UH(src6, src6, src7, src7, mask, mask, vec6, vec7);
-    DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, out0, out1,
-                out2, out3);
-    DOTP_UB4_UH(vec4, vec5, vec6, vec7, filt0, filt0, filt0, filt0, out4, out5,
-                out6, out7);
-    SRARI_H4_UH(out0, out1, out2, out3, FILTER_BITS);
-    SRARI_H4_UH(out4, out5, out6, out7, FILTER_BITS);
-    PCKEV_B4_SB(out1, out0, out3, out2, out5, out4, out7, out6, src0, src1,
-                src2, src3);
-    CALC_MSE_AVG_B(src0, dst0, var, avg);
-    CALC_MSE_AVG_B(src1, dst1, var, avg);
-    CALC_MSE_AVG_B(src2, dst2, var, avg);
-    CALC_MSE_AVG_B(src3, dst3, var, avg);
-  }
-
-  vec = __msa_hadd_s_w(avg, avg);
-  *diff = HADD_SW_S32(vec);
-
-  return HADD_SW_S32(var);
-}
-
-static uint32_t sub_pixel_sse_diff_32width_h_msa(
-    const uint8_t *src, int32_t src_stride, const uint8_t *dst,
-    int32_t dst_stride, const uint8_t *filter, int32_t height, int32_t *diff) {
-  uint32_t loop_cnt, sse = 0;
-  int32_t diff0[2];
-
-  for (loop_cnt = 0; loop_cnt < 2; ++loop_cnt) {
-    sse += sub_pixel_sse_diff_16width_h_msa(src, src_stride, dst, dst_stride,
-                                            filter, height, &diff0[loop_cnt]);
-    src += 16;
-    dst += 16;
-  }
-
-  *diff = diff0[0] + diff0[1];
-
-  return sse;
-}
-
-static uint32_t sub_pixel_sse_diff_64width_h_msa(
-    const uint8_t *src, int32_t src_stride, const uint8_t *dst,
-    int32_t dst_stride, const uint8_t *filter, int32_t height, int32_t *diff) {
-  uint32_t loop_cnt, sse = 0;
-  int32_t diff0[4];
-
-  for (loop_cnt = 0; loop_cnt < 4; ++loop_cnt) {
-    sse += sub_pixel_sse_diff_16width_h_msa(src, src_stride, dst, dst_stride,
-                                            filter, height, &diff0[loop_cnt]);
-    src += 16;
-    dst += 16;
-  }
-
-  *diff = diff0[0] + diff0[1] + diff0[2] + diff0[3];
-
-  return sse;
-}
-
-static uint32_t sub_pixel_sse_diff_4width_v_msa(
-    const uint8_t *src, int32_t src_stride, const uint8_t *dst,
-    int32_t dst_stride, const uint8_t *filter, int32_t height, int32_t *diff) {
-  int16_t filtval;
-  uint32_t loop_cnt;
-  uint32_t ref0, ref1, ref2, ref3;
-  v16u8 src0, src1, src2, src3, src4, out;
-  v16u8 src10_r, src32_r, src21_r, src43_r;
-  v16u8 ref = { 0 };
-  v16u8 src2110, src4332;
-  v16u8 filt0;
-  v8i16 avg = { 0 };
-  v4i32 vec, var = { 0 };
-  v8u16 tmp0, tmp1;
-
-  filtval = LH(filter);
-  filt0 = (v16u8)__msa_fill_h(filtval);
-
-  src0 = LD_UB(src);
-  src += src_stride;
-
-  for (loop_cnt = (height >> 2); loop_cnt--;) {
-    LD_UB4(src, src_stride, src1, src2, src3, src4);
-    src += (4 * src_stride);
-    LW4(dst, dst_stride, ref0, ref1, ref2, ref3);
-    dst += (4 * dst_stride);
-
-    INSERT_W4_UB(ref0, ref1, ref2, ref3, ref);
-    ILVR_B4_UB(src1, src0, src2, src1, src3, src2, src4, src3, src10_r, src21_r,
-               src32_r, src43_r);
-    ILVR_D2_UB(src21_r, src10_r, src43_r, src32_r, src2110, src4332);
-    DOTP_UB2_UH(src2110, src4332, filt0, filt0, tmp0, tmp1);
-    SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
-    out = (v16u8)__msa_pckev_b((v16i8)tmp1, (v16i8)tmp0);
-    CALC_MSE_AVG_B(out, ref, var, avg);
-    src0 = src4;
-  }
-
-  vec = __msa_hadd_s_w(avg, avg);
-  *diff = HADD_SW_S32(vec);
-
-  return HADD_SW_S32(var);
-}
-
-static uint32_t sub_pixel_sse_diff_8width_v_msa(
-    const uint8_t *src, int32_t src_stride, const uint8_t *dst,
-    int32_t dst_stride, const uint8_t *filter, int32_t height, int32_t *diff) {
-  int16_t filtval;
-  uint32_t loop_cnt;
-  v16u8 src0, src1, src2, src3, src4;
-  v16u8 ref0, ref1, ref2, ref3;
-  v8u16 vec0, vec1, vec2, vec3;
-  v8u16 tmp0, tmp1, tmp2, tmp3;
-  v16u8 filt0;
-  v8i16 avg = { 0 };
-  v4i32 vec, var = { 0 };
-
-  filtval = LH(filter);
-  filt0 = (v16u8)__msa_fill_h(filtval);
-
-  src0 = LD_UB(src);
-  src += src_stride;
-
-  for (loop_cnt = (height >> 2); loop_cnt--;) {
-    LD_UB4(src, src_stride, src1, src2, src3, src4);
-    src += (4 * src_stride);
-    LD_UB4(dst, dst_stride, ref0, ref1, ref2, ref3);
-    dst += (4 * dst_stride);
-
-    PCKEV_D2_UB(ref1, ref0, ref3, ref2, ref0, ref1);
-    ILVR_B4_UH(src1, src0, src2, src1, src3, src2, src4, src3, vec0, vec1, vec2,
-               vec3);
-    DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, tmp0, tmp1,
-                tmp2, tmp3);
-    SRARI_H4_UH(tmp0, tmp1, tmp2, tmp3, FILTER_BITS);
-    PCKEV_B2_UB(tmp1, tmp0, tmp3, tmp2, src0, src1);
-    CALC_MSE_AVG_B(src0, ref0, var, avg);
-    CALC_MSE_AVG_B(src1, ref1, var, avg);
-    src0 = src4;
-  }
-
-  vec = __msa_hadd_s_w(avg, avg);
-  *diff = HADD_SW_S32(vec);
-
-  return HADD_SW_S32(var);
-}
-
-static uint32_t sub_pixel_sse_diff_16width_v_msa(
-    const uint8_t *src, int32_t src_stride, const uint8_t *dst,
-    int32_t dst_stride, const uint8_t *filter, int32_t height, int32_t *diff) {
-  int16_t filtval;
-  uint32_t loop_cnt;
-  v16u8 ref0, ref1, ref2, ref3;
-  v16u8 src0, src1, src2, src3, src4;
-  v16u8 out0, out1, out2, out3;
-  v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
-  v8u16 tmp0, tmp1, tmp2, tmp3;
-  v16u8 filt0;
-  v8i16 avg = { 0 };
-  v4i32 vec, var = { 0 };
-
-  filtval = LH(filter);
-  filt0 = (v16u8)__msa_fill_h(filtval);
-
-  src0 = LD_UB(src);
-  src += src_stride;
-
-  for (loop_cnt = (height >> 2); loop_cnt--;) {
-    LD_UB4(src, src_stride, src1, src2, src3, src4);
-    src += (4 * src_stride);
-    LD_UB4(dst, dst_stride, ref0, ref1, ref2, ref3);
-    dst += (4 * dst_stride);
-
-    ILVR_B2_UB(src1, src0, src2, src1, vec0, vec2);
-    ILVL_B2_UB(src1, src0, src2, src1, vec1, vec3);
-    DOTP_UB2_UH(vec0, vec1, filt0, filt0, tmp0, tmp1);
-    SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
-    out0 = (v16u8)__msa_pckev_b((v16i8)tmp1, (v16i8)tmp0);
-
-    ILVR_B2_UB(src3, src2, src4, src3, vec4, vec6);
-    ILVL_B2_UB(src3, src2, src4, src3, vec5, vec7);
-    DOTP_UB2_UH(vec2, vec3, filt0, filt0, tmp2, tmp3);
-    SRARI_H2_UH(tmp2, tmp3, FILTER_BITS);
-    out1 = (v16u8)__msa_pckev_b((v16i8)tmp3, (v16i8)tmp2);
-
-    DOTP_UB2_UH(vec4, vec5, filt0, filt0, tmp0, tmp1);
-    SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
-    out2 = (v16u8)__msa_pckev_b((v16i8)tmp1, (v16i8)tmp0);
-    DOTP_UB2_UH(vec6, vec7, filt0, filt0, tmp2, tmp3);
-    SRARI_H2_UH(tmp2, tmp3, FILTER_BITS);
-    out3 = (v16u8)__msa_pckev_b((v16i8)tmp3, (v16i8)tmp2);
-
-    src0 = src4;
-
-    CALC_MSE_AVG_B(out0, ref0, var, avg);
-    CALC_MSE_AVG_B(out1, ref1, var, avg);
-    CALC_MSE_AVG_B(out2, ref2, var, avg);
-    CALC_MSE_AVG_B(out3, ref3, var, avg);
-  }
-
-  vec = __msa_hadd_s_w(avg, avg);
-  *diff = HADD_SW_S32(vec);
-
-  return HADD_SW_S32(var);
-}
-
-static uint32_t sub_pixel_sse_diff_32width_v_msa(
-    const uint8_t *src, int32_t src_stride, const uint8_t *dst,
-    int32_t dst_stride, const uint8_t *filter, int32_t height, int32_t *diff) {
-  uint32_t loop_cnt, sse = 0;
-  int32_t diff0[2];
-
-  for (loop_cnt = 0; loop_cnt < 2; ++loop_cnt) {
-    sse += sub_pixel_sse_diff_16width_v_msa(src, src_stride, dst, dst_stride,
-                                            filter, height, &diff0[loop_cnt]);
-    src += 16;
-    dst += 16;
-  }
-
-  *diff = diff0[0] + diff0[1];
-
-  return sse;
-}
-
-static uint32_t sub_pixel_sse_diff_64width_v_msa(
-    const uint8_t *src, int32_t src_stride, const uint8_t *dst,
-    int32_t dst_stride, const uint8_t *filter, int32_t height, int32_t *diff) {
-  uint32_t loop_cnt, sse = 0;
-  int32_t diff0[4];
-
-  for (loop_cnt = 0; loop_cnt < 4; ++loop_cnt) {
-    sse += sub_pixel_sse_diff_16width_v_msa(src, src_stride, dst, dst_stride,
-                                            filter, height, &diff0[loop_cnt]);
-    src += 16;
-    dst += 16;
-  }
-
-  *diff = diff0[0] + diff0[1] + diff0[2] + diff0[3];
-
-  return sse;
-}
-
-static uint32_t sub_pixel_sse_diff_4width_hv_msa(
-    const uint8_t *src, int32_t src_stride, const uint8_t *dst,
-    int32_t dst_stride, const uint8_t *filter_horiz, const uint8_t *filter_vert,
-    int32_t height, int32_t *diff) {
-  int16_t filtval;
-  uint32_t loop_cnt;
-  uint32_t ref0, ref1, ref2, ref3;
-  v16u8 src0, src1, src2, src3, src4;
-  v16u8 out, ref = { 0 };
-  v16u8 filt_vt, filt_hz, vec0, vec1;
-  v16u8 mask = { 0, 1, 1, 2, 2, 3, 3, 4, 16, 17, 17, 18, 18, 19, 19, 20 };
-  v8u16 hz_out0, hz_out1, hz_out2, hz_out3, hz_out4;
-  v8u16 tmp0, tmp1;
-  v8i16 avg = { 0 };
-  v4i32 vec, var = { 0 };
-
-  filtval = LH(filter_horiz);
-  filt_hz = (v16u8)__msa_fill_h(filtval);
-  filtval = LH(filter_vert);
-  filt_vt = (v16u8)__msa_fill_h(filtval);
-
-  src0 = LD_UB(src);
-  src += src_stride;
-
-  for (loop_cnt = (height >> 2); loop_cnt--;) {
-    LD_UB4(src, src_stride, src1, src2, src3, src4);
-    src += (4 * src_stride);
-    LW4(dst, dst_stride, ref0, ref1, ref2, ref3);
-    dst += (4 * dst_stride);
-    INSERT_W4_UB(ref0, ref1, ref2, ref3, ref);
-    hz_out0 = HORIZ_2TAP_FILT_UH(src0, src1, mask, filt_hz, FILTER_BITS);
-    hz_out2 = HORIZ_2TAP_FILT_UH(src2, src3, mask, filt_hz, FILTER_BITS);
-    hz_out4 = HORIZ_2TAP_FILT_UH(src4, src4, mask, filt_hz, FILTER_BITS);
-    hz_out1 = (v8u16)__msa_sldi_b((v16i8)hz_out2, (v16i8)hz_out0, 8);
-    hz_out3 = (v8u16)__msa_pckod_d((v2i64)hz_out4, (v2i64)hz_out2);
-    ILVEV_B2_UB(hz_out0, hz_out1, hz_out2, hz_out3, vec0, vec1);
-    DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp0, tmp1);
-    SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
-    out = (v16u8)__msa_pckev_b((v16i8)tmp1, (v16i8)tmp0);
-    CALC_MSE_AVG_B(out, ref, var, avg);
-    src0 = src4;
-  }
-
-  vec = __msa_hadd_s_w(avg, avg);
-  *diff = HADD_SW_S32(vec);
-
-  return HADD_SW_S32(var);
-}
-
-static uint32_t sub_pixel_sse_diff_8width_hv_msa(
-    const uint8_t *src, int32_t src_stride, const uint8_t *dst,
-    int32_t dst_stride, const uint8_t *filter_horiz, const uint8_t *filter_vert,
-    int32_t height, int32_t *diff) {
-  int16_t filtval;
-  uint32_t loop_cnt;
-  v16u8 ref0, ref1, ref2, ref3;
-  v16u8 src0, src1, src2, src3, src4;
-  v16u8 out0, out1;
-  v16u8 mask = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
-  v8u16 hz_out0, hz_out1;
-  v8u16 tmp0, tmp1, tmp2, tmp3;
-  v16u8 filt_vt, filt_hz, vec0;
-  v8i16 avg = { 0 };
-  v4i32 vec, var = { 0 };
-
-  filtval = LH(filter_horiz);
-  filt_hz = (v16u8)__msa_fill_h(filtval);
-  filtval = LH(filter_vert);
-  filt_vt = (v16u8)__msa_fill_h(filtval);
-
-  src0 = LD_UB(src);
-  src += src_stride;
-  hz_out0 = HORIZ_2TAP_FILT_UH(src0, src0, mask, filt_hz, FILTER_BITS);
-
-  for (loop_cnt = (height >> 2); loop_cnt--;) {
-    LD_UB4(src, src_stride, src1, src2, src3, src4);
-    src += (4 * src_stride);
-    LD_UB4(dst, dst_stride, ref0, ref1, ref2, ref3);
-    dst += (4 * dst_stride);
-
-    PCKEV_D2_UB(ref1, ref0, ref3, ref2, ref0, ref1);
-    hz_out1 = HORIZ_2TAP_FILT_UH(src1, src1, mask, filt_hz, FILTER_BITS);
-    vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out1, (v16i8)hz_out0);
-    tmp0 = __msa_dotp_u_h(vec0, filt_vt);
-    hz_out0 = HORIZ_2TAP_FILT_UH(src2, src2, mask, filt_hz, FILTER_BITS);
-    vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out0, (v16i8)hz_out1);
-    tmp1 = __msa_dotp_u_h(vec0, filt_vt);
-    SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
-    hz_out1 = HORIZ_2TAP_FILT_UH(src3, src3, mask, filt_hz, FILTER_BITS);
-    vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out1, (v16i8)hz_out0);
-    tmp2 = __msa_dotp_u_h(vec0, filt_vt);
-    hz_out0 = HORIZ_2TAP_FILT_UH(src4, src4, mask, filt_hz, FILTER_BITS);
-    vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out0, (v16i8)hz_out1);
-    tmp3 = __msa_dotp_u_h(vec0, filt_vt);
-    SRARI_H2_UH(tmp2, tmp3, FILTER_BITS);
-    PCKEV_B2_UB(tmp1, tmp0, tmp3, tmp2, out0, out1);
-    CALC_MSE_AVG_B(out0, ref0, var, avg);
-    CALC_MSE_AVG_B(out1, ref1, var, avg);
-  }
-
-  vec = __msa_hadd_s_w(avg, avg);
-  *diff = HADD_SW_S32(vec);
-
-  return HADD_SW_S32(var);
-}
-
-static uint32_t sub_pixel_sse_diff_16width_hv_msa(
-    const uint8_t *src, int32_t src_stride, const uint8_t *dst,
-    int32_t dst_stride, const uint8_t *filter_horiz, const uint8_t *filter_vert,
-    int32_t height, int32_t *diff) {
-  int16_t filtval;
-  uint32_t loop_cnt;
-  v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
-  v16u8 ref0, ref1, ref2, ref3;
-  v16u8 filt_hz, filt_vt, vec0, vec1;
-  v16u8 mask = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
-  v8u16 hz_out0, hz_out1, hz_out2, hz_out3;
-  v8u16 tmp0, tmp1;
-  v8i16 avg = { 0 };
-  v4i32 vec, var = { 0 };
-
-  filtval = LH(filter_horiz);
-  filt_hz = (v16u8)__msa_fill_h(filtval);
-  filtval = LH(filter_vert);
-  filt_vt = (v16u8)__msa_fill_h(filtval);
-
-  LD_UB2(src, 8, src0, src1);
-  src += src_stride;
-
-  hz_out0 = HORIZ_2TAP_FILT_UH(src0, src0, mask, filt_hz, FILTER_BITS);
-  hz_out2 = HORIZ_2TAP_FILT_UH(src1, src1, mask, filt_hz, FILTER_BITS);
-
-  for (loop_cnt = (height >> 2); loop_cnt--;) {
-    LD_UB4(src, src_stride, src0, src2, src4, src6);
-    LD_UB4(src + 8, src_stride, src1, src3, src5, src7);
-    src += (4 * src_stride);
-    LD_UB4(dst, dst_stride, ref0, ref1, ref2, ref3);
-    dst += (4 * dst_stride);
-
-    hz_out1 = HORIZ_2TAP_FILT_UH(src0, src0, mask, filt_hz, FILTER_BITS);
-    hz_out3 = HORIZ_2TAP_FILT_UH(src1, src1, mask, filt_hz, FILTER_BITS);
-    ILVEV_B2_UB(hz_out0, hz_out1, hz_out2, hz_out3, vec0, vec1);
-    DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp0, tmp1);
-    SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
-    src0 = (v16u8)__msa_pckev_b((v16i8)tmp1, (v16i8)tmp0);
-
-    hz_out0 = HORIZ_2TAP_FILT_UH(src2, src2, mask, filt_hz, FILTER_BITS);
-    hz_out2 = HORIZ_2TAP_FILT_UH(src3, src3, mask, filt_hz, FILTER_BITS);
-    ILVEV_B2_UB(hz_out1, hz_out0, hz_out3, hz_out2, vec0, vec1);
-    DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp0, tmp1);
-    SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
-    src1 = (v16u8)__msa_pckev_b((v16i8)tmp1, (v16i8)tmp0);
-
-    hz_out1 = HORIZ_2TAP_FILT_UH(src4, src4, mask, filt_hz, FILTER_BITS);
-    hz_out3 = HORIZ_2TAP_FILT_UH(src5, src5, mask, filt_hz, FILTER_BITS);
-    ILVEV_B2_UB(hz_out0, hz_out1, hz_out2, hz_out3, vec0, vec1);
-    DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp0, tmp1);
-    SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
-    src2 = (v16u8)__msa_pckev_b((v16i8)tmp1, (v16i8)tmp0);
-
-    hz_out0 = HORIZ_2TAP_FILT_UH(src6, src6, mask, filt_hz, FILTER_BITS);
-    hz_out2 = HORIZ_2TAP_FILT_UH(src7, src7, mask, filt_hz, FILTER_BITS);
-    ILVEV_B2_UB(hz_out1, hz_out0, hz_out3, hz_out2, vec0, vec1);
-    DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp0, tmp1);
-    SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
-    src3 = (v16u8)__msa_pckev_b((v16i8)tmp1, (v16i8)tmp0);
-
-    CALC_MSE_AVG_B(src0, ref0, var, avg);
-    CALC_MSE_AVG_B(src1, ref1, var, avg);
-    CALC_MSE_AVG_B(src2, ref2, var, avg);
-    CALC_MSE_AVG_B(src3, ref3, var, avg);
-  }
-
-  vec = __msa_hadd_s_w(avg, avg);
-  *diff = HADD_SW_S32(vec);
-
-  return HADD_SW_S32(var);
-}
-
-static uint32_t sub_pixel_sse_diff_32width_hv_msa(
-    const uint8_t *src, int32_t src_stride, const uint8_t *dst,
-    int32_t dst_stride, const uint8_t *filter_horiz, const uint8_t *filter_vert,
-    int32_t height, int32_t *diff) {
-  uint32_t loop_cnt, sse = 0;
-  int32_t diff0[2];
-
-  for (loop_cnt = 0; loop_cnt < 2; ++loop_cnt) {
-    sse += sub_pixel_sse_diff_16width_hv_msa(src, src_stride, dst, dst_stride,
-                                             filter_horiz, filter_vert, height,
-                                             &diff0[loop_cnt]);
-    src += 16;
-    dst += 16;
-  }
-
-  *diff = diff0[0] + diff0[1];
-
-  return sse;
-}
-
-static uint32_t sub_pixel_sse_diff_64width_hv_msa(
-    const uint8_t *src, int32_t src_stride, const uint8_t *dst,
-    int32_t dst_stride, const uint8_t *filter_horiz, const uint8_t *filter_vert,
-    int32_t height, int32_t *diff) {
-  uint32_t loop_cnt, sse = 0;
-  int32_t diff0[4];
-
-  for (loop_cnt = 0; loop_cnt < 4; ++loop_cnt) {
-    sse += sub_pixel_sse_diff_16width_hv_msa(src, src_stride, dst, dst_stride,
-                                             filter_horiz, filter_vert, height,
-                                             &diff0[loop_cnt]);
-    src += 16;
-    dst += 16;
-  }
-
-  *diff = diff0[0] + diff0[1] + diff0[2] + diff0[3];
-
-  return sse;
-}
-
-static uint32_t sub_pixel_avg_sse_diff_4width_h_msa(
-    const uint8_t *src, int32_t src_stride, const uint8_t *dst,
-    int32_t dst_stride, const uint8_t *sec_pred, const uint8_t *filter,
-    int32_t height, int32_t *diff) {
-  int16_t filtval;
-  uint32_t loop_cnt;
-  uint32_t ref0, ref1, ref2, ref3;
-  v16u8 out, pred, filt0, ref = { 0 };
-  v16i8 src0, src1, src2, src3;
-  v16i8 mask = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
-  v8u16 vec0, vec1, vec2, vec3;
-  v8i16 avg = { 0 };
-  v4i32 vec, var = { 0 };
-
-  filtval = LH(filter);
-  filt0 = (v16u8)__msa_fill_h(filtval);
-
-  for (loop_cnt = (height >> 2); loop_cnt--;) {
-    LD_SB4(src, src_stride, src0, src1, src2, src3);
-    src += (4 * src_stride);
-    pred = LD_UB(sec_pred);
-    sec_pred += 16;
-    LW4(dst, dst_stride, ref0, ref1, ref2, ref3);
-    dst += (4 * dst_stride);
-
-    INSERT_W4_UB(ref0, ref1, ref2, ref3, ref);
-    VSHF_B2_UH(src0, src0, src1, src1, mask, mask, vec0, vec1);
-    VSHF_B2_UH(src2, src2, src3, src3, mask, mask, vec2, vec3);
-    DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, vec0, vec1,
-                vec2, vec3);
-    SRARI_H4_UH(vec0, vec1, vec2, vec3, FILTER_BITS);
-    PCKEV_B4_SB(vec0, vec0, vec1, vec1, vec2, vec2, vec3, vec3, src0, src1,
-                src2, src3);
-    ILVEV_W2_SB(src0, src1, src2, src3, src0, src2);
-    out = (v16u8)__msa_ilvev_d((v2i64)src2, (v2i64)src0);
-    out = __msa_aver_u_b(out, pred);
-    CALC_MSE_AVG_B(out, ref, var, avg);
-  }
-
-  vec = __msa_hadd_s_w(avg, avg);
-  *diff = HADD_SW_S32(vec);
-
-  return HADD_SW_S32(var);
-}
-
-static uint32_t sub_pixel_avg_sse_diff_8width_h_msa(
-    const uint8_t *src, int32_t src_stride, const uint8_t *dst,
-    int32_t dst_stride, const uint8_t *sec_pred, const uint8_t *filter,
-    int32_t height, int32_t *diff) {
-  int16_t filtval;
-  uint32_t loop_cnt;
-  v16u8 out, pred, filt0;
-  v16u8 ref0, ref1, ref2, ref3;
-  v16i8 src0, src1, src2, src3;
-  v16i8 mask = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
-  v8u16 vec0, vec1, vec2, vec3;
-  v8i16 avg = { 0 };
-  v4i32 vec, var = { 0 };
-
-  filtval = LH(filter);
-  filt0 = (v16u8)__msa_fill_h(filtval);
-
-  for (loop_cnt = (height >> 2); loop_cnt--;) {
-    LD_SB4(src, src_stride, src0, src1, src2, src3);
-    src += (4 * src_stride);
-    LD_UB4(dst, dst_stride, ref0, ref1, ref2, ref3);
-    dst += (4 * dst_stride);
-
-    PCKEV_D2_UB(ref1, ref0, ref3, ref2, ref0, ref1);
-    VSHF_B2_UH(src0, src0, src1, src1, mask, mask, vec0, vec1);
-    VSHF_B2_UH(src2, src2, src3, src3, mask, mask, vec2, vec3);
-    DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, vec0, vec1,
-                vec2, vec3);
-    SRARI_H4_UH(vec0, vec1, vec2, vec3, FILTER_BITS);
-    PCKEV_B4_SB(vec0, vec0, vec1, vec1, vec2, vec2, vec3, vec3, src0, src1,
-                src2, src3);
-    out = (v16u8)__msa_ilvev_d((v2i64)src1, (v2i64)src0);
-
-    pred = LD_UB(sec_pred);
-    sec_pred += 16;
-    out = __msa_aver_u_b(out, pred);
-    CALC_MSE_AVG_B(out, ref0, var, avg);
-    out = (v16u8)__msa_ilvev_d((v2i64)src3, (v2i64)src2);
-    pred = LD_UB(sec_pred);
-    sec_pred += 16;
-    out = __msa_aver_u_b(out, pred);
-    CALC_MSE_AVG_B(out, ref1, var, avg);
-  }
-
-  vec = __msa_hadd_s_w(avg, avg);
-  *diff = HADD_SW_S32(vec);
-
-  return HADD_SW_S32(var);
-}
-
-static uint32_t subpel_avg_ssediff_16w_h_msa(
-    const uint8_t *src, int32_t src_stride, const uint8_t *dst,
-    int32_t dst_stride, const uint8_t *sec_pred, const uint8_t *filter,
-    int32_t height, int32_t *diff, int32_t width) {
-  int16_t filtval;
-  uint32_t loop_cnt;
-  v16i8 src0, src1, src2, src3, src4, src5, src6, src7;
-  v16i8 mask = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
-  v16u8 dst0, dst1, dst2, dst3;
-  v16u8 tmp0, tmp1, tmp2, tmp3;
-  v16u8 pred0, pred1, pred2, pred3, filt0;
-  v8u16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
-  v8u16 out0, out1, out2, out3, out4, out5, out6, out7;
-  v8i16 avg = { 0 };
-  v4i32 vec, var = { 0 };
-
-  filtval = LH(filter);
-  filt0 = (v16u8)__msa_fill_h(filtval);
-
-  for (loop_cnt = (height >> 2); loop_cnt--;) {
-    LD_SB4(src, src_stride, src0, src2, src4, src6);
-    LD_SB4(src + 8, src_stride, src1, src3, src5, src7);
-    src += (4 * src_stride);
-    LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
-    dst += (4 * dst_stride);
-    LD_UB4(sec_pred, width, pred0, pred1, pred2, pred3);
-    sec_pred += (4 * width);
-
-    VSHF_B2_UH(src0, src0, src1, src1, mask, mask, vec0, vec1);
-    VSHF_B2_UH(src2, src2, src3, src3, mask, mask, vec2, vec3);
-    VSHF_B2_UH(src4, src4, src5, src5, mask, mask, vec4, vec5);
-    VSHF_B2_UH(src6, src6, src7, src7, mask, mask, vec6, vec7);
-    DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, out0, out1,
-                out2, out3);
-    DOTP_UB4_UH(vec4, vec5, vec6, vec7, filt0, filt0, filt0, filt0, out4, out5,
-                out6, out7);
-    SRARI_H4_UH(out0, out1, out2, out3, FILTER_BITS);
-    SRARI_H4_UH(out4, out5, out6, out7, FILTER_BITS);
-    PCKEV_B4_UB(out1, out0, out3, out2, out5, out4, out7, out6, tmp0, tmp1,
-                tmp2, tmp3);
-    AVER_UB4_UB(tmp0, pred0, tmp1, pred1, tmp2, pred2, tmp3, pred3, tmp0, tmp1,
-                tmp2, tmp3);
-
-    CALC_MSE_AVG_B(tmp0, dst0, var, avg);
-    CALC_MSE_AVG_B(tmp1, dst1, var, avg);
-    CALC_MSE_AVG_B(tmp2, dst2, var, avg);
-    CALC_MSE_AVG_B(tmp3, dst3, var, avg);
-  }
-
-  vec = __msa_hadd_s_w(avg, avg);
-  *diff = HADD_SW_S32(vec);
-
-  return HADD_SW_S32(var);
-}
-
-static uint32_t sub_pixel_avg_sse_diff_16width_h_msa(
-    const uint8_t *src, int32_t src_stride, const uint8_t *dst,
-    int32_t dst_stride, const uint8_t *sec_pred, const uint8_t *filter,
-    int32_t height, int32_t *diff) {
-  return subpel_avg_ssediff_16w_h_msa(src, src_stride, dst, dst_stride,
-                                      sec_pred, filter, height, diff, 16);
-}
-
-static uint32_t sub_pixel_avg_sse_diff_32width_h_msa(
-    const uint8_t *src, int32_t src_stride, const uint8_t *dst,
-    int32_t dst_stride, const uint8_t *sec_pred, const uint8_t *filter,
-    int32_t height, int32_t *diff) {
-  uint32_t loop_cnt, sse = 0;
-  int32_t diff0[2];
-
-  for (loop_cnt = 0; loop_cnt < 2; ++loop_cnt) {
-    sse +=
-        subpel_avg_ssediff_16w_h_msa(src, src_stride, dst, dst_stride, sec_pred,
-                                     filter, height, &diff0[loop_cnt], 32);
-    src += 16;
-    dst += 16;
-    sec_pred += 16;
-  }
-
-  *diff = diff0[0] + diff0[1];
-
-  return sse;
-}
-
-static uint32_t sub_pixel_avg_sse_diff_64width_h_msa(
-    const uint8_t *src, int32_t src_stride, const uint8_t *dst,
-    int32_t dst_stride, const uint8_t *sec_pred, const uint8_t *filter,
-    int32_t height, int32_t *diff) {
-  uint32_t loop_cnt, sse = 0;
-  int32_t diff0[4];
-
-  for (loop_cnt = 0; loop_cnt < 4; ++loop_cnt) {
-    sse +=
-        subpel_avg_ssediff_16w_h_msa(src, src_stride, dst, dst_stride, sec_pred,
-                                     filter, height, &diff0[loop_cnt], 64);
-    src += 16;
-    dst += 16;
-    sec_pred += 16;
-  }
-
-  *diff = diff0[0] + diff0[1] + diff0[2] + diff0[3];
-
-  return sse;
-}
-
-static uint32_t sub_pixel_avg_sse_diff_4width_v_msa(
-    const uint8_t *src, int32_t src_stride, const uint8_t *dst,
-    int32_t dst_stride, const uint8_t *sec_pred, const uint8_t *filter,
-    int32_t height, int32_t *diff) {
-  int16_t filtval;
-  uint32_t loop_cnt;
-  uint32_t ref0, ref1, ref2, ref3;
-  v16u8 src0, src1, src2, src3, src4;
-  v16u8 src10_r, src32_r, src21_r, src43_r;
-  v16u8 out, pred, ref = { 0 };
-  v16u8 src2110, src4332, filt0;
-  v8i16 avg = { 0 };
-  v4i32 vec, var = { 0 };
-  v8u16 tmp0, tmp1;
-
-  filtval = LH(filter);
-  filt0 = (v16u8)__msa_fill_h(filtval);
-
-  src0 = LD_UB(src);
-  src += src_stride;
-
-  for (loop_cnt = (height >> 2); loop_cnt--;) {
-    LD_UB4(src, src_stride, src1, src2, src3, src4);
-    src += (4 * src_stride);
-    pred = LD_UB(sec_pred);
-    sec_pred += 16;
-    LW4(dst, dst_stride, ref0, ref1, ref2, ref3);
-    dst += (4 * dst_stride);
-
-    INSERT_W4_UB(ref0, ref1, ref2, ref3, ref);
-    ILVR_B4_UB(src1, src0, src2, src1, src3, src2, src4, src3, src10_r, src21_r,
-               src32_r, src43_r);
-    ILVR_D2_UB(src21_r, src10_r, src43_r, src32_r, src2110, src4332);
-    DOTP_UB2_UH(src2110, src4332, filt0, filt0, tmp0, tmp1);
-    SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
-
-    out = (v16u8)__msa_pckev_b((v16i8)tmp1, (v16i8)tmp0);
-    out = __msa_aver_u_b(out, pred);
-    CALC_MSE_AVG_B(out, ref, var, avg);
-    src0 = src4;
-  }
-
-  vec = __msa_hadd_s_w(avg, avg);
-  *diff = HADD_SW_S32(vec);
-
-  return HADD_SW_S32(var);
-}
-
-static uint32_t sub_pixel_avg_sse_diff_8width_v_msa(
-    const uint8_t *src, int32_t src_stride, const uint8_t *dst,
-    int32_t dst_stride, const uint8_t *sec_pred, const uint8_t *filter,
-    int32_t height, int32_t *diff) {
-  int16_t filtval;
-  uint32_t loop_cnt;
-  v16u8 src0, src1, src2, src3, src4;
-  v16u8 ref0, ref1, ref2, ref3;
-  v16u8 pred0, pred1, filt0;
-  v8u16 vec0, vec1, vec2, vec3;
-  v8u16 tmp0, tmp1, tmp2, tmp3;
-  v8i16 avg = { 0 };
-  v4i32 vec, var = { 0 };
-
-  filtval = LH(filter);
-  filt0 = (v16u8)__msa_fill_h(filtval);
-
-  src0 = LD_UB(src);
-  src += src_stride;
-
-  for (loop_cnt = (height >> 2); loop_cnt--;) {
-    LD_UB4(src, src_stride, src1, src2, src3, src4);
-    src += (4 * src_stride);
-    LD_UB2(sec_pred, 16, pred0, pred1);
-    sec_pred += 32;
-    LD_UB4(dst, dst_stride, ref0, ref1, ref2, ref3);
-    dst += (4 * dst_stride);
-    PCKEV_D2_UB(ref1, ref0, ref3, ref2, ref0, ref1);
-    ILVR_B4_UH(src1, src0, src2, src1, src3, src2, src4, src3, vec0, vec1, vec2,
-               vec3);
-    DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, tmp0, tmp1,
-                tmp2, tmp3);
-    SRARI_H4_UH(tmp0, tmp1, tmp2, tmp3, FILTER_BITS);
-    PCKEV_B2_UB(tmp1, tmp0, tmp3, tmp2, src0, src1);
-    AVER_UB2_UB(src0, pred0, src1, pred1, src0, src1);
-    CALC_MSE_AVG_B(src0, ref0, var, avg);
-    CALC_MSE_AVG_B(src1, ref1, var, avg);
-
-    src0 = src4;
-  }
-
-  vec = __msa_hadd_s_w(avg, avg);
-  *diff = HADD_SW_S32(vec);
-
-  return HADD_SW_S32(var);
-}
-
-static uint32_t subpel_avg_ssediff_16w_v_msa(
-    const uint8_t *src, int32_t src_stride, const uint8_t *dst,
-    int32_t dst_stride, const uint8_t *sec_pred, const uint8_t *filter,
-    int32_t height, int32_t *diff, int32_t width) {
-  int16_t filtval;
-  uint32_t loop_cnt;
-  v16u8 ref0, ref1, ref2, ref3;
-  v16u8 pred0, pred1, pred2, pred3;
-  v16u8 src0, src1, src2, src3, src4;
-  v16u8 out0, out1, out2, out3, filt0;
-  v8u16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
-  v8u16 tmp0, tmp1, tmp2, tmp3;
-  v8i16 avg = { 0 };
-  v4i32 vec, var = { 0 };
-
-  filtval = LH(filter);
-  filt0 = (v16u8)__msa_fill_h(filtval);
-
-  src0 = LD_UB(src);
-  src += src_stride;
-
-  for (loop_cnt = (height >> 2); loop_cnt--;) {
-    LD_UB4(src, src_stride, src1, src2, src3, src4);
-    src += (4 * src_stride);
-    LD_UB4(sec_pred, width, pred0, pred1, pred2, pred3);
-    sec_pred += (4 * width);
-
-    ILVR_B2_UH(src1, src0, src2, src1, vec0, vec2);
-    ILVL_B2_UH(src1, src0, src2, src1, vec1, vec3);
-    DOTP_UB2_UH(vec0, vec1, filt0, filt0, tmp0, tmp1);
-    SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
-    out0 = (v16u8)__msa_pckev_b((v16i8)tmp1, (v16i8)tmp0);
-
-    ILVR_B2_UH(src3, src2, src4, src3, vec4, vec6);
-    ILVL_B2_UH(src3, src2, src4, src3, vec5, vec7);
-    DOTP_UB2_UH(vec2, vec3, filt0, filt0, tmp2, tmp3);
-    SRARI_H2_UH(tmp2, tmp3, FILTER_BITS);
-    out1 = (v16u8)__msa_pckev_b((v16i8)tmp3, (v16i8)tmp2);
-
-    DOTP_UB2_UH(vec4, vec5, filt0, filt0, tmp0, tmp1);
-    SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
-    out2 = (v16u8)__msa_pckev_b((v16i8)tmp1, (v16i8)tmp0);
-
-    DOTP_UB2_UH(vec6, vec7, filt0, filt0, tmp2, tmp3);
-    SRARI_H2_UH(tmp2, tmp3, FILTER_BITS);
-    out3 = (v16u8)__msa_pckev_b((v16i8)tmp3, (v16i8)tmp2);
-
-    src0 = src4;
-    LD_UB4(dst, dst_stride, ref0, ref1, ref2, ref3);
-    dst += (4 * dst_stride);
-
-    AVER_UB4_UB(out0, pred0, out1, pred1, out2, pred2, out3, pred3, out0, out1,
-                out2, out3);
-
-    CALC_MSE_AVG_B(out0, ref0, var, avg);
-    CALC_MSE_AVG_B(out1, ref1, var, avg);
-    CALC_MSE_AVG_B(out2, ref2, var, avg);
-    CALC_MSE_AVG_B(out3, ref3, var, avg);
-  }
-
-  vec = __msa_hadd_s_w(avg, avg);
-  *diff = HADD_SW_S32(vec);
-
-  return HADD_SW_S32(var);
-}
-
-static uint32_t sub_pixel_avg_sse_diff_16width_v_msa(
-    const uint8_t *src, int32_t src_stride, const uint8_t *dst,
-    int32_t dst_stride, const uint8_t *sec_pred, const uint8_t *filter,
-    int32_t height, int32_t *diff) {
-  return subpel_avg_ssediff_16w_v_msa(src, src_stride, dst, dst_stride,
-                                      sec_pred, filter, height, diff, 16);
-}
-
-static uint32_t sub_pixel_avg_sse_diff_32width_v_msa(
-    const uint8_t *src, int32_t src_stride, const uint8_t *dst,
-    int32_t dst_stride, const uint8_t *sec_pred, const uint8_t *filter,
-    int32_t height, int32_t *diff) {
-  uint32_t loop_cnt, sse = 0;
-  int32_t diff0[2];
-
-  for (loop_cnt = 0; loop_cnt < 2; ++loop_cnt) {
-    sse +=
-        subpel_avg_ssediff_16w_v_msa(src, src_stride, dst, dst_stride, sec_pred,
-                                     filter, height, &diff0[loop_cnt], 32);
-    src += 16;
-    dst += 16;
-    sec_pred += 16;
-  }
-
-  *diff = diff0[0] + diff0[1];
-
-  return sse;
-}
-
-static uint32_t sub_pixel_avg_sse_diff_64width_v_msa(
-    const uint8_t *src, int32_t src_stride, const uint8_t *dst,
-    int32_t dst_stride, const uint8_t *sec_pred, const uint8_t *filter,
-    int32_t height, int32_t *diff) {
-  uint32_t loop_cnt, sse = 0;
-  int32_t diff0[4];
-
-  for (loop_cnt = 0; loop_cnt < 4; ++loop_cnt) {
-    sse +=
-        subpel_avg_ssediff_16w_v_msa(src, src_stride, dst, dst_stride, sec_pred,
-                                     filter, height, &diff0[loop_cnt], 64);
-    src += 16;
-    dst += 16;
-    sec_pred += 16;
-  }
-
-  *diff = diff0[0] + diff0[1] + diff0[2] + diff0[3];
-
-  return sse;
-}
-
-static uint32_t sub_pixel_avg_sse_diff_4width_hv_msa(
-    const uint8_t *src, int32_t src_stride, const uint8_t *dst,
-    int32_t dst_stride, const uint8_t *sec_pred, const uint8_t *filter_horiz,
-    const uint8_t *filter_vert, int32_t height, int32_t *diff) {
-  int16_t filtval;
-  uint32_t loop_cnt;
-  uint32_t ref0, ref1, ref2, ref3;
-  v16u8 src0, src1, src2, src3, src4;
-  v16u8 mask = { 0, 1, 1, 2, 2, 3, 3, 4, 16, 17, 17, 18, 18, 19, 19, 20 };
-  v16u8 filt_hz, filt_vt, vec0, vec1;
-  v16u8 out, pred, ref = { 0 };
-  v8u16 hz_out0, hz_out1, hz_out2, hz_out3, hz_out4, tmp0, tmp1;
-  v8i16 avg = { 0 };
-  v4i32 vec, var = { 0 };
-
-  filtval = LH(filter_horiz);
-  filt_hz = (v16u8)__msa_fill_h(filtval);
-  filtval = LH(filter_vert);
-  filt_vt = (v16u8)__msa_fill_h(filtval);
-
-  src0 = LD_UB(src);
-  src += src_stride;
-
-  for (loop_cnt = (height >> 2); loop_cnt--;) {
-    LD_UB4(src, src_stride, src1, src2, src3, src4);
-    src += (4 * src_stride);
-    pred = LD_UB(sec_pred);
-    sec_pred += 16;
-    LW4(dst, dst_stride, ref0, ref1, ref2, ref3);
-    dst += (4 * dst_stride);
-    INSERT_W4_UB(ref0, ref1, ref2, ref3, ref);
-    hz_out0 = HORIZ_2TAP_FILT_UH(src0, src1, mask, filt_hz, FILTER_BITS);
-    hz_out2 = HORIZ_2TAP_FILT_UH(src2, src3, mask, filt_hz, FILTER_BITS);
-    hz_out4 = HORIZ_2TAP_FILT_UH(src4, src4, mask, filt_hz, FILTER_BITS);
-    hz_out1 = (v8u16)__msa_sldi_b((v16i8)hz_out2, (v16i8)hz_out0, 8);
-    hz_out3 = (v8u16)__msa_pckod_d((v2i64)hz_out4, (v2i64)hz_out2);
-    ILVEV_B2_UB(hz_out0, hz_out1, hz_out2, hz_out3, vec0, vec1);
-    DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp0, tmp1);
-    SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
-    out = (v16u8)__msa_pckev_b((v16i8)tmp1, (v16i8)tmp0);
-    out = __msa_aver_u_b(out, pred);
-    CALC_MSE_AVG_B(out, ref, var, avg);
-    src0 = src4;
-  }
-
-  vec = __msa_hadd_s_w(avg, avg);
-  *diff = HADD_SW_S32(vec);
-
-  return HADD_SW_S32(var);
-}
-
-static uint32_t sub_pixel_avg_sse_diff_8width_hv_msa(
-    const uint8_t *src, int32_t src_stride, const uint8_t *dst,
-    int32_t dst_stride, const uint8_t *sec_pred, const uint8_t *filter_horiz,
-    const uint8_t *filter_vert, int32_t height, int32_t *diff) {
-  int16_t filtval;
-  uint32_t loop_cnt;
-  v16u8 ref0, ref1, ref2, ref3;
-  v16u8 src0, src1, src2, src3, src4;
-  v16u8 pred0, pred1, out0, out1;
-  v16u8 filt_hz, filt_vt, vec0;
-  v16u8 mask = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
-  v8u16 hz_out0, hz_out1, tmp0, tmp1, tmp2, tmp3;
-  v8i16 avg = { 0 };
-  v4i32 vec, var = { 0 };
-
-  filtval = LH(filter_horiz);
-  filt_hz = (v16u8)__msa_fill_h(filtval);
-  filtval = LH(filter_vert);
-  filt_vt = (v16u8)__msa_fill_h(filtval);
-
-  src0 = LD_UB(src);
-  src += src_stride;
-  hz_out0 = HORIZ_2TAP_FILT_UH(src0, src0, mask, filt_hz, FILTER_BITS);
-
-  for (loop_cnt = (height >> 2); loop_cnt--;) {
-    LD_UB4(src, src_stride, src1, src2, src3, src4);
-    src += (4 * src_stride);
-    LD_UB2(sec_pred, 16, pred0, pred1);
-    sec_pred += 32;
-    LD_UB4(dst, dst_stride, ref0, ref1, ref2, ref3);
-    dst += (4 * dst_stride);
-
-    PCKEV_D2_UB(ref1, ref0, ref3, ref2, ref0, ref1);
-    hz_out1 = HORIZ_2TAP_FILT_UH(src1, src1, mask, filt_hz, FILTER_BITS);
-
-    vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out1, (v16i8)hz_out0);
-    tmp0 = __msa_dotp_u_h(vec0, filt_vt);
-    hz_out0 = HORIZ_2TAP_FILT_UH(src2, src2, mask, filt_hz, FILTER_BITS);
-
-    vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out0, (v16i8)hz_out1);
-    tmp1 = __msa_dotp_u_h(vec0, filt_vt);
-    SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
-    hz_out1 = HORIZ_2TAP_FILT_UH(src3, src3, mask, filt_hz, FILTER_BITS);
-
-    vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out1, (v16i8)hz_out0);
-    tmp2 = __msa_dotp_u_h(vec0, filt_vt);
-    hz_out0 = HORIZ_2TAP_FILT_UH(src4, src4, mask, filt_hz, FILTER_BITS);
-
-    vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out0, (v16i8)hz_out1);
-    tmp3 = __msa_dotp_u_h(vec0, filt_vt);
-
-    SRARI_H2_UH(tmp2, tmp3, FILTER_BITS);
-    PCKEV_B2_UB(tmp1, tmp0, tmp3, tmp2, out0, out1);
-    AVER_UB2_UB(out0, pred0, out1, pred1, out0, out1);
-
-    CALC_MSE_AVG_B(out0, ref0, var, avg);
-    CALC_MSE_AVG_B(out1, ref1, var, avg);
-  }
-
-  vec = __msa_hadd_s_w(avg, avg);
-  *diff = HADD_SW_S32(vec);
-
-  return HADD_SW_S32(var);
-}
-
-static uint32_t subpel_avg_ssediff_16w_hv_msa(
-    const uint8_t *src, int32_t src_stride, const uint8_t *dst,
-    int32_t dst_stride, const uint8_t *sec_pred, const uint8_t *filter_horiz,
-    const uint8_t *filter_vert, int32_t height, int32_t *diff, int32_t width) {
-  int16_t filtval;
-  uint32_t loop_cnt;
-  v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
-  v16u8 ref0, ref1, ref2, ref3;
-  v16u8 pred0, pred1, pred2, pred3;
-  v16u8 out0, out1, out2, out3;
-  v16u8 filt_hz, filt_vt, vec0, vec1;
-  v16u8 mask = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
-  v8u16 hz_out0, hz_out1, hz_out2, hz_out3, tmp0, tmp1;
-  v8i16 avg = { 0 };
-  v4i32 vec, var = { 0 };
-
-  filtval = LH(filter_horiz);
-  filt_hz = (v16u8)__msa_fill_h(filtval);
-  filtval = LH(filter_vert);
-  filt_vt = (v16u8)__msa_fill_h(filtval);
-
-  LD_UB2(src, 8, src0, src1);
-  src += src_stride;
-
-  hz_out0 = HORIZ_2TAP_FILT_UH(src0, src0, mask, filt_hz, FILTER_BITS);
-  hz_out2 = HORIZ_2TAP_FILT_UH(src1, src1, mask, filt_hz, FILTER_BITS);
-
-  for (loop_cnt = (height >> 2); loop_cnt--;) {
-    LD_UB4(src, src_stride, src0, src2, src4, src6);
-    LD_UB4(src + 8, src_stride, src1, src3, src5, src7);
-    src += (4 * src_stride);
-    LD_UB4(sec_pred, width, pred0, pred1, pred2, pred3);
-    sec_pred += (4 * width);
-
-    hz_out1 = HORIZ_2TAP_FILT_UH(src0, src0, mask, filt_hz, FILTER_BITS);
-    hz_out3 = HORIZ_2TAP_FILT_UH(src1, src1, mask, filt_hz, FILTER_BITS);
-    ILVEV_B2_UB(hz_out0, hz_out1, hz_out2, hz_out3, vec0, vec1);
-    DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp0, tmp1);
-    SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
-    out0 = (v16u8)__msa_pckev_b((v16i8)tmp1, (v16i8)tmp0);
-
-    hz_out0 = HORIZ_2TAP_FILT_UH(src2, src2, mask, filt_hz, FILTER_BITS);
-    hz_out2 = HORIZ_2TAP_FILT_UH(src3, src3, mask, filt_hz, FILTER_BITS);
-    ILVEV_B2_UB(hz_out1, hz_out0, hz_out3, hz_out2, vec0, vec1);
-    DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp0, tmp1);
-    SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
-    out1 = (v16u8)__msa_pckev_b((v16i8)tmp1, (v16i8)tmp0);
-
-    hz_out1 = HORIZ_2TAP_FILT_UH(src4, src4, mask, filt_hz, FILTER_BITS);
-    hz_out3 = HORIZ_2TAP_FILT_UH(src5, src5, mask, filt_hz, FILTER_BITS);
-    ILVEV_B2_UB(hz_out0, hz_out1, hz_out2, hz_out3, vec0, vec1);
-    DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp0, tmp1);
-    SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
-    out2 = (v16u8)__msa_pckev_b((v16i8)tmp1, (v16i8)tmp0);
-
-    hz_out0 = HORIZ_2TAP_FILT_UH(src6, src6, mask, filt_hz, FILTER_BITS);
-    hz_out2 = HORIZ_2TAP_FILT_UH(src7, src7, mask, filt_hz, FILTER_BITS);
-    ILVEV_B2_UB(hz_out1, hz_out0, hz_out3, hz_out2, vec0, vec1);
-    DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp0, tmp1);
-    SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
-    out3 = (v16u8)__msa_pckev_b((v16i8)tmp1, (v16i8)tmp0);
-
-    LD_UB4(dst, dst_stride, ref0, ref1, ref2, ref3);
-    dst += (4 * dst_stride);
-
-    AVER_UB4_UB(out0, pred0, out1, pred1, out2, pred2, out3, pred3, out0, out1,
-                out2, out3);
-
-    CALC_MSE_AVG_B(out0, ref0, var, avg);
-    CALC_MSE_AVG_B(out1, ref1, var, avg);
-    CALC_MSE_AVG_B(out2, ref2, var, avg);
-    CALC_MSE_AVG_B(out3, ref3, var, avg);
-  }
-
-  vec = __msa_hadd_s_w(avg, avg);
-  *diff = HADD_SW_S32(vec);
-
-  return HADD_SW_S32(var);
-}
-
-static uint32_t sub_pixel_avg_sse_diff_16width_hv_msa(
-    const uint8_t *src, int32_t src_stride, const uint8_t *dst,
-    int32_t dst_stride, const uint8_t *sec_pred, const uint8_t *filter_horiz,
-    const uint8_t *filter_vert, int32_t height, int32_t *diff) {
-  return subpel_avg_ssediff_16w_hv_msa(src, src_stride, dst, dst_stride,
-                                       sec_pred, filter_horiz, filter_vert,
-                                       height, diff, 16);
-}
-
-static uint32_t sub_pixel_avg_sse_diff_32width_hv_msa(
-    const uint8_t *src, int32_t src_stride, const uint8_t *dst,
-    int32_t dst_stride, const uint8_t *sec_pred, const uint8_t *filter_horiz,
-    const uint8_t *filter_vert, int32_t height, int32_t *diff) {
-  uint32_t loop_cnt, sse = 0;
-  int32_t diff0[2];
-
-  for (loop_cnt = 0; loop_cnt < 2; ++loop_cnt) {
-    sse += subpel_avg_ssediff_16w_hv_msa(src, src_stride, dst, dst_stride,
-                                         sec_pred, filter_horiz, filter_vert,
-                                         height, &diff0[loop_cnt], 32);
-    src += 16;
-    dst += 16;
-    sec_pred += 16;
-  }
-
-  *diff = diff0[0] + diff0[1];
-
-  return sse;
-}
-
-static uint32_t sub_pixel_avg_sse_diff_64width_hv_msa(
-    const uint8_t *src, int32_t src_stride, const uint8_t *dst,
-    int32_t dst_stride, const uint8_t *sec_pred, const uint8_t *filter_horiz,
-    const uint8_t *filter_vert, int32_t height, int32_t *diff) {
-  uint32_t loop_cnt, sse = 0;
-  int32_t diff0[4];
-
-  for (loop_cnt = 0; loop_cnt < 4; ++loop_cnt) {
-    sse += subpel_avg_ssediff_16w_hv_msa(src, src_stride, dst, dst_stride,
-                                         sec_pred, filter_horiz, filter_vert,
-                                         height, &diff0[loop_cnt], 64);
-    src += 16;
-    dst += 16;
-    sec_pred += 16;
-  }
-
-  *diff = diff0[0] + diff0[1] + diff0[2] + diff0[3];
-
-  return sse;
-}
-
-#define VARIANCE_4Wx4H(sse, diff) VARIANCE_WxH(sse, diff, 4);
-#define VARIANCE_4Wx8H(sse, diff) VARIANCE_WxH(sse, diff, 5);
-#define VARIANCE_8Wx4H(sse, diff) VARIANCE_WxH(sse, diff, 5);
-#define VARIANCE_8Wx8H(sse, diff) VARIANCE_WxH(sse, diff, 6);
-#define VARIANCE_8Wx16H(sse, diff) VARIANCE_WxH(sse, diff, 7);
-#define VARIANCE_16Wx8H(sse, diff) VARIANCE_WxH(sse, diff, 7);
-#define VARIANCE_16Wx16H(sse, diff) VARIANCE_WxH(sse, diff, 8);
-
-#define VARIANCE_16Wx32H(sse, diff) VARIANCE_LARGE_WxH(sse, diff, 9);
-#define VARIANCE_32Wx16H(sse, diff) VARIANCE_LARGE_WxH(sse, diff, 9);
-#define VARIANCE_32Wx32H(sse, diff) VARIANCE_LARGE_WxH(sse, diff, 10);
-#define VARIANCE_32Wx64H(sse, diff) VARIANCE_LARGE_WxH(sse, diff, 11);
-#define VARIANCE_64Wx32H(sse, diff) VARIANCE_LARGE_WxH(sse, diff, 11);
-#define VARIANCE_64Wx64H(sse, diff) VARIANCE_LARGE_WxH(sse, diff, 12);
-
-#define AOM_SUB_PIXEL_VARIANCE_WDXHT_MSA(wd, ht)                              \
-  uint32_t aom_sub_pixel_variance##wd##x##ht##_msa(                           \
-      const uint8_t *src, int32_t src_stride, int32_t xoffset,                \
-      int32_t yoffset, const uint8_t *ref, int32_t ref_stride,                \
-      uint32_t *sse) {                                                        \
-    int32_t diff;                                                             \
-    uint32_t var;                                                             \
-    const uint8_t *h_filter = bilinear_filters_2t[xoffset];                   \
-    const uint8_t *v_filter = bilinear_filters_2t[yoffset];                   \
-                                                                              \
-    if (yoffset) {                                                            \
-      if (xoffset) {                                                          \
-        *sse = sub_pixel_sse_diff_##wd##width_hv_msa(                         \
-            src, src_stride, ref, ref_stride, h_filter, v_filter, ht, &diff); \
-      } else {                                                                \
-        *sse = sub_pixel_sse_diff_##wd##width_v_msa(                          \
-            src, src_stride, ref, ref_stride, v_filter, ht, &diff);           \
-      }                                                                       \
-                                                                              \
-      var = VARIANCE_##wd##Wx##ht##H(*sse, diff);                             \
-    } else {                                                                  \
-      if (xoffset) {                                                          \
-        *sse = sub_pixel_sse_diff_##wd##width_h_msa(                          \
-            src, src_stride, ref, ref_stride, h_filter, ht, &diff);           \
-                                                                              \
-        var = VARIANCE_##wd##Wx##ht##H(*sse, diff);                           \
-      } else {                                                                \
-        var = aom_variance##wd##x##ht##_msa(src, src_stride, ref, ref_stride, \
-                                            sse);                             \
-      }                                                                       \
-    }                                                                         \
-                                                                              \
-    return var;                                                               \
-  }
-
-/* clang-format off */
-AOM_SUB_PIXEL_VARIANCE_WDXHT_MSA(4, 4)
-AOM_SUB_PIXEL_VARIANCE_WDXHT_MSA(4, 8)
-
-AOM_SUB_PIXEL_VARIANCE_WDXHT_MSA(8, 4)
-AOM_SUB_PIXEL_VARIANCE_WDXHT_MSA(8, 8)
-AOM_SUB_PIXEL_VARIANCE_WDXHT_MSA(8, 16)
-
-AOM_SUB_PIXEL_VARIANCE_WDXHT_MSA(16, 8)
-AOM_SUB_PIXEL_VARIANCE_WDXHT_MSA(16, 16)
-AOM_SUB_PIXEL_VARIANCE_WDXHT_MSA(16, 32)
-
-AOM_SUB_PIXEL_VARIANCE_WDXHT_MSA(32, 16)
-AOM_SUB_PIXEL_VARIANCE_WDXHT_MSA(32, 32)
-AOM_SUB_PIXEL_VARIANCE_WDXHT_MSA(32, 64)
-
-AOM_SUB_PIXEL_VARIANCE_WDXHT_MSA(64, 32)
-AOM_SUB_PIXEL_VARIANCE_WDXHT_MSA(64, 64)
-/* clang-format on */
-
-#define AOM_SUB_PIXEL_AVG_VARIANCE_WDXHT_MSA(wd, ht)                          \
-  uint32_t aom_sub_pixel_avg_variance##wd##x##ht##_msa(                       \
-      const uint8_t *src_ptr, int32_t src_stride, int32_t xoffset,            \
-      int32_t yoffset, const uint8_t *ref_ptr, int32_t ref_stride,            \
-      uint32_t *sse, const uint8_t *sec_pred) {                               \
-    int32_t diff;                                                             \
-    const uint8_t *h_filter = bilinear_filters_2t[xoffset];                   \
-    const uint8_t *v_filter = bilinear_filters_2t[yoffset];                   \
-                                                                              \
-    if (yoffset) {                                                            \
-      if (xoffset) {                                                          \
-        *sse = sub_pixel_avg_sse_diff_##wd##width_hv_msa(                     \
-            src_ptr, src_stride, ref_ptr, ref_stride, sec_pred, h_filter,     \
-            v_filter, ht, &diff);                                             \
-      } else {                                                                \
-        *sse = sub_pixel_avg_sse_diff_##wd##width_v_msa(                      \
-            src_ptr, src_stride, ref_ptr, ref_stride, sec_pred, v_filter, ht, \
-            &diff);                                                           \
-      }                                                                       \
-    } else {                                                                  \
-      if (xoffset) {                                                          \
-        *sse = sub_pixel_avg_sse_diff_##wd##width_h_msa(                      \
-            src_ptr, src_stride, ref_ptr, ref_stride, sec_pred, h_filter, ht, \
-            &diff);                                                           \
-      } else {                                                                \
-        *sse = avg_sse_diff_##wd##width_msa(src_ptr, src_stride, ref_ptr,     \
-                                            ref_stride, sec_pred, ht, &diff); \
-      }                                                                       \
-    }                                                                         \
-                                                                              \
-    return VARIANCE_##wd##Wx##ht##H(*sse, diff);                              \
-  }
-
-/* clang-format off */
-AOM_SUB_PIXEL_AVG_VARIANCE_WDXHT_MSA(4, 4)
-AOM_SUB_PIXEL_AVG_VARIANCE_WDXHT_MSA(4, 8)
-
-AOM_SUB_PIXEL_AVG_VARIANCE_WDXHT_MSA(8, 4)
-AOM_SUB_PIXEL_AVG_VARIANCE_WDXHT_MSA(8, 8)
-AOM_SUB_PIXEL_AVG_VARIANCE_WDXHT_MSA(8, 16)
-
-AOM_SUB_PIXEL_AVG_VARIANCE_WDXHT_MSA(16, 8)
-AOM_SUB_PIXEL_AVG_VARIANCE_WDXHT_MSA(16, 16)
-AOM_SUB_PIXEL_AVG_VARIANCE_WDXHT_MSA(16, 32)
-
-AOM_SUB_PIXEL_AVG_VARIANCE_WDXHT_MSA(32, 16)
-AOM_SUB_PIXEL_AVG_VARIANCE_WDXHT_MSA(32, 32)
-/* clang-format on */
-
-uint32_t aom_sub_pixel_avg_variance32x64_msa(const uint8_t *src_ptr,
-                                             int32_t src_stride,
-                                             int32_t xoffset, int32_t yoffset,
-                                             const uint8_t *ref_ptr,
-                                             int32_t ref_stride, uint32_t *sse,
-                                             const uint8_t *sec_pred) {
-  int32_t diff;
-  const uint8_t *h_filter = bilinear_filters_2t[xoffset];
-  const uint8_t *v_filter = bilinear_filters_2t[yoffset];
-
-  if (yoffset) {
-    if (xoffset) {
-      *sse = sub_pixel_avg_sse_diff_32width_hv_msa(
-          src_ptr, src_stride, ref_ptr, ref_stride, sec_pred, h_filter,
-          v_filter, 64, &diff);
-    } else {
-      *sse = sub_pixel_avg_sse_diff_32width_v_msa(src_ptr, src_stride, ref_ptr,
-                                                  ref_stride, sec_pred,
-                                                  v_filter, 64, &diff);
-    }
-  } else {
-    if (xoffset) {
-      *sse = sub_pixel_avg_sse_diff_32width_h_msa(src_ptr, src_stride, ref_ptr,
-                                                  ref_stride, sec_pred,
-                                                  h_filter, 64, &diff);
-    } else {
-      *sse = avg_sse_diff_32x64_msa(src_ptr, src_stride, ref_ptr, ref_stride,
-                                    sec_pred, &diff);
-    }
-  }
-
-  return VARIANCE_32Wx64H(*sse, diff);
-}
-
-#define AOM_SUB_PIXEL_AVG_VARIANCE64XHEIGHT_MSA(ht)                           \
-  uint32_t aom_sub_pixel_avg_variance64x##ht##_msa(                           \
-      const uint8_t *src_ptr, int32_t src_stride, int32_t xoffset,            \
-      int32_t yoffset, const uint8_t *ref_ptr, int32_t ref_stride,            \
-      uint32_t *sse, const uint8_t *sec_pred) {                               \
-    int32_t diff;                                                             \
-    const uint8_t *h_filter = bilinear_filters_2t[xoffset];                   \
-    const uint8_t *v_filter = bilinear_filters_2t[yoffset];                   \
-                                                                              \
-    if (yoffset) {                                                            \
-      if (xoffset) {                                                          \
-        *sse = sub_pixel_avg_sse_diff_64width_hv_msa(                         \
-            src_ptr, src_stride, ref_ptr, ref_stride, sec_pred, h_filter,     \
-            v_filter, ht, &diff);                                             \
-      } else {                                                                \
-        *sse = sub_pixel_avg_sse_diff_64width_v_msa(                          \
-            src_ptr, src_stride, ref_ptr, ref_stride, sec_pred, v_filter, ht, \
-            &diff);                                                           \
-      }                                                                       \
-    } else {                                                                  \
-      if (xoffset) {                                                          \
-        *sse = sub_pixel_avg_sse_diff_64width_h_msa(                          \
-            src_ptr, src_stride, ref_ptr, ref_stride, sec_pred, h_filter, ht, \
-            &diff);                                                           \
-      } else {                                                                \
-        *sse = avg_sse_diff_64x##ht##_msa(src_ptr, src_stride, ref_ptr,       \
-                                          ref_stride, sec_pred, &diff);       \
-      }                                                                       \
-    }                                                                         \
-                                                                              \
-    return VARIANCE_64Wx##ht##H(*sse, diff);                                  \
-  }
-
-/* clang-format off */
-AOM_SUB_PIXEL_AVG_VARIANCE64XHEIGHT_MSA(32)
-AOM_SUB_PIXEL_AVG_VARIANCE64XHEIGHT_MSA(64)
-/* clang-format on */
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/mips/subtract_msa.c b/chromium/third_party/libaom/source/libaom/aom_dsp/mips/subtract_msa.c
deleted file mode 100644
index bfed773ac89..00000000000
--- a/chromium/third_party/libaom/source/libaom/aom_dsp/mips/subtract_msa.c
+++ /dev/null
@@ -1,266 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom_dsp/mips/macros_msa.h"
-
-static void sub_blk_4x4_msa(const uint8_t *src_ptr, int32_t src_stride,
-                            const uint8_t *pred_ptr, int32_t pred_stride,
-                            int16_t *diff_ptr, int32_t diff_stride) {
-  uint32_t src0, src1, src2, src3;
-  uint32_t pred0, pred1, pred2, pred3;
-  v16i8 src = { 0 };
-  v16i8 pred = { 0 };
-  v16u8 src_l0, src_l1;
-  v8i16 diff0, diff1;
-
-  LW4(src_ptr, src_stride, src0, src1, src2, src3);
-  LW4(pred_ptr, pred_stride, pred0, pred1, pred2, pred3);
-  INSERT_W4_SB(src0, src1, src2, src3, src);
-  INSERT_W4_SB(pred0, pred1, pred2, pred3, pred);
-  ILVRL_B2_UB(src, pred, src_l0, src_l1);
-  HSUB_UB2_SH(src_l0, src_l1, diff0, diff1);
-  ST8x4_UB(diff0, diff1, diff_ptr, (2 * diff_stride));
-}
-
-static void sub_blk_8x8_msa(const uint8_t *src_ptr, int32_t src_stride,
-                            const uint8_t *pred_ptr, int32_t pred_stride,
-                            int16_t *diff_ptr, int32_t diff_stride) {
-  uint32_t loop_cnt;
-  uint64_t src0, src1, pred0, pred1;
-  v16i8 src = { 0 };
-  v16i8 pred = { 0 };
-  v16u8 src_l0, src_l1;
-  v8i16 diff0, diff1;
-
-  for (loop_cnt = 4; loop_cnt--;) {
-    LD2(src_ptr, src_stride, src0, src1);
-    src_ptr += (2 * src_stride);
-    LD2(pred_ptr, pred_stride, pred0, pred1);
-    pred_ptr += (2 * pred_stride);
-
-    INSERT_D2_SB(src0, src1, src);
-    INSERT_D2_SB(pred0, pred1, pred);
-    ILVRL_B2_UB(src, pred, src_l0, src_l1);
-    HSUB_UB2_SH(src_l0, src_l1, diff0, diff1);
-    ST_SH2(diff0, diff1, diff_ptr, diff_stride);
-    diff_ptr += (2 * diff_stride);
-  }
-}
-
-static void sub_blk_16x16_msa(const uint8_t *src, int32_t src_stride,
-                              const uint8_t *pred, int32_t pred_stride,
-                              int16_t *diff, int32_t diff_stride) {
-  int8_t count;
-  v16i8 src0, src1, src2, src3, src4, src5, src6, src7;
-  v16i8 pred0, pred1, pred2, pred3, pred4, pred5, pred6, pred7;
-  v16u8 src_l0, src_l1;
-  v8i16 diff0, diff1;
-
-  for (count = 2; count--;) {
-    LD_SB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7);
-    src += (8 * src_stride);
-
-    LD_SB8(pred, pred_stride, pred0, pred1, pred2, pred3, pred4, pred5, pred6,
-           pred7);
-    pred += (8 * pred_stride);
-
-    ILVRL_B2_UB(src0, pred0, src_l0, src_l1);
-    HSUB_UB2_SH(src_l0, src_l1, diff0, diff1);
-    ST_SH2(diff0, diff1, diff, 8);
-    diff += diff_stride;
-
-    ILVRL_B2_UB(src1, pred1, src_l0, src_l1);
-    HSUB_UB2_SH(src_l0, src_l1, diff0, diff1);
-    ST_SH2(diff0, diff1, diff, 8);
-    diff += diff_stride;
-
-    ILVRL_B2_UB(src2, pred2, src_l0, src_l1);
-    HSUB_UB2_SH(src_l0, src_l1, diff0, diff1);
-    ST_SH2(diff0, diff1, diff, 8);
-    diff += diff_stride;
-
-    ILVRL_B2_UB(src3, pred3, src_l0, src_l1);
-    HSUB_UB2_SH(src_l0, src_l1, diff0, diff1);
-    ST_SH2(diff0, diff1, diff, 8);
-    diff += diff_stride;
-
-    ILVRL_B2_UB(src4, pred4, src_l0, src_l1);
-    HSUB_UB2_SH(src_l0, src_l1, diff0, diff1);
-    ST_SH2(diff0, diff1, diff, 8);
-    diff += diff_stride;
-
-    ILVRL_B2_UB(src5, pred5, src_l0, src_l1);
-    HSUB_UB2_SH(src_l0, src_l1, diff0, diff1);
-    ST_SH2(diff0, diff1, diff, 8);
-    diff += diff_stride;
-
-    ILVRL_B2_UB(src6, pred6, src_l0, src_l1);
-    HSUB_UB2_SH(src_l0, src_l1, diff0, diff1);
-    ST_SH2(diff0, diff1, diff, 8);
-    diff += diff_stride;
-
-    ILVRL_B2_UB(src7, pred7, src_l0, src_l1);
-    HSUB_UB2_SH(src_l0, src_l1, diff0, diff1);
-    ST_SH2(diff0, diff1, diff, 8);
-    diff += diff_stride;
-  }
-}
-
-static void sub_blk_32x32_msa(const uint8_t *src, int32_t src_stride,
-                              const uint8_t *pred, int32_t pred_stride,
-                              int16_t *diff, int32_t diff_stride) {
-  uint32_t loop_cnt;
-  v16i8 src0, src1, src2, src3, src4, src5, src6, src7;
-  v16i8 pred0, pred1, pred2, pred3, pred4, pred5, pred6, pred7;
-  v16u8 src_l0, src_l1;
-  v8i16 diff0, diff1;
-
-  for (loop_cnt = 8; loop_cnt--;) {
-    LD_SB2(src, 16, src0, src1);
-    src += src_stride;
-    LD_SB2(src, 16, src2, src3);
-    src += src_stride;
-    LD_SB2(src, 16, src4, src5);
-    src += src_stride;
-    LD_SB2(src, 16, src6, src7);
-    src += src_stride;
-
-    LD_SB2(pred, 16, pred0, pred1);
-    pred += pred_stride;
-    LD_SB2(pred, 16, pred2, pred3);
-    pred += pred_stride;
-    LD_SB2(pred, 16, pred4, pred5);
-    pred += pred_stride;
-    LD_SB2(pred, 16, pred6, pred7);
-    pred += pred_stride;
-
-    ILVRL_B2_UB(src0, pred0, src_l0, src_l1);
-    HSUB_UB2_SH(src_l0, src_l1, diff0, diff1);
-    ST_SH2(diff0, diff1, diff, 8);
-    ILVRL_B2_UB(src1, pred1, src_l0, src_l1);
-    HSUB_UB2_SH(src_l0, src_l1, diff0, diff1);
-    ST_SH2(diff0, diff1, diff + 16, 8);
-    diff += diff_stride;
-
-    ILVRL_B2_UB(src2, pred2, src_l0, src_l1);
-    HSUB_UB2_SH(src_l0, src_l1, diff0, diff1);
-    ST_SH2(diff0, diff1, diff, 8);
-    ILVRL_B2_UB(src3, pred3, src_l0, src_l1);
-    HSUB_UB2_SH(src_l0, src_l1, diff0, diff1);
-    ST_SH2(diff0, diff1, diff + 16, 8);
-    diff += diff_stride;
-
-    ILVRL_B2_UB(src4, pred4, src_l0, src_l1);
-    HSUB_UB2_SH(src_l0, src_l1, diff0, diff1);
-    ST_SH2(diff0, diff1, diff, 8);
-    ILVRL_B2_UB(src5, pred5, src_l0, src_l1);
-    HSUB_UB2_SH(src_l0, src_l1, diff0, diff1);
-    ST_SH2(diff0, diff1, diff + 16, 8);
-    diff += diff_stride;
-
-    ILVRL_B2_UB(src6, pred6, src_l0, src_l1);
-    HSUB_UB2_SH(src_l0, src_l1, diff0, diff1);
-    ST_SH2(diff0, diff1, diff, 8);
-    ILVRL_B2_UB(src7, pred7, src_l0, src_l1);
-    HSUB_UB2_SH(src_l0, src_l1, diff0, diff1);
-    ST_SH2(diff0, diff1, diff + 16, 8);
-    diff += diff_stride;
-  }
-}
-
-static void sub_blk_64x64_msa(const uint8_t *src, int32_t src_stride,
-                              const uint8_t *pred, int32_t pred_stride,
-                              int16_t *diff, int32_t diff_stride) {
-  uint32_t loop_cnt;
-  v16i8 src0, src1, src2, src3, src4, src5, src6, src7;
-  v16i8 pred0, pred1, pred2, pred3, pred4, pred5, pred6, pred7;
-  v16u8 src_l0, src_l1;
-  v8i16 diff0, diff1;
-
-  for (loop_cnt = 32; loop_cnt--;) {
-    LD_SB4(src, 16, src0, src1, src2, src3);
-    src += src_stride;
-    LD_SB4(src, 16, src4, src5, src6, src7);
-    src += src_stride;
-
-    LD_SB4(pred, 16, pred0, pred1, pred2, pred3);
-    pred += pred_stride;
-    LD_SB4(pred, 16, pred4, pred5, pred6, pred7);
-    pred += pred_stride;
-
-    ILVRL_B2_UB(src0, pred0, src_l0, src_l1);
-    HSUB_UB2_SH(src_l0, src_l1, diff0, diff1);
-    ST_SH2(diff0, diff1, diff, 8);
-    ILVRL_B2_UB(src1, pred1, src_l0, src_l1);
-    HSUB_UB2_SH(src_l0, src_l1, diff0, diff1);
-    ST_SH2(diff0, diff1, diff + 16, 8);
-    ILVRL_B2_UB(src2, pred2, src_l0, src_l1);
-    HSUB_UB2_SH(src_l0, src_l1, diff0, diff1);
-    ST_SH2(diff0, diff1, diff + 32, 8);
-    ILVRL_B2_UB(src3, pred3, src_l0, src_l1);
-    HSUB_UB2_SH(src_l0, src_l1, diff0, diff1);
-    ST_SH2(diff0, diff1, diff + 48, 8);
-    diff += diff_stride;
-
-    ILVRL_B2_UB(src4, pred4, src_l0, src_l1);
-    HSUB_UB2_SH(src_l0, src_l1, diff0, diff1);
-    ST_SH2(diff0, diff1, diff, 8);
-    ILVRL_B2_UB(src5, pred5, src_l0, src_l1);
-    HSUB_UB2_SH(src_l0, src_l1, diff0, diff1);
-    ST_SH2(diff0, diff1, diff + 16, 8);
-    ILVRL_B2_UB(src6, pred6, src_l0, src_l1);
-    HSUB_UB2_SH(src_l0, src_l1, diff0, diff1);
-    ST_SH2(diff0, diff1, diff + 32, 8);
-    ILVRL_B2_UB(src7, pred7, src_l0, src_l1);
-    HSUB_UB2_SH(src_l0, src_l1, diff0, diff1);
-    ST_SH2(diff0, diff1, diff + 48, 8);
-    diff += diff_stride;
-  }
-}
-
-void aom_subtract_block_msa(int32_t rows, int32_t cols, int16_t *diff_ptr,
-                            ptrdiff_t diff_stride, const uint8_t *src_ptr,
-                            ptrdiff_t src_stride, const uint8_t *pred_ptr,
-                            ptrdiff_t pred_stride) {
-  if (rows == cols) {
-    switch (rows) {
-      case 4:
-        sub_blk_4x4_msa(src_ptr, src_stride, pred_ptr, pred_stride, diff_ptr,
-                        diff_stride);
-        break;
-      case 8:
-        sub_blk_8x8_msa(src_ptr, src_stride, pred_ptr, pred_stride, diff_ptr,
-                        diff_stride);
-        break;
-      case 16:
-        sub_blk_16x16_msa(src_ptr, src_stride, pred_ptr, pred_stride, diff_ptr,
-                          diff_stride);
-        break;
-      case 32:
-        sub_blk_32x32_msa(src_ptr, src_stride, pred_ptr, pred_stride, diff_ptr,
-                          diff_stride);
-        break;
-      case 64:
-        sub_blk_64x64_msa(src_ptr, src_stride, pred_ptr, pred_stride, diff_ptr,
-                          diff_stride);
-        break;
-      default:
-        aom_subtract_block_c(rows, cols, diff_ptr, diff_stride, src_ptr,
-                             src_stride, pred_ptr, pred_stride);
-        break;
-    }
-  } else {
-    aom_subtract_block_c(rows, cols, diff_ptr, diff_stride, src_ptr, src_stride,
-                         pred_ptr, pred_stride);
-  }
-}
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/mips/variance_msa.c b/chromium/third_party/libaom/source/libaom/aom_dsp/mips/variance_msa.c
deleted file mode 100644
index 065c09ac55a..00000000000
--- a/chromium/third_party/libaom/source/libaom/aom_dsp/mips/variance_msa.c
+++ /dev/null
@@ -1,633 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "config/aom_dsp_rtcd.h"
-
-#include "aom_dsp/mips/macros_msa.h"
-
-#define CALC_MSE_B(src, ref, var)                                   \
-  {                                                                 \
-    v16u8 src_l0_m, src_l1_m;                                       \
-    v8i16 res_l0_m, res_l1_m;                                       \
-                                                                    \
-    ILVRL_B2_UB(src, ref, src_l0_m, src_l1_m);                      \
-    HSUB_UB2_SH(src_l0_m, src_l1_m, res_l0_m, res_l1_m);            \
-    DPADD_SH2_SW(res_l0_m, res_l1_m, res_l0_m, res_l1_m, var, var); \
-  }
-
-#define CALC_MSE_AVG_B(src, ref, var, sub)                          \
-  {                                                                 \
-    v16u8 src_l0_m, src_l1_m;                                       \
-    v8i16 res_l0_m, res_l1_m;                                       \
-                                                                    \
-    ILVRL_B2_UB(src, ref, src_l0_m, src_l1_m);                      \
-    HSUB_UB2_SH(src_l0_m, src_l1_m, res_l0_m, res_l1_m);            \
-    DPADD_SH2_SW(res_l0_m, res_l1_m, res_l0_m, res_l1_m, var, var); \
-                                                                    \
-    sub += res_l0_m + res_l1_m;                                     \
-  }
-
-#define VARIANCE_WxH(sse, diff, shift) sse - (((uint32_t)diff * diff) >> shift)
-
-#define VARIANCE_LARGE_WxH(sse, diff, shift) \
-  sse - (((int64_t)diff * diff) >> shift)
-
-static uint32_t sse_diff_4width_msa(const uint8_t *src_ptr, int32_t src_stride,
-                                    const uint8_t *ref_ptr, int32_t ref_stride,
-                                    int32_t height, int32_t *diff) {
-  uint32_t src0, src1, src2, src3;
-  uint32_t ref0, ref1, ref2, ref3;
-  int32_t ht_cnt;
-  v16u8 src = { 0 };
-  v16u8 ref = { 0 };
-  v8i16 avg = { 0 };
-  v4i32 vec, var = { 0 };
-
-  for (ht_cnt = (height >> 2); ht_cnt--;) {
-    LW4(src_ptr, src_stride, src0, src1, src2, src3);
-    src_ptr += (4 * src_stride);
-    LW4(ref_ptr, ref_stride, ref0, ref1, ref2, ref3);
-    ref_ptr += (4 * ref_stride);
-
-    INSERT_W4_UB(src0, src1, src2, src3, src);
-    INSERT_W4_UB(ref0, ref1, ref2, ref3, ref);
-    CALC_MSE_AVG_B(src, ref, var, avg);
-  }
-
-  vec = __msa_hadd_s_w(avg, avg);
-  *diff = HADD_SW_S32(vec);
-
-  return HADD_SW_S32(var);
-}
-
-static uint32_t sse_diff_8width_msa(const uint8_t *src_ptr, int32_t src_stride,
-                                    const uint8_t *ref_ptr, int32_t ref_stride,
-                                    int32_t height, int32_t *diff) {
-  int32_t ht_cnt;
-  v16u8 src0, src1, src2, src3;
-  v16u8 ref0, ref1, ref2, ref3;
-  v8i16 avg = { 0 };
-  v4i32 vec, var = { 0 };
-
-  for (ht_cnt = (height >> 2); ht_cnt--;) {
-    LD_UB4(src_ptr, src_stride, src0, src1, src2, src3);
-    src_ptr += (4 * src_stride);
-    LD_UB4(ref_ptr, ref_stride, ref0, ref1, ref2, ref3);
-    ref_ptr += (4 * ref_stride);
-
-    PCKEV_D4_UB(src1, src0, src3, src2, ref1, ref0, ref3, ref2, src0, src1,
-                ref0, ref1);
-    CALC_MSE_AVG_B(src0, ref0, var, avg);
-    CALC_MSE_AVG_B(src1, ref1, var, avg);
-  }
-
-  vec = __msa_hadd_s_w(avg, avg);
-  *diff = HADD_SW_S32(vec);
-
-  return HADD_SW_S32(var);
-}
-
-static uint32_t sse_diff_16width_msa(const uint8_t *src_ptr, int32_t src_stride,
-                                     const uint8_t *ref_ptr, int32_t ref_stride,
-                                     int32_t height, int32_t *diff) {
-  int32_t ht_cnt;
-  v16u8 src, ref;
-  v8i16 avg = { 0 };
-  v4i32 vec, var = { 0 };
-
-  for (ht_cnt = (height >> 2); ht_cnt--;) {
-    src = LD_UB(src_ptr);
-    src_ptr += src_stride;
-    ref = LD_UB(ref_ptr);
-    ref_ptr += ref_stride;
-    CALC_MSE_AVG_B(src, ref, var, avg);
-
-    src = LD_UB(src_ptr);
-    src_ptr += src_stride;
-    ref = LD_UB(ref_ptr);
-    ref_ptr += ref_stride;
-    CALC_MSE_AVG_B(src, ref, var, avg);
-
-    src = LD_UB(src_ptr);
-    src_ptr += src_stride;
-    ref = LD_UB(ref_ptr);
-    ref_ptr += ref_stride;
-    CALC_MSE_AVG_B(src, ref, var, avg);
-
-    src = LD_UB(src_ptr);
-    src_ptr += src_stride;
-    ref = LD_UB(ref_ptr);
-    ref_ptr += ref_stride;
-    CALC_MSE_AVG_B(src, ref, var, avg);
-  }
-
-  vec = __msa_hadd_s_w(avg, avg);
-  *diff = HADD_SW_S32(vec);
-
-  return HADD_SW_S32(var);
-}
-
-static uint32_t sse_diff_32width_msa(const uint8_t *src_ptr, int32_t src_stride,
-                                     const uint8_t *ref_ptr, int32_t ref_stride,
-                                     int32_t height, int32_t *diff) {
-  int32_t ht_cnt;
-  v16u8 src0, src1, ref0, ref1;
-  v8i16 avg = { 0 };
-  v4i32 vec, var = { 0 };
-
-  for (ht_cnt = (height >> 2); ht_cnt--;) {
-    LD_UB2(src_ptr, 16, src0, src1);
-    src_ptr += src_stride;
-    LD_UB2(ref_ptr, 16, ref0, ref1);
-    ref_ptr += ref_stride;
-    CALC_MSE_AVG_B(src0, ref0, var, avg);
-    CALC_MSE_AVG_B(src1, ref1, var, avg);
-
-    LD_UB2(src_ptr, 16, src0, src1);
-    src_ptr += src_stride;
-    LD_UB2(ref_ptr, 16, ref0, ref1);
-    ref_ptr += ref_stride;
-    CALC_MSE_AVG_B(src0, ref0, var, avg);
-    CALC_MSE_AVG_B(src1, ref1, var, avg);
-
-    LD_UB2(src_ptr, 16, src0, src1);
-    src_ptr += src_stride;
-    LD_UB2(ref_ptr, 16, ref0, ref1);
-    ref_ptr += ref_stride;
-    CALC_MSE_AVG_B(src0, ref0, var, avg);
-    CALC_MSE_AVG_B(src1, ref1, var, avg);
-
-    LD_UB2(src_ptr, 16, src0, src1);
-    src_ptr += src_stride;
-    LD_UB2(ref_ptr, 16, ref0, ref1);
-    ref_ptr += ref_stride;
-    CALC_MSE_AVG_B(src0, ref0, var, avg);
-    CALC_MSE_AVG_B(src1, ref1, var, avg);
-  }
-
-  vec = __msa_hadd_s_w(avg, avg);
-  *diff = HADD_SW_S32(vec);
-
-  return HADD_SW_S32(var);
-}
-
-static uint32_t sse_diff_32x64_msa(const uint8_t *src_ptr, int32_t src_stride,
-                                   const uint8_t *ref_ptr, int32_t ref_stride,
-                                   int32_t *diff) {
-  int32_t ht_cnt;
-  v16u8 src0, src1, ref0, ref1;
-  v8i16 avg0 = { 0 };
-  v8i16 avg1 = { 0 };
-  v4i32 vec, var = { 0 };
-
-  for (ht_cnt = 16; ht_cnt--;) {
-    LD_UB2(src_ptr, 16, src0, src1);
-    src_ptr += src_stride;
-    LD_UB2(ref_ptr, 16, ref0, ref1);
-    ref_ptr += ref_stride;
-    CALC_MSE_AVG_B(src0, ref0, var, avg0);
-    CALC_MSE_AVG_B(src1, ref1, var, avg1);
-
-    LD_UB2(src_ptr, 16, src0, src1);
-    src_ptr += src_stride;
-    LD_UB2(ref_ptr, 16, ref0, ref1);
-    ref_ptr += ref_stride;
-    CALC_MSE_AVG_B(src0, ref0, var, avg0);
-    CALC_MSE_AVG_B(src1, ref1, var, avg1);
-
-    LD_UB2(src_ptr, 16, src0, src1);
-    src_ptr += src_stride;
-    LD_UB2(ref_ptr, 16, ref0, ref1);
-    ref_ptr += ref_stride;
-    CALC_MSE_AVG_B(src0, ref0, var, avg0);
-    CALC_MSE_AVG_B(src1, ref1, var, avg1);
-
-    LD_UB2(src_ptr, 16, src0, src1);
-    src_ptr += src_stride;
-    LD_UB2(ref_ptr, 16, ref0, ref1);
-    ref_ptr += ref_stride;
-    CALC_MSE_AVG_B(src0, ref0, var, avg0);
-    CALC_MSE_AVG_B(src1, ref1, var, avg1);
-  }
-
-  vec = __msa_hadd_s_w(avg0, avg0);
-  vec += __msa_hadd_s_w(avg1, avg1);
-  *diff = HADD_SW_S32(vec);
-
-  return HADD_SW_S32(var);
-}
-
-static uint32_t sse_diff_64x32_msa(const uint8_t *src_ptr, int32_t src_stride,
-                                   const uint8_t *ref_ptr, int32_t ref_stride,
-                                   int32_t *diff) {
-  int32_t ht_cnt;
-  v16u8 src0, src1, src2, src3;
-  v16u8 ref0, ref1, ref2, ref3;
-  v8i16 avg0 = { 0 };
-  v8i16 avg1 = { 0 };
-  v4i32 vec, var = { 0 };
-
-  for (ht_cnt = 16; ht_cnt--;) {
-    LD_UB4(src_ptr, 16, src0, src1, src2, src3);
-    src_ptr += src_stride;
-    LD_UB4(ref_ptr, 16, ref0, ref1, ref2, ref3);
-    ref_ptr += ref_stride;
-    CALC_MSE_AVG_B(src0, ref0, var, avg0);
-    CALC_MSE_AVG_B(src2, ref2, var, avg0);
-    CALC_MSE_AVG_B(src1, ref1, var, avg1);
-    CALC_MSE_AVG_B(src3, ref3, var, avg1);
-
-    LD_UB4(src_ptr, 16, src0, src1, src2, src3);
-    src_ptr += src_stride;
-    LD_UB4(ref_ptr, 16, ref0, ref1, ref2, ref3);
-    ref_ptr += ref_stride;
-    CALC_MSE_AVG_B(src0, ref0, var, avg0);
-    CALC_MSE_AVG_B(src2, ref2, var, avg0);
-    CALC_MSE_AVG_B(src1, ref1, var, avg1);
-    CALC_MSE_AVG_B(src3, ref3, var, avg1);
-  }
-
-  vec = __msa_hadd_s_w(avg0, avg0);
-  vec += __msa_hadd_s_w(avg1, avg1);
-  *diff = HADD_SW_S32(vec);
-
-  return HADD_SW_S32(var);
-}
-
-static uint32_t sse_diff_64x64_msa(const uint8_t *src_ptr, int32_t src_stride,
-                                   const uint8_t *ref_ptr, int32_t ref_stride,
-                                   int32_t *diff) {
-  int32_t ht_cnt;
-  v16u8 src0, src1, src2, src3;
-  v16u8 ref0, ref1, ref2, ref3;
-  v8i16 avg0 = { 0 };
-  v8i16 avg1 = { 0 };
-  v8i16 avg2 = { 0 };
-  v8i16 avg3 = { 0 };
-  v4i32 vec, var = { 0 };
-
-  for (ht_cnt = 32; ht_cnt--;) {
-    LD_UB4(src_ptr, 16, src0, src1, src2, src3);
-    src_ptr += src_stride;
-    LD_UB4(ref_ptr, 16, ref0, ref1, ref2, ref3);
-    ref_ptr += ref_stride;
-
-    CALC_MSE_AVG_B(src0, ref0, var, avg0);
-    CALC_MSE_AVG_B(src1, ref1, var, avg1);
-    CALC_MSE_AVG_B(src2, ref2, var, avg2);
-    CALC_MSE_AVG_B(src3, ref3, var, avg3);
-    LD_UB4(src_ptr, 16, src0, src1, src2, src3);
-    src_ptr += src_stride;
-    LD_UB4(ref_ptr, 16, ref0, ref1, ref2, ref3);
-    ref_ptr += ref_stride;
-    CALC_MSE_AVG_B(src0, ref0, var, avg0);
-    CALC_MSE_AVG_B(src1, ref1, var, avg1);
-    CALC_MSE_AVG_B(src2, ref2, var, avg2);
-    CALC_MSE_AVG_B(src3, ref3, var, avg3);
-  }
-
-  vec = __msa_hadd_s_w(avg0, avg0);
-  vec += __msa_hadd_s_w(avg1, avg1);
-  vec += __msa_hadd_s_w(avg2, avg2);
-  vec += __msa_hadd_s_w(avg3, avg3);
-  *diff = HADD_SW_S32(vec);
-
-  return HADD_SW_S32(var);
-}
-
-static uint32_t get_mb_ss_msa(const int16_t *src) {
-  uint32_t sum, cnt;
-  v8i16 src0, src1, src2, src3;
-  v4i32 src0_l, src1_l, src2_l, src3_l;
-  v4i32 src0_r, src1_r, src2_r, src3_r;
-  v2i64 sq_src_l = { 0 };
-  v2i64 sq_src_r = { 0 };
-
-  for (cnt = 8; cnt--;) {
-    LD_SH4(src, 8, src0, src1, src2, src3);
-    src += 4 * 8;
-
-    UNPCK_SH_SW(src0, src0_l, src0_r);
-    UNPCK_SH_SW(src1, src1_l, src1_r);
-    UNPCK_SH_SW(src2, src2_l, src2_r);
-    UNPCK_SH_SW(src3, src3_l, src3_r);
-
-    DPADD_SD2_SD(src0_l, src0_r, sq_src_l, sq_src_r);
-    DPADD_SD2_SD(src1_l, src1_r, sq_src_l, sq_src_r);
-    DPADD_SD2_SD(src2_l, src2_r, sq_src_l, sq_src_r);
-    DPADD_SD2_SD(src3_l, src3_r, sq_src_l, sq_src_r);
-  }
-
-  sq_src_l += __msa_splati_d(sq_src_l, 1);
-  sq_src_r += __msa_splati_d(sq_src_r, 1);
-
-  sum = __msa_copy_s_d(sq_src_l, 0);
-  sum += __msa_copy_s_d(sq_src_r, 0);
-
-  return sum;
-}
-
-static uint32_t sse_4width_msa(const uint8_t *src_ptr, int32_t src_stride,
-                               const uint8_t *ref_ptr, int32_t ref_stride,
-                               int32_t height) {
-  int32_t ht_cnt;
-  uint32_t src0, src1, src2, src3;
-  uint32_t ref0, ref1, ref2, ref3;
-  v16u8 src = { 0 };
-  v16u8 ref = { 0 };
-  v4i32 var = { 0 };
-
-  for (ht_cnt = (height >> 2); ht_cnt--;) {
-    LW4(src_ptr, src_stride, src0, src1, src2, src3);
-    src_ptr += (4 * src_stride);
-    LW4(ref_ptr, ref_stride, ref0, ref1, ref2, ref3);
-    ref_ptr += (4 * ref_stride);
-
-    INSERT_W4_UB(src0, src1, src2, src3, src);
-    INSERT_W4_UB(ref0, ref1, ref2, ref3, ref);
-    CALC_MSE_B(src, ref, var);
-  }
-
-  return HADD_SW_S32(var);
-}
-
-static uint32_t sse_8width_msa(const uint8_t *src_ptr, int32_t src_stride,
-                               const uint8_t *ref_ptr, int32_t ref_stride,
-                               int32_t height) {
-  int32_t ht_cnt;
-  v16u8 src0, src1, src2, src3;
-  v16u8 ref0, ref1, ref2, ref3;
-  v4i32 var = { 0 };
-
-  for (ht_cnt = (height >> 2); ht_cnt--;) {
-    LD_UB4(src_ptr, src_stride, src0, src1, src2, src3);
-    src_ptr += (4 * src_stride);
-    LD_UB4(ref_ptr, ref_stride, ref0, ref1, ref2, ref3);
-    ref_ptr += (4 * ref_stride);
-
-    PCKEV_D4_UB(src1, src0, src3, src2, ref1, ref0, ref3, ref2, src0, src1,
-                ref0, ref1);
-    CALC_MSE_B(src0, ref0, var);
-    CALC_MSE_B(src1, ref1, var);
-  }
-
-  return HADD_SW_S32(var);
-}
-
-static uint32_t sse_16width_msa(const uint8_t *src_ptr, int32_t src_stride,
-                                const uint8_t *ref_ptr, int32_t ref_stride,
-                                int32_t height) {
-  int32_t ht_cnt;
-  v16u8 src, ref;
-  v4i32 var = { 0 };
-
-  for (ht_cnt = (height >> 2); ht_cnt--;) {
-    src = LD_UB(src_ptr);
-    src_ptr += src_stride;
-    ref = LD_UB(ref_ptr);
-    ref_ptr += ref_stride;
-    CALC_MSE_B(src, ref, var);
-
-    src = LD_UB(src_ptr);
-    src_ptr += src_stride;
-    ref = LD_UB(ref_ptr);
-    ref_ptr += ref_stride;
-    CALC_MSE_B(src, ref, var);
-
-    src = LD_UB(src_ptr);
-    src_ptr += src_stride;
-    ref = LD_UB(ref_ptr);
-    ref_ptr += ref_stride;
-    CALC_MSE_B(src, ref, var);
-
-    src = LD_UB(src_ptr);
-    src_ptr += src_stride;
-    ref = LD_UB(ref_ptr);
-    ref_ptr += ref_stride;
-    CALC_MSE_B(src, ref, var);
-  }
-
-  return HADD_SW_S32(var);
-}
-
-static uint32_t sse_32width_msa(const uint8_t *src_ptr, int32_t src_stride,
-                                const uint8_t *ref_ptr, int32_t ref_stride,
-                                int32_t height) {
-  int32_t ht_cnt;
-  v16u8 src0, src1, ref0, ref1;
-  v4i32 var = { 0 };
-
-  for (ht_cnt = (height >> 2); ht_cnt--;) {
-    LD_UB2(src_ptr, 16, src0, src1);
-    src_ptr += src_stride;
-    LD_UB2(ref_ptr, 16, ref0, ref1);
-    ref_ptr += ref_stride;
-    CALC_MSE_B(src0, ref0, var);
-    CALC_MSE_B(src1, ref1, var);
-
-    LD_UB2(src_ptr, 16, src0, src1);
-    src_ptr += src_stride;
-    LD_UB2(ref_ptr, 16, ref0, ref1);
-    ref_ptr += ref_stride;
-    CALC_MSE_B(src0, ref0, var);
-    CALC_MSE_B(src1, ref1, var);
-
-    LD_UB2(src_ptr, 16, src0, src1);
-    src_ptr += src_stride;
-    LD_UB2(ref_ptr, 16, ref0, ref1);
-    ref_ptr += ref_stride;
-    CALC_MSE_B(src0, ref0, var);
-    CALC_MSE_B(src1, ref1, var);
-
-    LD_UB2(src_ptr, 16, src0, src1);
-    src_ptr += src_stride;
-    LD_UB2(ref_ptr, 16, ref0, ref1);
-    ref_ptr += ref_stride;
-    CALC_MSE_B(src0, ref0, var);
-    CALC_MSE_B(src1, ref1, var);
-  }
-
-  return HADD_SW_S32(var);
-}
-
-static uint32_t sse_64width_msa(const uint8_t *src_ptr, int32_t src_stride,
-                                const uint8_t *ref_ptr, int32_t ref_stride,
-                                int32_t height) {
-  int32_t ht_cnt;
-  v16u8 src0, src1, src2, src3;
-  v16u8 ref0, ref1, ref2, ref3;
-  v4i32 var = { 0 };
-
-  for (ht_cnt = height >> 1; ht_cnt--;) {
-    LD_UB4(src_ptr, 16, src0, src1, src2, src3);
-    src_ptr += src_stride;
-    LD_UB4(ref_ptr, 16, ref0, ref1, ref2, ref3);
-    ref_ptr += ref_stride;
-    CALC_MSE_B(src0, ref0, var);
-    CALC_MSE_B(src2, ref2, var);
-    CALC_MSE_B(src1, ref1, var);
-    CALC_MSE_B(src3, ref3, var);
-
-    LD_UB4(src_ptr, 16, src0, src1, src2, src3);
-    src_ptr += src_stride;
-    LD_UB4(ref_ptr, 16, ref0, ref1, ref2, ref3);
-    ref_ptr += ref_stride;
-    CALC_MSE_B(src0, ref0, var);
-    CALC_MSE_B(src2, ref2, var);
-    CALC_MSE_B(src1, ref1, var);
-    CALC_MSE_B(src3, ref3, var);
-  }
-
-  return HADD_SW_S32(var);
-}
-
-uint32_t aom_get4x4sse_cs_msa(const uint8_t *src_ptr, int32_t src_stride,
-                              const uint8_t *ref_ptr, int32_t ref_stride) {
-  uint32_t err = 0;
-  uint32_t src0, src1, src2, src3;
-  uint32_t ref0, ref1, ref2, ref3;
-  v16i8 src = { 0 };
-  v16i8 ref = { 0 };
-  v16u8 src_vec0, src_vec1;
-  v8i16 diff0, diff1;
-  v4i32 err0 = { 0 };
-  v4i32 err1 = { 0 };
-
-  LW4(src_ptr, src_stride, src0, src1, src2, src3);
-  LW4(ref_ptr, ref_stride, ref0, ref1, ref2, ref3);
-  INSERT_W4_SB(src0, src1, src2, src3, src);
-  INSERT_W4_SB(ref0, ref1, ref2, ref3, ref);
-  ILVRL_B2_UB(src, ref, src_vec0, src_vec1);
-  HSUB_UB2_SH(src_vec0, src_vec1, diff0, diff1);
-  DPADD_SH2_SW(diff0, diff1, diff0, diff1, err0, err1);
-  err = HADD_SW_S32(err0);
-  err += HADD_SW_S32(err1);
-
-  return err;
-}
-
-#define VARIANCE_4Wx4H(sse, diff) VARIANCE_WxH(sse, diff, 4);
-#define VARIANCE_4Wx8H(sse, diff) VARIANCE_WxH(sse, diff, 5);
-#define VARIANCE_8Wx4H(sse, diff) VARIANCE_WxH(sse, diff, 5);
-#define VARIANCE_8Wx8H(sse, diff) VARIANCE_WxH(sse, diff, 6);
-#define VARIANCE_8Wx16H(sse, diff) VARIANCE_WxH(sse, diff, 7);
-#define VARIANCE_16Wx8H(sse, diff) VARIANCE_WxH(sse, diff, 7);
-#define VARIANCE_16Wx16H(sse, diff) VARIANCE_WxH(sse, diff, 8);
-
-#define VARIANCE_16Wx32H(sse, diff) VARIANCE_LARGE_WxH(sse, diff, 9);
-#define VARIANCE_32Wx16H(sse, diff) VARIANCE_LARGE_WxH(sse, diff, 9);
-#define VARIANCE_32Wx32H(sse, diff) VARIANCE_LARGE_WxH(sse, diff, 10);
-#define VARIANCE_32Wx64H(sse, diff) VARIANCE_LARGE_WxH(sse, diff, 11);
-#define VARIANCE_64Wx32H(sse, diff) VARIANCE_LARGE_WxH(sse, diff, 11);
-#define VARIANCE_64Wx64H(sse, diff) VARIANCE_LARGE_WxH(sse, diff, 12);
-
-#define AOM_VARIANCE_WDXHT_MSA(wd, ht)                                         \
-  uint32_t aom_variance##wd##x##ht##_msa(                                      \
-      const uint8_t *src, int32_t src_stride, const uint8_t *ref,              \
-      int32_t ref_stride, uint32_t *sse) {                                     \
-    int32_t diff;                                                              \
-                                                                               \
-    *sse =                                                                     \
-        sse_diff_##wd##width_msa(src, src_stride, ref, ref_stride, ht, &diff); \
-                                                                               \
-    return VARIANCE_##wd##Wx##ht##H(*sse, diff);                               \
-  }
-
-/* clang-format off */
-AOM_VARIANCE_WDXHT_MSA(4, 4)
-AOM_VARIANCE_WDXHT_MSA(4, 8)
-
-AOM_VARIANCE_WDXHT_MSA(8, 4)
-AOM_VARIANCE_WDXHT_MSA(8, 8)
-AOM_VARIANCE_WDXHT_MSA(8, 16)
-
-AOM_VARIANCE_WDXHT_MSA(16, 8)
-AOM_VARIANCE_WDXHT_MSA(16, 16)
-AOM_VARIANCE_WDXHT_MSA(16, 32)
-
-AOM_VARIANCE_WDXHT_MSA(32, 16)
-AOM_VARIANCE_WDXHT_MSA(32, 32)
-/* clang-format on */
-
-uint32_t aom_variance32x64_msa(const uint8_t *src, int32_t src_stride,
-                               const uint8_t *ref, int32_t ref_stride,
-                               uint32_t *sse) {
-  int32_t diff;
-
-  *sse = sse_diff_32x64_msa(src, src_stride, ref, ref_stride, &diff);
-
-  return VARIANCE_32Wx64H(*sse, diff);
-}
-
-uint32_t aom_variance64x32_msa(const uint8_t *src, int32_t src_stride,
-                               const uint8_t *ref, int32_t ref_stride,
-                               uint32_t *sse) {
-  int32_t diff;
-
-  *sse = sse_diff_64x32_msa(src, src_stride, ref, ref_stride, &diff);
-
-  return VARIANCE_64Wx32H(*sse, diff);
-}
-
-uint32_t aom_variance64x64_msa(const uint8_t *src, int32_t src_stride,
-                               const uint8_t *ref, int32_t ref_stride,
-                               uint32_t *sse) {
-  int32_t diff;
-
-  *sse = sse_diff_64x64_msa(src, src_stride, ref, ref_stride, &diff);
-
-  return VARIANCE_64Wx64H(*sse, diff);
-}
-
-uint32_t aom_mse8x8_msa(const uint8_t *src, int32_t src_stride,
-                        const uint8_t *ref, int32_t ref_stride, uint32_t *sse) {
-  *sse = sse_8width_msa(src, src_stride, ref, ref_stride, 8);
-
-  return *sse;
-}
-
-uint32_t aom_mse8x16_msa(const uint8_t *src, int32_t src_stride,
-                         const uint8_t *ref, int32_t ref_stride,
-                         uint32_t *sse) {
-  *sse = sse_8width_msa(src, src_stride, ref, ref_stride, 16);
-
-  return *sse;
-}
-
-uint32_t aom_mse16x8_msa(const uint8_t *src, int32_t src_stride,
-                         const uint8_t *ref, int32_t ref_stride,
-                         uint32_t *sse) {
-  *sse = sse_16width_msa(src, src_stride, ref, ref_stride, 8);
-
-  return *sse;
-}
-
-uint32_t aom_mse16x16_msa(const uint8_t *src, int32_t src_stride,
-                          const uint8_t *ref, int32_t ref_stride,
-                          uint32_t *sse) {
-  *sse = sse_16width_msa(src, src_stride, ref, ref_stride, 16);
-
-  return *sse;
-}
-
-void aom_get8x8var_msa(const uint8_t *src, int32_t src_stride,
-                       const uint8_t *ref, int32_t ref_stride, uint32_t *sse,
-                       int32_t *sum) {
-  *sse = sse_diff_8width_msa(src, src_stride, ref, ref_stride, 8, sum);
-}
-
-void aom_get16x16var_msa(const uint8_t *src, int32_t src_stride,
-                         const uint8_t *ref, int32_t ref_stride, uint32_t *sse,
-                         int32_t *sum) {
-  *sse = sse_diff_16width_msa(src, src_stride, ref, ref_stride, 16, sum);
-}
-
-uint32_t aom_get_mb_ss_msa(const int16_t *src) { return get_mb_ss_msa(src); }
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/noise_util.c b/chromium/third_party/libaom/source/libaom/aom_dsp/noise_util.c
index 7e7e380c681..3ded8cb099f 100644
--- a/chromium/third_party/libaom/source/libaom/aom_dsp/noise_util.c
+++ b/chromium/third_party/libaom/source/libaom/aom_dsp/noise_util.c
@@ -160,15 +160,17 @@ int aom_noise_data_validate(const double *data, int w, int h) {
 
   // Check that noise variance is not increasing in x or y
   // and that the data is zero mean.
-  mean_x = (double *)aom_malloc(sizeof(*mean_x) * w);
-  var_x = (double *)aom_malloc(sizeof(*var_x) * w);
-  mean_y = (double *)aom_malloc(sizeof(*mean_x) * h);
-  var_y = (double *)aom_malloc(sizeof(*var_y) * h);
-
-  memset(mean_x, 0, sizeof(*mean_x) * w);
-  memset(var_x, 0, sizeof(*var_x) * w);
-  memset(mean_y, 0, sizeof(*mean_y) * h);
-  memset(var_y, 0, sizeof(*var_y) * h);
+  mean_x = (double *)aom_calloc(w, sizeof(*mean_x));
+  var_x = (double *)aom_calloc(w, sizeof(*var_x));
+  mean_y = (double *)aom_calloc(h, sizeof(*mean_x));
+  var_y = (double *)aom_calloc(h, sizeof(*var_y));
+  if (!(mean_x && var_x && mean_y && var_y)) {
+    aom_free(mean_x);
+    aom_free(mean_y);
+    aom_free(var_x);
+    aom_free(var_y);
+    return 0;
+  }
 
   for (y = 0; y < h; ++y) {
     for (x = 0; x < w; ++x) {
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/prob.h b/chromium/third_party/libaom/source/libaom/aom_dsp/prob.h
index ea5e4cb34ed..5e25b9cdfbc 100644
--- a/chromium/third_party/libaom/source/libaom/aom_dsp/prob.h
+++ b/chromium/third_party/libaom/source/libaom/aom_dsp/prob.h
@@ -641,26 +641,33 @@ static INLINE uint8_t get_prob(unsigned int num, unsigned int den) {
 }
 
 static INLINE void update_cdf(aom_cdf_prob *cdf, int8_t val, int nsymbs) {
-  int rate;
-  int i, tmp;
-
-  static const int nsymbs2speed[17] = { 0, 0, 1, 1, 2, 2, 2, 2, 2,
-                                        2, 2, 2, 2, 2, 2, 2, 2 };
   assert(nsymbs < 17);
-  rate = 3 + (cdf[nsymbs] > 15) + (cdf[nsymbs] > 31) +
-         nsymbs2speed[nsymbs];  // + get_msb(nsymbs);
-  tmp = AOM_ICDF(0);
+  const int count = cdf[nsymbs];
+
+  // rate is computed in the spec as:
+  //  3 + ( cdf[N] > 15 ) + ( cdf[N] > 31 ) + Min(FloorLog2(N), 2)
+  // In this case cdf[N] is |count|.
+  // Min(FloorLog2(N), 2) is 1 for nsymbs == {2, 3} and 2 for all
+  // nsymbs > 3. So the equation becomes:
+  //  4 + (count > 15) + (count > 31) + (nsymbs > 3).
+  // Note that the largest value for count is 32 (it is not incremented beyond
+  // 32). So using that information:
+  //  count >> 4 is 0 for count from 0 to 15.
+  //  count >> 4 is 1 for count from 16 to 31.
+  //  count >> 4 is 2 for count == 31.
+  // Now, the equation becomes:
+  //  4 + (count >> 4) + (nsymbs > 3).
+  const int rate = 4 + (count >> 4) + (nsymbs > 3);
 
-  // Single loop (faster)
-  for (i = 0; i < nsymbs - 1; ++i) {
-    tmp = (i == val) ? 0 : tmp;
-    if (tmp < cdf[i]) {
-      cdf[i] -= ((cdf[i] - tmp) >> rate);
+  int i = 0;
+  do {
+    if (i < val) {
+      cdf[i] += (CDF_PROB_TOP - cdf[i]) >> rate;
     } else {
-      cdf[i] += ((tmp - cdf[i]) >> rate);
+      cdf[i] -= cdf[i] >> rate;
     }
-  }
-  cdf[nsymbs] += (cdf[nsymbs] < 32);
+  } while (++i < nsymbs - 1);
+  cdf[nsymbs] += (count < 32);
 }
 
 #ifdef __cplusplus
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/quantize.c b/chromium/third_party/libaom/source/libaom/aom_dsp/quantize.c
index 36ca58f6b23..8dd5b0b0f7a 100644
--- a/chromium/third_party/libaom/source/libaom/aom_dsp/quantize.c
+++ b/chromium/third_party/libaom/source/libaom/aom_dsp/quantize.c
@@ -12,6 +12,7 @@
 #include "aom_dsp/quantize.h"
 #include "aom_mem/aom_mem.h"
 
+#if !CONFIG_REALTIME_ONLY
 void aom_quantize_b_adaptive_helper_c(
     const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr,
     const int16_t *round_ptr, const int16_t *quant_ptr,
@@ -102,6 +103,7 @@ void aom_quantize_b_adaptive_helper_c(
 #endif  // SKIP_EOB_FACTOR_ADJUST
   *eob_ptr = eob + 1;
 }
+#endif  // !CONFIG_REALTIME_ONLY
 
 void aom_quantize_b_helper_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
                              const int16_t *zbin_ptr, const int16_t *round_ptr,
@@ -167,6 +169,7 @@ void aom_quantize_b_helper_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
 }
 
 #if CONFIG_AV1_HIGHBITDEPTH
+#if !CONFIG_REALTIME_ONLY
 void aom_highbd_quantize_b_adaptive_helper_c(
     const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr,
     const int16_t *round_ptr, const int16_t *quant_ptr,
@@ -253,6 +256,7 @@ void aom_highbd_quantize_b_adaptive_helper_c(
 #endif  // SKIP_EOB_FACTOR_ADJUST
   *eob_ptr = eob + 1;
 }
+#endif  // !CONFIG_REALTIME_ONLY
 
 void aom_highbd_quantize_b_helper_c(
     const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr,
@@ -312,6 +316,7 @@ void aom_highbd_quantize_b_helper_c(
 }
 #endif  // CONFIG_AV1_HIGHBITDEPTH
 
+#if !CONFIG_REALTIME_ONLY
 /* These functions should only be called when quantisation matrices
    are not used. */
 void aom_quantize_b_adaptive_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
@@ -389,6 +394,7 @@ void aom_highbd_quantize_b_64x64_adaptive_c(
                                           eob_ptr, scan, iscan, NULL, NULL, 2);
 }
 #endif  // CONFIG_AV1_HIGHBITDEPTH
+#endif  // !CONFIG_REALTIME_ONLY
 
 void aom_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
                       const int16_t *zbin_ptr, const int16_t *round_ptr,
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/simd/v128_intrinsics_c.h b/chromium/third_party/libaom/source/libaom/aom_dsp/simd/v128_intrinsics_c.h
index 466a41e1078..f5ca817fb69 100644
--- a/chromium/third_party/libaom/source/libaom/aom_dsp/simd/v128_intrinsics_c.h
+++ b/chromium/third_party/libaom/source/libaom/aom_dsp/simd/v128_intrinsics_c.h
@@ -14,6 +14,7 @@
 
 #include <stdio.h>
 #include <stdlib.h>
+#include <string.h>
 
 #include "config/aom_config.h"
 
@@ -63,10 +64,7 @@ SIMD_INLINE c_v128 c_v128_from_32(uint32_t a, uint32_t b, uint32_t c,
 
 SIMD_INLINE c_v128 c_v128_load_unaligned(const void *p) {
   c_v128 t;
-  uint8_t *pp = (uint8_t *)p;
-  uint8_t *q = (uint8_t *)&t;
-  int c;
-  for (c = 0; c < 16; c++) q[c] = pp[c];
+  memcpy(&t, p, 16);
   return t;
 }
 
@@ -79,10 +77,7 @@ SIMD_INLINE c_v128 c_v128_load_aligned(const void *p) {
 }
 
 SIMD_INLINE void c_v128_store_unaligned(void *p, c_v128 a) {
-  uint8_t *pp = (uint8_t *)p;
-  uint8_t *q = (uint8_t *)&a;
-  int c;
-  for (c = 0; c < 16; c++) pp[c] = q[c];
+  memcpy(p, &a, 16);
 }
 
 SIMD_INLINE void c_v128_store_aligned(void *p, c_v128 a) {
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/simd/v128_intrinsics_x86.h b/chromium/third_party/libaom/source/libaom/aom_dsp/simd/v128_intrinsics_x86.h
index c404015ef1e..32b51c956d7 100644
--- a/chromium/third_party/libaom/source/libaom/aom_dsp/simd/v128_intrinsics_x86.h
+++ b/chromium/third_party/libaom/source/libaom/aom_dsp/simd/v128_intrinsics_x86.h
@@ -36,7 +36,7 @@ SIMD_INLINE v128 v128_from_64(uint64_t a, uint64_t b) {
 }
 
 SIMD_INLINE v128 v128_from_32(uint32_t a, uint32_t b, uint32_t c, uint32_t d) {
-  return _mm_set_epi32(a, b, c, d);
+  return _mm_set_epi32((int)a, (int)b, (int)c, (int)d);
 }
 
 SIMD_INLINE v128 v128_load_aligned(const void *p) {
@@ -81,16 +81,16 @@ SIMD_INLINE v128 v128_align(v128 a, v128 b, const unsigned int c) {
 
 SIMD_INLINE v128 v128_zero() { return _mm_setzero_si128(); }
 
-SIMD_INLINE v128 v128_dup_8(uint8_t x) { return _mm_set1_epi8(x); }
+SIMD_INLINE v128 v128_dup_8(uint8_t x) { return _mm_set1_epi8((char)x); }
 
-SIMD_INLINE v128 v128_dup_16(uint16_t x) { return _mm_set1_epi16(x); }
+SIMD_INLINE v128 v128_dup_16(uint16_t x) { return _mm_set1_epi16((short)x); }
 
-SIMD_INLINE v128 v128_dup_32(uint32_t x) { return _mm_set1_epi32(x); }
+SIMD_INLINE v128 v128_dup_32(uint32_t x) { return _mm_set1_epi32((int)x); }
 
 SIMD_INLINE v128 v128_dup_64(uint64_t x) {
   // _mm_set_pi64x and _mm_cvtsi64x_si64 missing in some compilers
-  return _mm_set_epi32((uint32_t)(x >> 32), (uint32_t)x, (uint32_t)(x >> 32),
-                       (uint32_t)x);
+  return _mm_set_epi32((int32_t)(x >> 32), (int32_t)x, (int32_t)(x >> 32),
+                       (int32_t)x);
 }
 
 SIMD_INLINE v128 v128_add_8(v128 a, v128 b) { return _mm_add_epi8(a, b); }
@@ -304,7 +304,7 @@ SIMD_INLINE v128 v128_shuffle_8(v128 x, v128 pattern) {
   v128 output;
   unsigned char *input = (unsigned char *)&x;
   unsigned char *index = (unsigned char *)&pattern;
-  char *selected = (char *)&output;
+  unsigned char *selected = (unsigned char *)&output;
   int counter;
 
   for (counter = 0; counter < 16; counter++) {
@@ -534,58 +534,58 @@ SIMD_INLINE v128 v128_cmplt_s32(v128 a, v128 b) {
 SIMD_INLINE v128 v128_cmpeq_16(v128 a, v128 b) { return _mm_cmpeq_epi16(a, b); }
 
 SIMD_INLINE v128 v128_shl_8(v128 a, unsigned int c) {
-  return _mm_and_si128(_mm_set1_epi8((uint8_t)(0xff << c)),
-                       _mm_sll_epi16(a, _mm_cvtsi32_si128(c)));
+  return _mm_and_si128(_mm_set1_epi8((char)(0xff << c)),
+                       _mm_sll_epi16(a, _mm_cvtsi32_si128((int)c)));
 }
 
 SIMD_INLINE v128 v128_shr_u8(v128 a, unsigned int c) {
   return _mm_and_si128(_mm_set1_epi8((char)(0xff >> c)),
-                       _mm_srl_epi16(a, _mm_cvtsi32_si128(c)));
+                       _mm_srl_epi16(a, _mm_cvtsi32_si128((int)c)));
 }
 
 SIMD_INLINE v128 v128_shr_s8(v128 a, unsigned int c) {
-  __m128i x = _mm_cvtsi32_si128(c + 8);
+  __m128i x = _mm_cvtsi32_si128((int)(c + 8));
   return _mm_packs_epi16(_mm_sra_epi16(_mm_unpacklo_epi8(a, a), x),
                          _mm_sra_epi16(_mm_unpackhi_epi8(a, a), x));
 }
 
 SIMD_INLINE v128 v128_shl_16(v128 a, unsigned int c) {
-  return _mm_sll_epi16(a, _mm_cvtsi32_si128(c));
+  return _mm_sll_epi16(a, _mm_cvtsi32_si128((int)c));
 }
 
 SIMD_INLINE v128 v128_shr_u16(v128 a, unsigned int c) {
-  return _mm_srl_epi16(a, _mm_cvtsi32_si128(c));
+  return _mm_srl_epi16(a, _mm_cvtsi32_si128((int)c));
 }
 
 SIMD_INLINE v128 v128_shr_s16(v128 a, unsigned int c) {
-  return _mm_sra_epi16(a, _mm_cvtsi32_si128(c));
+  return _mm_sra_epi16(a, _mm_cvtsi32_si128((int)c));
 }
 
 SIMD_INLINE v128 v128_shl_32(v128 a, unsigned int c) {
-  return _mm_sll_epi32(a, _mm_cvtsi32_si128(c));
+  return _mm_sll_epi32(a, _mm_cvtsi32_si128((int)c));
 }
 
 SIMD_INLINE v128 v128_shr_u32(v128 a, unsigned int c) {
-  return _mm_srl_epi32(a, _mm_cvtsi32_si128(c));
+  return _mm_srl_epi32(a, _mm_cvtsi32_si128((int)c));
 }
 
 SIMD_INLINE v128 v128_shr_s32(v128 a, unsigned int c) {
-  return _mm_sra_epi32(a, _mm_cvtsi32_si128(c));
+  return _mm_sra_epi32(a, _mm_cvtsi32_si128((int)c));
 }
 
 SIMD_INLINE v128 v128_shl_64(v128 a, unsigned int c) {
-  return _mm_sll_epi64(a, _mm_cvtsi32_si128(c));
+  return _mm_sll_epi64(a, _mm_cvtsi32_si128((int)c));
 }
 
 SIMD_INLINE v128 v128_shr_u64(v128 a, unsigned int c) {
-  return _mm_srl_epi64(a, _mm_cvtsi32_si128(c));
+  return _mm_srl_epi64(a, _mm_cvtsi32_si128((int)c));
 }
 
 SIMD_INLINE v128 v128_shr_s64(v128 a, unsigned int c) {
   // _mm_sra_epi64 is missing in gcc?
-  return v128_from_64((int64_t)v64_u64(v128_high_v64(a)) >> c,
-                      (int64_t)v64_u64(v128_low_v64(a)) >> c);
-  // return _mm_sra_epi64(a, _mm_cvtsi32_si128(c));
+  return v128_from_64((uint64_t)((int64_t)v64_u64(v128_high_v64(a)) >> c),
+                      (uint64_t)((int64_t)v64_u64(v128_low_v64(a)) >> c));
+  // return _mm_sra_epi64(a, _mm_cvtsi32_si128((int)c));
 }
 
 /* These intrinsics require immediate values, so we must use #defines
@@ -593,9 +593,9 @@ SIMD_INLINE v128 v128_shr_s64(v128 a, unsigned int c) {
 #define v128_shl_n_byte(a, c) _mm_slli_si128(a, (c)&127)
 #define v128_shr_n_byte(a, c) _mm_srli_si128(a, (c)&127)
 #define v128_shl_n_8(a, c) \
-  _mm_and_si128(_mm_set1_epi8((uint8_t)(0xff << (c))), _mm_slli_epi16(a, c))
+  _mm_and_si128(_mm_set1_epi8((char)(0xff << (c))), _mm_slli_epi16(a, c))
 #define v128_shr_n_u8(a, c) \
-  _mm_and_si128(_mm_set1_epi8(0xff >> (c)), _mm_srli_epi16(a, c))
+  _mm_and_si128(_mm_set1_epi8((char)(0xff >> (c))), _mm_srli_epi16(a, c))
 #define v128_shr_n_s8(a, c)                                         \
   _mm_packs_epi16(_mm_srai_epi16(_mm_unpacklo_epi8(a, a), (c) + 8), \
                   _mm_srai_epi16(_mm_unpackhi_epi8(a, a), (c) + 8))
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/simd/v256_intrinsics_c.h b/chromium/third_party/libaom/source/libaom/aom_dsp/simd/v256_intrinsics_c.h
index 8127ee35669..66cfda31e1e 100644
--- a/chromium/third_party/libaom/source/libaom/aom_dsp/simd/v256_intrinsics_c.h
+++ b/chromium/third_party/libaom/source/libaom/aom_dsp/simd/v256_intrinsics_c.h
@@ -14,6 +14,7 @@
 
 #include <stdio.h>
 #include <stdlib.h>
+#include <string.h>
 
 #include "config/aom_config.h"
 
@@ -70,10 +71,7 @@ SIMD_INLINE c_v256 c_v256_from_v64(c_v64 a, c_v64 b, c_v64 c, c_v64 d) {
 
 SIMD_INLINE c_v256 c_v256_load_unaligned(const void *p) {
   c_v256 t;
-  uint8_t *pp = (uint8_t *)p;
-  uint8_t *q = (uint8_t *)&t;
-  int c;
-  for (c = 0; c < 32; c++) q[c] = pp[c];
+  memcpy(&t, p, 32);
   return t;
 }
 
@@ -86,10 +84,7 @@ SIMD_INLINE c_v256 c_v256_load_aligned(const void *p) {
 }
 
 SIMD_INLINE void c_v256_store_unaligned(void *p, c_v256 a) {
-  uint8_t *pp = (uint8_t *)p;
-  uint8_t *q = (uint8_t *)&a;
-  int c;
-  for (c = 0; c < 32; c++) pp[c] = q[c];
+  memcpy(p, &a, 32);
 }
 
 SIMD_INLINE void c_v256_store_aligned(void *p, c_v256 a) {
@@ -385,7 +380,7 @@ SIMD_INLINE c_v256 c_v256_min_s8(c_v256 a, c_v256 b) {
 }
 
 SIMD_INLINE uint32_t c_v256_movemask_8(c_v256 a) {
-  return ((a.s8[31] < 0) << 31) | ((a.s8[30] < 0) << 30) |
+  return ((uint32_t)(a.s8[31] < 0) << 31) | ((a.s8[30] < 0) << 30) |
          ((a.s8[29] < 0) << 29) | ((a.s8[28] < 0) << 28) |
          ((a.s8[27] < 0) << 27) | ((a.s8[26] < 0) << 26) |
          ((a.s8[25] < 0) << 25) | ((a.s8[24] < 0) << 24) |
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/simd/v256_intrinsics_x86.h b/chromium/third_party/libaom/source/libaom/aom_dsp/simd/v256_intrinsics_x86.h
index eb5eaf0632b..894ddee1670 100644
--- a/chromium/third_party/libaom/source/libaom/aom_dsp/simd/v256_intrinsics_x86.h
+++ b/chromium/third_party/libaom/source/libaom/aom_dsp/simd/v256_intrinsics_x86.h
@@ -57,7 +57,7 @@ SIMD_INLINE v256 v256_from_v64(v64 a, v64 b, v64 c, v64 d) {
 }
 
 SIMD_INLINE v256 v256_from_64(uint64_t a, uint64_t b, uint64_t c, uint64_t d) {
-  return _mm256_set_epi64x(a, b, c, d);
+  return _mm256_set_epi64x((int64_t)a, (int64_t)b, (int64_t)c, (int64_t)d);
 }
 
 SIMD_INLINE v256 v256_load_aligned(const void *p) {
@@ -78,13 +78,15 @@ SIMD_INLINE void v256_store_unaligned(void *p, v256 a) {
 
 SIMD_INLINE v256 v256_zero(void) { return _mm256_setzero_si256(); }
 
-SIMD_INLINE v256 v256_dup_8(uint8_t x) { return _mm256_set1_epi8(x); }
+SIMD_INLINE v256 v256_dup_8(uint8_t x) { return _mm256_set1_epi8((char)x); }
 
-SIMD_INLINE v256 v256_dup_16(uint16_t x) { return _mm256_set1_epi16(x); }
+SIMD_INLINE v256 v256_dup_16(uint16_t x) { return _mm256_set1_epi16((short)x); }
 
-SIMD_INLINE v256 v256_dup_32(uint32_t x) { return _mm256_set1_epi32(x); }
+SIMD_INLINE v256 v256_dup_32(uint32_t x) { return _mm256_set1_epi32((int)x); }
 
-SIMD_INLINE v256 v256_dup_64(uint64_t x) { return _mm256_set1_epi64x(x); }
+SIMD_INLINE v256 v256_dup_64(uint64_t x) {
+  return _mm256_set1_epi64x((int64_t)x);
+}
 
 SIMD_INLINE v256 v256_add_8(v256 a, v256 b) { return _mm256_add_epi8(a, b); }
 
@@ -543,7 +545,9 @@ SIMD_INLINE v256 v256_max_u8(v256 a, v256 b) { return _mm256_max_epu8(a, b); }
 
 SIMD_INLINE v256 v256_min_s8(v256 a, v256 b) { return _mm256_min_epi8(a, b); }
 
-SIMD_INLINE uint32_t v256_movemask_8(v256 a) { return _mm256_movemask_epi8(a); }
+SIMD_INLINE uint32_t v256_movemask_8(v256 a) {
+  return (uint32_t)_mm256_movemask_epi8(a);
+}
 
 SIMD_INLINE v256 v256_blend_8(v256 a, v256 b, v256 c) {
   return _mm256_blendv_epi8(a, b, c);
@@ -596,56 +600,56 @@ SIMD_INLINE v256 v256_cmpeq_32(v256 a, v256 b) {
 }
 
 SIMD_INLINE v256 v256_shl_8(v256 a, unsigned int c) {
-  return _mm256_and_si256(_mm256_set1_epi8((uint8_t)(0xff << c)),
-                          _mm256_sll_epi16(a, _mm_cvtsi32_si128(c)));
+  return _mm256_and_si256(_mm256_set1_epi8((char)(0xff << c)),
+                          _mm256_sll_epi16(a, _mm_cvtsi32_si128((int)c)));
 }
 
 SIMD_INLINE v256 v256_shr_u8(v256 a, unsigned int c) {
   return _mm256_and_si256(_mm256_set1_epi8((char)(0xff >> c)),
-                          _mm256_srl_epi16(a, _mm_cvtsi32_si128(c)));
+                          _mm256_srl_epi16(a, _mm_cvtsi32_si128((int)c)));
 }
 
 SIMD_INLINE v256 v256_shr_s8(v256 a, unsigned int c) {
-  __m128i x = _mm_cvtsi32_si128(c + 8);
+  __m128i x = _mm_cvtsi32_si128((int)(c + 8));
   return _mm256_packs_epi16(_mm256_sra_epi16(_mm256_unpacklo_epi8(a, a), x),
                             _mm256_sra_epi16(_mm256_unpackhi_epi8(a, a), x));
 }
 
 SIMD_INLINE v256 v256_shl_16(v256 a, unsigned int c) {
-  return _mm256_sll_epi16(a, _mm_cvtsi32_si128(c));
+  return _mm256_sll_epi16(a, _mm_cvtsi32_si128((int)c));
 }
 
 SIMD_INLINE v256 v256_shr_u16(v256 a, unsigned int c) {
-  return _mm256_srl_epi16(a, _mm_cvtsi32_si128(c));
+  return _mm256_srl_epi16(a, _mm_cvtsi32_si128((int)c));
 }
 
 SIMD_INLINE v256 v256_shr_s16(v256 a, unsigned int c) {
-  return _mm256_sra_epi16(a, _mm_cvtsi32_si128(c));
+  return _mm256_sra_epi16(a, _mm_cvtsi32_si128((int)c));
 }
 
 SIMD_INLINE v256 v256_shl_32(v256 a, unsigned int c) {
-  return _mm256_sll_epi32(a, _mm_cvtsi32_si128(c));
+  return _mm256_sll_epi32(a, _mm_cvtsi32_si128((int)c));
 }
 
 SIMD_INLINE v256 v256_shr_u32(v256 a, unsigned int c) {
-  return _mm256_srl_epi32(a, _mm_cvtsi32_si128(c));
+  return _mm256_srl_epi32(a, _mm_cvtsi32_si128((int)c));
 }
 
 SIMD_INLINE v256 v256_shr_s32(v256 a, unsigned int c) {
-  return _mm256_sra_epi32(a, _mm_cvtsi32_si128(c));
+  return _mm256_sra_epi32(a, _mm_cvtsi32_si128((int)c));
 }
 
 SIMD_INLINE v256 v256_shl_64(v256 a, unsigned int c) {
-  return _mm256_sll_epi64(a, _mm_cvtsi32_si128(c));
+  return _mm256_sll_epi64(a, _mm_cvtsi32_si128((int)c));
 }
 
 SIMD_INLINE v256 v256_shr_u64(v256 a, unsigned int c) {
-  return _mm256_srl_epi64(a, _mm_cvtsi32_si128(c));
+  return _mm256_srl_epi64(a, _mm_cvtsi32_si128((int)c));
 }
 
 SIMD_INLINE v256 v256_shr_s64(v256 a, unsigned int c) {
 #if defined(__AVX512VL__)
-  return _mm256_sra_epi64(a, _mm_cvtsi32_si128(c));
+  return _mm256_sra_epi64(a, _mm_cvtsi32_si128((int)c));
 #else
   return v256_from_v128(v128_shr_s64(v256_high_v128(a), c),
                         v128_shr_s64(v256_low_v128(a), c));
@@ -677,11 +681,12 @@ SIMD_INLINE v256 v256_shr_s64(v256 a, unsigned int c) {
 #define v256_align(a, b, c) \
   ((c) ? v256_or(v256_shr_n_byte(b, c), v256_shl_n_byte(a, 32 - (c))) : b)
 
-#define v256_shl_n_8(a, c)                                   \
-  _mm256_and_si256(_mm256_set1_epi8((uint8_t)(0xff << (c))), \
+#define v256_shl_n_8(a, c)                                \
+  _mm256_and_si256(_mm256_set1_epi8((char)(0xff << (c))), \
                    _mm256_slli_epi16(a, c))
-#define v256_shr_n_u8(a, c) \
-  _mm256_and_si256(_mm256_set1_epi8(0xff >> (c)), _mm256_srli_epi16(a, c))
+#define v256_shr_n_u8(a, c)                               \
+  _mm256_and_si256(_mm256_set1_epi8((char)(0xff >> (c))), \
+                   _mm256_srli_epi16(a, c))
 #define v256_shr_n_s8(a, c)                                                  \
   _mm256_packs_epi16(_mm256_srai_epi16(_mm256_unpacklo_epi8(a, a), (c) + 8), \
                      _mm256_srai_epi16(_mm256_unpackhi_epi8(a, a), (c) + 8))
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/simd/v64_intrinsics_c.h b/chromium/third_party/libaom/source/libaom/aom_dsp/simd/v64_intrinsics_c.h
index b84f243c453..bfd6fe07102 100644
--- a/chromium/third_party/libaom/source/libaom/aom_dsp/simd/v64_intrinsics_c.h
+++ b/chromium/third_party/libaom/source/libaom/aom_dsp/simd/v64_intrinsics_c.h
@@ -186,11 +186,7 @@ SIMD_INLINE c_v64 c_v64_sadd_u8(c_v64 a, c_v64 b) {
   c_v64 t;
   int c;
   for (c = 0; c < 8; c++)
-    t.u8[c] = (int16_t)a.u8[c] + (int16_t)b.u8[c] > 255
-                  ? 255
-                  : (int16_t)a.u8[c] + (int16_t)b.u8[c] < 0
-                        ? 0
-                        : (int16_t)a.u8[c] + (int16_t)b.u8[c];
+    t.u8[c] = SIMD_CLAMP((int16_t)a.u8[c] + (int16_t)b.u8[c], 0, 255);
   return t;
 }
 
@@ -198,11 +194,7 @@ SIMD_INLINE c_v64 c_v64_sadd_s8(c_v64 a, c_v64 b) {
   c_v64 t;
   int c;
   for (c = 0; c < 8; c++)
-    t.s8[c] = (int16_t)a.s8[c] + (int16_t)b.s8[c] > 127
-                  ? 127
-                  : (int16_t)a.s8[c] + (int16_t)b.s8[c] < -128
-                        ? -128
-                        : (int16_t)a.s8[c] + (int16_t)b.s8[c];
+    t.s8[c] = SIMD_CLAMP((int16_t)a.s8[c] + (int16_t)b.s8[c], -128, 127);
   return t;
 }
 
@@ -210,11 +202,7 @@ SIMD_INLINE c_v64 c_v64_sadd_s16(c_v64 a, c_v64 b) {
   c_v64 t;
   int c;
   for (c = 0; c < 4; c++)
-    t.s16[c] = (int32_t)a.s16[c] + (int32_t)b.s16[c] > 32767
-                   ? 32767
-                   : (int32_t)a.s16[c] + (int32_t)b.s16[c] < -32768
-                         ? -32768
-                         : (int32_t)a.s16[c] + (int32_t)b.s16[c];
+    t.s16[c] = SIMD_CLAMP((int32_t)a.s16[c] + (int32_t)b.s16[c], -32768, 32767);
   return t;
 }
 
@@ -244,7 +232,7 @@ SIMD_INLINE c_v64 c_v64_ssub_s8(c_v64 a, c_v64 b) {
   int c;
   for (c = 0; c < 8; c++) {
     int16_t d = (int16_t)a.s8[c] - (int16_t)b.s8[c];
-    t.s8[c] = d > 127 ? 127 : (d < -128 ? -128 : d);
+    t.s8[c] = SIMD_CLAMP(d, -128, 127);
   }
   return t;
 }
@@ -260,11 +248,7 @@ SIMD_INLINE c_v64 c_v64_ssub_s16(c_v64 a, c_v64 b) {
   c_v64 t;
   int c;
   for (c = 0; c < 4; c++)
-    t.s16[c] = (int32_t)a.s16[c] - (int32_t)b.s16[c] < -32768
-                   ? -32768
-                   : (int32_t)a.s16[c] - (int32_t)b.s16[c] > 32767
-                         ? 32767
-                         : (int32_t)a.s16[c] - (int32_t)b.s16[c];
+    t.s16[c] = SIMD_CLAMP((int32_t)a.s16[c] - (int32_t)b.s16[c], -32768, 32767);
   return t;
 }
 
@@ -481,10 +465,10 @@ SIMD_INLINE c_v64 c_v64_pack_s32_s16(c_v64 a, c_v64 b) {
     a = b;
     b = u;
   }
-  t.s16[3] = a.s32[1] > 32767 ? 32767 : a.s32[1] < -32768 ? -32768 : a.s32[1];
-  t.s16[2] = a.s32[0] > 32767 ? 32767 : a.s32[0] < -32768 ? -32768 : a.s32[0];
-  t.s16[1] = b.s32[1] > 32767 ? 32767 : b.s32[1] < -32768 ? -32768 : b.s32[1];
-  t.s16[0] = b.s32[0] > 32767 ? 32767 : b.s32[0] < -32768 ? -32768 : b.s32[0];
+  t.s16[3] = SIMD_CLAMP(a.s32[1], -32768, 32767);
+  t.s16[2] = SIMD_CLAMP(a.s32[0], -32768, 32767);
+  t.s16[1] = SIMD_CLAMP(b.s32[1], -32768, 32767);
+  t.s16[0] = SIMD_CLAMP(b.s32[0], -32768, 32767);
   return t;
 }
 
@@ -495,10 +479,10 @@ SIMD_INLINE c_v64 c_v64_pack_s32_u16(c_v64 a, c_v64 b) {
     a = b;
     b = u;
   }
-  t.u16[3] = a.s32[1] > 65535 ? 65535 : a.s32[1] < 0 ? 0 : a.s32[1];
-  t.u16[2] = a.s32[0] > 65535 ? 65535 : a.s32[0] < 0 ? 0 : a.s32[0];
-  t.u16[1] = b.s32[1] > 65535 ? 65535 : b.s32[1] < 0 ? 0 : b.s32[1];
-  t.u16[0] = b.s32[0] > 65535 ? 65535 : b.s32[0] < 0 ? 0 : b.s32[0];
+  t.u16[3] = SIMD_CLAMP(a.s32[1], 0, 65535);
+  t.u16[2] = SIMD_CLAMP(a.s32[0], 0, 65535);
+  t.u16[1] = SIMD_CLAMP(b.s32[1], 0, 65535);
+  t.u16[0] = SIMD_CLAMP(b.s32[0], 0, 65535);
   return t;
 }
 
@@ -509,14 +493,14 @@ SIMD_INLINE c_v64 c_v64_pack_s16_u8(c_v64 a, c_v64 b) {
     a = b;
     b = u;
   }
-  t.u8[7] = a.s16[3] > 255 ? 255 : a.s16[3] < 0 ? 0 : a.s16[3];
-  t.u8[6] = a.s16[2] > 255 ? 255 : a.s16[2] < 0 ? 0 : a.s16[2];
-  t.u8[5] = a.s16[1] > 255 ? 255 : a.s16[1] < 0 ? 0 : a.s16[1];
-  t.u8[4] = a.s16[0] > 255 ? 255 : a.s16[0] < 0 ? 0 : a.s16[0];
-  t.u8[3] = b.s16[3] > 255 ? 255 : b.s16[3] < 0 ? 0 : b.s16[3];
-  t.u8[2] = b.s16[2] > 255 ? 255 : b.s16[2] < 0 ? 0 : b.s16[2];
-  t.u8[1] = b.s16[1] > 255 ? 255 : b.s16[1] < 0 ? 0 : b.s16[1];
-  t.u8[0] = b.s16[0] > 255 ? 255 : b.s16[0] < 0 ? 0 : b.s16[0];
+  t.u8[7] = SIMD_CLAMP(a.s16[3], 0, 255);
+  t.u8[6] = SIMD_CLAMP(a.s16[2], 0, 255);
+  t.u8[5] = SIMD_CLAMP(a.s16[1], 0, 255);
+  t.u8[4] = SIMD_CLAMP(a.s16[0], 0, 255);
+  t.u8[3] = SIMD_CLAMP(b.s16[3], 0, 255);
+  t.u8[2] = SIMD_CLAMP(b.s16[2], 0, 255);
+  t.u8[1] = SIMD_CLAMP(b.s16[1], 0, 255);
+  t.u8[0] = SIMD_CLAMP(b.s16[0], 0, 255);
   return t;
 }
 
@@ -527,14 +511,14 @@ SIMD_INLINE c_v64 c_v64_pack_s16_s8(c_v64 a, c_v64 b) {
     a = b;
     b = u;
   }
-  t.u8[7] = (uint8_t)(a.s16[3] > 127 ? 127 : a.s16[3] < -128 ? 128 : a.s16[3]);
-  t.u8[6] = (uint8_t)(a.s16[2] > 127 ? 127 : a.s16[2] < -128 ? 128 : a.s16[2]);
-  t.u8[5] = (uint8_t)(a.s16[1] > 127 ? 127 : a.s16[1] < -128 ? 128 : a.s16[1]);
-  t.u8[4] = (uint8_t)(a.s16[0] > 127 ? 127 : a.s16[0] < -128 ? 128 : a.s16[0]);
-  t.u8[3] = (uint8_t)(b.s16[3] > 127 ? 127 : b.s16[3] < -128 ? 128 : b.s16[3]);
-  t.u8[2] = (uint8_t)(b.s16[2] > 127 ? 127 : b.s16[2] < -128 ? 128 : b.s16[2]);
-  t.u8[1] = (uint8_t)(b.s16[1] > 127 ? 127 : b.s16[1] < -128 ? 128 : b.s16[1]);
-  t.u8[0] = (uint8_t)(b.s16[0] > 127 ? 127 : b.s16[0] < -128 ? 128 : b.s16[0]);
+  t.s8[7] = SIMD_CLAMP(a.s16[3], -128, 127);
+  t.s8[6] = SIMD_CLAMP(a.s16[2], -128, 127);
+  t.s8[5] = SIMD_CLAMP(a.s16[1], -128, 127);
+  t.s8[4] = SIMD_CLAMP(a.s16[0], -128, 127);
+  t.s8[3] = SIMD_CLAMP(b.s16[3], -128, 127);
+  t.s8[2] = SIMD_CLAMP(b.s16[2], -128, 127);
+  t.s8[1] = SIMD_CLAMP(b.s16[1], -128, 127);
+  t.s8[0] = SIMD_CLAMP(b.s16[0], -128, 127);
   return t;
 }
 
@@ -702,13 +686,13 @@ SIMD_INLINE c_v64 c_v64_madd_us8(c_v64 a, c_v64 b) {
   c_v64 t;
   int32_t u;
   u = a.u8[0] * b.s8[0] + a.u8[1] * b.s8[1];
-  t.s16[0] = u > 32767 ? 32767 : u < -32768 ? -32768 : u;
+  t.s16[0] = SIMD_CLAMP(u, -32768, 32767);
   u = a.u8[2] * b.s8[2] + a.u8[3] * b.s8[3];
-  t.s16[1] = u > 32767 ? 32767 : u < -32768 ? -32768 : u;
+  t.s16[1] = SIMD_CLAMP(u, -32768, 32767);
   u = a.u8[4] * b.s8[4] + a.u8[5] * b.s8[5];
-  t.s16[2] = u > 32767 ? 32767 : u < -32768 ? -32768 : u;
+  t.s16[2] = SIMD_CLAMP(u, -32768, 32767);
   u = a.u8[6] * b.s8[6] + a.u8[7] * b.s8[7];
-  t.s16[3] = u > 32767 ? 32767 : u < -32768 ? -32768 : u;
+  t.s16[3] = SIMD_CLAMP(u, -32768, 32767);
   return t;
 }
 
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/simd/v64_intrinsics_x86.h b/chromium/third_party/libaom/source/libaom/aom_dsp/simd/v64_intrinsics_x86.h
index 1f273fe9646..ec27a6bf42b 100644
--- a/chromium/third_party/libaom/source/libaom/aom_dsp/simd/v64_intrinsics_x86.h
+++ b/chromium/third_party/libaom/source/libaom/aom_dsp/simd/v64_intrinsics_x86.h
@@ -43,14 +43,14 @@ SIMD_INLINE v64 v64_from_16(uint16_t a, uint16_t b, uint16_t c, uint16_t d) {
 }
 
 SIMD_INLINE v64 v64_from_32(uint32_t x, uint32_t y) {
-  return _mm_set_epi32(0, 0, x, y);
+  return _mm_set_epi32(0, 0, (int32_t)x, (int32_t)y);
 }
 
 SIMD_INLINE v64 v64_from_64(uint64_t x) {
 #ifdef __x86_64__
-  return _mm_cvtsi64_si128(x);
+  return _mm_cvtsi64_si128((int64_t)x);
 #else
-  return _mm_set_epi32(0, 0, x >> 32, (uint32_t)x);
+  return _mm_set_epi32(0, 0, (int32_t)(x >> 32), (int32_t)x);
 #endif
 }
 
@@ -101,11 +101,11 @@ SIMD_INLINE void v64_store_unaligned(void *p, v64 a) {
 
 SIMD_INLINE v64 v64_zero(void) { return _mm_setzero_si128(); }
 
-SIMD_INLINE v64 v64_dup_8(uint8_t x) { return _mm_set1_epi8(x); }
+SIMD_INLINE v64 v64_dup_8(uint8_t x) { return _mm_set1_epi8((char)x); }
 
-SIMD_INLINE v64 v64_dup_16(uint16_t x) { return _mm_set1_epi16(x); }
+SIMD_INLINE v64 v64_dup_16(uint16_t x) { return _mm_set1_epi16((short)x); }
 
-SIMD_INLINE v64 v64_dup_32(uint32_t x) { return _mm_set1_epi32(x); }
+SIMD_INLINE v64 v64_dup_32(uint32_t x) { return _mm_set1_epi32((int)x); }
 
 SIMD_INLINE v64 v64_add_8(v64 a, v64 b) { return _mm_add_epi8(a, b); }
 
@@ -178,14 +178,11 @@ SIMD_INLINE v64 v64_pack_s32_u16(v64 a, v64 b) {
   __m128i t = _mm_unpacklo_epi64(b, a);
   return _mm_packus_epi32(t, t);
 #else
-  int32_t ah = v64_high_u32(a);
-  int32_t al = v64_low_u32(a);
-  int32_t bh = v64_high_u32(b);
-  int32_t bl = v64_low_u32(b);
-  return v64_from_16(ah > 65535 ? 65535 : ah < 0 ? 0 : ah,
-                     al > 65535 ? 65535 : al < 0 ? 0 : al,
-                     bh > 65535 ? 65535 : bh < 0 ? 0 : bh,
-                     bl > 65535 ? 65535 : bl < 0 ? 0 : bl);
+  const int32_t ah = SIMD_CLAMP(v64_high_s32(a), 0, 65535);
+  const int32_t al = SIMD_CLAMP(v64_low_s32(a), 0, 65535);
+  const int32_t bh = SIMD_CLAMP(v64_high_s32(b), 0, 65535);
+  const int32_t bl = SIMD_CLAMP(v64_low_s32(b), 0, 65535);
+  return v64_from_16(ah, al, bh, bl);
 #endif
 }
 
@@ -279,7 +276,7 @@ SIMD_INLINE v64 v64_shuffle_8(v64 x, v64 pattern) {
   v64 output;
   unsigned char *input = (unsigned char *)&x;
   unsigned char *index = (unsigned char *)&pattern;
-  char *selected = (char *)&output;
+  unsigned char *selected = (unsigned char *)&output;
   int counter;
 
   for (counter = 0; counter < 8; counter++) {
@@ -433,42 +430,43 @@ SIMD_INLINE v64 v64_cmplt_s16(v64 a, v64 b) { return _mm_cmplt_epi16(a, b); }
 SIMD_INLINE v64 v64_cmpeq_16(v64 a, v64 b) { return _mm_cmpeq_epi16(a, b); }
 
 SIMD_INLINE v64 v64_shl_8(v64 a, unsigned int c) {
-  return _mm_and_si128(_mm_set1_epi8((uint8_t)(0xff << c)),
-                       _mm_sll_epi16(a, _mm_cvtsi32_si128(c)));
+  return _mm_and_si128(_mm_set1_epi8((char)(0xff << c)),
+                       _mm_sll_epi16(a, _mm_cvtsi32_si128((int)c)));
 }
 
 SIMD_INLINE v64 v64_shr_u8(v64 a, unsigned int c) {
   return _mm_and_si128(_mm_set1_epi8((char)(0xff >> c)),
-                       _mm_srl_epi16(a, _mm_cvtsi32_si128(c)));
+                       _mm_srl_epi16(a, _mm_cvtsi32_si128((int)c)));
 }
 
 SIMD_INLINE v64 v64_shr_s8(v64 a, unsigned int c) {
   return _mm_packs_epi16(
-      _mm_sra_epi16(_mm_unpacklo_epi8(a, a), _mm_cvtsi32_si128(c + 8)), a);
+      _mm_sra_epi16(_mm_unpacklo_epi8(a, a), _mm_cvtsi32_si128((int)(c + 8))),
+      a);
 }
 
 SIMD_INLINE v64 v64_shl_16(v64 a, unsigned int c) {
-  return _mm_sll_epi16(a, _mm_cvtsi32_si128(c));
+  return _mm_sll_epi16(a, _mm_cvtsi32_si128((int)c));
 }
 
 SIMD_INLINE v64 v64_shr_u16(v64 a, unsigned int c) {
-  return _mm_srl_epi16(a, _mm_cvtsi32_si128(c));
+  return _mm_srl_epi16(a, _mm_cvtsi32_si128((int)c));
 }
 
 SIMD_INLINE v64 v64_shr_s16(v64 a, unsigned int c) {
-  return _mm_sra_epi16(a, _mm_cvtsi32_si128(c));
+  return _mm_sra_epi16(a, _mm_cvtsi32_si128((int)c));
 }
 
 SIMD_INLINE v64 v64_shl_32(v64 a, unsigned int c) {
-  return _mm_sll_epi32(a, _mm_cvtsi32_si128(c));
+  return _mm_sll_epi32(a, _mm_cvtsi32_si128((int)c));
 }
 
 SIMD_INLINE v64 v64_shr_u32(v64 a, unsigned int c) {
-  return _mm_srl_epi32(a, _mm_cvtsi32_si128(c));
+  return _mm_srl_epi32(a, _mm_cvtsi32_si128((int)c));
 }
 
 SIMD_INLINE v64 v64_shr_s32(v64 a, unsigned int c) {
-  return _mm_sra_epi32(a, _mm_cvtsi32_si128(c));
+  return _mm_sra_epi32(a, _mm_cvtsi32_si128((int)c));
 }
 
 /* These intrinsics require immediate values, so we must use #defines
@@ -476,9 +474,9 @@ SIMD_INLINE v64 v64_shr_s32(v64 a, unsigned int c) {
 #define v64_shl_n_byte(a, c) _mm_slli_si128(a, c)
 #define v64_shr_n_byte(a, c) _mm_srli_si128(_mm_unpacklo_epi64(a, a), c + 8)
 #define v64_shl_n_8(a, c) \
-  _mm_and_si128(_mm_set1_epi8((uint8_t)(0xff << (c))), _mm_slli_epi16(a, c))
+  _mm_and_si128(_mm_set1_epi8((char)(0xff << (c))), _mm_slli_epi16(a, c))
 #define v64_shr_n_u8(a, c) \
-  _mm_and_si128(_mm_set1_epi8(0xff >> (c)), _mm_srli_epi16(a, c))
+  _mm_and_si128(_mm_set1_epi8((char)(0xff >> (c))), _mm_srli_epi16(a, c))
 #define v64_shr_n_s8(a, c) \
   _mm_packs_epi16(_mm_srai_epi16(_mm_unpacklo_epi8(a, a), (c) + 8), a)
 #define v64_shl_n_16(a, c) _mm_slli_epi16(a, c)
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/variance.c b/chromium/third_party/libaom/source/libaom/aom_dsp/variance.c
index d7641607f47..a37f732deaf 100644
--- a/chromium/third_party/libaom/source/libaom/aom_dsp/variance.c
+++ b/chromium/third_party/libaom/source/libaom/aom_dsp/variance.c
@@ -1240,6 +1240,20 @@ uint64_t aom_mse_wxh_16bit_c(uint8_t *dst, int dstride, uint16_t *src,
   return sum;
 }
 
+uint64_t aom_mse_16xh_16bit_c(uint8_t *dst, int dstride, uint16_t *src, int w,
+                              int h) {
+  uint16_t *src_temp = src;
+  uint8_t *dst_temp = dst;
+  const int num_blks = 16 / w;
+  int64_t sum = 0;
+  for (int i = 0; i < num_blks; i++) {
+    sum += aom_mse_wxh_16bit_c(dst_temp, dstride, src_temp, w, w, h);
+    dst_temp += w;
+    src_temp += (w * h);
+  }
+  return sum;
+}
+
 uint64_t aom_mse_wxh_16bit_highbd_c(uint16_t *dst, int dstride, uint16_t *src,
                                     int sstride, int w, int h) {
   uint64_t sum = 0;
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/x86/aom_convolve_copy_sse2.c b/chromium/third_party/libaom/source/libaom/aom_dsp/x86/aom_convolve_copy_sse2.c
index f7b468a2292..e78845e97ce 100644
--- a/chromium/third_party/libaom/source/libaom/aom_dsp/x86/aom_convolve_copy_sse2.c
+++ b/chromium/third_party/libaom/source/libaom/aom_dsp/x86/aom_convolve_copy_sse2.c
@@ -207,11 +207,11 @@ void aom_highbd_convolve_copy_sse2(const uint16_t *src, ptrdiff_t src_stride,
   if (w == 2) {
     do {
       __m128i s = _mm_loadl_epi64((__m128i *)src);
-      *(uint32_t *)dst = _mm_cvtsi128_si32(s);
+      *(int *)dst = _mm_cvtsi128_si32(s);
       src += src_stride;
       dst += dst_stride;
       s = _mm_loadl_epi64((__m128i *)src);
-      *(uint32_t *)dst = _mm_cvtsi128_si32(s);
+      *(int *)dst = _mm_cvtsi128_si32(s);
       src += src_stride;
       dst += dst_stride;
       h -= 2;
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/x86/aom_subpixel_8t_intrin_avx2.c b/chromium/third_party/libaom/source/libaom/aom_dsp/x86/aom_subpixel_8t_intrin_avx2.c
index d8d353c0663..22f2e696d39 100644
--- a/chromium/third_party/libaom/source/libaom/aom_dsp/x86/aom_subpixel_8t_intrin_avx2.c
+++ b/chromium/third_party/libaom/source/libaom/aom_dsp/x86/aom_subpixel_8t_intrin_avx2.c
@@ -43,8 +43,8 @@
 
 static INLINE void xx_storeu2_epi32(const uint8_t *output_ptr,
                                     const ptrdiff_t stride, const __m256i *a) {
-  *((uint32_t *)(output_ptr)) = _mm_cvtsi128_si32(_mm256_castsi256_si128(*a));
-  *((uint32_t *)(output_ptr + stride)) =
+  *((int *)(output_ptr)) = _mm_cvtsi128_si32(_mm256_castsi256_si128(*a));
+  *((int *)(output_ptr + stride)) =
       _mm_cvtsi128_si32(_mm256_extracti128_si256(*a, 1));
 }
 
@@ -151,7 +151,7 @@ static void aom_filter_block1d4_h4_avx2(
     srcRegFilt1_1 = _mm_packus_epi16(srcRegFilt1_1, _mm_setzero_si128());
 
     // save 4 bytes
-    *((uint32_t *)(output_ptr)) = _mm_cvtsi128_si32(srcRegFilt1_1);
+    *((int *)(output_ptr)) = _mm_cvtsi128_si32(srcRegFilt1_1);
   }
 }
 
@@ -256,7 +256,7 @@ static void aom_filter_block1d4_h8_avx2(
     srcRegFilt1_1 = _mm_packus_epi16(srcRegFilt1_1, _mm_setzero_si128());
 
     // save 4 bytes
-    *((uint32_t *)(output_ptr)) = _mm_cvtsi128_si32(srcRegFilt1_1);
+    *((int *)(output_ptr)) = _mm_cvtsi128_si32(srcRegFilt1_1);
   }
 }
 
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/x86/aom_subpixel_8t_intrin_sse2.c b/chromium/third_party/libaom/source/libaom/aom_dsp/x86/aom_subpixel_8t_intrin_sse2.c
index cff7f43eee0..5c36b687277 100644
--- a/chromium/third_party/libaom/source/libaom/aom_dsp/x86/aom_subpixel_8t_intrin_sse2.c
+++ b/chromium/third_party/libaom/source/libaom/aom_dsp/x86/aom_subpixel_8t_intrin_sse2.c
@@ -477,7 +477,7 @@ void aom_filter_block1d4_h4_sse2(const uint8_t *src_ptr,
 
     src_ptr += src_pixels_per_line;
 
-    *((uint32_t *)(output_ptr)) = _mm_cvtsi128_si32(srcRegFilt32b1_1);
+    *((int *)(output_ptr)) = _mm_cvtsi128_si32(srcRegFilt32b1_1);
 
     output_ptr += output_pitch;
   }
@@ -555,8 +555,8 @@ void aom_filter_block1d4_v4_sse2(const uint8_t *src_ptr, ptrdiff_t src_pitch,
 
     src_ptr += src_stride;
 
-    *((uint32_t *)(output_ptr)) = _mm_cvtsi128_si32(resReg23_34_45_56);
-    *((uint32_t *)(output_ptr + out_pitch)) =
+    *((int *)(output_ptr)) = _mm_cvtsi128_si32(resReg23_34_45_56);
+    *((int *)(output_ptr + out_pitch)) =
         _mm_cvtsi128_si32(_mm_srli_si128(resReg23_34_45_56, 4));
 
     output_ptr += dst_stride;
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/x86/aom_subpixel_8t_intrin_ssse3.c b/chromium/third_party/libaom/source/libaom/aom_dsp/x86/aom_subpixel_8t_intrin_ssse3.c
index 8a182790700..582305957c3 100644
--- a/chromium/third_party/libaom/source/libaom/aom_dsp/x86/aom_subpixel_8t_intrin_ssse3.c
+++ b/chromium/third_party/libaom/source/libaom/aom_dsp/x86/aom_subpixel_8t_intrin_ssse3.c
@@ -108,7 +108,7 @@ static void aom_filter_block1d4_h4_ssse3(
 
     src_ptr += src_pixels_per_line;
 
-    *((uint32_t *)(output_ptr)) = _mm_cvtsi128_si32(srcRegFilt32b1_1);
+    *((int *)(output_ptr)) = _mm_cvtsi128_si32(srcRegFilt32b1_1);
     output_ptr += output_pitch;
   }
 }
@@ -185,8 +185,8 @@ static void aom_filter_block1d4_v4_ssse3(
 
     src_ptr += src_stride;
 
-    *((uint32_t *)(output_ptr)) = _mm_cvtsi128_si32(resReglo);
-    *((uint32_t *)(output_ptr + out_pitch)) = _mm_cvtsi128_si32(resReghi);
+    *((int *)(output_ptr)) = _mm_cvtsi128_si32(resReglo);
+    *((int *)(output_ptr + out_pitch)) = _mm_cvtsi128_si32(resReghi);
 
     output_ptr += dst_stride;
 
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/x86/avg_intrin_avx2.c b/chromium/third_party/libaom/source/libaom/aom_dsp/x86/avg_intrin_avx2.c
index 40397592636..e4edb1202db 100644
--- a/chromium/third_party/libaom/source/libaom/aom_dsp/x86/avg_intrin_avx2.c
+++ b/chromium/third_party/libaom/source/libaom/aom_dsp/x86/avg_intrin_avx2.c
@@ -92,8 +92,8 @@ static void hadamard_col8x2_avx2(__m256i *in, int iter) {
   }
 }
 
-void aom_hadamard_8x8_dual_avx2(const int16_t *src_diff, ptrdiff_t src_stride,
-                                int16_t *coeff) {
+void aom_hadamard_lp_8x8_dual_avx2(const int16_t *src_diff,
+                                   ptrdiff_t src_stride, int16_t *coeff) {
   __m256i src[8];
   src[0] = _mm256_loadu_si256((const __m256i *)src_diff);
   src[1] = _mm256_loadu_si256((const __m256i *)(src_diff += src_stride));
@@ -102,7 +102,7 @@ void aom_hadamard_8x8_dual_avx2(const int16_t *src_diff, ptrdiff_t src_stride,
   src[4] = _mm256_loadu_si256((const __m256i *)(src_diff += src_stride));
   src[5] = _mm256_loadu_si256((const __m256i *)(src_diff += src_stride));
   src[6] = _mm256_loadu_si256((const __m256i *)(src_diff += src_stride));
-  src[7] = _mm256_loadu_si256((const __m256i *)(src_diff += src_stride));
+  src[7] = _mm256_loadu_si256((const __m256i *)(src_diff + src_stride));
 
   hadamard_col8x2_avx2(src, 0);
   hadamard_col8x2_avx2(src, 1);
@@ -141,7 +141,8 @@ static INLINE void hadamard_16x16_avx2(const int16_t *src_diff,
   int idx;
   for (idx = 0; idx < 2; ++idx) {
     const int16_t *src_ptr = src_diff + idx * 8 * src_stride;
-    aom_hadamard_8x8_dual_avx2(src_ptr, src_stride, t_coeff + (idx * 64 * 2));
+    aom_hadamard_lp_8x8_dual_avx2(src_ptr, src_stride,
+                                  t_coeff + (idx * 64 * 2));
   }
 
   for (idx = 0; idx < 64; idx += 16) {
@@ -186,7 +187,8 @@ void aom_hadamard_lp_16x16_avx2(const int16_t *src_diff, ptrdiff_t src_stride,
   int16_t *t_coeff = coeff;
   for (int idx = 0; idx < 2; ++idx) {
     const int16_t *src_ptr = src_diff + idx * 8 * src_stride;
-    aom_hadamard_8x8_dual_avx2(src_ptr, src_stride, t_coeff + (idx * 64 * 2));
+    aom_hadamard_lp_8x8_dual_avx2(src_ptr, src_stride,
+                                  t_coeff + (idx * 64 * 2));
   }
 
   for (int idx = 0; idx < 64; idx += 16) {
@@ -345,7 +347,7 @@ void aom_highbd_hadamard_8x8_avx2(const int16_t *src_diff, ptrdiff_t src_stride,
   src16[4] = _mm_loadu_si128((const __m128i *)(src_diff += src_stride));
   src16[5] = _mm_loadu_si128((const __m128i *)(src_diff += src_stride));
   src16[6] = _mm_loadu_si128((const __m128i *)(src_diff += src_stride));
-  src16[7] = _mm_loadu_si128((const __m128i *)(src_diff += src_stride));
+  src16[7] = _mm_loadu_si128((const __m128i *)(src_diff + src_stride));
 
   src32[0] = _mm256_cvtepi16_epi32(src16[0]);
   src32[1] = _mm256_cvtepi16_epi32(src16[1]);
@@ -550,3 +552,211 @@ void aom_avg_8x8_quad_avx2(const uint8_t *s, int p, int x16_idx, int y16_idx,
   avg[2] = _mm_extract_epi16(_mm256_castsi256_si128(result_0), 4);
   avg[3] = _mm_extract_epi16(_mm256_extracti128_si256(result_0, 1), 4);
 }
+
+void aom_int_pro_row_avx2(int16_t *hbuf, const uint8_t *ref,
+                          const int ref_stride, const int width,
+                          const int height, int norm_factor) {
+  // SIMD implementation assumes width and height to be multiple of 16 and 2
+  // respectively. For any odd width or height, SIMD support needs to be added.
+  assert(width % 16 == 0 && height % 2 == 0);
+
+  if (width % 32 == 0) {
+    const __m256i zero = _mm256_setzero_si256();
+    for (int wd = 0; wd < width; wd += 32) {
+      const uint8_t *ref_tmp = ref + wd;
+      int16_t *hbuf_tmp = hbuf + wd;
+      __m256i s0 = zero;
+      __m256i s1 = zero;
+      int idx = 0;
+      do {
+        __m256i src_line = _mm256_loadu_si256((const __m256i *)ref_tmp);
+        __m256i t0 = _mm256_unpacklo_epi8(src_line, zero);
+        __m256i t1 = _mm256_unpackhi_epi8(src_line, zero);
+        s0 = _mm256_adds_epu16(s0, t0);
+        s1 = _mm256_adds_epu16(s1, t1);
+        ref_tmp += ref_stride;
+
+        src_line = _mm256_loadu_si256((const __m256i *)ref_tmp);
+        t0 = _mm256_unpacklo_epi8(src_line, zero);
+        t1 = _mm256_unpackhi_epi8(src_line, zero);
+        s0 = _mm256_adds_epu16(s0, t0);
+        s1 = _mm256_adds_epu16(s1, t1);
+        ref_tmp += ref_stride;
+        idx += 2;
+      } while (idx < height);
+      s0 = _mm256_srai_epi16(s0, norm_factor);
+      s1 = _mm256_srai_epi16(s1, norm_factor);
+      _mm_storeu_si128((__m128i *)(hbuf_tmp), _mm256_castsi256_si128(s0));
+      _mm_storeu_si128((__m128i *)(hbuf_tmp + 8), _mm256_castsi256_si128(s1));
+      _mm_storeu_si128((__m128i *)(hbuf_tmp + 16),
+                       _mm256_extractf128_si256(s0, 1));
+      _mm_storeu_si128((__m128i *)(hbuf_tmp + 24),
+                       _mm256_extractf128_si256(s1, 1));
+    }
+  } else if (width % 16 == 0) {
+    aom_int_pro_row_sse2(hbuf, ref, ref_stride, width, height, norm_factor);
+  }
+}
+
+void aom_int_pro_col_avx2(int16_t *vbuf, const uint8_t *ref,
+                          const int ref_stride, const int width,
+                          const int height, int norm_factor) {
+  // SIMD implementation assumes width to be multiple of 16. For any odd width,
+  // SIMD support needs to be added.
+  assert(width % 16 == 0);
+
+  if (width == 128) {
+    const __m256i zero = _mm256_setzero_si256();
+    for (int ht = 0; ht < height; ++ht) {
+      const __m256i src_line0 = _mm256_loadu_si256((const __m256i *)ref);
+      const __m256i src_line1 = _mm256_loadu_si256((const __m256i *)(ref + 32));
+      const __m256i src_line2 = _mm256_loadu_si256((const __m256i *)(ref + 64));
+      const __m256i src_line3 = _mm256_loadu_si256((const __m256i *)(ref + 96));
+      const __m256i s0 = _mm256_sad_epu8(src_line0, zero);
+      const __m256i s1 = _mm256_sad_epu8(src_line1, zero);
+      const __m256i s2 = _mm256_sad_epu8(src_line2, zero);
+      const __m256i s3 = _mm256_sad_epu8(src_line3, zero);
+      const __m256i result0_256bit = _mm256_adds_epu16(s0, s1);
+      const __m256i result1_256bit = _mm256_adds_epu16(s2, s3);
+      const __m256i result_256bit =
+          _mm256_adds_epu16(result0_256bit, result1_256bit);
+
+      const __m128i result =
+          _mm_adds_epu16(_mm256_castsi256_si128(result_256bit),
+                         _mm256_extractf128_si256(result_256bit, 1));
+      __m128i result1 = _mm_adds_epu16(result, _mm_srli_si128(result, 8));
+      vbuf[ht] = _mm_extract_epi16(result1, 0) >> norm_factor;
+      ref += ref_stride;
+    }
+  } else if (width == 64) {
+    const __m256i zero = _mm256_setzero_si256();
+    for (int ht = 0; ht < height; ++ht) {
+      const __m256i src_line0 = _mm256_loadu_si256((const __m256i *)ref);
+      const __m256i src_line1 = _mm256_loadu_si256((const __m256i *)(ref + 32));
+      const __m256i s1 = _mm256_sad_epu8(src_line0, zero);
+      const __m256i s2 = _mm256_sad_epu8(src_line1, zero);
+      const __m256i result_256bit = _mm256_adds_epu16(s1, s2);
+
+      const __m128i result =
+          _mm_adds_epu16(_mm256_castsi256_si128(result_256bit),
+                         _mm256_extractf128_si256(result_256bit, 1));
+      __m128i result1 = _mm_adds_epu16(result, _mm_srli_si128(result, 8));
+      vbuf[ht] = _mm_extract_epi16(result1, 0) >> norm_factor;
+      ref += ref_stride;
+    }
+  } else if (width == 32) {
+    assert(height % 2 == 0);
+    const __m256i zero = _mm256_setzero_si256();
+    for (int ht = 0; ht < height; ht += 2) {
+      const __m256i src_line0 = _mm256_loadu_si256((const __m256i *)ref);
+      const __m256i src_line1 =
+          _mm256_loadu_si256((const __m256i *)(ref + ref_stride));
+      const __m256i s0 = _mm256_sad_epu8(src_line0, zero);
+      const __m256i s1 = _mm256_sad_epu8(src_line1, zero);
+
+      __m128i result0 = _mm_adds_epu16(_mm256_castsi256_si128(s0),
+                                       _mm256_extractf128_si256(s0, 1));
+      __m128i result1 = _mm_adds_epu16(_mm256_castsi256_si128(s1),
+                                       _mm256_extractf128_si256(s1, 1));
+      __m128i result2 = _mm_adds_epu16(result0, _mm_srli_si128(result0, 8));
+      __m128i result3 = _mm_adds_epu16(result1, _mm_srli_si128(result1, 8));
+
+      vbuf[ht] = _mm_extract_epi16(result2, 0) >> norm_factor;
+      vbuf[ht + 1] = _mm_extract_epi16(result3, 0) >> norm_factor;
+      ref += (2 * ref_stride);
+    }
+  } else if (width == 16) {
+    aom_int_pro_col_sse2(vbuf, ref, ref_stride, width, height, norm_factor);
+  }
+}
+
+static inline void calc_vector_mean_sse_64wd(const int16_t *ref,
+                                             const int16_t *src, __m256i *mean,
+                                             __m256i *sse) {
+  const __m256i src_line0 = _mm256_loadu_si256((const __m256i *)src);
+  const __m256i src_line1 = _mm256_loadu_si256((const __m256i *)(src + 16));
+  const __m256i src_line2 = _mm256_loadu_si256((const __m256i *)(src + 32));
+  const __m256i src_line3 = _mm256_loadu_si256((const __m256i *)(src + 48));
+  const __m256i ref_line0 = _mm256_loadu_si256((const __m256i *)ref);
+  const __m256i ref_line1 = _mm256_loadu_si256((const __m256i *)(ref + 16));
+  const __m256i ref_line2 = _mm256_loadu_si256((const __m256i *)(ref + 32));
+  const __m256i ref_line3 = _mm256_loadu_si256((const __m256i *)(ref + 48));
+
+  const __m256i diff0 = _mm256_sub_epi16(ref_line0, src_line0);
+  const __m256i diff1 = _mm256_sub_epi16(ref_line1, src_line1);
+  const __m256i diff2 = _mm256_sub_epi16(ref_line2, src_line2);
+  const __m256i diff3 = _mm256_sub_epi16(ref_line3, src_line3);
+  const __m256i diff_sqr0 = _mm256_madd_epi16(diff0, diff0);
+  const __m256i diff_sqr1 = _mm256_madd_epi16(diff1, diff1);
+  const __m256i diff_sqr2 = _mm256_madd_epi16(diff2, diff2);
+  const __m256i diff_sqr3 = _mm256_madd_epi16(diff3, diff3);
+
+  *mean = _mm256_add_epi16(*mean, _mm256_add_epi16(diff0, diff1));
+  *mean = _mm256_add_epi16(*mean, diff2);
+  *mean = _mm256_add_epi16(*mean, diff3);
+  *sse = _mm256_add_epi32(*sse, _mm256_add_epi32(diff_sqr0, diff_sqr1));
+  *sse = _mm256_add_epi32(*sse, diff_sqr2);
+  *sse = _mm256_add_epi32(*sse, diff_sqr3);
+}
+
+#define CALC_VAR_FROM_MEAN_SSE(mean, sse)                                    \
+  {                                                                          \
+    mean = _mm256_madd_epi16(mean, _mm256_set1_epi16(1));                    \
+    mean = _mm256_hadd_epi32(mean, sse);                                     \
+    mean = _mm256_add_epi32(mean, _mm256_bsrli_epi128(mean, 4));             \
+    const __m128i result = _mm_add_epi32(_mm256_castsi256_si128(mean),       \
+                                         _mm256_extractf128_si256(mean, 1)); \
+    /*(mean * mean): dynamic range 31 bits.*/                                \
+    const int mean_int = _mm_extract_epi32(result, 0);                       \
+    const int sse_int = _mm_extract_epi32(result, 2);                        \
+    const unsigned int mean_abs = abs(mean_int);                             \
+    var = sse_int - ((mean_abs * mean_abs) >> (bwl + 2));                    \
+  }
+
+// ref: [0 - 510]
+// src: [0 - 510]
+// bwl: {2, 3, 4, 5}
+int aom_vector_var_avx2(const int16_t *ref, const int16_t *src, int bwl) {
+  const int width = 4 << bwl;
+  assert(width % 16 == 0 && width <= 128);
+  int var = 0;
+
+  // Instead of having a loop over width 16, considered loop unrolling to avoid
+  // some addition operations.
+  if (width == 128) {
+    __m256i mean = _mm256_setzero_si256();
+    __m256i sse = _mm256_setzero_si256();
+
+    calc_vector_mean_sse_64wd(src, ref, &mean, &sse);
+    calc_vector_mean_sse_64wd(src + 64, ref + 64, &mean, &sse);
+    CALC_VAR_FROM_MEAN_SSE(mean, sse)
+  } else if (width == 64) {
+    __m256i mean = _mm256_setzero_si256();
+    __m256i sse = _mm256_setzero_si256();
+
+    calc_vector_mean_sse_64wd(src, ref, &mean, &sse);
+    CALC_VAR_FROM_MEAN_SSE(mean, sse)
+  } else if (width == 32) {
+    const __m256i src_line0 = _mm256_loadu_si256((const __m256i *)src);
+    const __m256i ref_line0 = _mm256_loadu_si256((const __m256i *)ref);
+    const __m256i src_line1 = _mm256_loadu_si256((const __m256i *)(src + 16));
+    const __m256i ref_line1 = _mm256_loadu_si256((const __m256i *)(ref + 16));
+
+    const __m256i diff0 = _mm256_sub_epi16(ref_line0, src_line0);
+    const __m256i diff1 = _mm256_sub_epi16(ref_line1, src_line1);
+    const __m256i diff_sqr0 = _mm256_madd_epi16(diff0, diff0);
+    const __m256i diff_sqr1 = _mm256_madd_epi16(diff1, diff1);
+    const __m256i sse = _mm256_add_epi32(diff_sqr0, diff_sqr1);
+    __m256i mean = _mm256_add_epi16(diff0, diff1);
+
+    CALC_VAR_FROM_MEAN_SSE(mean, sse)
+  } else if (width == 16) {
+    const __m256i src_line = _mm256_loadu_si256((const __m256i *)src);
+    const __m256i ref_line = _mm256_loadu_si256((const __m256i *)ref);
+    __m256i mean = _mm256_sub_epi16(ref_line, src_line);
+    const __m256i sse = _mm256_madd_epi16(mean, mean);
+
+    CALC_VAR_FROM_MEAN_SSE(mean, sse)
+  }
+  return var;
+}
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/x86/avg_intrin_sse2.c b/chromium/third_party/libaom/source/libaom/aom_dsp/x86/avg_intrin_sse2.c
index da3cfedb3ce..8e8955557d8 100644
--- a/chromium/third_party/libaom/source/libaom/aom_dsp/x86/avg_intrin_sse2.c
+++ b/chromium/third_party/libaom/source/libaom/aom_dsp/x86/avg_intrin_sse2.c
@@ -123,40 +123,11 @@ unsigned int aom_avg_8x8_sse2(const uint8_t *s, int p) {
 
 void aom_avg_8x8_quad_sse2(const uint8_t *s, int p, int x16_idx, int y16_idx,
                            int *avg) {
-  const __m128i u0 = _mm_setzero_si128();
   for (int k = 0; k < 4; k++) {
-    __m128i s0, s1;
-    unsigned int avg_temp = 0;
     const int x8_idx = x16_idx + ((k & 1) << 3);
     const int y8_idx = y16_idx + ((k >> 1) << 3);
     const uint8_t *s_tmp = s + y8_idx * p + x8_idx;
-    s0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s_tmp)), u0);
-    s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s_tmp + p)), u0);
-    s0 = _mm_adds_epu16(s0, s1);
-    s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s_tmp + 2 * p)),
-                           u0);
-    s0 = _mm_adds_epu16(s0, s1);
-    s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s_tmp + 3 * p)),
-                           u0);
-    s0 = _mm_adds_epu16(s0, s1);
-    s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s_tmp + 4 * p)),
-                           u0);
-    s0 = _mm_adds_epu16(s0, s1);
-    s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s_tmp + 5 * p)),
-                           u0);
-    s0 = _mm_adds_epu16(s0, s1);
-    s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s_tmp + 6 * p)),
-                           u0);
-    s0 = _mm_adds_epu16(s0, s1);
-    s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s_tmp + 7 * p)),
-                           u0);
-    s0 = _mm_adds_epu16(s0, s1);
-
-    s0 = _mm_adds_epu16(s0, _mm_srli_si128(s0, 8));
-    s0 = _mm_adds_epu16(s0, _mm_srli_epi64(s0, 32));
-    s0 = _mm_adds_epu16(s0, _mm_srli_epi64(s0, 16));
-    avg_temp = _mm_extract_epi16(s0, 0);
-    avg[k] = (avg_temp + 32) >> 6;
+    avg[k] = aom_avg_8x8_sse2(s_tmp, p);
   }
 }
 
@@ -210,7 +181,7 @@ void aom_hadamard_4x4_sse2(const int16_t *src_diff, ptrdiff_t src_stride,
   src[0] = _mm_loadl_epi64((const __m128i *)src_diff);
   src[1] = _mm_loadl_epi64((const __m128i *)(src_diff += src_stride));
   src[2] = _mm_loadl_epi64((const __m128i *)(src_diff += src_stride));
-  src[3] = _mm_loadl_epi64((const __m128i *)(src_diff += src_stride));
+  src[3] = _mm_loadl_epi64((const __m128i *)(src_diff + src_stride));
 
   hadamard_col4_sse2(src, 0);
   hadamard_col4_sse2(src, 1);
@@ -307,7 +278,7 @@ static INLINE void hadamard_8x8_sse2(const int16_t *src_diff,
   src[4] = _mm_load_si128((const __m128i *)(src_diff += src_stride));
   src[5] = _mm_load_si128((const __m128i *)(src_diff += src_stride));
   src[6] = _mm_load_si128((const __m128i *)(src_diff += src_stride));
-  src[7] = _mm_load_si128((const __m128i *)(src_diff += src_stride));
+  src[7] = _mm_load_si128((const __m128i *)(src_diff + src_stride));
 
   hadamard_col8_sse2(src, 0);
   hadamard_col8_sse2(src, 1);
@@ -371,7 +342,7 @@ void aom_pixel_scale_sse2(const int16_t *src_diff, ptrdiff_t src_stride,
       src[4] = _mm_load_si128((const __m128i *)(src_diff += src_stride));
       src[5] = _mm_load_si128((const __m128i *)(src_diff += src_stride));
       src[6] = _mm_load_si128((const __m128i *)(src_diff += src_stride));
-      src[7] = _mm_load_si128((const __m128i *)(src_diff += src_stride));
+      src[7] = _mm_load_si128((const __m128i *)(src_diff + src_stride));
 
       src[0] = _mm_slli_epi16(src[0], log_scale);
       src[1] = _mm_slli_epi16(src[1], log_scale);
@@ -413,7 +384,7 @@ static INLINE void hadamard_lp_8x8_sse2(const int16_t *src_diff,
   src[4] = _mm_load_si128((const __m128i *)(src_diff += src_stride));
   src[5] = _mm_load_si128((const __m128i *)(src_diff += src_stride));
   src[6] = _mm_load_si128((const __m128i *)(src_diff += src_stride));
-  src[7] = _mm_load_si128((const __m128i *)(src_diff += src_stride));
+  src[7] = _mm_load_si128((const __m128i *)(src_diff + src_stride));
 
   hadamard_col8_sse2(src, 0);
   hadamard_col8_sse2(src, 1);
@@ -440,8 +411,8 @@ void aom_hadamard_lp_8x8_sse2(const int16_t *src_diff, ptrdiff_t src_stride,
   hadamard_lp_8x8_sse2(src_diff, src_stride, coeff);
 }
 
-void aom_hadamard_8x8_dual_sse2(const int16_t *src_diff, ptrdiff_t src_stride,
-                                int16_t *coeff) {
+void aom_hadamard_lp_8x8_dual_sse2(const int16_t *src_diff,
+                                   ptrdiff_t src_stride, int16_t *coeff) {
   for (int i = 0; i < 2; i++) {
     hadamard_lp_8x8_sse2(src_diff + (i * 8), src_stride, coeff + (i * 64));
   }
@@ -654,74 +625,64 @@ int aom_satd_lp_sse2(const int16_t *coeff, int length) {
   return _mm_cvtsi128_si32(accum);
 }
 
-void aom_int_pro_row_sse2(int16_t hbuf[16], const uint8_t *ref,
-                          const int ref_stride, const int height) {
-  int idx = 1;
+void aom_int_pro_row_sse2(int16_t *hbuf, const uint8_t *ref,
+                          const int ref_stride, const int width,
+                          const int height, int norm_factor) {
+  // SIMD implementation assumes width and height to be multiple of 16 and 2
+  // respectively. For any odd width or height, SIMD support needs to be added.
+  assert(width % 16 == 0 && height % 2 == 0);
   __m128i zero = _mm_setzero_si128();
-  __m128i src_line = _mm_loadu_si128((const __m128i *)ref);
-  __m128i s0 = _mm_unpacklo_epi8(src_line, zero);
-  __m128i s1 = _mm_unpackhi_epi8(src_line, zero);
-  __m128i t0, t1;
-  int height_1 = height - 1;
-  ref += ref_stride;
-  do {
-    src_line = _mm_loadu_si128((const __m128i *)ref);
-    t0 = _mm_unpacklo_epi8(src_line, zero);
-    t1 = _mm_unpackhi_epi8(src_line, zero);
-    s0 = _mm_adds_epu16(s0, t0);
-    s1 = _mm_adds_epu16(s1, t1);
-    ref += ref_stride;
-
-    src_line = _mm_loadu_si128((const __m128i *)ref);
-    t0 = _mm_unpacklo_epi8(src_line, zero);
-    t1 = _mm_unpackhi_epi8(src_line, zero);
-    s0 = _mm_adds_epu16(s0, t0);
-    s1 = _mm_adds_epu16(s1, t1);
-    ref += ref_stride;
-    idx += 2;
-  } while (idx < height_1);
-
-  src_line = _mm_loadu_si128((const __m128i *)ref);
-  t0 = _mm_unpacklo_epi8(src_line, zero);
-  t1 = _mm_unpackhi_epi8(src_line, zero);
-  s0 = _mm_adds_epu16(s0, t0);
-  s1 = _mm_adds_epu16(s1, t1);
-  if (height == 128) {
-    s0 = _mm_srai_epi16(s0, 6);
-    s1 = _mm_srai_epi16(s1, 6);
-  } else if (height == 64) {
-    s0 = _mm_srai_epi16(s0, 5);
-    s1 = _mm_srai_epi16(s1, 5);
-  } else if (height == 32) {
-    s0 = _mm_srai_epi16(s0, 4);
-    s1 = _mm_srai_epi16(s1, 4);
-  } else {
-    assert(height == 16);
-    s0 = _mm_srai_epi16(s0, 3);
-    s1 = _mm_srai_epi16(s1, 3);
-  }
 
-  _mm_storeu_si128((__m128i *)hbuf, s0);
-  hbuf += 8;
-  _mm_storeu_si128((__m128i *)hbuf, s1);
+  for (int wd = 0; wd < width; wd += 16) {
+    const uint8_t *ref_tmp = ref + wd;
+    int16_t *hbuf_tmp = hbuf + wd;
+    __m128i s0 = zero;
+    __m128i s1 = zero;
+    int idx = 0;
+    do {
+      __m128i src_line = _mm_loadu_si128((const __m128i *)ref_tmp);
+      __m128i t0 = _mm_unpacklo_epi8(src_line, zero);
+      __m128i t1 = _mm_unpackhi_epi8(src_line, zero);
+      s0 = _mm_adds_epu16(s0, t0);
+      s1 = _mm_adds_epu16(s1, t1);
+      ref_tmp += ref_stride;
+
+      src_line = _mm_loadu_si128((const __m128i *)ref_tmp);
+      t0 = _mm_unpacklo_epi8(src_line, zero);
+      t1 = _mm_unpackhi_epi8(src_line, zero);
+      s0 = _mm_adds_epu16(s0, t0);
+      s1 = _mm_adds_epu16(s1, t1);
+      ref_tmp += ref_stride;
+      idx += 2;
+    } while (idx < height);
+
+    s0 = _mm_srai_epi16(s0, norm_factor);
+    s1 = _mm_srai_epi16(s1, norm_factor);
+    _mm_storeu_si128((__m128i *)(hbuf_tmp), s0);
+    _mm_storeu_si128((__m128i *)(hbuf_tmp + 8), s1);
+  }
 }
 
-int16_t aom_int_pro_col_sse2(const uint8_t *ref, const int width) {
-  __m128i zero = _mm_setzero_si128();
-  __m128i src_line = _mm_loadu_si128((const __m128i *)ref);
-  __m128i s0 = _mm_sad_epu8(src_line, zero);
-  __m128i s1;
-  int i;
+void aom_int_pro_col_sse2(int16_t *vbuf, const uint8_t *ref,
+                          const int ref_stride, const int width,
+                          const int height, int norm_factor) {
+  // SIMD implementation assumes width to be multiple of 16.
+  assert(width % 16 == 0);
+
+  for (int ht = 0; ht < height; ht++) {
+    const uint8_t *ref_tmp = ref + (ht * ref_stride);
+    __m128i zero = _mm_setzero_si128();
+    __m128i s0 = zero;
+    __m128i s1, src_line;
+    for (int i = 0; i < width; i += 16) {
+      src_line = _mm_loadu_si128((const __m128i *)ref_tmp);
+      s1 = _mm_sad_epu8(src_line, zero);
+      s0 = _mm_adds_epu16(s0, s1);
+      ref_tmp += 16;
+    }
 
-  for (i = 16; i < width; i += 16) {
-    ref += 16;
-    src_line = _mm_loadu_si128((const __m128i *)ref);
-    s1 = _mm_sad_epu8(src_line, zero);
+    s1 = _mm_srli_si128(s0, 8);
     s0 = _mm_adds_epu16(s0, s1);
+    vbuf[ht] = _mm_extract_epi16(s0, 0) >> norm_factor;
   }
-
-  s1 = _mm_srli_si128(s0, 8);
-  s0 = _mm_adds_epu16(s0, s1);
-
-  return _mm_extract_epi16(s0, 0);
 }
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/x86/avg_intrin_sse4.c b/chromium/third_party/libaom/source/libaom/aom_dsp/x86/avg_intrin_sse4.c
new file mode 100644
index 00000000000..b83b43122a4
--- /dev/null
+++ b/chromium/third_party/libaom/source/libaom/aom_dsp/x86/avg_intrin_sse4.c
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2022, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <smmintrin.h>
+
+#include "config/aom_dsp_rtcd.h"
+
+// ref: [0 - 510]
+// src: [0 - 510]
+// bwl: {2, 3, 4, 5}
+int aom_vector_var_sse4_1(const int16_t *ref, const int16_t *src, int bwl) {
+  const int width = 4 << bwl;
+  assert(width % 16 == 0);
+
+  const __m128i k_one_epi16 = _mm_set1_epi16((int16_t)1);
+  __m128i mean = _mm_setzero_si128();
+  __m128i sse = _mm_setzero_si128();
+
+  for (int i = 0; i < width; i += 16) {
+    const __m128i src_line = _mm_loadu_si128((const __m128i *)src);
+    const __m128i ref_line = _mm_loadu_si128((const __m128i *)ref);
+    const __m128i src_line2 = _mm_loadu_si128((const __m128i *)(src + 8));
+    const __m128i ref_line2 = _mm_loadu_si128((const __m128i *)(ref + 8));
+    __m128i diff = _mm_sub_epi16(ref_line, src_line);
+    const __m128i diff2 = _mm_sub_epi16(ref_line2, src_line2);
+    __m128i diff_sqr = _mm_madd_epi16(diff, diff);
+    const __m128i diff_sqr2 = _mm_madd_epi16(diff2, diff2);
+
+    diff = _mm_add_epi16(diff, diff2);
+    diff_sqr = _mm_add_epi32(diff_sqr, diff_sqr2);
+    sse = _mm_add_epi32(sse, diff_sqr);
+    mean = _mm_add_epi16(mean, diff);
+
+    src += 16;
+    ref += 16;
+  }
+
+  // m0 m1 m2 m3
+  mean = _mm_madd_epi16(mean, k_one_epi16);
+  // m0+m1 m2+m3 s0+s1 s2+s3
+  __m128i result = _mm_hadd_epi32(mean, sse);
+  // m0+m1+m2+m3 s0+s1+s2+s3 x x
+  result = _mm_add_epi32(result, _mm_bsrli_si128(result, 4));
+
+  // (mean * mean): dynamic range 31 bits.
+  const int mean_int = _mm_extract_epi32(result, 0);
+  const int sse_int = _mm_extract_epi32(result, 2);
+  const unsigned int mean_abs = abs(mean_int);
+  const int var = sse_int - ((mean_abs * mean_abs) >> (bwl + 2));
+  return var;
+}
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/x86/blend_a64_mask_avx2.c b/chromium/third_party/libaom/source/libaom/aom_dsp/x86/blend_a64_mask_avx2.c
index 95383d2fd15..dfbab324d03 100644
--- a/chromium/third_party/libaom/source/libaom/aom_dsp/x86/blend_a64_mask_avx2.c
+++ b/chromium/third_party/libaom/source/libaom/aom_dsp/x86/blend_a64_mask_avx2.c
@@ -910,14 +910,14 @@ static INLINE void highbd_blend_a64_d16_mask_w4_avx2(
     const __m256i *round_offset, int shift, const __m256i *clip_low,
     const __m256i *clip_high, const __m256i *mask_max) {
   // Load 4x u16 pixels from each of 4 rows from each source
-  const __m256i s0 = _mm256_set_epi64x(*(uint64_t *)(src0 + 3 * src0_stride),
-                                       *(uint64_t *)(src0 + 2 * src0_stride),
-                                       *(uint64_t *)(src0 + 1 * src0_stride),
-                                       *(uint64_t *)(src0 + 0 * src0_stride));
-  const __m256i s1 = _mm256_set_epi64x(*(uint64_t *)(src1 + 3 * src1_stride),
-                                       *(uint64_t *)(src1 + 2 * src1_stride),
-                                       *(uint64_t *)(src1 + 1 * src1_stride),
-                                       *(uint64_t *)(src1 + 0 * src1_stride));
+  const __m256i s0 = _mm256_set_epi64x(*(int64_t *)(src0 + 3 * src0_stride),
+                                       *(int64_t *)(src0 + 2 * src0_stride),
+                                       *(int64_t *)(src0 + 1 * src0_stride),
+                                       *(int64_t *)(src0 + 0 * src0_stride));
+  const __m256i s1 = _mm256_set_epi64x(*(int64_t *)(src1 + 3 * src1_stride),
+                                       *(int64_t *)(src1 + 2 * src1_stride),
+                                       *(int64_t *)(src1 + 1 * src1_stride),
+                                       *(int64_t *)(src1 + 0 * src1_stride));
   // Generate the inverse mask
   const __m256i mask1 = _mm256_sub_epi16(*mask_max, *mask0);
 
@@ -964,10 +964,10 @@ static INLINE void highbd_blend_a64_d16_mask_subw0_subh0_w4_avx2(
     const __m256i *clip_high, const __m256i *mask_max) {
   do {
     // Load 8x u8 pixels from each of 4 rows of the mask, pad each to u16
-    const __m128i mask08 = _mm_set_epi32(*(uint32_t *)(mask + 3 * mask_stride),
-                                         *(uint32_t *)(mask + 2 * mask_stride),
-                                         *(uint32_t *)(mask + 1 * mask_stride),
-                                         *(uint32_t *)(mask + 0 * mask_stride));
+    const __m128i mask08 = _mm_set_epi32(*(int32_t *)(mask + 3 * mask_stride),
+                                         *(int32_t *)(mask + 2 * mask_stride),
+                                         *(int32_t *)(mask + 1 * mask_stride),
+                                         *(int32_t *)(mask + 0 * mask_stride));
     const __m256i mask0 = _mm256_cvtepu8_epi16(mask08);
 
     highbd_blend_a64_d16_mask_w4_avx2(dst, dst_stride, src0, src0_stride, src1,
@@ -994,15 +994,15 @@ static INLINE void highbd_blend_a64_d16_mask_subw1_subh1_w4_avx2(
     // (saturating) add together rows then use madd to add adjacent pixels
     // Finally, divide each value by 4 (with rounding)
     const __m256i m0246 =
-        _mm256_set_epi64x(*(uint64_t *)(mask + 6 * mask_stride),
-                          *(uint64_t *)(mask + 4 * mask_stride),
-                          *(uint64_t *)(mask + 2 * mask_stride),
-                          *(uint64_t *)(mask + 0 * mask_stride));
+        _mm256_set_epi64x(*(int64_t *)(mask + 6 * mask_stride),
+                          *(int64_t *)(mask + 4 * mask_stride),
+                          *(int64_t *)(mask + 2 * mask_stride),
+                          *(int64_t *)(mask + 0 * mask_stride));
     const __m256i m1357 =
-        _mm256_set_epi64x(*(uint64_t *)(mask + 7 * mask_stride),
-                          *(uint64_t *)(mask + 5 * mask_stride),
-                          *(uint64_t *)(mask + 3 * mask_stride),
-                          *(uint64_t *)(mask + 1 * mask_stride));
+        _mm256_set_epi64x(*(int64_t *)(mask + 7 * mask_stride),
+                          *(int64_t *)(mask + 5 * mask_stride),
+                          *(int64_t *)(mask + 3 * mask_stride),
+                          *(int64_t *)(mask + 1 * mask_stride));
     const __m256i addrows = _mm256_adds_epu8(m0246, m1357);
     const __m256i adjacent = _mm256_maddubs_epi16(addrows, one_b);
     const __m256i mask0 =
@@ -1101,10 +1101,10 @@ static INLINE void highbd_blend_a64_d16_mask_subw0_subh0_w8_avx2(
   do {
     // Load 8x u8 pixels from each of 4 rows in the mask
     const __m128i mask0a8 =
-        _mm_set_epi64x(*(uint64_t *)mask, *(uint64_t *)(mask + mask_stride));
+        _mm_set_epi64x(*(int64_t *)mask, *(uint64_t *)(mask + mask_stride));
     const __m128i mask0b8 =
-        _mm_set_epi64x(*(uint64_t *)(mask + 2 * mask_stride),
-                       *(uint64_t *)(mask + 3 * mask_stride));
+        _mm_set_epi64x(*(int64_t *)(mask + 2 * mask_stride),
+                       *(int64_t *)(mask + 3 * mask_stride));
     const __m256i mask0a = _mm256_cvtepu8_epi16(mask0a8);
     const __m256i mask0b = _mm256_cvtepu8_epi16(mask0b8);
 
@@ -1307,7 +1307,7 @@ void aom_highbd_blend_a64_d16_mask_avx2(
   const __m256i v_round_offset = _mm256_set1_epi32(round_offset);
   const int shift = round_bits + AOM_BLEND_A64_ROUND_BITS;
 
-  const __m256i clip_low = _mm256_set1_epi16(0);
+  const __m256i clip_low = _mm256_setzero_si256();
   const __m256i clip_high = _mm256_set1_epi16((1 << bd) - 1);
   const __m256i mask_max = _mm256_set1_epi16(AOM_BLEND_A64_MAX_ALPHA);
 
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/x86/blend_a64_mask_sse4.c b/chromium/third_party/libaom/source/libaom/aom_dsp/x86/blend_a64_mask_sse4.c
index 4a368ef9478..58a7345ec24 100644
--- a/chromium/third_party/libaom/source/libaom/aom_dsp/x86/blend_a64_mask_sse4.c
+++ b/chromium/third_party/libaom/source/libaom/aom_dsp/x86/blend_a64_mask_sse4.c
@@ -1121,13 +1121,13 @@ static INLINE void highbd_blend_a64_d16_mask_w4_sse4_1(
     const __m128i *mask_max) {
   // Load 4 pixels from each of 4 rows from each source
   const __m128i s0a =
-      _mm_set_epi64x(*(uint64_t *)src0, *(uint64_t *)(src0 + src0_stride));
-  const __m128i s0b = _mm_set_epi64x(*(uint64_t *)(src0 + 2 * src0_stride),
-                                     *(uint64_t *)(src0 + 3 * src0_stride));
+      _mm_set_epi64x(*(int64_t *)src0, *(int64_t *)(src0 + src0_stride));
+  const __m128i s0b = _mm_set_epi64x(*(int64_t *)(src0 + 2 * src0_stride),
+                                     *(int64_t *)(src0 + 3 * src0_stride));
   const __m128i s1a =
-      _mm_set_epi64x(*(uint64_t *)(src1), *(uint64_t *)(src1 + src1_stride));
-  const __m128i s1b = _mm_set_epi64x(*(uint64_t *)(src1 + 2 * src1_stride),
-                                     *(uint64_t *)(src1 + 3 * src1_stride));
+      _mm_set_epi64x(*(int64_t *)(src1), *(int64_t *)(src1 + src1_stride));
+  const __m128i s1b = _mm_set_epi64x(*(int64_t *)(src1 + 2 * src1_stride),
+                                     *(int64_t *)(src1 + 3 * src1_stride));
 
   // Generate the inverse masks
   const __m128i mask1a = _mm_sub_epi16(*mask_max, *mask0a);
@@ -1187,11 +1187,11 @@ static INLINE void highbd_blend_a64_d16_mask_subw0_subh0_w4_sse4_1(
     const __m128i *round_offset, int shift, const __m128i *clip_low,
     const __m128i *clip_high, const __m128i *mask_max) {
   do {
-    const __m128i mask0a8 = _mm_set_epi32(0, 0, *(uint32_t *)mask,
-                                          *(uint32_t *)(mask + mask_stride));
+    const __m128i mask0a8 =
+        _mm_set_epi32(0, 0, *(int32_t *)mask, *(int32_t *)(mask + mask_stride));
     const __m128i mask0b8 =
-        _mm_set_epi32(0, 0, *(uint32_t *)(mask + 2 * mask_stride),
-                      *(uint32_t *)(mask + 3 * mask_stride));
+        _mm_set_epi32(0, 0, *(int32_t *)(mask + 2 * mask_stride),
+                      *(int32_t *)(mask + 3 * mask_stride));
     const __m128i mask0a = _mm_cvtepu8_epi16(mask0a8);
     const __m128i mask0b = _mm_cvtepu8_epi16(mask0b8);
 
@@ -1218,16 +1218,16 @@ static INLINE void highbd_blend_a64_d16_mask_subw1_subh1_w4_sse4_1(
     // Load 8 pixels from each of 8 rows of mask,
     // (saturating) add together rows then use madd to add adjacent pixels
     // Finally, divide each value by 4 (with rounding)
-    const __m128i m02 = _mm_set_epi64x(*(uint64_t *)(mask),
-                                       *(uint64_t *)(mask + 2 * mask_stride));
-    const __m128i m13 = _mm_set_epi64x(*(uint64_t *)(mask + mask_stride),
-                                       *(uint64_t *)(mask + 3 * mask_stride));
+    const __m128i m02 = _mm_set_epi64x(*(int64_t *)(mask),
+                                       *(int64_t *)(mask + 2 * mask_stride));
+    const __m128i m13 = _mm_set_epi64x(*(int64_t *)(mask + mask_stride),
+                                       *(int64_t *)(mask + 3 * mask_stride));
     const __m128i m0123 = _mm_maddubs_epi16(_mm_adds_epu8(m02, m13), one_b);
     const __m128i mask_0a = _mm_srli_epi16(_mm_add_epi16(m0123, two_w), 2);
-    const __m128i m46 = _mm_set_epi64x(*(uint64_t *)(mask + 4 * mask_stride),
-                                       *(uint64_t *)(mask + 6 * mask_stride));
-    const __m128i m57 = _mm_set_epi64x(*(uint64_t *)(mask + 5 * mask_stride),
-                                       *(uint64_t *)(mask + 7 * mask_stride));
+    const __m128i m46 = _mm_set_epi64x(*(int64_t *)(mask + 4 * mask_stride),
+                                       *(int64_t *)(mask + 6 * mask_stride));
+    const __m128i m57 = _mm_set_epi64x(*(int64_t *)(mask + 5 * mask_stride),
+                                       *(int64_t *)(mask + 7 * mask_stride));
     const __m128i m4567 = _mm_maddubs_epi16(_mm_adds_epu8(m46, m57), one_b);
     const __m128i mask_0b = _mm_srli_epi16(_mm_add_epi16(m4567, two_w), 2);
 
@@ -1493,7 +1493,7 @@ void aom_highbd_blend_a64_d16_mask_sse4_1(
   const __m128i v_round_offset = _mm_set1_epi32(round_offset);
   const int shift = round_bits + AOM_BLEND_A64_ROUND_BITS;
 
-  const __m128i clip_low = _mm_set1_epi16(0);
+  const __m128i clip_low = _mm_setzero_si128();
   const __m128i clip_high = _mm_set1_epi16((1 << bd) - 1);
   const __m128i mask_max = _mm_set1_epi16(AOM_BLEND_A64_MAX_ALPHA);
 
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/x86/convolve_avx2.h b/chromium/third_party/libaom/source/libaom/aom_dsp/x86/convolve_avx2.h
index 785ba39d9cd..a7090088f26 100644
--- a/chromium/third_party/libaom/source/libaom/aom_dsp/x86/convolve_avx2.h
+++ b/chromium/third_party/libaom/source/libaom/aom_dsp/x86/convolve_avx2.h
@@ -12,6 +12,13 @@
 #ifndef AOM_AOM_DSP_X86_CONVOLVE_AVX2_H_
 #define AOM_AOM_DSP_X86_CONVOLVE_AVX2_H_
 
+#include <immintrin.h>
+
+#include "aom_ports/mem.h"
+
+#include "av1/common/convolve.h"
+#include "av1/common/filter.h"
+
 // filters for 16
 DECLARE_ALIGNED(32, static const uint8_t, filt_global_avx2[]) = {
   0,  1,  1,  2,  2, 3,  3,  4,  4,  5,  5,  6,  6,  7,  7,  8,  0,  1,  1,
@@ -576,9 +583,8 @@ DECLARE_ALIGNED(32, static const uint8_t, filt4_global_avx2[32]) = {
           const __m128i res_0 = _mm256_castsi256_si128(res_8);                 \
           const __m128i res_1 = _mm256_extracti128_si256(res_8, 1);            \
                                                                                \
-          *(uint32_t *)(&dst0[i * dst_stride0 + j]) =                          \
-              _mm_cvtsi128_si32(res_0);                                        \
-          *(uint32_t *)(&dst0[i * dst_stride0 + j + dst_stride0]) =            \
+          *(int *)(&dst0[i * dst_stride0 + j]) = _mm_cvtsi128_si32(res_0);     \
+          *(int *)(&dst0[i * dst_stride0 + j + dst_stride0]) =                 \
               _mm_cvtsi128_si32(res_1);                                        \
                                                                                \
         } else {                                                               \
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/x86/fwd_txfm_sse2.h b/chromium/third_party/libaom/source/libaom/aom_dsp/x86/fwd_txfm_sse2.h
index ab3cd915579..78ea98522ec 100644
--- a/chromium/third_party/libaom/source/libaom/aom_dsp/x86/fwd_txfm_sse2.h
+++ b/chromium/third_party/libaom/source/libaom/aom_dsp/x86/fwd_txfm_sse2.h
@@ -34,7 +34,7 @@ static INLINE __m128i k_packs_epi64(__m128i a, __m128i b) {
 static INLINE int check_epi16_overflow_x2(const __m128i *preg0,
                                           const __m128i *preg1) {
   const __m128i max_overflow = _mm_set1_epi16(0x7fff);
-  const __m128i min_overflow = _mm_set1_epi16(0x8000);
+  const __m128i min_overflow = _mm_set1_epi16((short)0x8000);
   __m128i cmp0 = _mm_or_si128(_mm_cmpeq_epi16(*preg0, max_overflow),
                               _mm_cmpeq_epi16(*preg0, min_overflow));
   __m128i cmp1 = _mm_or_si128(_mm_cmpeq_epi16(*preg1, max_overflow),
@@ -48,7 +48,7 @@ static INLINE int check_epi16_overflow_x4(const __m128i *preg0,
                                           const __m128i *preg2,
                                           const __m128i *preg3) {
   const __m128i max_overflow = _mm_set1_epi16(0x7fff);
-  const __m128i min_overflow = _mm_set1_epi16(0x8000);
+  const __m128i min_overflow = _mm_set1_epi16((short)0x8000);
   __m128i cmp0 = _mm_or_si128(_mm_cmpeq_epi16(*preg0, max_overflow),
                               _mm_cmpeq_epi16(*preg0, min_overflow));
   __m128i cmp1 = _mm_or_si128(_mm_cmpeq_epi16(*preg1, max_overflow),
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/x86/highbd_convolve_avx2.c b/chromium/third_party/libaom/source/libaom/aom_dsp/x86/highbd_convolve_avx2.c
index fdf9524ad66..8361e2f9e2a 100644
--- a/chromium/third_party/libaom/source/libaom/aom_dsp/x86/highbd_convolve_avx2.c
+++ b/chromium/third_party/libaom/source/libaom/aom_dsp/x86/highbd_convolve_avx2.c
@@ -165,9 +165,9 @@ void av1_highbd_convolve_y_sr_avx2(const uint16_t *src, int src_stride,
           res_a_round = _mm256_min_epi16(res_a_round, clip_pixel);
           res_a_round = _mm256_max_epi16(res_a_round, zero);
 
-          xx_storel_32((__m128i *)&dst[i * dst_stride + j],
+          xx_storel_32(&dst[i * dst_stride + j],
                        _mm256_castsi256_si128(res_a_round));
-          xx_storel_32((__m128i *)&dst[i * dst_stride + j + dst_stride],
+          xx_storel_32(&dst[i * dst_stride + j + dst_stride],
                        _mm256_extracti128_si256(res_a_round, 1));
         }
 
@@ -275,9 +275,8 @@ void av1_highbd_convolve_x_sr_avx2(const uint16_t *src, int src_stride,
         _mm_storel_epi64((__m128i *)&dst[i * dst_stride + j + dst_stride],
                          _mm256_extracti128_si256(res, 1));
       } else {
-        xx_storel_32((__m128i *)&dst[i * dst_stride + j],
-                     _mm256_castsi256_si128(res));
-        xx_storel_32((__m128i *)&dst[i * dst_stride + j + dst_stride],
+        xx_storel_32(&dst[i * dst_stride + j], _mm256_castsi256_si128(res));
+        xx_storel_32(&dst[i * dst_stride + j + dst_stride],
                      _mm256_extracti128_si256(res, 1));
       }
     }
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/x86/highbd_convolve_ssse3.c b/chromium/third_party/libaom/source/libaom/aom_dsp/x86/highbd_convolve_ssse3.c
index 5293e276449..21389dbe43d 100644
--- a/chromium/third_party/libaom/source/libaom/aom_dsp/x86/highbd_convolve_ssse3.c
+++ b/chromium/third_party/libaom/source/libaom/aom_dsp/x86/highbd_convolve_ssse3.c
@@ -136,10 +136,10 @@ void av1_highbd_convolve_y_sr_ssse3(const uint16_t *src, int src_stride,
           res_a_round1 = _mm_min_epi16(res_a_round1, clip_pixel);
           res_a_round1 = _mm_max_epi16(res_a_round1, zero);
 
-          *((uint32_t *)(&dst[i * dst_stride + j])) =
+          *((int *)(&dst[i * dst_stride + j])) =
               _mm_cvtsi128_si32(res_a_round0);
 
-          *((uint32_t *)(&dst[i * dst_stride + j + dst_stride])) =
+          *((int *)(&dst[i * dst_stride + j + dst_stride])) =
               _mm_cvtsi128_si32(res_a_round1);
         }
 
@@ -264,10 +264,10 @@ void av1_highbd_convolve_y_sr_ssse3(const uint16_t *src, int src_stride,
             res_a_round1 = _mm_min_epi16(res_a_round1, clip_pixel);
             res_a_round1 = _mm_max_epi16(res_a_round1, zero);
 
-            *((uint32_t *)(&dst[i * dst_stride + j])) =
+            *((int *)(&dst[i * dst_stride + j])) =
                 _mm_cvtsi128_si32(res_a_round0);
 
-            *((uint32_t *)(&dst[i * dst_stride + j + dst_stride])) =
+            *((int *)(&dst[i * dst_stride + j + dst_stride])) =
                 _mm_cvtsi128_si32(res_a_round1);
           }
 
@@ -375,7 +375,7 @@ void av1_highbd_convolve_x_sr_ssse3(const uint16_t *src, int src_stride,
           } else if (w == 4) {
             _mm_storel_epi64((__m128i *)&dst[i * dst_stride + j], res);
           } else {
-            *((uint32_t *)(&dst[i * dst_stride + j])) = _mm_cvtsi128_si32(res);
+            *((int *)(&dst[i * dst_stride + j])) = _mm_cvtsi128_si32(res);
           }
         }
       }
@@ -430,7 +430,7 @@ void av1_highbd_convolve_x_sr_ssse3(const uint16_t *src, int src_stride,
           } else if (w == 4) {
             _mm_storel_epi64((__m128i *)&dst[i * dst_stride + j], res);
           } else {
-            *((uint32_t *)(&dst[i * dst_stride + j])) = _mm_cvtsi128_si32(res);
+            *((int *)(&dst[i * dst_stride + j])) = _mm_cvtsi128_si32(res);
           }
         }
       }
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/x86/highbd_intrapred_sse2.c b/chromium/third_party/libaom/source/libaom/aom_dsp/x86/highbd_intrapred_sse2.c
index 5a55736c488..6a2e915ed7c 100644
--- a/chromium/third_party/libaom/source/libaom/aom_dsp/x86/highbd_intrapred_sse2.c
+++ b/chromium/third_party/libaom/source/libaom/aom_dsp/x86/highbd_intrapred_sse2.c
@@ -821,11 +821,11 @@ void aom_highbd_dc_predictor_4x8_sse2(uint16_t *dst, ptrdiff_t stride,
   const __m128i sum_above = dc_sum_4(above);
   const __m128i sum_left = dc_sum_8(left);
   const __m128i sum = _mm_add_epi16(sum_above, sum_left);
-  uint32_t sum32 = _mm_cvtsi128_si32(sum);
+  uint32_t sum32 = (uint32_t)_mm_cvtsi128_si32(sum);
   sum32 >>= 16;
   sum32 += 6;
   sum32 /= 12;
-  const __m128i row = _mm_set1_epi16((uint16_t)sum32);
+  const __m128i row = _mm_set1_epi16((int16_t)sum32);
   int i;
   for (i = 0; i < 4; ++i) {
     _mm_storel_epi64((__m128i *)dst, row);
@@ -842,11 +842,11 @@ void aom_highbd_dc_predictor_8x4_sse2(uint16_t *dst, ptrdiff_t stride,
   const __m128i sum_left = dc_sum_4(left);
   const __m128i sum_above = dc_sum_8(above);
   const __m128i sum = _mm_add_epi16(sum_above, sum_left);
-  uint32_t sum32 = _mm_cvtsi128_si32(sum);
+  uint32_t sum32 = (uint32_t)_mm_cvtsi128_si32(sum);
   sum32 >>= 16;
   sum32 += 6;
   sum32 /= 12;
-  const __m128i row = _mm_set1_epi16((uint16_t)sum32);
+  const __m128i row = _mm_set1_epi16((int16_t)sum32);
 
   _mm_store_si128((__m128i *)dst, row);
   dst += stride;
@@ -867,10 +867,10 @@ void aom_highbd_dc_predictor_8x16_sse2(uint16_t *dst, ptrdiff_t stride,
   sum_left = _mm_unpacklo_epi16(sum_left, zero);
   sum_above = _mm_unpacklo_epi16(sum_above, zero);
   const __m128i sum = _mm_add_epi32(sum_left, sum_above);
-  uint32_t sum32 = _mm_cvtsi128_si32(sum);
+  uint32_t sum32 = (uint32_t)_mm_cvtsi128_si32(sum);
   sum32 += 12;
   sum32 /= 24;
-  const __m128i row = _mm_set1_epi16((uint16_t)sum32);
+  const __m128i row = _mm_set1_epi16((int16_t)sum32);
   int i;
   for (i = 0; i < 4; ++i) {
     _mm_store_si128((__m128i *)dst, row);
@@ -894,10 +894,10 @@ void aom_highbd_dc_predictor_16x8_sse2(uint16_t *dst, ptrdiff_t stride,
   sum_left = _mm_unpacklo_epi16(sum_left, zero);
   sum_above = _mm_unpacklo_epi16(sum_above, zero);
   const __m128i sum = _mm_add_epi32(sum_left, sum_above);
-  uint32_t sum32 = _mm_cvtsi128_si32(sum);
+  uint32_t sum32 = (uint32_t)_mm_cvtsi128_si32(sum);
   sum32 += 12;
   sum32 /= 24;
-  const __m128i row = _mm_set1_epi16((uint16_t)sum32);
+  const __m128i row = _mm_set1_epi16((int16_t)sum32);
   int i;
   for (i = 0; i < 2; ++i) {
     _mm_store_si128((__m128i *)dst, row);
@@ -924,10 +924,10 @@ void aom_highbd_dc_predictor_16x32_sse2(uint16_t *dst, ptrdiff_t stride,
   const __m128i zero = _mm_setzero_si128();
   sum_above = _mm_unpacklo_epi16(sum_above, zero);
   const __m128i sum = _mm_add_epi32(sum_left, sum_above);
-  uint32_t sum32 = _mm_cvtsi128_si32(sum);
+  uint32_t sum32 = (uint32_t)_mm_cvtsi128_si32(sum);
   sum32 += 24;
   sum32 /= 48;
-  const __m128i row = _mm_set1_epi16((uint16_t)sum32);
+  const __m128i row = _mm_set1_epi16((int16_t)sum32);
   int i;
   for (i = 0; i < 8; ++i) {
     _mm_store_si128((__m128i *)dst, row);
@@ -954,10 +954,10 @@ void aom_highbd_dc_predictor_32x16_sse2(uint16_t *dst, ptrdiff_t stride,
   const __m128i zero = _mm_setzero_si128();
   sum_left = _mm_unpacklo_epi16(sum_left, zero);
   const __m128i sum = _mm_add_epi32(sum_left, sum_above);
-  uint32_t sum32 = _mm_cvtsi128_si32(sum);
+  uint32_t sum32 = (uint32_t)_mm_cvtsi128_si32(sum);
   sum32 += 24;
   sum32 /= 48;
-  const __m128i row = _mm_set1_epi16((uint16_t)sum32);
+  const __m128i row = _mm_set1_epi16((int16_t)sum32);
   int i;
   for (i = 0; i < 4; ++i) {
     _mm_store_si128((__m128i *)dst, row);
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/x86/highbd_quantize_intrin_avx2.c b/chromium/third_party/libaom/source/libaom/aom_dsp/x86/highbd_quantize_intrin_avx2.c
index b9689202a0c..950465cf468 100644
--- a/chromium/third_party/libaom/source/libaom/aom_dsp/x86/highbd_quantize_intrin_avx2.c
+++ b/chromium/third_party/libaom/source/libaom/aom_dsp/x86/highbd_quantize_intrin_avx2.c
@@ -31,7 +31,8 @@ static INLINE void update_qp(__m256i *qp) {
 
 static INLINE void init_qp(const int16_t *zbin_ptr, const int16_t *round_ptr,
                            const int16_t *quant_ptr, const int16_t *dequant_ptr,
-                           const int16_t *quant_shift_ptr, __m256i *qp) {
+                           const int16_t *quant_shift_ptr, __m256i *qp,
+                           int log_scale) {
   const __m128i zbin = _mm_loadu_si128((const __m128i *)zbin_ptr);
   const __m128i round = _mm_loadu_si128((const __m128i *)round_ptr);
   const __m128i quant = _mm_loadu_si128((const __m128i *)quant_ptr);
@@ -42,13 +43,24 @@ static INLINE void init_qp(const int16_t *zbin_ptr, const int16_t *round_ptr,
   init_one_qp(&quant, &qp[2]);
   init_one_qp(&dequant, &qp[3]);
   init_one_qp(&quant_shift, &qp[4]);
+  if (log_scale > 0) {
+    const __m256i rnd = _mm256_set1_epi32((int16_t)(1 << (log_scale - 1)));
+    qp[0] = _mm256_add_epi32(qp[0], rnd);
+    qp[0] = _mm256_srai_epi32(qp[0], log_scale);
+
+    qp[1] = _mm256_add_epi32(qp[1], rnd);
+    qp[1] = _mm256_srai_epi32(qp[1], log_scale);
+  }
+  // Subtracting 1 here eliminates a _mm256_cmpeq_epi32() instruction when
+  // calculating the zbin mask.
+  qp[0] = _mm256_sub_epi32(qp[0], _mm256_set1_epi32(1));
 }
 
 // Note:
 // *x is vector multiplied by *y which is 16 int32_t parallel multiplication
 // and right shift 16.  The output, 16 int32_t is save in *p.
-static INLINE void mm256_mul_shift_epi32(const __m256i *x, const __m256i *y,
-                                         __m256i *p) {
+static INLINE __m256i mm256_mul_shift_epi32(const __m256i *x,
+                                            const __m256i *y) {
   __m256i prod_lo = _mm256_mul_epi32(*x, *y);
   __m256i prod_hi = _mm256_srli_epi64(*x, 32);
   const __m256i mult_hi = _mm256_srli_epi64(*y, 32);
@@ -60,53 +72,114 @@ static INLINE void mm256_mul_shift_epi32(const __m256i *x, const __m256i *y,
   prod_hi = _mm256_srli_epi64(prod_hi, 16);
 
   prod_hi = _mm256_slli_epi64(prod_hi, 32);
-  *p = _mm256_or_si256(prod_lo, prod_hi);
+  return _mm256_or_si256(prod_lo, prod_hi);
+}
+
+static AOM_FORCE_INLINE __m256i get_max_lane_eob(const int16_t *iscan_ptr,
+                                                 __m256i eobmax,
+                                                 __m256i nz_mask) {
+  const __m256i packed_nz_mask = _mm256_packs_epi32(nz_mask, nz_mask);
+  const __m256i packed_nz_mask_perm =
+      _mm256_permute4x64_epi64(packed_nz_mask, 0xD8);
+  const __m256i iscan =
+      _mm256_castsi128_si256(_mm_loadu_si128((const __m128i *)iscan_ptr));
+  const __m256i iscan_plus1 = _mm256_sub_epi16(iscan, packed_nz_mask_perm);
+  const __m256i nz_iscan = _mm256_and_si256(iscan_plus1, packed_nz_mask_perm);
+  return _mm256_max_epi16(eobmax, nz_iscan);
+}
+
+// Get the max eob from the lower 128 bits.
+static AOM_FORCE_INLINE uint16_t get_max_eob(__m256i eob) {
+  __m256i eob_s;
+  eob_s = _mm256_shuffle_epi32(eob, 0xe);
+  eob = _mm256_max_epi16(eob, eob_s);
+  eob_s = _mm256_shufflelo_epi16(eob, 0xe);
+  eob = _mm256_max_epi16(eob, eob_s);
+  eob_s = _mm256_shufflelo_epi16(eob, 1);
+  eob = _mm256_max_epi16(eob, eob_s);
+  return (uint16_t)_mm256_extract_epi16(eob, 0);
+}
+
+static AOM_FORCE_INLINE __m256i mm256_mul_shift_epi32_logscale(const __m256i *x,
+                                                               const __m256i *y,
+                                                               int log_scale) {
+  __m256i prod_lo = _mm256_mul_epi32(*x, *y);
+  __m256i prod_hi = _mm256_srli_epi64(*x, 32);
+  const __m256i mult_hi = _mm256_srli_epi64(*y, 32);
+  prod_hi = _mm256_mul_epi32(prod_hi, mult_hi);
+  prod_lo = _mm256_srli_epi64(prod_lo, 16 - log_scale);
+  const __m256i mask = _mm256_set_epi32(0, -1, 0, -1, 0, -1, 0, -1);
+  prod_lo = _mm256_and_si256(prod_lo, mask);
+  prod_hi = _mm256_srli_epi64(prod_hi, 16 - log_scale);
+  prod_hi = _mm256_slli_epi64(prod_hi, 32);
+  return _mm256_or_si256(prod_lo, prod_hi);
 }
 
-static INLINE void quantize(const __m256i *qp, __m256i *c,
-                            const int16_t *iscan_ptr, tran_low_t *qcoeff,
-                            tran_low_t *dqcoeff, __m256i *eob) {
-  const __m256i abs = _mm256_abs_epi32(*c);
-  const __m256i flag1 = _mm256_cmpgt_epi32(abs, qp[0]);
-  __m256i flag2 = _mm256_cmpeq_epi32(abs, qp[0]);
-  flag2 = _mm256_or_si256(flag1, flag2);
-  const int32_t nzflag = _mm256_movemask_epi8(flag2);
-
-  if (LIKELY(nzflag)) {
-    __m256i q = _mm256_add_epi32(abs, qp[1]);
-    __m256i tmp;
-    mm256_mul_shift_epi32(&q, &qp[2], &tmp);
-    q = _mm256_add_epi32(tmp, q);
-
-    mm256_mul_shift_epi32(&q, &qp[4], &q);
-    __m256i dq = _mm256_mullo_epi32(q, qp[3]);
-
-    q = _mm256_sign_epi32(q, *c);
-    dq = _mm256_sign_epi32(dq, *c);
-    q = _mm256_and_si256(q, flag2);
-    dq = _mm256_and_si256(dq, flag2);
-
-    _mm256_storeu_si256((__m256i *)qcoeff, q);
-    _mm256_storeu_si256((__m256i *)dqcoeff, dq);
-
-    const __m128i isc = _mm_loadu_si128((const __m128i *)iscan_ptr);
-    const __m128i zr = _mm_setzero_si128();
-    const __m128i lo = _mm_unpacklo_epi16(isc, zr);
-    const __m128i hi = _mm_unpackhi_epi16(isc, zr);
-    const __m256i iscan =
-        _mm256_insertf128_si256(_mm256_castsi128_si256(lo), hi, 1);
+static AOM_FORCE_INLINE void quantize_logscale(
+    const __m256i *qp, const tran_low_t *coeff_ptr, const int16_t *iscan_ptr,
+    tran_low_t *qcoeff, tran_low_t *dqcoeff, __m256i *eob, int log_scale) {
+  const __m256i coeff = _mm256_loadu_si256((const __m256i *)coeff_ptr);
+  const __m256i abs_coeff = _mm256_abs_epi32(coeff);
+  const __m256i zbin_mask = _mm256_cmpgt_epi32(abs_coeff, qp[0]);
 
+  if (UNLIKELY(_mm256_movemask_epi8(zbin_mask) == 0)) {
     const __m256i zero = _mm256_setzero_si256();
-    const __m256i zc = _mm256_cmpeq_epi32(dq, zero);
-    const __m256i nz = _mm256_cmpeq_epi32(zc, zero);
-    __m256i cur_eob = _mm256_sub_epi32(iscan, nz);
-    cur_eob = _mm256_and_si256(cur_eob, nz);
-    *eob = _mm256_max_epi32(cur_eob, *eob);
-  } else {
+    _mm256_storeu_si256((__m256i *)qcoeff, zero);
+    _mm256_storeu_si256((__m256i *)dqcoeff, zero);
+    return;
+  }
+
+  const __m256i tmp_rnd =
+      _mm256_and_si256(_mm256_add_epi32(abs_coeff, qp[1]), zbin_mask);
+  // const int64_t tmp2 = ((tmpw * quant_ptr[rc != 0]) >> 16) + tmpw;
+  const __m256i tmp = mm256_mul_shift_epi32_logscale(&tmp_rnd, &qp[2], 0);
+  const __m256i tmp2 = _mm256_add_epi32(tmp, tmp_rnd);
+  // const int abs_qcoeff = (int)((tmp2 * quant_shift_ptr[rc != 0]) >>
+  //                              (16 - log_scale + AOM_QM_BITS));
+  const __m256i abs_q =
+      mm256_mul_shift_epi32_logscale(&tmp2, &qp[4], log_scale);
+  const __m256i abs_dq =
+      _mm256_srli_epi32(_mm256_mullo_epi32(abs_q, qp[3]), log_scale);
+  const __m256i nz_mask = _mm256_cmpgt_epi32(abs_q, _mm256_setzero_si256());
+  const __m256i q = _mm256_sign_epi32(abs_q, coeff);
+  const __m256i dq = _mm256_sign_epi32(abs_dq, coeff);
+
+  _mm256_storeu_si256((__m256i *)qcoeff, q);
+  _mm256_storeu_si256((__m256i *)dqcoeff, dq);
+
+  *eob = get_max_lane_eob(iscan_ptr, *eob, nz_mask);
+}
+
+static AOM_FORCE_INLINE void quantize(const __m256i *qp,
+                                      const tran_low_t *coeff_ptr,
+                                      const int16_t *iscan_ptr,
+                                      tran_low_t *qcoeff, tran_low_t *dqcoeff,
+                                      __m256i *eob) {
+  const __m256i coeff = _mm256_loadu_si256((const __m256i *)coeff_ptr);
+  const __m256i abs_coeff = _mm256_abs_epi32(coeff);
+  const __m256i zbin_mask = _mm256_cmpgt_epi32(abs_coeff, qp[0]);
+
+  if (UNLIKELY(_mm256_movemask_epi8(zbin_mask) == 0)) {
     const __m256i zero = _mm256_setzero_si256();
     _mm256_storeu_si256((__m256i *)qcoeff, zero);
     _mm256_storeu_si256((__m256i *)dqcoeff, zero);
+    return;
   }
+
+  const __m256i tmp_rnd =
+      _mm256_and_si256(_mm256_add_epi32(abs_coeff, qp[1]), zbin_mask);
+  const __m256i tmp = mm256_mul_shift_epi32(&tmp_rnd, &qp[2]);
+  const __m256i tmp2 = _mm256_add_epi32(tmp, tmp_rnd);
+  const __m256i abs_q = mm256_mul_shift_epi32(&tmp2, &qp[4]);
+  const __m256i abs_dq = _mm256_mullo_epi32(abs_q, qp[3]);
+  const __m256i nz_mask = _mm256_cmpgt_epi32(abs_q, _mm256_setzero_si256());
+  const __m256i q = _mm256_sign_epi32(abs_q, coeff);
+  const __m256i dq = _mm256_sign_epi32(abs_dq, coeff);
+
+  _mm256_storeu_si256((__m256i *)qcoeff, q);
+  _mm256_storeu_si256((__m256i *)dqcoeff, dq);
+
+  *eob = get_max_lane_eob(iscan_ptr, *eob, nz_mask);
 }
 
 void aom_highbd_quantize_b_avx2(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
@@ -118,14 +191,50 @@ void aom_highbd_quantize_b_avx2(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
                                 const int16_t *dequant_ptr, uint16_t *eob_ptr,
                                 const int16_t *scan, const int16_t *iscan) {
   (void)scan;
-  const unsigned int step = 8;
+  const int step = 8;
+
+  __m256i eob = _mm256_setzero_si256();
+  __m256i qp[5];
+
+  init_qp(zbin_ptr, round_ptr, quant_ptr, dequant_ptr, quant_shift_ptr, qp, 0);
+
+  quantize(qp, coeff_ptr, iscan, qcoeff_ptr, dqcoeff_ptr, &eob);
+
+  coeff_ptr += step;
+  qcoeff_ptr += step;
+  dqcoeff_ptr += step;
+  iscan += step;
+  n_coeffs -= step;
+
+  update_qp(qp);
+
+  while (n_coeffs > 0) {
+    quantize(qp, coeff_ptr, iscan, qcoeff_ptr, dqcoeff_ptr, &eob);
+
+    coeff_ptr += step;
+    qcoeff_ptr += step;
+    dqcoeff_ptr += step;
+    iscan += step;
+    n_coeffs -= step;
+  }
 
-  __m256i qp[5], coeff;
-  init_qp(zbin_ptr, round_ptr, quant_ptr, dequant_ptr, quant_shift_ptr, qp);
-  coeff = _mm256_loadu_si256((const __m256i *)coeff_ptr);
+  *eob_ptr = get_max_eob(eob);
+}
+
+void aom_highbd_quantize_b_32x32_avx2(
+    const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr,
+    const int16_t *round_ptr, const int16_t *quant_ptr,
+    const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
+    tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
+    const int16_t *scan, const int16_t *iscan) {
+  (void)scan;
+  const unsigned int step = 8;
 
   __m256i eob = _mm256_setzero_si256();
-  quantize(qp, &coeff, iscan, qcoeff_ptr, dqcoeff_ptr, &eob);
+  __m256i qp[5];
+  init_qp(zbin_ptr, round_ptr, quant_ptr, dequant_ptr, quant_shift_ptr, qp, 1);
+
+  quantize_logscale(qp, coeff_ptr, iscan, qcoeff_ptr, dqcoeff_ptr, &eob, 1);
 
   coeff_ptr += step;
   qcoeff_ptr += step;
@@ -136,8 +245,7 @@ void aom_highbd_quantize_b_avx2(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
   update_qp(qp);
 
   while (n_coeffs > 0) {
-    coeff = _mm256_loadu_si256((const __m256i *)coeff_ptr);
-    quantize(qp, &coeff, iscan, qcoeff_ptr, dqcoeff_ptr, &eob);
+    quantize_logscale(qp, coeff_ptr, iscan, qcoeff_ptr, dqcoeff_ptr, &eob, 1);
 
     coeff_ptr += step;
     qcoeff_ptr += step;
@@ -145,16 +253,42 @@ void aom_highbd_quantize_b_avx2(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
     iscan += step;
     n_coeffs -= step;
   }
-  {
-    __m256i eob_s;
-    eob_s = _mm256_shuffle_epi32(eob, 0xe);
-    eob = _mm256_max_epi16(eob, eob_s);
-    eob_s = _mm256_shufflelo_epi16(eob, 0xe);
-    eob = _mm256_max_epi16(eob, eob_s);
-    eob_s = _mm256_shufflelo_epi16(eob, 1);
-    eob = _mm256_max_epi16(eob, eob_s);
-    const __m128i final_eob = _mm_max_epi16(_mm256_castsi256_si128(eob),
-                                            _mm256_extractf128_si256(eob, 1));
-    *eob_ptr = _mm_extract_epi16(final_eob, 0);
+
+  *eob_ptr = get_max_eob(eob);
+}
+
+void aom_highbd_quantize_b_64x64_avx2(
+    const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr,
+    const int16_t *round_ptr, const int16_t *quant_ptr,
+    const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
+    tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
+    const int16_t *scan, const int16_t *iscan) {
+  (void)scan;
+  const int step = 8;
+
+  __m256i eob = _mm256_setzero_si256();
+  __m256i qp[5];
+  init_qp(zbin_ptr, round_ptr, quant_ptr, dequant_ptr, quant_shift_ptr, qp, 2);
+
+  quantize_logscale(qp, coeff_ptr, iscan, qcoeff_ptr, dqcoeff_ptr, &eob, 2);
+
+  coeff_ptr += step;
+  qcoeff_ptr += step;
+  dqcoeff_ptr += step;
+  iscan += step;
+  n_coeffs -= step;
+
+  update_qp(qp);
+
+  while (n_coeffs > 0) {
+    quantize_logscale(qp, coeff_ptr, iscan, qcoeff_ptr, dqcoeff_ptr, &eob, 2);
+
+    coeff_ptr += step;
+    qcoeff_ptr += step;
+    dqcoeff_ptr += step;
+    iscan += step;
+    n_coeffs -= step;
   }
+
+  *eob_ptr = get_max_eob(eob);
 }
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/x86/highbd_quantize_intrin_sse2.c b/chromium/third_party/libaom/source/libaom/aom_dsp/x86/highbd_quantize_intrin_sse2.c
index 1764a4952a4..a5c450a1beb 100644
--- a/chromium/third_party/libaom/source/libaom/aom_dsp/x86/highbd_quantize_intrin_sse2.c
+++ b/chromium/third_party/libaom/source/libaom/aom_dsp/x86/highbd_quantize_intrin_sse2.c
@@ -80,7 +80,8 @@ void aom_highbd_quantize_b_sse2(const tran_low_t *coeff_ptr, intptr_t count,
         const int64_t tmp4 = ((tmp3 * quant_ptr[k != 0]) >> 16) + tmp3;
         const uint32_t abs_qcoeff =
             (uint32_t)((tmp4 * quant_shift_ptr[k != 0]) >> 16);
-        qcoeff_ptr[k] = (int)(abs_qcoeff ^ coeff_sign[j]) - coeff_sign[j];
+        qcoeff_ptr[k] =
+            (int)(abs_qcoeff ^ (uint32_t)coeff_sign[j]) - coeff_sign[j];
         dqcoeff_ptr[k] = qcoeff_ptr[k] * dequant_ptr[k != 0];
         if (abs_qcoeff) eob_i = iscan[k] > eob_i ? iscan[k] : eob_i;
       }
@@ -140,7 +141,7 @@ void aom_highbd_quantize_b_32x32_sse2(
     const int64_t tmp2 = ((tmp1 * quant_ptr[rc != 0]) >> 16) + tmp1;
     const uint32_t abs_qcoeff =
         (uint32_t)((tmp2 * quant_shift_ptr[rc != 0]) >> 15);
-    qcoeff_ptr[rc] = (int)(abs_qcoeff ^ coeff_sign) - coeff_sign;
+    qcoeff_ptr[rc] = (int)(abs_qcoeff ^ (uint32_t)coeff_sign) - coeff_sign;
     dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] / 2;
     if (abs_qcoeff) eob = iscan[idx_arr[i]] > eob ? iscan[idx_arr[i]] : eob;
   }
@@ -198,7 +199,7 @@ void aom_highbd_quantize_b_64x64_sse2(
     const int64_t tmp2 = ((tmp1 * quant_ptr[rc != 0]) >> 16) + tmp1;
     const uint32_t abs_qcoeff =
         (uint32_t)((tmp2 * quant_shift_ptr[rc != 0]) >> 14);
-    qcoeff_ptr[rc] = (int)(abs_qcoeff ^ coeff_sign) - coeff_sign;
+    qcoeff_ptr[rc] = (int)(abs_qcoeff ^ (uint32_t)coeff_sign) - coeff_sign;
     dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] / 4;
     if (abs_qcoeff) eob = iscan[idx_arr[i]] > eob ? iscan[idx_arr[i]] : eob;
   }
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/x86/highbd_sad_avx2.c b/chromium/third_party/libaom/source/libaom/aom_dsp/x86/highbd_sad_avx2.c
index ad4db2f8c92..f583772ce61 100644
--- a/chromium/third_party/libaom/source/libaom/aom_dsp/x86/highbd_sad_avx2.c
+++ b/chromium/third_party/libaom/source/libaom/aom_dsp/x86/highbd_sad_avx2.c
@@ -551,7 +551,7 @@ unsigned int aom_highbd_sad128x128_avg_avx2(const uint8_t *src, int src_stride,
 static INLINE void get_4d_sad_from_mm256_epi32(const __m256i *v,
                                                uint32_t *res) {
   __m256i u0, u1, u2, u3;
-  const __m256i mask = yy_set1_64_from_32i(UINT32_MAX);
+  const __m256i mask = yy_set1_64_from_32i(~0);
   __m128i sad;
 
   // 8 32-bit summation
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/x86/highbd_variance_avx2.c b/chromium/third_party/libaom/source/libaom/aom_dsp/x86/highbd_variance_avx2.c
index 49912ac1914..36e647383b5 100644
--- a/chromium/third_party/libaom/source/libaom/aom_dsp/x86/highbd_variance_avx2.c
+++ b/chromium/third_party/libaom/source/libaom/aom_dsp/x86/highbd_variance_avx2.c
@@ -26,13 +26,13 @@ static uint32_t aom_highbd_var_filter_block2d_bil_avx2(
     const uint32_t xoffset, const uint32_t yoffset, const uint8_t *dst_ptr8,
     int dst_stride, uint32_t *sse) {
   const __m256i filter1 =
-      _mm256_set1_epi32((uint32_t)(bilinear_filters_2t[xoffset][1] << 16) |
+      _mm256_set1_epi32((int)(bilinear_filters_2t[xoffset][1] << 16) |
                         bilinear_filters_2t[xoffset][0]);
   const __m256i filter2 =
-      _mm256_set1_epi32((uint32_t)(bilinear_filters_2t[yoffset][1] << 16) |
+      _mm256_set1_epi32((int)(bilinear_filters_2t[yoffset][1] << 16) |
                         bilinear_filters_2t[yoffset][0]);
   const __m256i one = _mm256_set1_epi16(1);
-  const uint32_t bitshift = (uint32_t)0x40;
+  const int bitshift = 0x40;
   (void)pixel_step;
   unsigned int i, j, prev = 0, curr = 2;
   uint16_t *src_ptr = CONVERT_TO_SHORTPTR(src_ptr8);
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/x86/highbd_variance_sse2.c b/chromium/third_party/libaom/source/libaom/aom_dsp/x86/highbd_variance_sse2.c
index 6bd6a5a3fcf..d45885caf7b 100644
--- a/chromium/third_party/libaom/source/libaom/aom_dsp/x86/highbd_variance_sse2.c
+++ b/chromium/third_party/libaom/source/libaom/aom_dsp/x86/highbd_variance_sse2.c
@@ -629,13 +629,12 @@ void aom_highbd_dist_wtd_comp_avg_pred_sse2(
     const uint8_t *ref8, int ref_stride,
     const DIST_WTD_COMP_PARAMS *jcp_param) {
   int i;
-  const uint16_t wt0 = (uint16_t)jcp_param->fwd_offset;
-  const uint16_t wt1 = (uint16_t)jcp_param->bck_offset;
-  const __m128i w0 = _mm_set_epi16(wt0, wt0, wt0, wt0, wt0, wt0, wt0, wt0);
-  const __m128i w1 = _mm_set_epi16(wt1, wt1, wt1, wt1, wt1, wt1, wt1, wt1);
-  const uint16_t round = ((1 << DIST_PRECISION_BITS) >> 1);
-  const __m128i r =
-      _mm_set_epi16(round, round, round, round, round, round, round, round);
+  const int16_t wt0 = (int16_t)jcp_param->fwd_offset;
+  const int16_t wt1 = (int16_t)jcp_param->bck_offset;
+  const __m128i w0 = _mm_set1_epi16(wt0);
+  const __m128i w1 = _mm_set1_epi16(wt1);
+  const int16_t round = (int16_t)((1 << DIST_PRECISION_BITS) >> 1);
+  const __m128i r = _mm_set1_epi16(round);
   uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
   uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
   uint16_t *comp_pred = CONVERT_TO_SHORTPTR(comp_pred8);
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/x86/intrapred_avx2.c b/chromium/third_party/libaom/source/libaom/aom_dsp/x86/intrapred_avx2.c
index b4b5ce2880a..621ef7af763 100644
--- a/chromium/third_party/libaom/source/libaom/aom_dsp/x86/intrapred_avx2.c
+++ b/chromium/third_party/libaom/source/libaom/aom_dsp/x86/intrapred_avx2.c
@@ -361,7 +361,7 @@ void aom_dc_128_predictor_32x32_avx2(uint8_t *dst, ptrdiff_t stride,
                                      const uint8_t *left) {
   (void)above;
   (void)left;
-  const __m256i row = _mm256_set1_epi8((uint8_t)0x80);
+  const __m256i row = _mm256_set1_epi8((int8_t)0x80);
   row_store_32xh(&row, 32, dst, stride);
 }
 
@@ -426,10 +426,10 @@ void aom_dc_predictor_32x16_avx2(uint8_t *dst, ptrdiff_t stride,
   const __m128i top_sum = dc_sum_32_sse2(above);
   __m128i left_sum = dc_sum_16_sse2(left);
   left_sum = _mm_add_epi16(top_sum, left_sum);
-  uint16_t sum = _mm_cvtsi128_si32(left_sum);
+  uint16_t sum = (uint16_t)_mm_cvtsi128_si32(left_sum);
   sum += 24;
   sum /= 48;
-  const __m256i row = _mm256_set1_epi8((uint8_t)sum);
+  const __m256i row = _mm256_set1_epi8((int8_t)sum);
   row_store_32xh(&row, 16, dst, stride);
 }
 
@@ -438,10 +438,10 @@ void aom_dc_predictor_32x64_avx2(uint8_t *dst, ptrdiff_t stride,
   const __m256i sum_above = dc_sum_32(above);
   __m256i sum_left = dc_sum_64(left);
   sum_left = _mm256_add_epi16(sum_left, sum_above);
-  uint16_t sum = _mm_cvtsi128_si32(_mm256_castsi256_si128(sum_left));
+  uint16_t sum = (uint16_t)_mm_cvtsi128_si32(_mm256_castsi256_si128(sum_left));
   sum += 48;
   sum /= 96;
-  const __m256i row = _mm256_set1_epi8((uint8_t)sum);
+  const __m256i row = _mm256_set1_epi8((int8_t)sum);
   row_store_32xh(&row, 64, dst, stride);
 }
 
@@ -450,10 +450,10 @@ void aom_dc_predictor_64x64_avx2(uint8_t *dst, ptrdiff_t stride,
   const __m256i sum_above = dc_sum_64(above);
   __m256i sum_left = dc_sum_64(left);
   sum_left = _mm256_add_epi16(sum_left, sum_above);
-  uint16_t sum = _mm_cvtsi128_si32(_mm256_castsi256_si128(sum_left));
+  uint16_t sum = (uint16_t)_mm_cvtsi128_si32(_mm256_castsi256_si128(sum_left));
   sum += 64;
   sum /= 128;
-  const __m256i row = _mm256_set1_epi8((uint8_t)sum);
+  const __m256i row = _mm256_set1_epi8((int8_t)sum);
   row_store_64xh(&row, 64, dst, stride);
 }
 
@@ -462,10 +462,10 @@ void aom_dc_predictor_64x32_avx2(uint8_t *dst, ptrdiff_t stride,
   const __m256i sum_above = dc_sum_64(above);
   __m256i sum_left = dc_sum_32(left);
   sum_left = _mm256_add_epi16(sum_left, sum_above);
-  uint16_t sum = _mm_cvtsi128_si32(_mm256_castsi256_si128(sum_left));
+  uint16_t sum = (uint16_t)_mm_cvtsi128_si32(_mm256_castsi256_si128(sum_left));
   sum += 48;
   sum /= 96;
-  const __m256i row = _mm256_set1_epi8((uint8_t)sum);
+  const __m256i row = _mm256_set1_epi8((int8_t)sum);
   row_store_64xh(&row, 32, dst, stride);
 }
 
@@ -474,10 +474,10 @@ void aom_dc_predictor_64x16_avx2(uint8_t *dst, ptrdiff_t stride,
   const __m256i sum_above = dc_sum_64(above);
   __m256i sum_left = _mm256_castsi128_si256(dc_sum_16_sse2(left));
   sum_left = _mm256_add_epi16(sum_left, sum_above);
-  uint16_t sum = _mm_cvtsi128_si32(_mm256_castsi256_si128(sum_left));
+  uint16_t sum = (uint16_t)_mm_cvtsi128_si32(_mm256_castsi256_si128(sum_left));
   sum += 40;
   sum /= 80;
-  const __m256i row = _mm256_set1_epi8((uint8_t)sum);
+  const __m256i row = _mm256_set1_epi8((int8_t)sum);
   row_store_64xh(&row, 16, dst, stride);
 }
 
@@ -628,7 +628,7 @@ void aom_dc_128_predictor_32x16_avx2(uint8_t *dst, ptrdiff_t stride,
                                      const uint8_t *left) {
   (void)above;
   (void)left;
-  const __m256i row = _mm256_set1_epi8((uint8_t)0x80);
+  const __m256i row = _mm256_set1_epi8((int8_t)0x80);
   row_store_32xh(&row, 16, dst, stride);
 }
 
@@ -637,7 +637,7 @@ void aom_dc_128_predictor_32x64_avx2(uint8_t *dst, ptrdiff_t stride,
                                      const uint8_t *left) {
   (void)above;
   (void)left;
-  const __m256i row = _mm256_set1_epi8((uint8_t)0x80);
+  const __m256i row = _mm256_set1_epi8((int8_t)0x80);
   row_store_32xh(&row, 64, dst, stride);
 }
 
@@ -646,7 +646,7 @@ void aom_dc_128_predictor_64x64_avx2(uint8_t *dst, ptrdiff_t stride,
                                      const uint8_t *left) {
   (void)above;
   (void)left;
-  const __m256i row = _mm256_set1_epi8((uint8_t)0x80);
+  const __m256i row = _mm256_set1_epi8((int8_t)0x80);
   row_store_64xh(&row, 64, dst, stride);
 }
 
@@ -655,7 +655,7 @@ void aom_dc_128_predictor_64x32_avx2(uint8_t *dst, ptrdiff_t stride,
                                      const uint8_t *left) {
   (void)above;
   (void)left;
-  const __m256i row = _mm256_set1_epi8((uint8_t)0x80);
+  const __m256i row = _mm256_set1_epi8((int8_t)0x80);
   row_store_64xh(&row, 32, dst, stride);
 }
 
@@ -664,7 +664,7 @@ void aom_dc_128_predictor_64x16_avx2(uint8_t *dst, ptrdiff_t stride,
                                      const uint8_t *left) {
   (void)above;
   (void)left;
-  const __m256i row = _mm256_set1_epi8((uint8_t)0x80);
+  const __m256i row = _mm256_set1_epi8((int8_t)0x80);
   row_store_64xh(&row, 16, dst, stride);
 }
 
@@ -754,7 +754,7 @@ void aom_paeth_predictor_16x8_avx2(uint8_t *dst, ptrdiff_t stride,
                                    const uint8_t *above, const uint8_t *left) {
   __m128i x = _mm_loadl_epi64((const __m128i *)left);
   const __m256i l = _mm256_inserti128_si256(_mm256_castsi128_si256(x), x, 1);
-  const __m256i tl16 = _mm256_set1_epi16((uint16_t)above[-1]);
+  const __m256i tl16 = _mm256_set1_epi16((int16_t)above[-1]);
   __m256i rep = _mm256_set1_epi16((short)0x8000);
   const __m256i one = _mm256_set1_epi16(1);
   const __m256i top = get_top_vector(above);
@@ -778,7 +778,7 @@ static INLINE __m256i get_left_vector(const uint8_t *left) {
 void aom_paeth_predictor_16x16_avx2(uint8_t *dst, ptrdiff_t stride,
                                     const uint8_t *above, const uint8_t *left) {
   const __m256i l = get_left_vector(left);
-  const __m256i tl16 = _mm256_set1_epi16((uint16_t)above[-1]);
+  const __m256i tl16 = _mm256_set1_epi16((int16_t)above[-1]);
   __m256i rep = _mm256_set1_epi16((short)0x8000);
   const __m256i one = _mm256_set1_epi16(1);
   const __m256i top = get_top_vector(above);
@@ -797,7 +797,7 @@ void aom_paeth_predictor_16x16_avx2(uint8_t *dst, ptrdiff_t stride,
 void aom_paeth_predictor_16x32_avx2(uint8_t *dst, ptrdiff_t stride,
                                     const uint8_t *above, const uint8_t *left) {
   __m256i l = get_left_vector(left);
-  const __m256i tl16 = _mm256_set1_epi16((uint16_t)above[-1]);
+  const __m256i tl16 = _mm256_set1_epi16((int16_t)above[-1]);
   __m256i rep = _mm256_set1_epi16((short)0x8000);
   const __m256i one = _mm256_set1_epi16(1);
   const __m256i top = get_top_vector(above);
@@ -826,7 +826,7 @@ void aom_paeth_predictor_16x32_avx2(uint8_t *dst, ptrdiff_t stride,
 
 void aom_paeth_predictor_16x64_avx2(uint8_t *dst, ptrdiff_t stride,
                                     const uint8_t *above, const uint8_t *left) {
-  const __m256i tl16 = _mm256_set1_epi16((uint16_t)above[-1]);
+  const __m256i tl16 = _mm256_set1_epi16((int16_t)above[-1]);
   const __m256i one = _mm256_set1_epi16(1);
   const __m256i top = get_top_vector(above);
 
@@ -864,7 +864,7 @@ void aom_paeth_predictor_32x16_avx2(uint8_t *dst, ptrdiff_t stride,
   const __m256i l = get_left_vector(left);
   const __m256i t0 = get_top_vector(above);
   const __m256i t1 = get_top_vector(above + 16);
-  const __m256i tl = _mm256_set1_epi16((uint16_t)above[-1]);
+  const __m256i tl = _mm256_set1_epi16((int16_t)above[-1]);
   __m256i rep = _mm256_set1_epi16((short)0x8000);
   const __m256i one = _mm256_set1_epi16(1);
 
@@ -886,7 +886,7 @@ void aom_paeth_predictor_32x32_avx2(uint8_t *dst, ptrdiff_t stride,
   __m256i l = get_left_vector(left);
   const __m256i t0 = get_top_vector(above);
   const __m256i t1 = get_top_vector(above + 16);
-  const __m256i tl = _mm256_set1_epi16((uint16_t)above[-1]);
+  const __m256i tl = _mm256_set1_epi16((int16_t)above[-1]);
   __m256i rep = _mm256_set1_epi16((short)0x8000);
   const __m256i one = _mm256_set1_epi16(1);
 
@@ -924,7 +924,7 @@ void aom_paeth_predictor_32x64_avx2(uint8_t *dst, ptrdiff_t stride,
                                     const uint8_t *above, const uint8_t *left) {
   const __m256i t0 = get_top_vector(above);
   const __m256i t1 = get_top_vector(above + 16);
-  const __m256i tl = _mm256_set1_epi16((uint16_t)above[-1]);
+  const __m256i tl = _mm256_set1_epi16((int16_t)above[-1]);
   const __m256i one = _mm256_set1_epi16(1);
 
   int i, j;
@@ -952,7 +952,7 @@ void aom_paeth_predictor_64x32_avx2(uint8_t *dst, ptrdiff_t stride,
   const __m256i t1 = get_top_vector(above + 16);
   const __m256i t2 = get_top_vector(above + 32);
   const __m256i t3 = get_top_vector(above + 48);
-  const __m256i tl = _mm256_set1_epi16((uint16_t)above[-1]);
+  const __m256i tl = _mm256_set1_epi16((int16_t)above[-1]);
   const __m256i one = _mm256_set1_epi16(1);
 
   int i, j;
@@ -984,7 +984,7 @@ void aom_paeth_predictor_64x64_avx2(uint8_t *dst, ptrdiff_t stride,
   const __m256i t1 = get_top_vector(above + 16);
   const __m256i t2 = get_top_vector(above + 32);
   const __m256i t3 = get_top_vector(above + 48);
-  const __m256i tl = _mm256_set1_epi16((uint16_t)above[-1]);
+  const __m256i tl = _mm256_set1_epi16((int16_t)above[-1]);
   const __m256i one = _mm256_set1_epi16(1);
 
   int i, j;
@@ -1016,7 +1016,7 @@ void aom_paeth_predictor_64x16_avx2(uint8_t *dst, ptrdiff_t stride,
   const __m256i t1 = get_top_vector(above + 16);
   const __m256i t2 = get_top_vector(above + 32);
   const __m256i t3 = get_top_vector(above + 48);
-  const __m256i tl = _mm256_set1_epi16((uint16_t)above[-1]);
+  const __m256i tl = _mm256_set1_epi16((int16_t)above[-1]);
   const __m256i one = _mm256_set1_epi16(1);
 
   int i;
@@ -3537,7 +3537,7 @@ static AOM_FORCE_INLINE void dr_prediction_z1_HxW_internal_avx2(
   __m128i a_mbase_x;
 
   a16 = _mm256_set1_epi16(16);
-  a_mbase_x = _mm_set1_epi8(above[max_base_x]);
+  a_mbase_x = _mm_set1_epi8((int8_t)above[max_base_x]);
   c3f = _mm256_set1_epi16(0x3f);
 
   int x = dx;
@@ -3597,7 +3597,7 @@ static void dr_prediction_z1_4xN_avx2(int N, uint8_t *dst, ptrdiff_t stride,
 
   dr_prediction_z1_HxW_internal_avx2(4, N, dstvec, above, upsample_above, dx);
   for (int i = 0; i < N; i++) {
-    *(uint32_t *)(dst + stride * i) = _mm_cvtsi128_si32(dstvec[i]);
+    *(int *)(dst + stride * i) = _mm_cvtsi128_si32(dstvec[i]);
   }
 }
 
@@ -3640,7 +3640,7 @@ static AOM_FORCE_INLINE void dr_prediction_z1_32xN_internal_avx2(
   __m256i a_mbase_x, diff, c3f;
 
   a16 = _mm256_set1_epi16(16);
-  a_mbase_x = _mm256_set1_epi8(above[max_base_x]);
+  a_mbase_x = _mm256_set1_epi8((int8_t)above[max_base_x]);
   c3f = _mm256_set1_epi16(0x3f);
 
   int x = dx;
@@ -3722,7 +3722,7 @@ static void dr_prediction_z1_64xN_avx2(int N, uint8_t *dst, ptrdiff_t stride,
   __m128i max_base_x128, base_inc128, mask128;
 
   a16 = _mm256_set1_epi16(16);
-  a_mbase_x = _mm256_set1_epi8(above[max_base_x]);
+  a_mbase_x = _mm256_set1_epi8((int8_t)above[max_base_x]);
   max_base_x128 = _mm_set1_epi8(max_base_x);
   c3f = _mm256_set1_epi16(0x3f);
 
@@ -3766,14 +3766,14 @@ static void dr_prediction_z1_64xN_avx2(int N, uint8_t *dst, ptrdiff_t stride,
                      _mm256_extracti128_si256(res, 1)));  // 16 8bit values
 
         base_inc128 =
-            _mm_setr_epi8((uint8_t)(base + j), (uint8_t)(base + j + 1),
-                          (uint8_t)(base + j + 2), (uint8_t)(base + j + 3),
-                          (uint8_t)(base + j + 4), (uint8_t)(base + j + 5),
-                          (uint8_t)(base + j + 6), (uint8_t)(base + j + 7),
-                          (uint8_t)(base + j + 8), (uint8_t)(base + j + 9),
-                          (uint8_t)(base + j + 10), (uint8_t)(base + j + 11),
-                          (uint8_t)(base + j + 12), (uint8_t)(base + j + 13),
-                          (uint8_t)(base + j + 14), (uint8_t)(base + j + 15));
+            _mm_setr_epi8((int8_t)(base + j), (int8_t)(base + j + 1),
+                          (int8_t)(base + j + 2), (int8_t)(base + j + 3),
+                          (int8_t)(base + j + 4), (int8_t)(base + j + 5),
+                          (int8_t)(base + j + 6), (int8_t)(base + j + 7),
+                          (int8_t)(base + j + 8), (int8_t)(base + j + 9),
+                          (int8_t)(base + j + 10), (int8_t)(base + j + 11),
+                          (int8_t)(base + j + 12), (int8_t)(base + j + 13),
+                          (int8_t)(base + j + 14), (int8_t)(base + j + 15));
 
         mask128 = _mm_cmpgt_epi8(_mm_subs_epu8(max_base_x128, base_inc128),
                                  _mm_setzero_si128());
@@ -3926,7 +3926,7 @@ static void dr_prediction_z2_Nx4_avx2(int N, uint8_t *dst, ptrdiff_t stride,
     resy = _mm_srli_si128(resx, 4);
 
     resxy = _mm_blendv_epi8(resx, resy, *(__m128i *)BaseMask[base_min_diff]);
-    *(uint32_t *)(dst) = _mm_cvtsi128_si32(resxy);
+    *(int *)(dst) = _mm_cvtsi128_si32(resxy);
     dst += stride;
   }
 }
@@ -4092,7 +4092,7 @@ static void dr_prediction_z2_HxW_avx2(int H, int W, uint8_t *dst,
     __m128i resx, resy;
     __m128i resxy;
     int y = r + 1;
-    ydx = _mm256_set1_epi16((uint16_t)(y * dx));
+    ydx = _mm256_set1_epi16((int16_t)(y * dx));
 
     int base_x = (-y * dx) >> frac_bits_x;
     for (int j = 0; j < W; j += 16) {
@@ -4338,10 +4338,10 @@ static void dr_prediction_z3_4x4_avx2(uint8_t *dst, ptrdiff_t stride,
   transpose4x8_8x4_low_sse2(&dstvec[0], &dstvec[1], &dstvec[2], &dstvec[3],
                             &d[0], &d[1], &d[2], &d[3]);
 
-  *(uint32_t *)(dst + stride * 0) = _mm_cvtsi128_si32(d[0]);
-  *(uint32_t *)(dst + stride * 1) = _mm_cvtsi128_si32(d[1]);
-  *(uint32_t *)(dst + stride * 2) = _mm_cvtsi128_si32(d[2]);
-  *(uint32_t *)(dst + stride * 3) = _mm_cvtsi128_si32(d[3]);
+  *(int *)(dst + stride * 0) = _mm_cvtsi128_si32(d[0]);
+  *(int *)(dst + stride * 1) = _mm_cvtsi128_si32(d[1]);
+  *(int *)(dst + stride * 2) = _mm_cvtsi128_si32(d[2]);
+  *(int *)(dst + stride * 3) = _mm_cvtsi128_si32(d[3]);
   return;
 }
 
@@ -4374,7 +4374,7 @@ static void dr_prediction_z3_4x8_avx2(uint8_t *dst, ptrdiff_t stride,
   transpose4x8_8x4_sse2(&dstvec[0], &dstvec[1], &dstvec[2], &dstvec[3], &d[0],
                         &d[1], &d[2], &d[3], &d[4], &d[5], &d[6], &d[7]);
   for (int i = 0; i < 8; i++) {
-    *(uint32_t *)(dst + stride * i) = _mm_cvtsi128_si32(d[i]);
+    *(int *)(dst + stride * i) = _mm_cvtsi128_si32(d[i]);
   }
 }
 
@@ -4434,7 +4434,7 @@ static void dr_prediction_z3_4x16_avx2(uint8_t *dst, ptrdiff_t stride,
   dr_prediction_z1_HxW_internal_avx2(16, 4, dstvec, left, upsample_left, dy);
   transpose4x16_sse2(dstvec, d);
   for (int i = 0; i < 16; i++) {
-    *(uint32_t *)(dst + stride * i) = _mm_cvtsi128_si32(d[i]);
+    *(int *)(dst + stride * i) = _mm_cvtsi128_si32(d[i]);
   }
 }
 
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/x86/intrapred_sse2.c b/chromium/third_party/libaom/source/libaom/aom_dsp/x86/intrapred_sse2.c
index 5afef68c39b..61e29731c4b 100644
--- a/chromium/third_party/libaom/source/libaom/aom_dsp/x86/intrapred_sse2.c
+++ b/chromium/third_party/libaom/source/libaom/aom_dsp/x86/intrapred_sse2.c
@@ -112,12 +112,12 @@ void aom_dc_predictor_4x8_sse2(uint8_t *dst, ptrdiff_t stride,
   __m128i sum_above = dc_sum_4(above);
   sum_above = _mm_add_epi16(sum_left, sum_above);
 
-  uint32_t sum = _mm_cvtsi128_si32(sum_above);
+  uint32_t sum = (uint32_t)_mm_cvtsi128_si32(sum_above);
   sum += 6;
   sum = divide_using_multiply_shift(sum, 2, DC_MULTIPLIER_1X2);
 
-  const __m128i row = _mm_set1_epi8((uint8_t)sum);
-  const uint32_t pred = _mm_cvtsi128_si32(row);
+  const __m128i row = _mm_set1_epi8((int8_t)sum);
+  const uint32_t pred = (uint32_t)_mm_cvtsi128_si32(row);
   dc_store_4xh(pred, 8, dst, stride);
 }
 
@@ -127,12 +127,12 @@ void aom_dc_predictor_4x16_sse2(uint8_t *dst, ptrdiff_t stride,
   __m128i sum_above = dc_sum_4(above);
   sum_above = _mm_add_epi16(sum_left, sum_above);
 
-  uint32_t sum = _mm_cvtsi128_si32(sum_above);
+  uint32_t sum = (uint32_t)_mm_cvtsi128_si32(sum_above);
   sum += 10;
   sum = divide_using_multiply_shift(sum, 2, DC_MULTIPLIER_1X4);
 
-  const __m128i row = _mm_set1_epi8((uint8_t)sum);
-  const uint32_t pred = _mm_cvtsi128_si32(row);
+  const __m128i row = _mm_set1_epi8((int8_t)sum);
+  const uint32_t pred = (uint32_t)_mm_cvtsi128_si32(row);
   dc_store_4xh(pred, 16, dst, stride);
 }
 
@@ -142,11 +142,11 @@ void aom_dc_predictor_8x4_sse2(uint8_t *dst, ptrdiff_t stride,
   __m128i sum_above = dc_sum_8(above);
   sum_above = _mm_add_epi16(sum_above, sum_left);
 
-  uint32_t sum = _mm_cvtsi128_si32(sum_above);
+  uint32_t sum = (uint32_t)_mm_cvtsi128_si32(sum_above);
   sum += 6;
   sum = divide_using_multiply_shift(sum, 2, DC_MULTIPLIER_1X2);
 
-  const __m128i row = _mm_set1_epi8((uint8_t)sum);
+  const __m128i row = _mm_set1_epi8((int8_t)sum);
   dc_store_8xh(&row, 4, dst, stride);
 }
 
@@ -156,10 +156,10 @@ void aom_dc_predictor_8x16_sse2(uint8_t *dst, ptrdiff_t stride,
   __m128i sum_above = dc_sum_8(above);
   sum_above = _mm_add_epi16(sum_above, sum_left);
 
-  uint32_t sum = _mm_cvtsi128_si32(sum_above);
+  uint32_t sum = (uint32_t)_mm_cvtsi128_si32(sum_above);
   sum += 12;
   sum = divide_using_multiply_shift(sum, 3, DC_MULTIPLIER_1X2);
-  const __m128i row = _mm_set1_epi8((uint8_t)sum);
+  const __m128i row = _mm_set1_epi8((int8_t)sum);
   dc_store_8xh(&row, 16, dst, stride);
 }
 
@@ -169,10 +169,10 @@ void aom_dc_predictor_8x32_sse2(uint8_t *dst, ptrdiff_t stride,
   __m128i sum_above = dc_sum_8(above);
   sum_above = _mm_add_epi16(sum_above, sum_left);
 
-  uint32_t sum = _mm_cvtsi128_si32(sum_above);
+  uint32_t sum = (uint32_t)_mm_cvtsi128_si32(sum_above);
   sum += 20;
   sum = divide_using_multiply_shift(sum, 3, DC_MULTIPLIER_1X4);
-  const __m128i row = _mm_set1_epi8((uint8_t)sum);
+  const __m128i row = _mm_set1_epi8((int8_t)sum);
   dc_store_8xh(&row, 32, dst, stride);
 }
 
@@ -182,10 +182,10 @@ void aom_dc_predictor_16x4_sse2(uint8_t *dst, ptrdiff_t stride,
   __m128i sum_above = dc_sum_16_sse2(above);
   sum_above = _mm_add_epi16(sum_above, sum_left);
 
-  uint32_t sum = _mm_cvtsi128_si32(sum_above);
+  uint32_t sum = (uint32_t)_mm_cvtsi128_si32(sum_above);
   sum += 10;
   sum = divide_using_multiply_shift(sum, 2, DC_MULTIPLIER_1X4);
-  const __m128i row = _mm_set1_epi8((uint8_t)sum);
+  const __m128i row = _mm_set1_epi8((int8_t)sum);
   dc_store_16xh(&row, 4, dst, stride);
 }
 
@@ -195,10 +195,10 @@ void aom_dc_predictor_16x8_sse2(uint8_t *dst, ptrdiff_t stride,
   __m128i sum_above = dc_sum_16_sse2(above);
   sum_above = _mm_add_epi16(sum_above, sum_left);
 
-  uint32_t sum = _mm_cvtsi128_si32(sum_above);
+  uint32_t sum = (uint32_t)_mm_cvtsi128_si32(sum_above);
   sum += 12;
   sum = divide_using_multiply_shift(sum, 3, DC_MULTIPLIER_1X2);
-  const __m128i row = _mm_set1_epi8((uint8_t)sum);
+  const __m128i row = _mm_set1_epi8((int8_t)sum);
   dc_store_16xh(&row, 8, dst, stride);
 }
 
@@ -208,10 +208,10 @@ void aom_dc_predictor_16x32_sse2(uint8_t *dst, ptrdiff_t stride,
   __m128i sum_above = dc_sum_16_sse2(above);
   sum_above = _mm_add_epi16(sum_left, sum_above);
 
-  uint32_t sum = _mm_cvtsi128_si32(sum_above);
+  uint32_t sum = (uint32_t)_mm_cvtsi128_si32(sum_above);
   sum += 24;
   sum = divide_using_multiply_shift(sum, 4, DC_MULTIPLIER_1X2);
-  const __m128i row = _mm_set1_epi8((uint8_t)sum);
+  const __m128i row = _mm_set1_epi8((int8_t)sum);
   dc_store_16xh(&row, 32, dst, stride);
 }
 
@@ -221,10 +221,10 @@ void aom_dc_predictor_16x64_sse2(uint8_t *dst, ptrdiff_t stride,
   __m128i sum_above = dc_sum_16_sse2(above);
   sum_above = _mm_add_epi16(sum_left, sum_above);
 
-  uint32_t sum = _mm_cvtsi128_si32(sum_above);
+  uint32_t sum = (uint32_t)_mm_cvtsi128_si32(sum_above);
   sum += 40;
   sum = divide_using_multiply_shift(sum, 4, DC_MULTIPLIER_1X4);
-  const __m128i row = _mm_set1_epi8((uint8_t)sum);
+  const __m128i row = _mm_set1_epi8((int8_t)sum);
   dc_store_16xh(&row, 64, dst, stride);
 }
 
@@ -234,10 +234,10 @@ void aom_dc_predictor_32x8_sse2(uint8_t *dst, ptrdiff_t stride,
   const __m128i sum_left = dc_sum_8(left);
   sum_above = _mm_add_epi16(sum_above, sum_left);
 
-  uint32_t sum = _mm_cvtsi128_si32(sum_above);
+  uint32_t sum = (uint32_t)_mm_cvtsi128_si32(sum_above);
   sum += 20;
   sum = divide_using_multiply_shift(sum, 3, DC_MULTIPLIER_1X4);
-  const __m128i row = _mm_set1_epi8((uint8_t)sum);
+  const __m128i row = _mm_set1_epi8((int8_t)sum);
   dc_store_32xh(&row, 8, dst, stride);
 }
 
@@ -247,10 +247,10 @@ void aom_dc_predictor_32x16_sse2(uint8_t *dst, ptrdiff_t stride,
   const __m128i sum_left = dc_sum_16_sse2(left);
   sum_above = _mm_add_epi16(sum_above, sum_left);
 
-  uint32_t sum = _mm_cvtsi128_si32(sum_above);
+  uint32_t sum = (uint32_t)_mm_cvtsi128_si32(sum_above);
   sum += 24;
   sum = divide_using_multiply_shift(sum, 4, DC_MULTIPLIER_1X2);
-  const __m128i row = _mm_set1_epi8((uint8_t)sum);
+  const __m128i row = _mm_set1_epi8((int8_t)sum);
   dc_store_32xh(&row, 16, dst, stride);
 }
 
@@ -260,10 +260,10 @@ void aom_dc_predictor_32x64_sse2(uint8_t *dst, ptrdiff_t stride,
   const __m128i sum_left = dc_sum_64(left);
   sum_above = _mm_add_epi16(sum_above, sum_left);
 
-  uint32_t sum = _mm_cvtsi128_si32(sum_above);
+  uint32_t sum = (uint32_t)_mm_cvtsi128_si32(sum_above);
   sum += 48;
   sum = divide_using_multiply_shift(sum, 5, DC_MULTIPLIER_1X2);
-  const __m128i row = _mm_set1_epi8((uint8_t)sum);
+  const __m128i row = _mm_set1_epi8((int8_t)sum);
   dc_store_32xh(&row, 64, dst, stride);
 }
 
@@ -273,10 +273,10 @@ void aom_dc_predictor_64x64_sse2(uint8_t *dst, ptrdiff_t stride,
   const __m128i sum_left = dc_sum_64(left);
   sum_above = _mm_add_epi16(sum_above, sum_left);
 
-  uint32_t sum = _mm_cvtsi128_si32(sum_above);
+  uint32_t sum = (uint32_t)_mm_cvtsi128_si32(sum_above);
   sum += 64;
   sum /= 128;
-  const __m128i row = _mm_set1_epi8((uint8_t)sum);
+  const __m128i row = _mm_set1_epi8((int8_t)sum);
   dc_store_64xh(&row, 64, dst, stride);
 }
 
@@ -286,10 +286,10 @@ void aom_dc_predictor_64x32_sse2(uint8_t *dst, ptrdiff_t stride,
   const __m128i sum_left = dc_sum_32_sse2(left);
   sum_above = _mm_add_epi16(sum_above, sum_left);
 
-  uint32_t sum = _mm_cvtsi128_si32(sum_above);
+  uint32_t sum = (uint32_t)_mm_cvtsi128_si32(sum_above);
   sum += 48;
   sum = divide_using_multiply_shift(sum, 5, DC_MULTIPLIER_1X2);
-  const __m128i row = _mm_set1_epi8((uint8_t)sum);
+  const __m128i row = _mm_set1_epi8((int8_t)sum);
   dc_store_64xh(&row, 32, dst, stride);
 }
 
@@ -299,10 +299,10 @@ void aom_dc_predictor_64x16_sse2(uint8_t *dst, ptrdiff_t stride,
   const __m128i sum_left = dc_sum_16_sse2(left);
   sum_above = _mm_add_epi16(sum_above, sum_left);
 
-  uint32_t sum = _mm_cvtsi128_si32(sum_above);
+  uint32_t sum = (uint32_t)_mm_cvtsi128_si32(sum_above);
   sum += 40;
   sum = divide_using_multiply_shift(sum, 4, DC_MULTIPLIER_1X4);
-  const __m128i row = _mm_set1_epi8((uint8_t)sum);
+  const __m128i row = _mm_set1_epi8((int8_t)sum);
   dc_store_64xh(&row, 16, dst, stride);
 }
 
@@ -313,13 +313,13 @@ void aom_dc_top_predictor_4x8_sse2(uint8_t *dst, ptrdiff_t stride,
                                    const uint8_t *above, const uint8_t *left) {
   (void)left;
   __m128i sum_above = dc_sum_4(above);
-  const __m128i two = _mm_set1_epi16((int16_t)2);
+  const __m128i two = _mm_set1_epi16(2);
   sum_above = _mm_add_epi16(sum_above, two);
   sum_above = _mm_srai_epi16(sum_above, 2);
   sum_above = _mm_shufflelo_epi16(sum_above, 0);
   sum_above = _mm_packus_epi16(sum_above, sum_above);
 
-  const uint32_t pred = _mm_cvtsi128_si32(sum_above);
+  const uint32_t pred = (uint32_t)_mm_cvtsi128_si32(sum_above);
   dc_store_4xh(pred, 8, dst, stride);
 }
 
@@ -327,13 +327,13 @@ void aom_dc_top_predictor_4x16_sse2(uint8_t *dst, ptrdiff_t stride,
                                     const uint8_t *above, const uint8_t *left) {
   (void)left;
   __m128i sum_above = dc_sum_4(above);
-  const __m128i two = _mm_set1_epi16((int16_t)2);
+  const __m128i two = _mm_set1_epi16(2);
   sum_above = _mm_add_epi16(sum_above, two);
   sum_above = _mm_srai_epi16(sum_above, 2);
   sum_above = _mm_shufflelo_epi16(sum_above, 0);
   sum_above = _mm_packus_epi16(sum_above, sum_above);
 
-  const uint32_t pred = _mm_cvtsi128_si32(sum_above);
+  const uint32_t pred = (uint32_t)_mm_cvtsi128_si32(sum_above);
   dc_store_4xh(pred, 16, dst, stride);
 }
 
@@ -341,7 +341,7 @@ void aom_dc_top_predictor_8x4_sse2(uint8_t *dst, ptrdiff_t stride,
                                    const uint8_t *above, const uint8_t *left) {
   (void)left;
   __m128i sum_above = dc_sum_8(above);
-  const __m128i four = _mm_set1_epi16((uint16_t)4);
+  const __m128i four = _mm_set1_epi16(4);
   sum_above = _mm_add_epi16(sum_above, four);
   sum_above = _mm_srai_epi16(sum_above, 3);
   sum_above = _mm_unpacklo_epi8(sum_above, sum_above);
@@ -353,7 +353,7 @@ void aom_dc_top_predictor_8x16_sse2(uint8_t *dst, ptrdiff_t stride,
                                     const uint8_t *above, const uint8_t *left) {
   (void)left;
   __m128i sum_above = dc_sum_8(above);
-  const __m128i four = _mm_set1_epi16((uint16_t)4);
+  const __m128i four = _mm_set1_epi16(4);
   sum_above = _mm_add_epi16(sum_above, four);
   sum_above = _mm_srai_epi16(sum_above, 3);
   sum_above = _mm_unpacklo_epi8(sum_above, sum_above);
@@ -365,7 +365,7 @@ void aom_dc_top_predictor_8x32_sse2(uint8_t *dst, ptrdiff_t stride,
                                     const uint8_t *above, const uint8_t *left) {
   (void)left;
   __m128i sum_above = dc_sum_8(above);
-  const __m128i four = _mm_set1_epi16((uint16_t)4);
+  const __m128i four = _mm_set1_epi16(4);
   sum_above = _mm_add_epi16(sum_above, four);
   sum_above = _mm_srai_epi16(sum_above, 3);
   sum_above = _mm_unpacklo_epi8(sum_above, sum_above);
@@ -377,7 +377,7 @@ void aom_dc_top_predictor_16x4_sse2(uint8_t *dst, ptrdiff_t stride,
                                     const uint8_t *above, const uint8_t *left) {
   (void)left;
   __m128i sum_above = dc_sum_16_sse2(above);
-  const __m128i eight = _mm_set1_epi16((uint16_t)8);
+  const __m128i eight = _mm_set1_epi16(8);
   sum_above = _mm_add_epi16(sum_above, eight);
   sum_above = _mm_srai_epi16(sum_above, 4);
   sum_above = _mm_unpacklo_epi8(sum_above, sum_above);
@@ -390,7 +390,7 @@ void aom_dc_top_predictor_16x8_sse2(uint8_t *dst, ptrdiff_t stride,
                                     const uint8_t *above, const uint8_t *left) {
   (void)left;
   __m128i sum_above = dc_sum_16_sse2(above);
-  const __m128i eight = _mm_set1_epi16((uint16_t)8);
+  const __m128i eight = _mm_set1_epi16(8);
   sum_above = _mm_add_epi16(sum_above, eight);
   sum_above = _mm_srai_epi16(sum_above, 4);
   sum_above = _mm_unpacklo_epi8(sum_above, sum_above);
@@ -404,7 +404,7 @@ void aom_dc_top_predictor_16x32_sse2(uint8_t *dst, ptrdiff_t stride,
                                      const uint8_t *left) {
   (void)left;
   __m128i sum_above = dc_sum_16_sse2(above);
-  const __m128i eight = _mm_set1_epi16((uint16_t)8);
+  const __m128i eight = _mm_set1_epi16(8);
   sum_above = _mm_add_epi16(sum_above, eight);
   sum_above = _mm_srai_epi16(sum_above, 4);
   sum_above = _mm_unpacklo_epi8(sum_above, sum_above);
@@ -418,7 +418,7 @@ void aom_dc_top_predictor_16x64_sse2(uint8_t *dst, ptrdiff_t stride,
                                      const uint8_t *left) {
   (void)left;
   __m128i sum_above = dc_sum_16_sse2(above);
-  const __m128i eight = _mm_set1_epi16((uint16_t)8);
+  const __m128i eight = _mm_set1_epi16(8);
   sum_above = _mm_add_epi16(sum_above, eight);
   sum_above = _mm_srai_epi16(sum_above, 4);
   sum_above = _mm_unpacklo_epi8(sum_above, sum_above);
@@ -431,7 +431,7 @@ void aom_dc_top_predictor_32x8_sse2(uint8_t *dst, ptrdiff_t stride,
                                     const uint8_t *above, const uint8_t *left) {
   (void)left;
   __m128i sum_above = dc_sum_32_sse2(above);
-  const __m128i sixteen = _mm_set1_epi16((uint16_t)16);
+  const __m128i sixteen = _mm_set1_epi16(16);
   sum_above = _mm_add_epi16(sum_above, sixteen);
   sum_above = _mm_srai_epi16(sum_above, 5);
   sum_above = _mm_unpacklo_epi8(sum_above, sum_above);
@@ -445,7 +445,7 @@ void aom_dc_top_predictor_32x16_sse2(uint8_t *dst, ptrdiff_t stride,
                                      const uint8_t *left) {
   (void)left;
   __m128i sum_above = dc_sum_32_sse2(above);
-  const __m128i sixteen = _mm_set1_epi16((uint16_t)16);
+  const __m128i sixteen = _mm_set1_epi16(16);
   sum_above = _mm_add_epi16(sum_above, sixteen);
   sum_above = _mm_srai_epi16(sum_above, 5);
   sum_above = _mm_unpacklo_epi8(sum_above, sum_above);
@@ -459,7 +459,7 @@ void aom_dc_top_predictor_32x64_sse2(uint8_t *dst, ptrdiff_t stride,
                                      const uint8_t *left) {
   (void)left;
   __m128i sum_above = dc_sum_32_sse2(above);
-  const __m128i sixteen = _mm_set1_epi16((uint16_t)16);
+  const __m128i sixteen = _mm_set1_epi16(16);
   sum_above = _mm_add_epi16(sum_above, sixteen);
   sum_above = _mm_srai_epi16(sum_above, 5);
   sum_above = _mm_unpacklo_epi8(sum_above, sum_above);
@@ -473,7 +473,7 @@ void aom_dc_top_predictor_64x64_sse2(uint8_t *dst, ptrdiff_t stride,
                                      const uint8_t *left) {
   (void)left;
   __m128i sum_above = dc_sum_64(above);
-  const __m128i thirtytwo = _mm_set1_epi16((uint16_t)32);
+  const __m128i thirtytwo = _mm_set1_epi16(32);
   sum_above = _mm_add_epi16(sum_above, thirtytwo);
   sum_above = _mm_srai_epi16(sum_above, 6);
   sum_above = _mm_unpacklo_epi8(sum_above, sum_above);
@@ -487,7 +487,7 @@ void aom_dc_top_predictor_64x32_sse2(uint8_t *dst, ptrdiff_t stride,
                                      const uint8_t *left) {
   (void)left;
   __m128i sum_above = dc_sum_64(above);
-  const __m128i thirtytwo = _mm_set1_epi16((uint16_t)32);
+  const __m128i thirtytwo = _mm_set1_epi16(32);
   sum_above = _mm_add_epi16(sum_above, thirtytwo);
   sum_above = _mm_srai_epi16(sum_above, 6);
   sum_above = _mm_unpacklo_epi8(sum_above, sum_above);
@@ -501,7 +501,7 @@ void aom_dc_top_predictor_64x16_sse2(uint8_t *dst, ptrdiff_t stride,
                                      const uint8_t *left) {
   (void)left;
   __m128i sum_above = dc_sum_64(above);
-  const __m128i thirtytwo = _mm_set1_epi16((uint16_t)32);
+  const __m128i thirtytwo = _mm_set1_epi16(32);
   sum_above = _mm_add_epi16(sum_above, thirtytwo);
   sum_above = _mm_srai_epi16(sum_above, 6);
   sum_above = _mm_unpacklo_epi8(sum_above, sum_above);
@@ -517,13 +517,13 @@ void aom_dc_left_predictor_4x8_sse2(uint8_t *dst, ptrdiff_t stride,
                                     const uint8_t *above, const uint8_t *left) {
   (void)above;
   __m128i sum_left = dc_sum_8(left);
-  const __m128i four = _mm_set1_epi16((uint16_t)4);
+  const __m128i four = _mm_set1_epi16(4);
   sum_left = _mm_add_epi16(sum_left, four);
   sum_left = _mm_srai_epi16(sum_left, 3);
   sum_left = _mm_shufflelo_epi16(sum_left, 0);
   sum_left = _mm_packus_epi16(sum_left, sum_left);
 
-  const uint32_t pred = _mm_cvtsi128_si32(sum_left);
+  const uint32_t pred = (uint32_t)_mm_cvtsi128_si32(sum_left);
   dc_store_4xh(pred, 8, dst, stride);
 }
 
@@ -532,13 +532,13 @@ void aom_dc_left_predictor_4x16_sse2(uint8_t *dst, ptrdiff_t stride,
                                      const uint8_t *left) {
   (void)above;
   __m128i sum_left = dc_sum_16_sse2(left);
-  const __m128i eight = _mm_set1_epi16((uint16_t)8);
+  const __m128i eight = _mm_set1_epi16(8);
   sum_left = _mm_add_epi16(sum_left, eight);
   sum_left = _mm_srai_epi16(sum_left, 4);
   sum_left = _mm_shufflelo_epi16(sum_left, 0);
   sum_left = _mm_packus_epi16(sum_left, sum_left);
 
-  const uint32_t pred = _mm_cvtsi128_si32(sum_left);
+  const uint32_t pred = (uint32_t)_mm_cvtsi128_si32(sum_left);
   dc_store_4xh(pred, 16, dst, stride);
 }
 
@@ -546,7 +546,7 @@ void aom_dc_left_predictor_8x4_sse2(uint8_t *dst, ptrdiff_t stride,
                                     const uint8_t *above, const uint8_t *left) {
   (void)above;
   __m128i sum_left = dc_sum_4(left);
-  const __m128i two = _mm_set1_epi16((uint16_t)2);
+  const __m128i two = _mm_set1_epi16(2);
   sum_left = _mm_add_epi16(sum_left, two);
   sum_left = _mm_srai_epi16(sum_left, 2);
   sum_left = _mm_unpacklo_epi8(sum_left, sum_left);
@@ -559,7 +559,7 @@ void aom_dc_left_predictor_8x16_sse2(uint8_t *dst, ptrdiff_t stride,
                                      const uint8_t *left) {
   (void)above;
   __m128i sum_left = dc_sum_16_sse2(left);
-  const __m128i eight = _mm_set1_epi16((uint16_t)8);
+  const __m128i eight = _mm_set1_epi16(8);
   sum_left = _mm_add_epi16(sum_left, eight);
   sum_left = _mm_srai_epi16(sum_left, 4);
   sum_left = _mm_unpacklo_epi8(sum_left, sum_left);
@@ -572,7 +572,7 @@ void aom_dc_left_predictor_8x32_sse2(uint8_t *dst, ptrdiff_t stride,
                                      const uint8_t *left) {
   (void)above;
   __m128i sum_left = dc_sum_32_sse2(left);
-  const __m128i sixteen = _mm_set1_epi16((uint16_t)16);
+  const __m128i sixteen = _mm_set1_epi16(16);
   sum_left = _mm_add_epi16(sum_left, sixteen);
   sum_left = _mm_srai_epi16(sum_left, 5);
   sum_left = _mm_unpacklo_epi8(sum_left, sum_left);
@@ -585,7 +585,7 @@ void aom_dc_left_predictor_16x4_sse2(uint8_t *dst, ptrdiff_t stride,
                                      const uint8_t *left) {
   (void)above;
   __m128i sum_left = dc_sum_4(left);
-  const __m128i two = _mm_set1_epi16((uint16_t)2);
+  const __m128i two = _mm_set1_epi16(2);
   sum_left = _mm_add_epi16(sum_left, two);
   sum_left = _mm_srai_epi16(sum_left, 2);
   sum_left = _mm_unpacklo_epi8(sum_left, sum_left);
@@ -599,7 +599,7 @@ void aom_dc_left_predictor_16x8_sse2(uint8_t *dst, ptrdiff_t stride,
                                      const uint8_t *left) {
   (void)above;
   __m128i sum_left = dc_sum_8(left);
-  const __m128i four = _mm_set1_epi16((uint16_t)4);
+  const __m128i four = _mm_set1_epi16(4);
   sum_left = _mm_add_epi16(sum_left, four);
   sum_left = _mm_srai_epi16(sum_left, 3);
   sum_left = _mm_unpacklo_epi8(sum_left, sum_left);
@@ -613,7 +613,7 @@ void aom_dc_left_predictor_16x32_sse2(uint8_t *dst, ptrdiff_t stride,
                                       const uint8_t *left) {
   (void)above;
   __m128i sum_left = dc_sum_32_sse2(left);
-  const __m128i sixteen = _mm_set1_epi16((uint16_t)16);
+  const __m128i sixteen = _mm_set1_epi16(16);
   sum_left = _mm_add_epi16(sum_left, sixteen);
   sum_left = _mm_srai_epi16(sum_left, 5);
   sum_left = _mm_unpacklo_epi8(sum_left, sum_left);
@@ -627,7 +627,7 @@ void aom_dc_left_predictor_16x64_sse2(uint8_t *dst, ptrdiff_t stride,
                                       const uint8_t *left) {
   (void)above;
   __m128i sum_left = dc_sum_64(left);
-  const __m128i thirtytwo = _mm_set1_epi16((uint16_t)32);
+  const __m128i thirtytwo = _mm_set1_epi16(32);
   sum_left = _mm_add_epi16(sum_left, thirtytwo);
   sum_left = _mm_srai_epi16(sum_left, 6);
   sum_left = _mm_unpacklo_epi8(sum_left, sum_left);
@@ -641,7 +641,7 @@ void aom_dc_left_predictor_32x8_sse2(uint8_t *dst, ptrdiff_t stride,
                                      const uint8_t *left) {
   (void)above;
   __m128i sum_left = dc_sum_8(left);
-  const __m128i four = _mm_set1_epi16((uint16_t)4);
+  const __m128i four = _mm_set1_epi16(4);
   sum_left = _mm_add_epi16(sum_left, four);
   sum_left = _mm_srai_epi16(sum_left, 3);
   sum_left = _mm_unpacklo_epi8(sum_left, sum_left);
@@ -655,7 +655,7 @@ void aom_dc_left_predictor_32x16_sse2(uint8_t *dst, ptrdiff_t stride,
                                       const uint8_t *left) {
   (void)above;
   __m128i sum_left = dc_sum_16_sse2(left);
-  const __m128i eight = _mm_set1_epi16((uint16_t)8);
+  const __m128i eight = _mm_set1_epi16(8);
   sum_left = _mm_add_epi16(sum_left, eight);
   sum_left = _mm_srai_epi16(sum_left, 4);
   sum_left = _mm_unpacklo_epi8(sum_left, sum_left);
@@ -669,7 +669,7 @@ void aom_dc_left_predictor_32x64_sse2(uint8_t *dst, ptrdiff_t stride,
                                       const uint8_t *left) {
   (void)above;
   __m128i sum_left = dc_sum_64(left);
-  const __m128i thirtytwo = _mm_set1_epi16((uint16_t)32);
+  const __m128i thirtytwo = _mm_set1_epi16(32);
   sum_left = _mm_add_epi16(sum_left, thirtytwo);
   sum_left = _mm_srai_epi16(sum_left, 6);
   sum_left = _mm_unpacklo_epi8(sum_left, sum_left);
@@ -683,7 +683,7 @@ void aom_dc_left_predictor_64x64_sse2(uint8_t *dst, ptrdiff_t stride,
                                       const uint8_t *left) {
   (void)above;
   __m128i sum_left = dc_sum_64(left);
-  const __m128i thirtytwo = _mm_set1_epi16((uint16_t)32);
+  const __m128i thirtytwo = _mm_set1_epi16(32);
   sum_left = _mm_add_epi16(sum_left, thirtytwo);
   sum_left = _mm_srai_epi16(sum_left, 6);
   sum_left = _mm_unpacklo_epi8(sum_left, sum_left);
@@ -697,7 +697,7 @@ void aom_dc_left_predictor_64x32_sse2(uint8_t *dst, ptrdiff_t stride,
                                       const uint8_t *left) {
   (void)above;
   __m128i sum_left = dc_sum_32_sse2(left);
-  const __m128i sixteen = _mm_set1_epi16((uint16_t)16);
+  const __m128i sixteen = _mm_set1_epi16(16);
   sum_left = _mm_add_epi16(sum_left, sixteen);
   sum_left = _mm_srai_epi16(sum_left, 5);
   sum_left = _mm_unpacklo_epi8(sum_left, sum_left);
@@ -711,7 +711,7 @@ void aom_dc_left_predictor_64x16_sse2(uint8_t *dst, ptrdiff_t stride,
                                       const uint8_t *left) {
   (void)above;
   __m128i sum_left = dc_sum_16_sse2(left);
-  const __m128i eight = _mm_set1_epi16((uint16_t)8);
+  const __m128i eight = _mm_set1_epi16(8);
   sum_left = _mm_add_epi16(sum_left, eight);
   sum_left = _mm_srai_epi16(sum_left, 4);
   sum_left = _mm_unpacklo_epi8(sum_left, sum_left);
@@ -743,7 +743,7 @@ void aom_dc_128_predictor_8x4_sse2(uint8_t *dst, ptrdiff_t stride,
                                    const uint8_t *above, const uint8_t *left) {
   (void)above;
   (void)left;
-  const __m128i row = _mm_set1_epi8((uint8_t)128);
+  const __m128i row = _mm_set1_epi8((int8_t)128);
   dc_store_8xh(&row, 4, dst, stride);
 }
 
@@ -751,7 +751,7 @@ void aom_dc_128_predictor_8x16_sse2(uint8_t *dst, ptrdiff_t stride,
                                     const uint8_t *above, const uint8_t *left) {
   (void)above;
   (void)left;
-  const __m128i row = _mm_set1_epi8((uint8_t)128);
+  const __m128i row = _mm_set1_epi8((int8_t)128);
   dc_store_8xh(&row, 16, dst, stride);
 }
 
@@ -759,7 +759,7 @@ void aom_dc_128_predictor_8x32_sse2(uint8_t *dst, ptrdiff_t stride,
                                     const uint8_t *above, const uint8_t *left) {
   (void)above;
   (void)left;
-  const __m128i row = _mm_set1_epi8((uint8_t)128);
+  const __m128i row = _mm_set1_epi8((int8_t)128);
   dc_store_8xh(&row, 32, dst, stride);
 }
 
@@ -767,7 +767,7 @@ void aom_dc_128_predictor_16x4_sse2(uint8_t *dst, ptrdiff_t stride,
                                     const uint8_t *above, const uint8_t *left) {
   (void)above;
   (void)left;
-  const __m128i row = _mm_set1_epi8((uint8_t)128);
+  const __m128i row = _mm_set1_epi8((int8_t)128);
   dc_store_16xh(&row, 4, dst, stride);
 }
 
@@ -775,7 +775,7 @@ void aom_dc_128_predictor_16x8_sse2(uint8_t *dst, ptrdiff_t stride,
                                     const uint8_t *above, const uint8_t *left) {
   (void)above;
   (void)left;
-  const __m128i row = _mm_set1_epi8((uint8_t)128);
+  const __m128i row = _mm_set1_epi8((int8_t)128);
   dc_store_16xh(&row, 8, dst, stride);
 }
 
@@ -784,7 +784,7 @@ void aom_dc_128_predictor_16x32_sse2(uint8_t *dst, ptrdiff_t stride,
                                      const uint8_t *left) {
   (void)above;
   (void)left;
-  const __m128i row = _mm_set1_epi8((uint8_t)128);
+  const __m128i row = _mm_set1_epi8((int8_t)128);
   dc_store_16xh(&row, 32, dst, stride);
 }
 
@@ -793,7 +793,7 @@ void aom_dc_128_predictor_16x64_sse2(uint8_t *dst, ptrdiff_t stride,
                                      const uint8_t *left) {
   (void)above;
   (void)left;
-  const __m128i row = _mm_set1_epi8((uint8_t)128);
+  const __m128i row = _mm_set1_epi8((int8_t)128);
   dc_store_16xh(&row, 64, dst, stride);
 }
 
@@ -801,7 +801,7 @@ void aom_dc_128_predictor_32x8_sse2(uint8_t *dst, ptrdiff_t stride,
                                     const uint8_t *above, const uint8_t *left) {
   (void)above;
   (void)left;
-  const __m128i row = _mm_set1_epi8((uint8_t)128);
+  const __m128i row = _mm_set1_epi8((int8_t)128);
   dc_store_32xh(&row, 8, dst, stride);
 }
 
@@ -810,7 +810,7 @@ void aom_dc_128_predictor_32x16_sse2(uint8_t *dst, ptrdiff_t stride,
                                      const uint8_t *left) {
   (void)above;
   (void)left;
-  const __m128i row = _mm_set1_epi8((uint8_t)128);
+  const __m128i row = _mm_set1_epi8((int8_t)128);
   dc_store_32xh(&row, 16, dst, stride);
 }
 
@@ -819,7 +819,7 @@ void aom_dc_128_predictor_32x64_sse2(uint8_t *dst, ptrdiff_t stride,
                                      const uint8_t *left) {
   (void)above;
   (void)left;
-  const __m128i row = _mm_set1_epi8((uint8_t)128);
+  const __m128i row = _mm_set1_epi8((int8_t)128);
   dc_store_32xh(&row, 64, dst, stride);
 }
 
@@ -828,7 +828,7 @@ void aom_dc_128_predictor_64x64_sse2(uint8_t *dst, ptrdiff_t stride,
                                      const uint8_t *left) {
   (void)above;
   (void)left;
-  const __m128i row = _mm_set1_epi8((uint8_t)128);
+  const __m128i row = _mm_set1_epi8((int8_t)128);
   dc_store_64xh(&row, 64, dst, stride);
 }
 
@@ -837,7 +837,7 @@ void aom_dc_128_predictor_64x32_sse2(uint8_t *dst, ptrdiff_t stride,
                                      const uint8_t *left) {
   (void)above;
   (void)left;
-  const __m128i row = _mm_set1_epi8((uint8_t)128);
+  const __m128i row = _mm_set1_epi8((int8_t)128);
   dc_store_64xh(&row, 32, dst, stride);
 }
 
@@ -846,7 +846,7 @@ void aom_dc_128_predictor_64x16_sse2(uint8_t *dst, ptrdiff_t stride,
                                      const uint8_t *left) {
   (void)above;
   (void)left;
-  const __m128i row = _mm_set1_epi8((uint8_t)128);
+  const __m128i row = _mm_set1_epi8((int8_t)128);
   dc_store_64xh(&row, 16, dst, stride);
 }
 
@@ -990,26 +990,26 @@ void aom_h_predictor_4x8_sse2(uint8_t *dst, ptrdiff_t stride,
   __m128i row1 = _mm_shufflelo_epi16(left_col, 0x55);
   __m128i row2 = _mm_shufflelo_epi16(left_col, 0xaa);
   __m128i row3 = _mm_shufflelo_epi16(left_col, 0xff);
-  *(uint32_t *)dst = _mm_cvtsi128_si32(row0);
+  *(int *)dst = _mm_cvtsi128_si32(row0);
   dst += stride;
-  *(uint32_t *)dst = _mm_cvtsi128_si32(row1);
+  *(int *)dst = _mm_cvtsi128_si32(row1);
   dst += stride;
-  *(uint32_t *)dst = _mm_cvtsi128_si32(row2);
+  *(int *)dst = _mm_cvtsi128_si32(row2);
   dst += stride;
-  *(uint32_t *)dst = _mm_cvtsi128_si32(row3);
+  *(int *)dst = _mm_cvtsi128_si32(row3);
   dst += stride;
   left_col = _mm_unpackhi_epi64(left_col, left_col);
   row0 = _mm_shufflelo_epi16(left_col, 0);
   row1 = _mm_shufflelo_epi16(left_col, 0x55);
   row2 = _mm_shufflelo_epi16(left_col, 0xaa);
   row3 = _mm_shufflelo_epi16(left_col, 0xff);
-  *(uint32_t *)dst = _mm_cvtsi128_si32(row0);
+  *(int *)dst = _mm_cvtsi128_si32(row0);
   dst += stride;
-  *(uint32_t *)dst = _mm_cvtsi128_si32(row1);
+  *(int *)dst = _mm_cvtsi128_si32(row1);
   dst += stride;
-  *(uint32_t *)dst = _mm_cvtsi128_si32(row2);
+  *(int *)dst = _mm_cvtsi128_si32(row2);
   dst += stride;
-  *(uint32_t *)dst = _mm_cvtsi128_si32(row3);
+  *(int *)dst = _mm_cvtsi128_si32(row3);
 }
 
 void aom_h_predictor_4x16_sse2(uint8_t *dst, ptrdiff_t stride,
@@ -1023,13 +1023,13 @@ void aom_h_predictor_4x16_sse2(uint8_t *dst, ptrdiff_t stride,
   __m128i row1 = _mm_shufflelo_epi16(left_col_low, 0x55);
   __m128i row2 = _mm_shufflelo_epi16(left_col_low, 0xaa);
   __m128i row3 = _mm_shufflelo_epi16(left_col_low, 0xff);
-  *(uint32_t *)dst = _mm_cvtsi128_si32(row0);
+  *(int *)dst = _mm_cvtsi128_si32(row0);
   dst += stride;
-  *(uint32_t *)dst = _mm_cvtsi128_si32(row1);
+  *(int *)dst = _mm_cvtsi128_si32(row1);
   dst += stride;
-  *(uint32_t *)dst = _mm_cvtsi128_si32(row2);
+  *(int *)dst = _mm_cvtsi128_si32(row2);
   dst += stride;
-  *(uint32_t *)dst = _mm_cvtsi128_si32(row3);
+  *(int *)dst = _mm_cvtsi128_si32(row3);
   dst += stride;
 
   left_col_low = _mm_unpackhi_epi64(left_col_low, left_col_low);
@@ -1037,26 +1037,26 @@ void aom_h_predictor_4x16_sse2(uint8_t *dst, ptrdiff_t stride,
   row1 = _mm_shufflelo_epi16(left_col_low, 0x55);
   row2 = _mm_shufflelo_epi16(left_col_low, 0xaa);
   row3 = _mm_shufflelo_epi16(left_col_low, 0xff);
-  *(uint32_t *)dst = _mm_cvtsi128_si32(row0);
+  *(int *)dst = _mm_cvtsi128_si32(row0);
   dst += stride;
-  *(uint32_t *)dst = _mm_cvtsi128_si32(row1);
+  *(int *)dst = _mm_cvtsi128_si32(row1);
   dst += stride;
-  *(uint32_t *)dst = _mm_cvtsi128_si32(row2);
+  *(int *)dst = _mm_cvtsi128_si32(row2);
   dst += stride;
-  *(uint32_t *)dst = _mm_cvtsi128_si32(row3);
+  *(int *)dst = _mm_cvtsi128_si32(row3);
   dst += stride;
 
   row0 = _mm_shufflelo_epi16(left_col_high, 0);
   row1 = _mm_shufflelo_epi16(left_col_high, 0x55);
   row2 = _mm_shufflelo_epi16(left_col_high, 0xaa);
   row3 = _mm_shufflelo_epi16(left_col_high, 0xff);
-  *(uint32_t *)dst = _mm_cvtsi128_si32(row0);
+  *(int *)dst = _mm_cvtsi128_si32(row0);
   dst += stride;
-  *(uint32_t *)dst = _mm_cvtsi128_si32(row1);
+  *(int *)dst = _mm_cvtsi128_si32(row1);
   dst += stride;
-  *(uint32_t *)dst = _mm_cvtsi128_si32(row2);
+  *(int *)dst = _mm_cvtsi128_si32(row2);
   dst += stride;
-  *(uint32_t *)dst = _mm_cvtsi128_si32(row3);
+  *(int *)dst = _mm_cvtsi128_si32(row3);
   dst += stride;
 
   left_col_high = _mm_unpackhi_epi64(left_col_high, left_col_high);
@@ -1064,13 +1064,13 @@ void aom_h_predictor_4x16_sse2(uint8_t *dst, ptrdiff_t stride,
   row1 = _mm_shufflelo_epi16(left_col_high, 0x55);
   row2 = _mm_shufflelo_epi16(left_col_high, 0xaa);
   row3 = _mm_shufflelo_epi16(left_col_high, 0xff);
-  *(uint32_t *)dst = _mm_cvtsi128_si32(row0);
+  *(int *)dst = _mm_cvtsi128_si32(row0);
   dst += stride;
-  *(uint32_t *)dst = _mm_cvtsi128_si32(row1);
+  *(int *)dst = _mm_cvtsi128_si32(row1);
   dst += stride;
-  *(uint32_t *)dst = _mm_cvtsi128_si32(row2);
+  *(int *)dst = _mm_cvtsi128_si32(row2);
   dst += stride;
-  *(uint32_t *)dst = _mm_cvtsi128_si32(row3);
+  *(int *)dst = _mm_cvtsi128_si32(row3);
 }
 
 void aom_h_predictor_8x4_sse2(uint8_t *dst, ptrdiff_t stride,
@@ -1334,7 +1334,7 @@ static INLINE void h_predictor_32xh(uint8_t *dst, ptrdiff_t stride,
                                     const uint8_t *left, int height) {
   int i = height >> 2;
   do {
-    __m128i left4 = _mm_cvtsi32_si128(((uint32_t *)left)[0]);
+    __m128i left4 = _mm_cvtsi32_si128(((int *)left)[0]);
     left4 = _mm_unpacklo_epi8(left4, left4);
     left4 = _mm_unpacklo_epi8(left4, left4);
     const __m128i r0 = _mm_shuffle_epi32(left4, 0x0);
@@ -1364,7 +1364,7 @@ static INLINE void h_predictor_64xh(uint8_t *dst, ptrdiff_t stride,
                                     const uint8_t *left, int height) {
   int i = height >> 2;
   do {
-    __m128i left4 = _mm_cvtsi32_si128(((uint32_t *)left)[0]);
+    __m128i left4 = _mm_cvtsi32_si128(((int *)left)[0]);
     left4 = _mm_unpacklo_epi8(left4, left4);
     left4 = _mm_unpacklo_epi8(left4, left4);
     const __m128i r0 = _mm_shuffle_epi32(left4, 0x0);
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/x86/intrapred_sse4.c b/chromium/third_party/libaom/source/libaom/aom_dsp/x86/intrapred_sse4.c
index b73258038bb..3f72dc48557 100644
--- a/chromium/third_party/libaom/source/libaom/aom_dsp/x86/intrapred_sse4.c
+++ b/chromium/third_party/libaom/source/libaom/aom_dsp/x86/intrapred_sse4.c
@@ -141,7 +141,7 @@ static AOM_FORCE_INLINE void dr_prediction_z1_HxW_internal_sse4_1(
   __m128i a_mbase_x;
 
   a16 = _mm_set1_epi16(16);
-  a_mbase_x = _mm_set1_epi8(above[max_base_x]);
+  a_mbase_x = _mm_set1_epi8((char)above[max_base_x]);
   c3f = _mm_set1_epi16(0x3f);
 
   int x = dx;
@@ -210,7 +210,7 @@ static void dr_prediction_z1_4xN_sse4_1(int N, uint8_t *dst, ptrdiff_t stride,
 
   dr_prediction_z1_HxW_internal_sse4_1(4, N, dstvec, above, upsample_above, dx);
   for (int i = 0; i < N; i++) {
-    *(uint32_t *)(dst + stride * i) = _mm_cvtsi128_si32(dstvec[i]);
+    *(int *)(dst + stride * i) = _mm_cvtsi128_si32(dstvec[i]);
   }
 }
 
@@ -255,7 +255,7 @@ static AOM_FORCE_INLINE void dr_prediction_z1_32xN_internal_sse4_1(
   __m128i a_mbase_x, diff, c3f;
 
   a16 = _mm_set1_epi16(16);
-  a_mbase_x = _mm_set1_epi8(above[max_base_x]);
+  a_mbase_x = _mm_set1_epi8((char)above[max_base_x]);
   c3f = _mm_set1_epi16(0x3f);
 
   int x = dx;
@@ -353,7 +353,7 @@ static void dr_prediction_z1_64xN_sse4_1(int N, uint8_t *dst, ptrdiff_t stride,
   __m128i max_base, base_inc, mask;
 
   a16 = _mm_set1_epi16(16);
-  a_mbase_x = _mm_set1_epi8(above[max_base_x]);
+  a_mbase_x = _mm_set1_epi8((char)above[max_base_x]);
   max_base = _mm_set1_epi8(max_base_x);
   c3f = _mm_set1_epi16(0x3f);
 
@@ -412,14 +412,14 @@ static void dr_prediction_z1_64xN_sse4_1(int N, uint8_t *dst, ptrdiff_t stride,
         res = _mm_packus_epi16(res, res1);  // 16 8bit values
 
         base_inc =
-            _mm_setr_epi8((uint8_t)(base + j), (uint8_t)(base + j + 1),
-                          (uint8_t)(base + j + 2), (uint8_t)(base + j + 3),
-                          (uint8_t)(base + j + 4), (uint8_t)(base + j + 5),
-                          (uint8_t)(base + j + 6), (uint8_t)(base + j + 7),
-                          (uint8_t)(base + j + 8), (uint8_t)(base + j + 9),
-                          (uint8_t)(base + j + 10), (uint8_t)(base + j + 11),
-                          (uint8_t)(base + j + 12), (uint8_t)(base + j + 13),
-                          (uint8_t)(base + j + 14), (uint8_t)(base + j + 15));
+            _mm_setr_epi8((int8_t)(base + j), (int8_t)(base + j + 1),
+                          (int8_t)(base + j + 2), (int8_t)(base + j + 3),
+                          (int8_t)(base + j + 4), (int8_t)(base + j + 5),
+                          (int8_t)(base + j + 6), (int8_t)(base + j + 7),
+                          (int8_t)(base + j + 8), (int8_t)(base + j + 9),
+                          (int8_t)(base + j + 10), (int8_t)(base + j + 11),
+                          (int8_t)(base + j + 12), (int8_t)(base + j + 13),
+                          (int8_t)(base + j + 14), (int8_t)(base + j + 15));
 
         mask = _mm_cmpgt_epi8(_mm_subs_epu8(max_base, base_inc),
                               _mm_setzero_si128());
@@ -571,7 +571,7 @@ static void dr_prediction_z2_Nx4_sse4_1(int N, uint8_t *dst, ptrdiff_t stride,
     resy = _mm_srli_si128(resx, 4);
 
     resxy = _mm_blendv_epi8(resx, resy, *(__m128i *)Mask[0][base_min_diff]);
-    *(uint32_t *)(dst) = _mm_cvtsi128_si32(resxy);
+    *(int *)(dst) = _mm_cvtsi128_si32(resxy);
     dst += stride;
   }
 }
@@ -743,7 +743,7 @@ static void dr_prediction_z2_HxW_sse4_1(int H, int W, uint8_t *dst,
     __m128i resx, resy;
     __m128i resxy;
     int y = r + 1;
-    ydx = _mm_set1_epi16((uint16_t)(y * dx));
+    ydx = _mm_set1_epi16((int16_t)(y * dx));
 
     int base_x = (-y * dx) >> frac_bits_x;
     for (int j = 0; j < W; j += 16) {
@@ -938,10 +938,10 @@ static void dr_prediction_z3_4x4_sse4_1(uint8_t *dst, ptrdiff_t stride,
   transpose4x8_8x4_low_sse2(&dstvec[0], &dstvec[1], &dstvec[2], &dstvec[3],
                             &d[0], &d[1], &d[2], &d[3]);
 
-  *(uint32_t *)(dst + stride * 0) = _mm_cvtsi128_si32(d[0]);
-  *(uint32_t *)(dst + stride * 1) = _mm_cvtsi128_si32(d[1]);
-  *(uint32_t *)(dst + stride * 2) = _mm_cvtsi128_si32(d[2]);
-  *(uint32_t *)(dst + stride * 3) = _mm_cvtsi128_si32(d[3]);
+  *(int *)(dst + stride * 0) = _mm_cvtsi128_si32(d[0]);
+  *(int *)(dst + stride * 1) = _mm_cvtsi128_si32(d[1]);
+  *(int *)(dst + stride * 2) = _mm_cvtsi128_si32(d[2]);
+  *(int *)(dst + stride * 3) = _mm_cvtsi128_si32(d[3]);
   return;
 }
 
@@ -974,7 +974,7 @@ static void dr_prediction_z3_4x8_sse4_1(uint8_t *dst, ptrdiff_t stride,
   transpose4x8_8x4_sse2(&dstvec[0], &dstvec[1], &dstvec[2], &dstvec[3], &d[0],
                         &d[1], &d[2], &d[3], &d[4], &d[5], &d[6], &d[7]);
   for (int i = 0; i < 8; i++) {
-    *(uint32_t *)(dst + stride * i) = _mm_cvtsi128_si32(d[i]);
+    *(int *)(dst + stride * i) = _mm_cvtsi128_si32(d[i]);
   }
 }
 
@@ -1034,7 +1034,7 @@ static void dr_prediction_z3_4x16_sse4_1(uint8_t *dst, ptrdiff_t stride,
   dr_prediction_z1_HxW_internal_sse4_1(16, 4, dstvec, left, upsample_left, dy);
   transpose4x16_sse2(dstvec, d);
   for (int i = 0; i < 16; i++) {
-    *(uint32_t *)(dst + stride * i) = _mm_cvtsi128_si32(d[i]);
+    *(int *)(dst + stride * i) = _mm_cvtsi128_si32(d[i]);
   }
 }
 
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/x86/intrapred_ssse3.c b/chromium/third_party/libaom/source/libaom/aom_dsp/x86/intrapred_ssse3.c
index 5a34ea0c8e7..ab59220edac 100644
--- a/chromium/third_party/libaom/source/libaom/aom_dsp/x86/intrapred_ssse3.c
+++ b/chromium/third_party/libaom/source/libaom/aom_dsp/x86/intrapred_ssse3.c
@@ -47,7 +47,7 @@ void aom_paeth_predictor_4x4_ssse3(uint8_t *dst, ptrdiff_t stride,
   const __m128i t = _mm_loadl_epi64((const __m128i *)above);
   const __m128i zero = _mm_setzero_si128();
   const __m128i t16 = _mm_unpacklo_epi8(t, zero);
-  const __m128i tl16 = _mm_set1_epi16((uint16_t)above[-1]);
+  const __m128i tl16 = _mm_set1_epi16((int16_t)above[-1]);
   __m128i rep = _mm_set1_epi16((short)0x8000);
   const __m128i one = _mm_set1_epi16(1);
 
@@ -56,7 +56,7 @@ void aom_paeth_predictor_4x4_ssse3(uint8_t *dst, ptrdiff_t stride,
     const __m128i l16 = _mm_shuffle_epi8(l, rep);
     const __m128i row = paeth_8x1_pred(&l16, &t16, &tl16);
 
-    *(uint32_t *)dst = _mm_cvtsi128_si32(_mm_packus_epi16(row, row));
+    *(int *)dst = _mm_cvtsi128_si32(_mm_packus_epi16(row, row));
     dst += stride;
     rep = _mm_add_epi16(rep, one);
   }
@@ -68,7 +68,7 @@ void aom_paeth_predictor_4x8_ssse3(uint8_t *dst, ptrdiff_t stride,
   const __m128i t = _mm_loadl_epi64((const __m128i *)above);
   const __m128i zero = _mm_setzero_si128();
   const __m128i t16 = _mm_unpacklo_epi8(t, zero);
-  const __m128i tl16 = _mm_set1_epi16((uint16_t)above[-1]);
+  const __m128i tl16 = _mm_set1_epi16((int16_t)above[-1]);
   __m128i rep = _mm_set1_epi16((short)0x8000);
   const __m128i one = _mm_set1_epi16(1);
 
@@ -77,7 +77,7 @@ void aom_paeth_predictor_4x8_ssse3(uint8_t *dst, ptrdiff_t stride,
     const __m128i l16 = _mm_shuffle_epi8(l, rep);
     const __m128i row = paeth_8x1_pred(&l16, &t16, &tl16);
 
-    *(uint32_t *)dst = _mm_cvtsi128_si32(_mm_packus_epi16(row, row));
+    *(int *)dst = _mm_cvtsi128_si32(_mm_packus_epi16(row, row));
     dst += stride;
     rep = _mm_add_epi16(rep, one);
   }
@@ -86,10 +86,10 @@ void aom_paeth_predictor_4x8_ssse3(uint8_t *dst, ptrdiff_t stride,
 void aom_paeth_predictor_4x16_ssse3(uint8_t *dst, ptrdiff_t stride,
                                     const uint8_t *above, const uint8_t *left) {
   __m128i l = _mm_load_si128((const __m128i *)left);
-  const __m128i t = _mm_cvtsi32_si128(((const uint32_t *)above)[0]);
+  const __m128i t = _mm_cvtsi32_si128(((const int *)above)[0]);
   const __m128i zero = _mm_setzero_si128();
   const __m128i t16 = _mm_unpacklo_epi8(t, zero);
-  const __m128i tl16 = _mm_set1_epi16((uint16_t)above[-1]);
+  const __m128i tl16 = _mm_set1_epi16((int16_t)above[-1]);
   __m128i rep = _mm_set1_epi16((short)0x8000);
   const __m128i one = _mm_set1_epi16(1);
 
@@ -97,7 +97,7 @@ void aom_paeth_predictor_4x16_ssse3(uint8_t *dst, ptrdiff_t stride,
     const __m128i l16 = _mm_shuffle_epi8(l, rep);
     const __m128i row = paeth_8x1_pred(&l16, &t16, &tl16);
 
-    *(uint32_t *)dst = _mm_cvtsi128_si32(_mm_packus_epi16(row, row));
+    *(int *)dst = _mm_cvtsi128_si32(_mm_packus_epi16(row, row));
     dst += stride;
     rep = _mm_add_epi16(rep, one);
   }
@@ -109,7 +109,7 @@ void aom_paeth_predictor_8x4_ssse3(uint8_t *dst, ptrdiff_t stride,
   const __m128i t = _mm_loadl_epi64((const __m128i *)above);
   const __m128i zero = _mm_setzero_si128();
   const __m128i t16 = _mm_unpacklo_epi8(t, zero);
-  const __m128i tl16 = _mm_set1_epi16((uint16_t)above[-1]);
+  const __m128i tl16 = _mm_set1_epi16((int16_t)above[-1]);
   __m128i rep = _mm_set1_epi16((short)0x8000);
   const __m128i one = _mm_set1_epi16(1);
 
@@ -130,7 +130,7 @@ void aom_paeth_predictor_8x8_ssse3(uint8_t *dst, ptrdiff_t stride,
   const __m128i t = _mm_loadl_epi64((const __m128i *)above);
   const __m128i zero = _mm_setzero_si128();
   const __m128i t16 = _mm_unpacklo_epi8(t, zero);
-  const __m128i tl16 = _mm_set1_epi16((uint16_t)above[-1]);
+  const __m128i tl16 = _mm_set1_epi16((int16_t)above[-1]);
   __m128i rep = _mm_set1_epi16((short)0x8000);
   const __m128i one = _mm_set1_epi16(1);
 
@@ -151,7 +151,7 @@ void aom_paeth_predictor_8x16_ssse3(uint8_t *dst, ptrdiff_t stride,
   const __m128i t = _mm_loadl_epi64((const __m128i *)above);
   const __m128i zero = _mm_setzero_si128();
   const __m128i t16 = _mm_unpacklo_epi8(t, zero);
-  const __m128i tl16 = _mm_set1_epi16((uint16_t)above[-1]);
+  const __m128i tl16 = _mm_set1_epi16((int16_t)above[-1]);
   __m128i rep = _mm_set1_epi16((short)0x8000);
   const __m128i one = _mm_set1_epi16(1);
 
@@ -171,7 +171,7 @@ void aom_paeth_predictor_8x32_ssse3(uint8_t *dst, ptrdiff_t stride,
   const __m128i t = _mm_loadl_epi64((const __m128i *)above);
   const __m128i zero = _mm_setzero_si128();
   const __m128i t16 = _mm_unpacklo_epi8(t, zero);
-  const __m128i tl16 = _mm_set1_epi16((uint16_t)above[-1]);
+  const __m128i tl16 = _mm_set1_epi16((int16_t)above[-1]);
   const __m128i one = _mm_set1_epi16(1);
 
   for (int j = 0; j < 2; ++j) {
@@ -199,12 +199,12 @@ static INLINE __m128i paeth_16x1_pred(const __m128i *left, const __m128i *top0,
 
 void aom_paeth_predictor_16x4_ssse3(uint8_t *dst, ptrdiff_t stride,
                                     const uint8_t *above, const uint8_t *left) {
-  __m128i l = _mm_cvtsi32_si128(((const uint32_t *)left)[0]);
+  __m128i l = _mm_cvtsi32_si128(((const int *)left)[0]);
   const __m128i t = _mm_load_si128((const __m128i *)above);
   const __m128i zero = _mm_setzero_si128();
   const __m128i top0 = _mm_unpacklo_epi8(t, zero);
   const __m128i top1 = _mm_unpackhi_epi8(t, zero);
-  const __m128i tl16 = _mm_set1_epi16((uint16_t)above[-1]);
+  const __m128i tl16 = _mm_set1_epi16((int16_t)above[-1]);
   __m128i rep = _mm_set1_epi16((short)0x8000);
   const __m128i one = _mm_set1_epi16(1);
 
@@ -225,7 +225,7 @@ void aom_paeth_predictor_16x8_ssse3(uint8_t *dst, ptrdiff_t stride,
   const __m128i zero = _mm_setzero_si128();
   const __m128i top0 = _mm_unpacklo_epi8(t, zero);
   const __m128i top1 = _mm_unpackhi_epi8(t, zero);
-  const __m128i tl16 = _mm_set1_epi16((uint16_t)above[-1]);
+  const __m128i tl16 = _mm_set1_epi16((int16_t)above[-1]);
   __m128i rep = _mm_set1_epi16((short)0x8000);
   const __m128i one = _mm_set1_epi16(1);
 
@@ -248,7 +248,7 @@ void aom_paeth_predictor_16x16_ssse3(uint8_t *dst, ptrdiff_t stride,
   const __m128i zero = _mm_setzero_si128();
   const __m128i top0 = _mm_unpacklo_epi8(t, zero);
   const __m128i top1 = _mm_unpackhi_epi8(t, zero);
-  const __m128i tl16 = _mm_set1_epi16((uint16_t)above[-1]);
+  const __m128i tl16 = _mm_set1_epi16((int16_t)above[-1]);
   __m128i rep = _mm_set1_epi16((short)0x8000);
   const __m128i one = _mm_set1_epi16(1);
 
@@ -271,7 +271,7 @@ void aom_paeth_predictor_16x32_ssse3(uint8_t *dst, ptrdiff_t stride,
   const __m128i zero = _mm_setzero_si128();
   const __m128i top0 = _mm_unpacklo_epi8(t, zero);
   const __m128i top1 = _mm_unpackhi_epi8(t, zero);
-  const __m128i tl16 = _mm_set1_epi16((uint16_t)above[-1]);
+  const __m128i tl16 = _mm_set1_epi16((int16_t)above[-1]);
   __m128i rep = _mm_set1_epi16((short)0x8000);
   const __m128i one = _mm_set1_epi16(1);
   __m128i l16;
@@ -305,7 +305,7 @@ void aom_paeth_predictor_16x64_ssse3(uint8_t *dst, ptrdiff_t stride,
   const __m128i zero = _mm_setzero_si128();
   const __m128i top0 = _mm_unpacklo_epi8(t, zero);
   const __m128i top1 = _mm_unpackhi_epi8(t, zero);
-  const __m128i tl16 = _mm_set1_epi16((uint16_t)above[-1]);
+  const __m128i tl16 = _mm_set1_epi16((int16_t)above[-1]);
   const __m128i one = _mm_set1_epi16(1);
 
   for (int j = 0; j < 4; ++j) {
@@ -331,7 +331,7 @@ void aom_paeth_predictor_32x8_ssse3(uint8_t *dst, ptrdiff_t stride,
   const __m128i bl = _mm_unpacklo_epi8(b, zero);
   const __m128i bh = _mm_unpackhi_epi8(b, zero);
 
-  const __m128i tl16 = _mm_set1_epi16((uint16_t)above[-1]);
+  const __m128i tl16 = _mm_set1_epi16((int16_t)above[-1]);
   __m128i rep = _mm_set1_epi16((short)0x8000);
   const __m128i one = _mm_set1_epi16(1);
   const __m128i l = _mm_loadl_epi64((const __m128i *)left);
@@ -360,7 +360,7 @@ void aom_paeth_predictor_32x16_ssse3(uint8_t *dst, ptrdiff_t stride,
   const __m128i bl = _mm_unpacklo_epi8(b, zero);
   const __m128i bh = _mm_unpackhi_epi8(b, zero);
 
-  const __m128i tl16 = _mm_set1_epi16((uint16_t)above[-1]);
+  const __m128i tl16 = _mm_set1_epi16((int16_t)above[-1]);
   __m128i rep = _mm_set1_epi16((short)0x8000);
   const __m128i one = _mm_set1_epi16(1);
   __m128i l = _mm_load_si128((const __m128i *)left);
@@ -390,7 +390,7 @@ void aom_paeth_predictor_32x32_ssse3(uint8_t *dst, ptrdiff_t stride,
   const __m128i bl = _mm_unpacklo_epi8(b, zero);
   const __m128i bh = _mm_unpackhi_epi8(b, zero);
 
-  const __m128i tl16 = _mm_set1_epi16((uint16_t)above[-1]);
+  const __m128i tl16 = _mm_set1_epi16((int16_t)above[-1]);
   __m128i rep = _mm_set1_epi16((short)0x8000);
   const __m128i one = _mm_set1_epi16(1);
   __m128i l = _mm_load_si128((const __m128i *)left);
@@ -433,7 +433,7 @@ void aom_paeth_predictor_32x64_ssse3(uint8_t *dst, ptrdiff_t stride,
   const __m128i bl = _mm_unpacklo_epi8(b, zero);
   const __m128i bh = _mm_unpackhi_epi8(b, zero);
 
-  const __m128i tl16 = _mm_set1_epi16((uint16_t)above[-1]);
+  const __m128i tl16 = _mm_set1_epi16((int16_t)above[-1]);
   const __m128i one = _mm_set1_epi16(1);
   __m128i l16;
 
@@ -471,7 +471,7 @@ void aom_paeth_predictor_64x32_ssse3(uint8_t *dst, ptrdiff_t stride,
   const __m128i dl = _mm_unpacklo_epi8(d, zero);
   const __m128i dh = _mm_unpackhi_epi8(d, zero);
 
-  const __m128i tl16 = _mm_set1_epi16((uint16_t)above[-1]);
+  const __m128i tl16 = _mm_set1_epi16((int16_t)above[-1]);
   const __m128i one = _mm_set1_epi16(1);
   __m128i l16;
 
@@ -513,7 +513,7 @@ void aom_paeth_predictor_64x64_ssse3(uint8_t *dst, ptrdiff_t stride,
   const __m128i dl = _mm_unpacklo_epi8(d, zero);
   const __m128i dh = _mm_unpackhi_epi8(d, zero);
 
-  const __m128i tl16 = _mm_set1_epi16((uint16_t)above[-1]);
+  const __m128i tl16 = _mm_set1_epi16((int16_t)above[-1]);
   const __m128i one = _mm_set1_epi16(1);
   __m128i l16;
 
@@ -555,7 +555,7 @@ void aom_paeth_predictor_64x16_ssse3(uint8_t *dst, ptrdiff_t stride,
   const __m128i dl = _mm_unpacklo_epi8(d, zero);
   const __m128i dh = _mm_unpackhi_epi8(d, zero);
 
-  const __m128i tl16 = _mm_set1_epi16((uint16_t)above[-1]);
+  const __m128i tl16 = _mm_set1_epi16((int16_t)above[-1]);
   const __m128i one = _mm_set1_epi16(1);
   __m128i l16;
 
@@ -586,17 +586,17 @@ void aom_paeth_predictor_64x16_ssse3(uint8_t *dst, ptrdiff_t stride,
 // pixels[2]: right_pred vector
 static INLINE void load_pixel_w4(const uint8_t *above, const uint8_t *left,
                                  int height, __m128i *pixels) {
-  __m128i d = _mm_cvtsi32_si128(((const uint32_t *)above)[0]);
+  __m128i d = _mm_cvtsi32_si128(((const int *)above)[0]);
   if (height == 4)
-    pixels[1] = _mm_cvtsi32_si128(((const uint32_t *)left)[0]);
+    pixels[1] = _mm_cvtsi32_si128(((const int *)left)[0]);
   else if (height == 8)
     pixels[1] = _mm_loadl_epi64(((const __m128i *)left));
   else
     pixels[1] = _mm_loadu_si128(((const __m128i *)left));
 
-  pixels[2] = _mm_set1_epi16((uint16_t)above[3]);
+  pixels[2] = _mm_set1_epi16((int16_t)above[3]);
 
-  const __m128i bp = _mm_set1_epi16((uint16_t)left[height - 1]);
+  const __m128i bp = _mm_set1_epi16((int16_t)left[height - 1]);
   const __m128i zero = _mm_setzero_si128();
   d = _mm_unpacklo_epi8(d, zero);
   pixels[0] = _mm_unpacklo_epi16(d, bp);
@@ -607,21 +607,22 @@ static INLINE void load_pixel_w4(const uint8_t *above, const uint8_t *left,
 // weight_h[2]: same as [0], second half for height = 16 only
 // weight_h[3]: same as [1], second half for height = 16 only
 // weight_w[0]: weights_w and scale - weights_w interleave vector
-static INLINE void load_weight_w4(const uint8_t *weight_array, int height,
-                                  __m128i *weight_h, __m128i *weight_w) {
+static INLINE void load_weight_w4(int height, __m128i *weight_h,
+                                  __m128i *weight_w) {
   const __m128i zero = _mm_setzero_si128();
-  const __m128i d = _mm_set1_epi16((uint16_t)(1 << sm_weight_log2_scale));
-  const __m128i t = _mm_cvtsi32_si128(((const uint32_t *)weight_array)[1]);
+  const __m128i d = _mm_set1_epi16((int16_t)(1 << SMOOTH_WEIGHT_LOG2_SCALE));
+  const __m128i t = _mm_cvtsi32_si128(((const int *)smooth_weights)[0]);
   weight_h[0] = _mm_unpacklo_epi8(t, zero);
   weight_h[1] = _mm_sub_epi16(d, weight_h[0]);
   weight_w[0] = _mm_unpacklo_epi16(weight_h[0], weight_h[1]);
 
   if (height == 8) {
-    const __m128i weight = _mm_loadl_epi64((const __m128i *)&weight_array[8]);
+    const __m128i weight = _mm_loadl_epi64((const __m128i *)&smooth_weights[4]);
     weight_h[0] = _mm_unpacklo_epi8(weight, zero);
     weight_h[1] = _mm_sub_epi16(d, weight_h[0]);
   } else if (height == 16) {
-    const __m128i weight = _mm_loadu_si128((const __m128i *)&weight_array[16]);
+    const __m128i weight =
+        _mm_loadu_si128((const __m128i *)&smooth_weights[12]);
     weight_h[0] = _mm_unpacklo_epi8(weight, zero);
     weight_h[1] = _mm_sub_epi16(d, weight_h[0]);
     weight_h[2] = _mm_unpackhi_epi8(weight, zero);
@@ -632,7 +633,7 @@ static INLINE void load_weight_w4(const uint8_t *weight_array, int height,
 static INLINE void smooth_pred_4xh(const __m128i *pixel, const __m128i *wh,
                                    const __m128i *ww, int h, uint8_t *dst,
                                    ptrdiff_t stride, int second_half) {
-  const __m128i round = _mm_set1_epi32((1 << sm_weight_log2_scale));
+  const __m128i round = _mm_set1_epi32((1 << SMOOTH_WEIGHT_LOG2_SCALE));
   const __m128i one = _mm_set1_epi16(1);
   const __m128i inc = _mm_set1_epi16(0x202);
   const __m128i gat = _mm_set1_epi32(0xc080400);
@@ -652,10 +653,10 @@ static INLINE void smooth_pred_4xh(const __m128i *pixel, const __m128i *wh,
 
     sum = _mm_add_epi32(s, sum);
     sum = _mm_add_epi32(sum, round);
-    sum = _mm_srai_epi32(sum, 1 + sm_weight_log2_scale);
+    sum = _mm_srai_epi32(sum, 1 + SMOOTH_WEIGHT_LOG2_SCALE);
 
     sum = _mm_shuffle_epi8(sum, gat);
-    *(uint32_t *)dst = _mm_cvtsi128_si32(sum);
+    *(int *)dst = _mm_cvtsi128_si32(sum);
     dst += stride;
 
     rep = _mm_add_epi16(rep, one);
@@ -669,7 +670,7 @@ void aom_smooth_predictor_4x4_ssse3(uint8_t *dst, ptrdiff_t stride,
   load_pixel_w4(above, left, 4, pixels);
 
   __m128i wh[4], ww[2];
-  load_weight_w4(sm_weight_arrays, 4, wh, ww);
+  load_weight_w4(4, wh, ww);
 
   smooth_pred_4xh(pixels, wh, ww, 4, dst, stride, 0);
 }
@@ -680,7 +681,7 @@ void aom_smooth_predictor_4x8_ssse3(uint8_t *dst, ptrdiff_t stride,
   load_pixel_w4(above, left, 8, pixels);
 
   __m128i wh[4], ww[2];
-  load_weight_w4(sm_weight_arrays, 8, wh, ww);
+  load_weight_w4(8, wh, ww);
 
   smooth_pred_4xh(pixels, wh, ww, 8, dst, stride, 0);
 }
@@ -692,7 +693,7 @@ void aom_smooth_predictor_4x16_ssse3(uint8_t *dst, ptrdiff_t stride,
   load_pixel_w4(above, left, 16, pixels);
 
   __m128i wh[4], ww[2];
-  load_weight_w4(sm_weight_arrays, 16, wh, ww);
+  load_weight_w4(16, wh, ww);
 
   smooth_pred_4xh(pixels, wh, ww, 8, dst, stride, 0);
   dst += stride << 3;
@@ -710,16 +711,16 @@ void aom_smooth_predictor_4x16_ssse3(uint8_t *dst, ptrdiff_t stride,
 static INLINE void load_pixel_w8(const uint8_t *above, const uint8_t *left,
                                  int height, __m128i *pixels) {
   const __m128i zero = _mm_setzero_si128();
-  const __m128i bp = _mm_set1_epi16((uint16_t)left[height - 1]);
+  const __m128i bp = _mm_set1_epi16((int16_t)left[height - 1]);
   __m128i d = _mm_loadl_epi64((const __m128i *)above);
   d = _mm_unpacklo_epi8(d, zero);
   pixels[0] = _mm_unpacklo_epi16(d, bp);
   pixels[1] = _mm_unpackhi_epi16(d, bp);
 
-  pixels[3] = _mm_set1_epi16((uint16_t)above[7]);
+  pixels[3] = _mm_set1_epi16((int16_t)above[7]);
 
   if (height == 4) {
-    pixels[2] = _mm_cvtsi32_si128(((const uint32_t *)left)[0]);
+    pixels[2] = _mm_cvtsi32_si128(((const int *)left)[0]);
   } else if (height == 8) {
     pixels[2] = _mm_loadl_epi64((const __m128i *)left);
   } else if (height == 16) {
@@ -743,13 +744,13 @@ static INLINE void load_pixel_w8(const uint8_t *above, const uint8_t *left,
 // weight_h[7]: same as [1], offset 24
 // weight_w[0]: weights_w and scale - weights_w interleave vector, first half
 // weight_w[1]: weights_w and scale - weights_w interleave vector, second half
-static INLINE void load_weight_w8(const uint8_t *weight_array, int height,
-                                  __m128i *weight_h, __m128i *weight_w) {
+static INLINE void load_weight_w8(int height, __m128i *weight_h,
+                                  __m128i *weight_w) {
   const __m128i zero = _mm_setzero_si128();
-  const int we_offset = height < 8 ? 4 : 8;
-  __m128i we = _mm_loadu_si128((const __m128i *)&weight_array[we_offset]);
+  const int we_offset = height < 8 ? 0 : 4;
+  __m128i we = _mm_loadu_si128((const __m128i *)&smooth_weights[we_offset]);
   weight_h[0] = _mm_unpacklo_epi8(we, zero);
-  const __m128i d = _mm_set1_epi16((uint16_t)(1 << sm_weight_log2_scale));
+  const __m128i d = _mm_set1_epi16((int16_t)(1 << SMOOTH_WEIGHT_LOG2_SCALE));
   weight_h[1] = _mm_sub_epi16(d, weight_h[0]);
 
   if (height == 4) {
@@ -764,20 +765,20 @@ static INLINE void load_weight_w8(const uint8_t *weight_array, int height,
   }
 
   if (height == 16) {
-    we = _mm_loadu_si128((const __m128i *)&weight_array[16]);
+    we = _mm_loadu_si128((const __m128i *)&smooth_weights[12]);
     weight_h[0] = _mm_unpacklo_epi8(we, zero);
     weight_h[1] = _mm_sub_epi16(d, weight_h[0]);
     weight_h[2] = _mm_unpackhi_epi8(we, zero);
     weight_h[3] = _mm_sub_epi16(d, weight_h[2]);
   } else if (height == 32) {
     const __m128i weight_lo =
-        _mm_loadu_si128((const __m128i *)&weight_array[32]);
+        _mm_loadu_si128((const __m128i *)&smooth_weights[28]);
     weight_h[0] = _mm_unpacklo_epi8(weight_lo, zero);
     weight_h[1] = _mm_sub_epi16(d, weight_h[0]);
     weight_h[2] = _mm_unpackhi_epi8(weight_lo, zero);
     weight_h[3] = _mm_sub_epi16(d, weight_h[2]);
     const __m128i weight_hi =
-        _mm_loadu_si128((const __m128i *)&weight_array[32 + 16]);
+        _mm_loadu_si128((const __m128i *)&smooth_weights[28 + 16]);
     weight_h[4] = _mm_unpacklo_epi8(weight_hi, zero);
     weight_h[5] = _mm_sub_epi16(d, weight_h[4]);
     weight_h[6] = _mm_unpackhi_epi8(weight_hi, zero);
@@ -788,7 +789,7 @@ static INLINE void load_weight_w8(const uint8_t *weight_array, int height,
 static INLINE void smooth_pred_8xh(const __m128i *pixels, const __m128i *wh,
                                    const __m128i *ww, int h, uint8_t *dst,
                                    ptrdiff_t stride, int second_half) {
-  const __m128i round = _mm_set1_epi32((1 << sm_weight_log2_scale));
+  const __m128i round = _mm_set1_epi32((1 << SMOOTH_WEIGHT_LOG2_SCALE));
   const __m128i one = _mm_set1_epi16(1);
   const __m128i inc = _mm_set1_epi16(0x202);
   const __m128i gat = _mm_set_epi32(0, 0, 0xe0c0a08, 0x6040200);
@@ -812,11 +813,11 @@ static INLINE void smooth_pred_8xh(const __m128i *pixels, const __m128i *wh,
 
     s0 = _mm_add_epi32(s0, sum0);
     s0 = _mm_add_epi32(s0, round);
-    s0 = _mm_srai_epi32(s0, 1 + sm_weight_log2_scale);
+    s0 = _mm_srai_epi32(s0, 1 + SMOOTH_WEIGHT_LOG2_SCALE);
 
     s1 = _mm_add_epi32(s1, sum1);
     s1 = _mm_add_epi32(s1, round);
-    s1 = _mm_srai_epi32(s1, 1 + sm_weight_log2_scale);
+    s1 = _mm_srai_epi32(s1, 1 + SMOOTH_WEIGHT_LOG2_SCALE);
 
     sum0 = _mm_packus_epi16(s0, s1);
     sum0 = _mm_shuffle_epi8(sum0, gat);
@@ -834,7 +835,7 @@ void aom_smooth_predictor_8x4_ssse3(uint8_t *dst, ptrdiff_t stride,
   load_pixel_w8(above, left, 4, pixels);
 
   __m128i wh[4], ww[2];
-  load_weight_w8(sm_weight_arrays, 4, wh, ww);
+  load_weight_w8(4, wh, ww);
 
   smooth_pred_8xh(pixels, wh, ww, 4, dst, stride, 0);
 }
@@ -845,7 +846,7 @@ void aom_smooth_predictor_8x8_ssse3(uint8_t *dst, ptrdiff_t stride,
   load_pixel_w8(above, left, 8, pixels);
 
   __m128i wh[4], ww[2];
-  load_weight_w8(sm_weight_arrays, 8, wh, ww);
+  load_weight_w8(8, wh, ww);
 
   smooth_pred_8xh(pixels, wh, ww, 8, dst, stride, 0);
 }
@@ -857,7 +858,7 @@ void aom_smooth_predictor_8x16_ssse3(uint8_t *dst, ptrdiff_t stride,
   load_pixel_w8(above, left, 16, pixels);
 
   __m128i wh[4], ww[2];
-  load_weight_w8(sm_weight_arrays, 16, wh, ww);
+  load_weight_w8(16, wh, ww);
 
   smooth_pred_8xh(pixels, wh, ww, 8, dst, stride, 0);
   dst += stride << 3;
@@ -871,7 +872,7 @@ void aom_smooth_predictor_8x32_ssse3(uint8_t *dst, ptrdiff_t stride,
   load_pixel_w8(above, left, 32, pixels);
 
   __m128i wh[8], ww[2];
-  load_weight_w8(sm_weight_arrays, 32, wh, ww);
+  load_weight_w8(32, wh, ww);
 
   smooth_pred_8xh(&pixels[0], wh, ww, 8, dst, stride, 0);
   dst += stride << 3;
@@ -886,21 +887,22 @@ static INLINE void smooth_predictor_wxh(uint8_t *dst, ptrdiff_t stride,
                                         const uint8_t *above,
                                         const uint8_t *left, uint32_t bw,
                                         uint32_t bh) {
-  const uint8_t *const sm_weights_w = sm_weight_arrays + bw;
-  const uint8_t *const sm_weights_h = sm_weight_arrays + bh;
+  const uint8_t *const sm_weights_w = smooth_weights + bw - 4;
+  const uint8_t *const sm_weights_h = smooth_weights + bh - 4;
   const __m128i zero = _mm_setzero_si128();
   const __m128i scale_value =
-      _mm_set1_epi16((uint16_t)(1 << sm_weight_log2_scale));
-  const __m128i bottom_left = _mm_cvtsi32_si128((uint32_t)left[bh - 1]);
+      _mm_set1_epi16((int16_t)(1 << SMOOTH_WEIGHT_LOG2_SCALE));
+  const __m128i bottom_left = _mm_cvtsi32_si128(left[bh - 1]);
   const __m128i dup16 = _mm_set1_epi32(0x01000100);
   const __m128i top_right =
-      _mm_shuffle_epi8(_mm_cvtsi32_si128((uint32_t)above[bw - 1]), dup16);
+      _mm_shuffle_epi8(_mm_cvtsi32_si128(above[bw - 1]), dup16);
   const __m128i gat = _mm_set_epi32(0, 0, 0xe0c0a08, 0x6040200);
-  const __m128i round = _mm_set1_epi32((uint16_t)(1 << sm_weight_log2_scale));
+  const __m128i round =
+      _mm_set1_epi32((uint16_t)(1 << SMOOTH_WEIGHT_LOG2_SCALE));
 
   for (uint32_t y = 0; y < bh; ++y) {
-    const __m128i weights_y = _mm_cvtsi32_si128((uint32_t)sm_weights_h[y]);
-    const __m128i left_y = _mm_cvtsi32_si128((uint32_t)left[y]);
+    const __m128i weights_y = _mm_cvtsi32_si128(sm_weights_h[y]);
+    const __m128i left_y = _mm_cvtsi32_si128(left[y]);
     const __m128i scale_m_weights_y = _mm_sub_epi16(scale_value, weights_y);
     __m128i pred_scaled_bl = _mm_mullo_epi16(scale_m_weights_y, bottom_left);
     const __m128i wl_y =
@@ -931,8 +933,8 @@ static INLINE void smooth_predictor_wxh(uint8_t *dst, ptrdiff_t stride,
       pred_lo = _mm_add_epi32(pred_lo, swxtr_lo);
       pred_hi = _mm_add_epi32(pred_hi, swxtr_hi);
 
-      pred_lo = _mm_srai_epi32(pred_lo, (1 + sm_weight_log2_scale));
-      pred_hi = _mm_srai_epi32(pred_hi, (1 + sm_weight_log2_scale));
+      pred_lo = _mm_srai_epi32(pred_lo, (1 + SMOOTH_WEIGHT_LOG2_SCALE));
+      pred_hi = _mm_srai_epi32(pred_hi, (1 + SMOOTH_WEIGHT_LOG2_SCALE));
 
       __m128i pred = _mm_packus_epi16(pred_lo, pred_hi);
       pred = _mm_shuffle_epi8(pred, gat);
@@ -1021,30 +1023,29 @@ void aom_smooth_predictor_16x64_ssse3(uint8_t *dst, ptrdiff_t stride,
 static INLINE void load_pixel_v_w4(const uint8_t *above, const uint8_t *left,
                                    int height, __m128i *pixels) {
   const __m128i zero = _mm_setzero_si128();
-  __m128i d = _mm_cvtsi32_si128(((const uint32_t *)above)[0]);
-  const __m128i bp = _mm_set1_epi16((uint16_t)left[height - 1]);
+  __m128i d = _mm_cvtsi32_si128(((const int *)above)[0]);
+  const __m128i bp = _mm_set1_epi16((int16_t)left[height - 1]);
   d = _mm_unpacklo_epi8(d, zero);
   pixels[0] = _mm_unpacklo_epi16(d, bp);
 }
 
 // weights[0]: weights_h vector
 // weights[1]: scale - weights_h vector
-static INLINE void load_weight_v_w4(const uint8_t *weight_array, int height,
-                                    __m128i *weights) {
+static INLINE void load_weight_v_w4(int height, __m128i *weights) {
   const __m128i zero = _mm_setzero_si128();
-  const __m128i d = _mm_set1_epi16((uint16_t)(1 << sm_weight_log2_scale));
+  const __m128i d = _mm_set1_epi16((int16_t)(1 << SMOOTH_WEIGHT_LOG2_SCALE));
 
   if (height == 4) {
-    const __m128i weight =
-        _mm_cvtsi32_si128(((const uint32_t *)weight_array)[1]);
+    const __m128i weight = _mm_cvtsi32_si128(((const int *)smooth_weights)[0]);
     weights[0] = _mm_unpacklo_epi8(weight, zero);
     weights[1] = _mm_sub_epi16(d, weights[0]);
   } else if (height == 8) {
-    const __m128i weight = _mm_loadl_epi64((const __m128i *)&weight_array[8]);
+    const __m128i weight = _mm_loadl_epi64((const __m128i *)&smooth_weights[4]);
     weights[0] = _mm_unpacklo_epi8(weight, zero);
     weights[1] = _mm_sub_epi16(d, weights[0]);
   } else {
-    const __m128i weight = _mm_loadu_si128((const __m128i *)&weight_array[16]);
+    const __m128i weight =
+        _mm_loadu_si128((const __m128i *)&smooth_weights[12]);
     weights[0] = _mm_unpacklo_epi8(weight, zero);
     weights[1] = _mm_sub_epi16(d, weights[0]);
     weights[2] = _mm_unpackhi_epi8(weight, zero);
@@ -1055,7 +1056,8 @@ static INLINE void load_weight_v_w4(const uint8_t *weight_array, int height,
 static INLINE void smooth_v_pred_4xh(const __m128i *pixel,
                                      const __m128i *weight, int h, uint8_t *dst,
                                      ptrdiff_t stride) {
-  const __m128i pred_round = _mm_set1_epi32((1 << (sm_weight_log2_scale - 1)));
+  const __m128i pred_round =
+      _mm_set1_epi32((1 << (SMOOTH_WEIGHT_LOG2_SCALE - 1)));
   const __m128i inc = _mm_set1_epi16(0x202);
   const __m128i gat = _mm_set1_epi32(0xc080400);
   __m128i d = _mm_set1_epi16(0x100);
@@ -1066,9 +1068,9 @@ static INLINE void smooth_v_pred_4xh(const __m128i *pixel,
     const __m128i wh_sc = _mm_unpacklo_epi16(wg_wg, sc_sc);
     __m128i sum = _mm_madd_epi16(pixel[0], wh_sc);
     sum = _mm_add_epi32(sum, pred_round);
-    sum = _mm_srai_epi32(sum, sm_weight_log2_scale);
+    sum = _mm_srai_epi32(sum, SMOOTH_WEIGHT_LOG2_SCALE);
     sum = _mm_shuffle_epi8(sum, gat);
-    *(uint32_t *)dst = _mm_cvtsi128_si32(sum);
+    *(int *)dst = _mm_cvtsi128_si32(sum);
     dst += stride;
     d = _mm_add_epi16(d, inc);
   }
@@ -1081,7 +1083,7 @@ void aom_smooth_v_predictor_4x4_ssse3(uint8_t *dst, ptrdiff_t stride,
   load_pixel_v_w4(above, left, 4, &pixels);
 
   __m128i weights[2];
-  load_weight_v_w4(sm_weight_arrays, 4, weights);
+  load_weight_v_w4(4, weights);
 
   smooth_v_pred_4xh(&pixels, weights, 4, dst, stride);
 }
@@ -1093,7 +1095,7 @@ void aom_smooth_v_predictor_4x8_ssse3(uint8_t *dst, ptrdiff_t stride,
   load_pixel_v_w4(above, left, 8, &pixels);
 
   __m128i weights[2];
-  load_weight_v_w4(sm_weight_arrays, 8, weights);
+  load_weight_v_w4(8, weights);
 
   smooth_v_pred_4xh(&pixels, weights, 8, dst, stride);
 }
@@ -1105,7 +1107,7 @@ void aom_smooth_v_predictor_4x16_ssse3(uint8_t *dst, ptrdiff_t stride,
   load_pixel_v_w4(above, left, 16, &pixels);
 
   __m128i weights[4];
-  load_weight_v_w4(sm_weight_arrays, 16, weights);
+  load_weight_v_w4(16, weights);
 
   smooth_v_pred_4xh(&pixels, weights, 8, dst, stride);
   dst += stride << 3;
@@ -1118,7 +1120,7 @@ static INLINE void load_pixel_v_w8(const uint8_t *above, const uint8_t *left,
                                    int height, __m128i *pixels) {
   const __m128i zero = _mm_setzero_si128();
   __m128i d = _mm_loadl_epi64((const __m128i *)above);
-  const __m128i bp = _mm_set1_epi16((uint16_t)left[height - 1]);
+  const __m128i bp = _mm_set1_epi16((int16_t)left[height - 1]);
   d = _mm_unpacklo_epi8(d, zero);
   pixels[0] = _mm_unpacklo_epi16(d, bp);
   pixels[1] = _mm_unpackhi_epi16(d, bp);
@@ -1132,32 +1134,32 @@ static INLINE void load_pixel_v_w8(const uint8_t *above, const uint8_t *left,
 // weight_h[5]: same as [1], offset 16
 // weight_h[6]: same as [0], offset 24
 // weight_h[7]: same as [1], offset 24
-static INLINE void load_weight_v_w8(const uint8_t *weight_array, int height,
-                                    __m128i *weight_h) {
+static INLINE void load_weight_v_w8(int height, __m128i *weight_h) {
   const __m128i zero = _mm_setzero_si128();
-  const __m128i d = _mm_set1_epi16((uint16_t)(1 << sm_weight_log2_scale));
+  const __m128i d = _mm_set1_epi16((int16_t)(1 << SMOOTH_WEIGHT_LOG2_SCALE));
 
   if (height < 16) {
-    const int offset = height < 8 ? 4 : 8;
+    const int offset = height < 8 ? 0 : 4;
     const __m128i weight =
-        _mm_loadu_si128((const __m128i *)&weight_array[offset]);
+        _mm_loadu_si128((const __m128i *)&smooth_weights[offset]);
     weight_h[0] = _mm_unpacklo_epi8(weight, zero);
     weight_h[1] = _mm_sub_epi16(d, weight_h[0]);
   } else if (height == 16) {
-    const __m128i weight = _mm_loadu_si128((const __m128i *)&weight_array[16]);
+    const __m128i weight =
+        _mm_loadu_si128((const __m128i *)&smooth_weights[12]);
     weight_h[0] = _mm_unpacklo_epi8(weight, zero);
     weight_h[1] = _mm_sub_epi16(d, weight_h[0]);
     weight_h[2] = _mm_unpackhi_epi8(weight, zero);
     weight_h[3] = _mm_sub_epi16(d, weight_h[2]);
   } else {
     const __m128i weight_lo =
-        _mm_loadu_si128((const __m128i *)&weight_array[32]);
+        _mm_loadu_si128((const __m128i *)&smooth_weights[28]);
     weight_h[0] = _mm_unpacklo_epi8(weight_lo, zero);
     weight_h[1] = _mm_sub_epi16(d, weight_h[0]);
     weight_h[2] = _mm_unpackhi_epi8(weight_lo, zero);
     weight_h[3] = _mm_sub_epi16(d, weight_h[2]);
     const __m128i weight_hi =
-        _mm_loadu_si128((const __m128i *)&weight_array[32 + 16]);
+        _mm_loadu_si128((const __m128i *)&smooth_weights[28 + 16]);
     weight_h[4] = _mm_unpacklo_epi8(weight_hi, zero);
     weight_h[5] = _mm_sub_epi16(d, weight_h[4]);
     weight_h[6] = _mm_unpackhi_epi8(weight_hi, zero);
@@ -1167,7 +1169,8 @@ static INLINE void load_weight_v_w8(const uint8_t *weight_array, int height,
 
 static INLINE void smooth_v_pred_8xh(const __m128i *pixels, const __m128i *wh,
                                      int h, uint8_t *dst, ptrdiff_t stride) {
-  const __m128i pred_round = _mm_set1_epi32((1 << (sm_weight_log2_scale - 1)));
+  const __m128i pred_round =
+      _mm_set1_epi32((1 << (SMOOTH_WEIGHT_LOG2_SCALE - 1)));
   const __m128i inc = _mm_set1_epi16(0x202);
   const __m128i gat = _mm_set_epi32(0, 0, 0xe0c0a08, 0x6040200);
   __m128i d = _mm_set1_epi16(0x100);
@@ -1180,10 +1183,10 @@ static INLINE void smooth_v_pred_8xh(const __m128i *pixels, const __m128i *wh,
     __m128i s1 = _mm_madd_epi16(pixels[1], wh_sc);
 
     s0 = _mm_add_epi32(s0, pred_round);
-    s0 = _mm_srai_epi32(s0, sm_weight_log2_scale);
+    s0 = _mm_srai_epi32(s0, SMOOTH_WEIGHT_LOG2_SCALE);
 
     s1 = _mm_add_epi32(s1, pred_round);
-    s1 = _mm_srai_epi32(s1, sm_weight_log2_scale);
+    s1 = _mm_srai_epi32(s1, SMOOTH_WEIGHT_LOG2_SCALE);
 
     __m128i sum01 = _mm_packus_epi16(s0, s1);
     sum01 = _mm_shuffle_epi8(sum01, gat);
@@ -1201,7 +1204,7 @@ void aom_smooth_v_predictor_8x4_ssse3(uint8_t *dst, ptrdiff_t stride,
   load_pixel_v_w8(above, left, 4, pixels);
 
   __m128i wh[2];
-  load_weight_v_w8(sm_weight_arrays, 4, wh);
+  load_weight_v_w8(4, wh);
 
   smooth_v_pred_8xh(pixels, wh, 4, dst, stride);
 }
@@ -1213,7 +1216,7 @@ void aom_smooth_v_predictor_8x8_ssse3(uint8_t *dst, ptrdiff_t stride,
   load_pixel_v_w8(above, left, 8, pixels);
 
   __m128i wh[2];
-  load_weight_v_w8(sm_weight_arrays, 8, wh);
+  load_weight_v_w8(8, wh);
 
   smooth_v_pred_8xh(pixels, wh, 8, dst, stride);
 }
@@ -1225,7 +1228,7 @@ void aom_smooth_v_predictor_8x16_ssse3(uint8_t *dst, ptrdiff_t stride,
   load_pixel_v_w8(above, left, 16, pixels);
 
   __m128i wh[4];
-  load_weight_v_w8(sm_weight_arrays, 16, wh);
+  load_weight_v_w8(16, wh);
 
   smooth_v_pred_8xh(pixels, wh, 8, dst, stride);
   dst += stride << 3;
@@ -1239,7 +1242,7 @@ void aom_smooth_v_predictor_8x32_ssse3(uint8_t *dst, ptrdiff_t stride,
   load_pixel_v_w8(above, left, 32, pixels);
 
   __m128i wh[8];
-  load_weight_v_w8(sm_weight_arrays, 32, wh);
+  load_weight_v_w8(32, wh);
 
   smooth_v_pred_8xh(pixels, &wh[0], 8, dst, stride);
   dst += stride << 3;
@@ -1254,19 +1257,19 @@ static INLINE void smooth_v_predictor_wxh(uint8_t *dst, ptrdiff_t stride,
                                           const uint8_t *above,
                                           const uint8_t *left, uint32_t bw,
                                           uint32_t bh) {
-  const uint8_t *const sm_weights_h = sm_weight_arrays + bh;
+  const uint8_t *const sm_weights_h = smooth_weights + bh - 4;
   const __m128i zero = _mm_setzero_si128();
   const __m128i scale_value =
-      _mm_set1_epi16((uint16_t)(1 << sm_weight_log2_scale));
+      _mm_set1_epi16((int16_t)(1 << SMOOTH_WEIGHT_LOG2_SCALE));
   const __m128i dup16 = _mm_set1_epi32(0x01000100);
   const __m128i bottom_left =
-      _mm_shuffle_epi8(_mm_cvtsi32_si128((uint32_t)left[bh - 1]), dup16);
+      _mm_shuffle_epi8(_mm_cvtsi32_si128(left[bh - 1]), dup16);
   const __m128i gat = _mm_set_epi32(0, 0, 0xe0c0a08, 0x6040200);
   const __m128i round =
-      _mm_set1_epi32((uint16_t)(1 << (sm_weight_log2_scale - 1)));
+      _mm_set1_epi32((uint16_t)(1 << (SMOOTH_WEIGHT_LOG2_SCALE - 1)));
 
   for (uint32_t y = 0; y < bh; ++y) {
-    const __m128i weights_y = _mm_cvtsi32_si128((uint32_t)sm_weights_h[y]);
+    const __m128i weights_y = _mm_cvtsi32_si128(sm_weights_h[y]);
     const __m128i scale_m_weights_y =
         _mm_shuffle_epi8(_mm_sub_epi16(scale_value, weights_y), dup16);
     const __m128i wl_y =
@@ -1284,8 +1287,8 @@ static INLINE void smooth_v_predictor_wxh(uint8_t *dst, ptrdiff_t stride,
 
       pred_lo = _mm_add_epi32(pred_lo, round);
       pred_hi = _mm_add_epi32(pred_hi, round);
-      pred_lo = _mm_srai_epi32(pred_lo, sm_weight_log2_scale);
-      pred_hi = _mm_srai_epi32(pred_hi, sm_weight_log2_scale);
+      pred_lo = _mm_srai_epi32(pred_lo, SMOOTH_WEIGHT_LOG2_SCALE);
+      pred_hi = _mm_srai_epi32(pred_hi, SMOOTH_WEIGHT_LOG2_SCALE);
 
       __m128i pred = _mm_packus_epi16(pred_lo, pred_hi);
       pred = _mm_shuffle_epi8(pred, gat);
@@ -1375,23 +1378,22 @@ void aom_smooth_v_predictor_16x64_ssse3(uint8_t *dst, ptrdiff_t stride,
 static INLINE void load_pixel_h_w4(const uint8_t *above, const uint8_t *left,
                                    int height, __m128i *pixels) {
   if (height == 4)
-    pixels[0] = _mm_cvtsi32_si128(((const uint32_t *)left)[0]);
+    pixels[0] = _mm_cvtsi32_si128(((const int *)left)[0]);
   else if (height == 8)
     pixels[0] = _mm_loadl_epi64(((const __m128i *)left));
   else
     pixels[0] = _mm_loadu_si128(((const __m128i *)left));
-  pixels[1] = _mm_set1_epi16((uint16_t)above[3]);
+  pixels[1] = _mm_set1_epi16((int16_t)above[3]);
 }
 
 // weights[0]: weights_w and scale - weights_w interleave vector
-static INLINE void load_weight_h_w4(const uint8_t *weight_array, int height,
-                                    __m128i *weights) {
+static INLINE void load_weight_h_w4(int height, __m128i *weights) {
   (void)height;
-  const __m128i t = _mm_loadu_si128((const __m128i *)&weight_array[4]);
+  const __m128i t = _mm_loadu_si128((const __m128i *)&smooth_weights[0]);
   const __m128i zero = _mm_setzero_si128();
 
   const __m128i weights_0 = _mm_unpacklo_epi8(t, zero);
-  const __m128i d = _mm_set1_epi16((uint16_t)(1 << sm_weight_log2_scale));
+  const __m128i d = _mm_set1_epi16((int16_t)(1 << SMOOTH_WEIGHT_LOG2_SCALE));
   const __m128i weights_1 = _mm_sub_epi16(d, weights_0);
   weights[0] = _mm_unpacklo_epi16(weights_0, weights_1);
 }
@@ -1399,7 +1401,8 @@ static INLINE void load_weight_h_w4(const uint8_t *weight_array, int height,
 static INLINE void smooth_h_pred_4xh(const __m128i *pixel,
                                      const __m128i *weight, int h, uint8_t *dst,
                                      ptrdiff_t stride) {
-  const __m128i pred_round = _mm_set1_epi32((1 << (sm_weight_log2_scale - 1)));
+  const __m128i pred_round =
+      _mm_set1_epi32((1 << (SMOOTH_WEIGHT_LOG2_SCALE - 1)));
   const __m128i one = _mm_set1_epi16(1);
   const __m128i gat = _mm_set1_epi32(0xc080400);
   __m128i rep = _mm_set1_epi16((short)0x8000);
@@ -1410,10 +1413,10 @@ static INLINE void smooth_h_pred_4xh(const __m128i *pixel,
     __m128i sum = _mm_madd_epi16(b, weight[0]);
 
     sum = _mm_add_epi32(sum, pred_round);
-    sum = _mm_srai_epi32(sum, sm_weight_log2_scale);
+    sum = _mm_srai_epi32(sum, SMOOTH_WEIGHT_LOG2_SCALE);
 
     sum = _mm_shuffle_epi8(sum, gat);
-    *(uint32_t *)dst = _mm_cvtsi128_si32(sum);
+    *(int *)dst = _mm_cvtsi128_si32(sum);
     dst += stride;
 
     rep = _mm_add_epi16(rep, one);
@@ -1427,7 +1430,7 @@ void aom_smooth_h_predictor_4x4_ssse3(uint8_t *dst, ptrdiff_t stride,
   load_pixel_h_w4(above, left, 4, pixels);
 
   __m128i weights;
-  load_weight_h_w4(sm_weight_arrays, 4, &weights);
+  load_weight_h_w4(4, &weights);
 
   smooth_h_pred_4xh(pixels, &weights, 4, dst, stride);
 }
@@ -1439,7 +1442,7 @@ void aom_smooth_h_predictor_4x8_ssse3(uint8_t *dst, ptrdiff_t stride,
   load_pixel_h_w4(above, left, 8, pixels);
 
   __m128i weights;
-  load_weight_h_w4(sm_weight_arrays, 8, &weights);
+  load_weight_h_w4(8, &weights);
 
   smooth_h_pred_4xh(pixels, &weights, 8, dst, stride);
 }
@@ -1451,7 +1454,7 @@ void aom_smooth_h_predictor_4x16_ssse3(uint8_t *dst, ptrdiff_t stride,
   load_pixel_h_w4(above, left, 16, pixels);
 
   __m128i weights;
-  load_weight_h_w4(sm_weight_arrays, 8, &weights);
+  load_weight_h_w4(8, &weights);
 
   smooth_h_pred_4xh(pixels, &weights, 8, dst, stride);
   dst += stride << 3;
@@ -1466,10 +1469,10 @@ void aom_smooth_h_predictor_4x16_ssse3(uint8_t *dst, ptrdiff_t stride,
 // pixels[3]: right_pred vector
 static INLINE void load_pixel_h_w8(const uint8_t *above, const uint8_t *left,
                                    int height, __m128i *pixels) {
-  pixels[1] = _mm_set1_epi16((uint16_t)above[7]);
+  pixels[1] = _mm_set1_epi16((int16_t)above[7]);
 
   if (height == 4) {
-    pixels[0] = _mm_cvtsi32_si128(((const uint32_t *)left)[0]);
+    pixels[0] = _mm_cvtsi32_si128(((const int *)left)[0]);
   } else if (height == 8) {
     pixels[0] = _mm_loadl_epi64((const __m128i *)left);
   } else if (height == 16) {
@@ -1483,12 +1486,11 @@ static INLINE void load_pixel_h_w8(const uint8_t *above, const uint8_t *left,
 
 // weight_w[0]: weights_w and scale - weights_w interleave vector, first half
 // weight_w[1]: weights_w and scale - weights_w interleave vector, second half
-static INLINE void load_weight_h_w8(const uint8_t *weight_array, int height,
-                                    __m128i *weight_w) {
+static INLINE void load_weight_h_w8(int height, __m128i *weight_w) {
   (void)height;
   const __m128i zero = _mm_setzero_si128();
-  const __m128i d = _mm_set1_epi16((uint16_t)(1 << sm_weight_log2_scale));
-  const __m128i we = _mm_loadu_si128((const __m128i *)&weight_array[8]);
+  const __m128i d = _mm_set1_epi16((int16_t)(1 << SMOOTH_WEIGHT_LOG2_SCALE));
+  const __m128i we = _mm_loadu_si128((const __m128i *)&smooth_weights[4]);
   const __m128i tmp1 = _mm_unpacklo_epi8(we, zero);
   const __m128i tmp2 = _mm_sub_epi16(d, tmp1);
   weight_w[0] = _mm_unpacklo_epi16(tmp1, tmp2);
@@ -1498,7 +1500,8 @@ static INLINE void load_weight_h_w8(const uint8_t *weight_array, int height,
 static INLINE void smooth_h_pred_8xh(const __m128i *pixels, const __m128i *ww,
                                      int h, uint8_t *dst, ptrdiff_t stride,
                                      int second_half) {
-  const __m128i pred_round = _mm_set1_epi32((1 << (sm_weight_log2_scale - 1)));
+  const __m128i pred_round =
+      _mm_set1_epi32((1 << (SMOOTH_WEIGHT_LOG2_SCALE - 1)));
   const __m128i one = _mm_set1_epi16(1);
   const __m128i gat = _mm_set_epi32(0, 0, 0xe0c0a08, 0x6040200);
   __m128i rep = second_half ? _mm_set1_epi16((short)0x8008)
@@ -1511,10 +1514,10 @@ static INLINE void smooth_h_pred_8xh(const __m128i *pixels, const __m128i *ww,
     __m128i sum1 = _mm_madd_epi16(b, ww[1]);
 
     sum0 = _mm_add_epi32(sum0, pred_round);
-    sum0 = _mm_srai_epi32(sum0, sm_weight_log2_scale);
+    sum0 = _mm_srai_epi32(sum0, SMOOTH_WEIGHT_LOG2_SCALE);
 
     sum1 = _mm_add_epi32(sum1, pred_round);
-    sum1 = _mm_srai_epi32(sum1, sm_weight_log2_scale);
+    sum1 = _mm_srai_epi32(sum1, SMOOTH_WEIGHT_LOG2_SCALE);
 
     sum0 = _mm_packus_epi16(sum0, sum1);
     sum0 = _mm_shuffle_epi8(sum0, gat);
@@ -1532,7 +1535,7 @@ void aom_smooth_h_predictor_8x4_ssse3(uint8_t *dst, ptrdiff_t stride,
   load_pixel_h_w8(above, left, 4, pixels);
 
   __m128i ww[2];
-  load_weight_h_w8(sm_weight_arrays, 4, ww);
+  load_weight_h_w8(4, ww);
 
   smooth_h_pred_8xh(pixels, ww, 4, dst, stride, 0);
 }
@@ -1544,7 +1547,7 @@ void aom_smooth_h_predictor_8x8_ssse3(uint8_t *dst, ptrdiff_t stride,
   load_pixel_h_w8(above, left, 8, pixels);
 
   __m128i ww[2];
-  load_weight_h_w8(sm_weight_arrays, 8, ww);
+  load_weight_h_w8(8, ww);
 
   smooth_h_pred_8xh(pixels, ww, 8, dst, stride, 0);
 }
@@ -1556,7 +1559,7 @@ void aom_smooth_h_predictor_8x16_ssse3(uint8_t *dst, ptrdiff_t stride,
   load_pixel_h_w8(above, left, 16, pixels);
 
   __m128i ww[2];
-  load_weight_h_w8(sm_weight_arrays, 16, ww);
+  load_weight_h_w8(16, ww);
 
   smooth_h_pred_8xh(pixels, ww, 8, dst, stride, 0);
   dst += stride << 3;
@@ -1570,7 +1573,7 @@ void aom_smooth_h_predictor_8x32_ssse3(uint8_t *dst, ptrdiff_t stride,
   load_pixel_h_w8(above, left, 32, pixels);
 
   __m128i ww[2];
-  load_weight_h_w8(sm_weight_arrays, 32, ww);
+  load_weight_h_w8(32, ww);
 
   smooth_h_pred_8xh(&pixels[0], ww, 8, dst, stride, 0);
   dst += stride << 3;
@@ -1585,16 +1588,17 @@ static INLINE void smooth_h_predictor_wxh(uint8_t *dst, ptrdiff_t stride,
                                           const uint8_t *above,
                                           const uint8_t *left, uint32_t bw,
                                           uint32_t bh) {
-  const uint8_t *const sm_weights_w = sm_weight_arrays + bw;
+  const uint8_t *const sm_weights_w = smooth_weights + bw - 4;
   const __m128i zero = _mm_setzero_si128();
   const __m128i scale_value =
-      _mm_set1_epi16((uint16_t)(1 << sm_weight_log2_scale));
-  const __m128i top_right = _mm_cvtsi32_si128((uint32_t)above[bw - 1]);
+      _mm_set1_epi16((int16_t)(1 << SMOOTH_WEIGHT_LOG2_SCALE));
+  const __m128i top_right = _mm_cvtsi32_si128(above[bw - 1]);
   const __m128i gat = _mm_set_epi32(0, 0, 0xe0c0a08, 0x6040200);
-  const __m128i pred_round = _mm_set1_epi32((1 << (sm_weight_log2_scale - 1)));
+  const __m128i pred_round =
+      _mm_set1_epi32((1 << (SMOOTH_WEIGHT_LOG2_SCALE - 1)));
 
   for (uint32_t y = 0; y < bh; ++y) {
-    const __m128i left_y = _mm_cvtsi32_si128((uint32_t)left[y]);
+    const __m128i left_y = _mm_cvtsi32_si128(left[y]);
     const __m128i tr_ly =
         _mm_shuffle_epi32(_mm_unpacklo_epi16(top_right, left_y), 0);
 
@@ -1611,8 +1615,8 @@ static INLINE void smooth_h_predictor_wxh(uint8_t *dst, ptrdiff_t stride,
       pred_lo = _mm_add_epi32(pred_lo, pred_round);
       pred_hi = _mm_add_epi32(pred_hi, pred_round);
 
-      pred_lo = _mm_srai_epi32(pred_lo, sm_weight_log2_scale);
-      pred_hi = _mm_srai_epi32(pred_hi, sm_weight_log2_scale);
+      pred_lo = _mm_srai_epi32(pred_lo, SMOOTH_WEIGHT_LOG2_SCALE);
+      pred_hi = _mm_srai_epi32(pred_hi, SMOOTH_WEIGHT_LOG2_SCALE);
 
       __m128i pred = _mm_packus_epi16(pred_lo, pred_hi);
       pred = _mm_shuffle_epi8(pred, gat);
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/x86/jnt_sad_ssse3.c b/chromium/third_party/libaom/source/libaom/aom_dsp/x86/jnt_sad_ssse3.c
index 4e6fe8faa3a..357f70a5023 100644
--- a/chromium/third_party/libaom/source/libaom/aom_dsp/x86/jnt_sad_ssse3.c
+++ b/chromium/third_party/libaom/source/libaom/aom_dsp/x86/jnt_sad_ssse3.c
@@ -53,7 +53,8 @@ unsigned int aom_sad4xh_sse2(const uint8_t *a, int a_stride, const uint8_t *b,
 
   // At this point, we have two 32-bit partial SADs at bit[0:31] and [64:95].
   const unsigned int res =
-      _mm_cvtsi128_si32(sad) + _mm_cvtsi128_si32(_mm_srli_si128(sad, 8));
+      (unsigned int)(_mm_cvtsi128_si32(sad) +
+                     _mm_cvtsi128_si32(_mm_srli_si128(sad, 8)));
 
   return res;
 }
@@ -84,7 +85,8 @@ unsigned int aom_sad8xh_sse2(const uint8_t *a, int a_stride, const uint8_t *b,
   }
 
   const unsigned int res =
-      _mm_cvtsi128_si32(sad) + _mm_cvtsi128_si32(_mm_srli_si128(sad, 8));
+      (unsigned int)(_mm_cvtsi128_si32(sad) +
+                     _mm_cvtsi128_si32(_mm_srli_si128(sad, 8)));
 
   return res;
 }
@@ -108,7 +110,8 @@ unsigned int aom_sad16xh_sse2(const uint8_t *a, int a_stride, const uint8_t *b,
   }
 
   const unsigned int res =
-      _mm_cvtsi128_si32(sad) + _mm_cvtsi128_si32(_mm_srli_si128(sad, 8));
+      (unsigned int)(_mm_cvtsi128_si32(sad) +
+                     _mm_cvtsi128_si32(_mm_srli_si128(sad, 8)));
 
   return res;
 }
@@ -134,7 +137,8 @@ unsigned int aom_sad32xh_sse2(const uint8_t *a, int a_stride, const uint8_t *b,
   }
 
   const unsigned int res =
-      _mm_cvtsi128_si32(sad) + _mm_cvtsi128_si32(_mm_srli_si128(sad, 8));
+      (unsigned int)(_mm_cvtsi128_si32(sad) +
+                     _mm_cvtsi128_si32(_mm_srli_si128(sad, 8)));
 
   return res;
 }
@@ -160,7 +164,8 @@ unsigned int aom_sad64xh_sse2(const uint8_t *a, int a_stride, const uint8_t *b,
   }
 
   const unsigned int res =
-      _mm_cvtsi128_si32(sad) + _mm_cvtsi128_si32(_mm_srli_si128(sad, 8));
+      (unsigned int)(_mm_cvtsi128_si32(sad) +
+                     _mm_cvtsi128_si32(_mm_srli_si128(sad, 8)));
 
   return res;
 }
@@ -186,7 +191,8 @@ unsigned int aom_sad128xh_sse2(const uint8_t *a, int a_stride, const uint8_t *b,
   }
 
   const unsigned int res =
-      _mm_cvtsi128_si32(sad) + _mm_cvtsi128_si32(_mm_srli_si128(sad, 8));
+      (unsigned int)(_mm_cvtsi128_si32(sad) +
+                     _mm_cvtsi128_si32(_mm_srli_si128(sad, 8)));
 
   return res;
 }
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/x86/jnt_variance_ssse3.c b/chromium/third_party/libaom/source/libaom/aom_dsp/x86/jnt_variance_ssse3.c
index 6ec5dd8c101..dd798ca54a5 100644
--- a/chromium/third_party/libaom/source/libaom/aom_dsp/x86/jnt_variance_ssse3.c
+++ b/chromium/third_party/libaom/source/libaom/aom_dsp/x86/jnt_variance_ssse3.c
@@ -49,13 +49,12 @@ void aom_dist_wtd_comp_avg_pred_ssse3(uint8_t *comp_pred, const uint8_t *pred,
                                       int ref_stride,
                                       const DIST_WTD_COMP_PARAMS *jcp_param) {
   int i;
-  const uint8_t w0 = (uint8_t)jcp_param->fwd_offset;
-  const uint8_t w1 = (uint8_t)jcp_param->bck_offset;
+  const int8_t w0 = (int8_t)jcp_param->fwd_offset;
+  const int8_t w1 = (int8_t)jcp_param->bck_offset;
   const __m128i w = _mm_set_epi8(w1, w0, w1, w0, w1, w0, w1, w0, w1, w0, w1, w0,
                                  w1, w0, w1, w0);
-  const uint16_t round = ((1 << DIST_PRECISION_BITS) >> 1);
-  const __m128i r =
-      _mm_set_epi16(round, round, round, round, round, round, round, round);
+  const int16_t round = (int16_t)((1 << DIST_PRECISION_BITS) >> 1);
+  const __m128i r = _mm_set1_epi16(round);
 
   if (width >= 16) {
     // Read 16 pixels one row at a time
@@ -95,10 +94,10 @@ void aom_dist_wtd_comp_avg_pred_ssse3(uint8_t *comp_pred, const uint8_t *pred,
     assert(!(width & 3));
     assert(!(height & 3));
     for (i = 0; i < height; i += 4) {
-      const uint8_t *row0 = ref + 0 * ref_stride;
-      const uint8_t *row1 = ref + 1 * ref_stride;
-      const uint8_t *row2 = ref + 2 * ref_stride;
-      const uint8_t *row3 = ref + 3 * ref_stride;
+      const int8_t *row0 = (const int8_t *)ref + 0 * ref_stride;
+      const int8_t *row1 = (const int8_t *)ref + 1 * ref_stride;
+      const int8_t *row2 = (const int8_t *)ref + 2 * ref_stride;
+      const int8_t *row3 = (const int8_t *)ref + 3 * ref_stride;
 
       __m128i p0 =
           _mm_setr_epi8(row0[0], row0[1], row0[2], row0[3], row1[0], row1[1],
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/x86/loopfilter_avx2.c b/chromium/third_party/libaom/source/libaom/aom_dsp/x86/loopfilter_avx2.c
index b59381928e6..af6c5da21b1 100644
--- a/chromium/third_party/libaom/source/libaom/aom_dsp/x86/loopfilter_avx2.c
+++ b/chromium/third_party/libaom/source/libaom/aom_dsp/x86/loopfilter_avx2.c
@@ -32,7 +32,7 @@ void aom_lpf_horizontal_6_quad_avx2(unsigned char *s, int p,
       _mm_broadcastb_epi8(_mm_cvtsi32_si128((int)_limit0[0]));
   const __m128i blimit_v =
       _mm_broadcastb_epi8(_mm_cvtsi32_si128((int)_blimit0[0]));
-  const __m128i zero = _mm_set1_epi16(0);
+  const __m128i zero = _mm_setzero_si128();
   const __m128i ff = _mm_cmpeq_epi8(zero, zero);
 
   p256_2 =
@@ -239,7 +239,7 @@ void aom_lpf_horizontal_8_quad_avx2(unsigned char *s, int p,
       _mm_broadcastb_epi8(_mm_cvtsi32_si128((int)_limit0[0]));
   const __m128i blimit_v =
       _mm_broadcastb_epi8(_mm_cvtsi32_si128((int)_blimit0[0]));
-  const __m128i zero = _mm_set1_epi16(0);
+  const __m128i zero = _mm_setzero_si128();
   const __m128i ff = _mm_cmpeq_epi8(zero, zero);
 
   p256_3 =
@@ -486,7 +486,7 @@ void aom_lpf_horizontal_14_quad_avx2(unsigned char *s, int p,
       _mm_broadcastb_epi8(_mm_cvtsi32_si128((int)_limit0[0]));
   const __m128i blimit_v =
       _mm_broadcastb_epi8(_mm_cvtsi32_si128((int)_blimit0[0]));
-  const __m128i zero = _mm_set1_epi16(0);
+  const __m128i zero = _mm_setzero_si128();
   const __m128i ff = _mm_cmpeq_epi8(zero, zero);
 
   p256_3 =
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/x86/loopfilter_sse2.c b/chromium/third_party/libaom/source/libaom/aom_dsp/x86/loopfilter_sse2.c
index 87c5bb32a4b..731dd1031b8 100644
--- a/chromium/third_party/libaom/source/libaom/aom_dsp/x86/loopfilter_sse2.c
+++ b/chromium/third_party/libaom/source/libaom/aom_dsp/x86/loopfilter_sse2.c
@@ -2133,7 +2133,7 @@ void aom_lpf_horizontal_14_quad_sse2(unsigned char *s, int p,
                                      const unsigned char *_blimit0,
                                      const unsigned char *_limit0,
                                      const unsigned char *_thresh0) {
-  const __m128i zero = _mm_set1_epi16(0);
+  const __m128i zero = _mm_setzero_si128();
   const __m128i one = _mm_set1_epi8(1);
   const __m128i blimit_v = _mm_load_si128((const __m128i *)_blimit0);
   const __m128i limit_v = _mm_load_si128((const __m128i *)_limit0);
@@ -2438,7 +2438,7 @@ void aom_lpf_horizontal_8_quad_sse2(unsigned char *s, int p,
                                     const unsigned char *_blimit0,
                                     const unsigned char *_limit0,
                                     const unsigned char *_thresh0) {
-  const __m128i zero = _mm_set1_epi16(0);
+  const __m128i zero = _mm_setzero_si128();
   const __m128i one = _mm_set1_epi8(1);
   const __m128i blimit_v = _mm_load_si128((const __m128i *)_blimit0);
   const __m128i limit_v = _mm_load_si128((const __m128i *)_limit0);
@@ -2630,7 +2630,7 @@ void aom_lpf_horizontal_6_quad_sse2(unsigned char *s, int p,
                                     const unsigned char *_blimit0,
                                     const unsigned char *_limit0,
                                     const unsigned char *_thresh0) {
-  const __m128i zero = _mm_set1_epi16(0);
+  const __m128i zero = _mm_setzero_si128();
   const __m128i one = _mm_set1_epi8(1);
   const __m128i blimit_v = _mm_load_si128((const __m128i *)_blimit0);
   const __m128i limit_v = _mm_load_si128((const __m128i *)_limit0);
@@ -2802,7 +2802,7 @@ void aom_lpf_horizontal_4_quad_sse2(unsigned char *s, int p,
                                     const unsigned char *_blimit0,
                                     const unsigned char *_limit0,
                                     const unsigned char *_thresh0) {
-  const __m128i zero = _mm_set1_epi16(0);
+  const __m128i zero = _mm_setzero_si128();
   const __m128i blimit_v = _mm_load_si128((const __m128i *)_blimit0);
   const __m128i limit_v = _mm_load_si128((const __m128i *)_limit0);
   const __m128i thresh_v = _mm_load_si128((const __m128i *)_thresh0);
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/x86/masked_sad4d_ssse3.c b/chromium/third_party/libaom/source/libaom/aom_dsp/x86/masked_sad4d_ssse3.c
index 1235f27797a..799ce9ef441 100644
--- a/chromium/third_party/libaom/source/libaom/aom_dsp/x86/masked_sad4d_ssse3.c
+++ b/chromium/third_party/libaom/source/libaom/aom_dsp/x86/masked_sad4d_ssse3.c
@@ -153,15 +153,15 @@ void aom_masked_sad8xhx4d_ssse3(const uint8_t *src_ptr, int src_stride,
   _mm_storeu_si128((__m128i *)sad_array, res0);
 }
 
-#define MASK_SAD4XH_ONE_REF(idx)                                               \
-  a = _mm_unpacklo_epi32(_mm_cvtsi32_si128(*(uint32_t *)ref##idx),             \
-                         _mm_cvtsi32_si128(*(uint32_t *)&ref##idx[a_stride])); \
-  data = _mm_unpacklo_epi8(a, b);                                              \
-  mask = _mm_unpacklo_epi8(m, m_inv);                                          \
-  pred = _mm_maddubs_epi16(data, mask);                                        \
-  pred = xx_roundn_epu16(pred, AOM_BLEND_A64_ROUND_BITS);                      \
-                                                                               \
-  pred = _mm_packus_epi16(pred, _mm_setzero_si128());                          \
+#define MASK_SAD4XH_ONE_REF(idx)                                          \
+  a = _mm_unpacklo_epi32(_mm_cvtsi32_si128(*(int *)ref##idx),             \
+                         _mm_cvtsi32_si128(*(int *)&ref##idx[a_stride])); \
+  data = _mm_unpacklo_epi8(a, b);                                         \
+  mask = _mm_unpacklo_epi8(m, m_inv);                                     \
+  pred = _mm_maddubs_epi16(data, mask);                                   \
+  pred = xx_roundn_epu16(pred, AOM_BLEND_A64_ROUND_BITS);                 \
+                                                                          \
+  pred = _mm_packus_epi16(pred, _mm_setzero_si128());                     \
   res##idx = _mm_add_epi32(res##idx, _mm_sad_epu8(pred, src));
 
 void aom_masked_sad4xhx4d_ssse3(const uint8_t *src_ptr, int src_stride,
@@ -182,15 +182,15 @@ void aom_masked_sad4xhx4d_ssse3(const uint8_t *src_ptr, int src_stride,
   const __m128i mask_max = _mm_set1_epi8((1 << AOM_BLEND_A64_ROUND_BITS));
 
   for (int y = 0; y < height; y += 2) {
-    const __m128i src = _mm_unpacklo_epi32(
-        _mm_cvtsi32_si128(*(uint32_t *)src_ptr),
-        _mm_cvtsi32_si128(*(uint32_t *)&src_ptr[src_stride]));
+    const __m128i src =
+        _mm_unpacklo_epi32(_mm_cvtsi32_si128(*(int *)src_ptr),
+                           _mm_cvtsi32_si128(*(int *)&src_ptr[src_stride]));
     const __m128i b =
-        _mm_unpacklo_epi32(_mm_cvtsi32_si128(*(uint32_t *)b_ptr),
-                           _mm_cvtsi32_si128(*(uint32_t *)&b_ptr[b_stride]));
+        _mm_unpacklo_epi32(_mm_cvtsi32_si128(*(int *)b_ptr),
+                           _mm_cvtsi32_si128(*(int *)&b_ptr[b_stride]));
     const __m128i m_copy =
-        _mm_unpacklo_epi32(_mm_cvtsi32_si128(*(uint32_t *)m_ptr),
-                           _mm_cvtsi32_si128(*(uint32_t *)&m_ptr[m_stride]));
+        _mm_unpacklo_epi32(_mm_cvtsi32_si128(*(int *)m_ptr),
+                           _mm_cvtsi32_si128(*(int *)&m_ptr[m_stride]));
 
     __m128i m_inv = _mm_sub_epi8(mask_max, m_copy);
     __m128i m = inv_mask ? m_inv : m_copy;
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/x86/masked_sad_intrin_ssse3.c b/chromium/third_party/libaom/source/libaom/aom_dsp/x86/masked_sad_intrin_ssse3.c
index 71682779637..df3a8764e32 100644
--- a/chromium/third_party/libaom/source/libaom/aom_dsp/x86/masked_sad_intrin_ssse3.c
+++ b/chromium/third_party/libaom/source/libaom/aom_dsp/x86/masked_sad_intrin_ssse3.c
@@ -132,8 +132,8 @@ static INLINE unsigned int masked_sad_ssse3(const uint8_t *src_ptr,
     m_ptr += m_stride;
   }
   // At this point, we have two 32-bit partial SADs in lanes 0 and 2 of 'res'.
-  int32_t sad =
-      _mm_cvtsi128_si32(res) + _mm_cvtsi128_si32(_mm_srli_si128(res, 8));
+  unsigned int sad = (unsigned int)(_mm_cvtsi128_si32(res) +
+                                    _mm_cvtsi128_si32(_mm_srli_si128(res, 8)));
   return sad;
 }
 
@@ -177,8 +177,8 @@ unsigned int aom_masked_sad8xh_ssse3(const uint8_t *src_ptr, int src_stride,
     b_ptr += b_stride * 2;
     m_ptr += m_stride * 2;
   }
-  int32_t sad =
-      _mm_cvtsi128_si32(res) + _mm_cvtsi128_si32(_mm_srli_si128(res, 8));
+  unsigned int sad = (unsigned int)(_mm_cvtsi128_si32(res) +
+                                    _mm_cvtsi128_si32(_mm_srli_si128(res, 8)));
   return sad;
 }
 
@@ -194,18 +194,18 @@ unsigned int aom_masked_sad4xh_ssse3(const uint8_t *src_ptr, int src_stride,
   for (y = 0; y < height; y += 2) {
     // Load two rows at a time, this seems to be a bit faster
     // than four rows at a time in this case.
-    const __m128i src = _mm_unpacklo_epi32(
-        _mm_cvtsi32_si128(*(uint32_t *)src_ptr),
-        _mm_cvtsi32_si128(*(uint32_t *)&src_ptr[src_stride]));
+    const __m128i src =
+        _mm_unpacklo_epi32(_mm_cvtsi32_si128(*(int *)src_ptr),
+                           _mm_cvtsi32_si128(*(int *)&src_ptr[src_stride]));
     const __m128i a =
-        _mm_unpacklo_epi32(_mm_cvtsi32_si128(*(uint32_t *)a_ptr),
-                           _mm_cvtsi32_si128(*(uint32_t *)&a_ptr[a_stride]));
+        _mm_unpacklo_epi32(_mm_cvtsi32_si128(*(int *)a_ptr),
+                           _mm_cvtsi32_si128(*(int *)&a_ptr[a_stride]));
     const __m128i b =
-        _mm_unpacklo_epi32(_mm_cvtsi32_si128(*(uint32_t *)b_ptr),
-                           _mm_cvtsi32_si128(*(uint32_t *)&b_ptr[b_stride]));
+        _mm_unpacklo_epi32(_mm_cvtsi32_si128(*(int *)b_ptr),
+                           _mm_cvtsi32_si128(*(int *)&b_ptr[b_stride]));
     const __m128i m =
-        _mm_unpacklo_epi32(_mm_cvtsi32_si128(*(uint32_t *)m_ptr),
-                           _mm_cvtsi32_si128(*(uint32_t *)&m_ptr[m_stride]));
+        _mm_unpacklo_epi32(_mm_cvtsi32_si128(*(int *)m_ptr),
+                           _mm_cvtsi32_si128(*(int *)&m_ptr[m_stride]));
     const __m128i m_inv = _mm_sub_epi8(mask_max, m);
 
     const __m128i data = _mm_unpacklo_epi8(a, b);
@@ -222,8 +222,7 @@ unsigned int aom_masked_sad4xh_ssse3(const uint8_t *src_ptr, int src_stride,
     m_ptr += m_stride * 2;
   }
   // At this point, the SAD is stored in lane 0 of 'res'
-  int32_t sad = _mm_cvtsi128_si32(res);
-  return sad;
+  return (unsigned int)_mm_cvtsi128_si32(res);
 }
 
 // For width a multiple of 8
@@ -368,9 +367,8 @@ unsigned int aom_highbd_masked_sad4xh_ssse3(const uint8_t *src8, int src_stride,
                            _mm_loadl_epi64((const __m128i *)&b_ptr[b_stride]));
     // Zero-extend mask to 16 bits
     const __m128i m = _mm_unpacklo_epi8(
-        _mm_unpacklo_epi32(
-            _mm_cvtsi32_si128(*(const uint32_t *)m_ptr),
-            _mm_cvtsi32_si128(*(const uint32_t *)&m_ptr[m_stride])),
+        _mm_unpacklo_epi32(_mm_cvtsi32_si128(*(const int *)m_ptr),
+                           _mm_cvtsi32_si128(*(const int *)&m_ptr[m_stride])),
         _mm_setzero_si128());
     const __m128i m_inv = _mm_sub_epi16(mask_max, m);
 
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/x86/masked_variance_intrin_ssse3.c b/chromium/third_party/libaom/source/libaom/aom_dsp/x86/masked_variance_intrin_ssse3.c
index bfd86ee410f..0bf383fffd2 100644
--- a/chromium/third_party/libaom/source/libaom/aom_dsp/x86/masked_variance_intrin_ssse3.c
+++ b/chromium/third_party/libaom/source/libaom/aom_dsp/x86/masked_variance_intrin_ssse3.c
@@ -312,7 +312,7 @@ static void bilinear_filter4xh(const uint8_t *src, int src_stride, int xoffset,
     uint8_t *b = dst;
     for (i = 0; i < h + 1; ++i) {
       __m128i x = xx_loadl_32((__m128i *)src);
-      xx_storel_32((__m128i *)b, x);
+      xx_storel_32(b, x);
       src += src_stride;
       b += 4;
     }
@@ -321,7 +321,7 @@ static void bilinear_filter4xh(const uint8_t *src, int src_stride, int xoffset,
     for (i = 0; i < h + 1; ++i) {
       __m128i x = _mm_loadl_epi64((__m128i *)src);
       __m128i z = _mm_srli_si128(x, 1);
-      xx_storel_32((__m128i *)b, _mm_avg_epu8(x, z));
+      xx_storel_32(b, _mm_avg_epu8(x, z));
       src += src_stride;
       b += 4;
     }
@@ -357,7 +357,7 @@ static void bilinear_filter4xh(const uint8_t *src, int src_stride, int xoffset,
     v0 = _mm_maddubs_epi16(v0, hfilter_vec);
     v0 = xx_roundn_epu16(v0, FILTER_BITS);
 
-    xx_storel_32((__m128i *)b, _mm_packus_epi16(v0, v0));
+    xx_storel_32(b, _mm_packus_epi16(v0, v0));
   }
 
   // Vertical filter
@@ -367,7 +367,7 @@ static void bilinear_filter4xh(const uint8_t *src, int src_stride, int xoffset,
     for (i = 0; i < h; ++i) {
       __m128i x = xx_loadl_32((__m128i *)dst);
       __m128i y = xx_loadl_32((__m128i *)&dst[4]);
-      xx_storel_32((__m128i *)dst, _mm_avg_epu8(x, y));
+      xx_storel_32(dst, _mm_avg_epu8(x, y));
       dst += 4;
     }
   } else {
@@ -452,7 +452,7 @@ static void masked_variance(const uint8_t *src_ptr, int src_stride,
   sum = _mm_hadd_epi32(sum, sum_sq);
   sum = _mm_hadd_epi32(sum, sum);
   *sum_ = _mm_cvtsi128_si32(sum);
-  *sse = _mm_cvtsi128_si32(_mm_srli_si128(sum, 4));
+  *sse = (unsigned int)_mm_cvtsi128_si32(_mm_srli_si128(sum, 4));
 }
 
 static void masked_variance8xh(const uint8_t *src_ptr, int src_stride,
@@ -482,7 +482,7 @@ static void masked_variance8xh(const uint8_t *src_ptr, int src_stride,
   sum = _mm_hadd_epi32(sum, sum_sq);
   sum = _mm_hadd_epi32(sum, sum);
   *sum_ = _mm_cvtsi128_si32(sum);
-  *sse = _mm_cvtsi128_si32(_mm_srli_si128(sum, 4));
+  *sse = (unsigned int)_mm_cvtsi128_si32(_mm_srli_si128(sum, 4));
 }
 
 static void masked_variance4xh(const uint8_t *src_ptr, int src_stride,
@@ -494,15 +494,14 @@ static void masked_variance4xh(const uint8_t *src_ptr, int src_stride,
 
   for (y = 0; y < height; y += 4) {
     // Load four rows at a time
-    __m128i src =
-        _mm_setr_epi32(*(uint32_t *)src_ptr, *(uint32_t *)&src_ptr[src_stride],
-                       *(uint32_t *)&src_ptr[src_stride * 2],
-                       *(uint32_t *)&src_ptr[src_stride * 3]);
+    __m128i src = _mm_setr_epi32(*(int *)src_ptr, *(int *)&src_ptr[src_stride],
+                                 *(int *)&src_ptr[src_stride * 2],
+                                 *(int *)&src_ptr[src_stride * 3]);
     const __m128i a = _mm_loadu_si128((const __m128i *)a_ptr);
     const __m128i b = _mm_loadu_si128((const __m128i *)b_ptr);
-    const __m128i m = _mm_setr_epi32(
-        *(uint32_t *)m_ptr, *(uint32_t *)&m_ptr[m_stride],
-        *(uint32_t *)&m_ptr[m_stride * 2], *(uint32_t *)&m_ptr[m_stride * 3]);
+    const __m128i m = _mm_setr_epi32(*(int *)m_ptr, *(int *)&m_ptr[m_stride],
+                                     *(int *)&m_ptr[m_stride * 2],
+                                     *(int *)&m_ptr[m_stride * 3]);
     accumulate_block(&src, &a, &b, &m, &sum, &sum_sq);
 
     src_ptr += src_stride * 4;
@@ -514,7 +513,7 @@ static void masked_variance4xh(const uint8_t *src_ptr, int src_stride,
   sum = _mm_hadd_epi32(sum, sum_sq);
   sum = _mm_hadd_epi32(sum, sum);
   *sum_ = _mm_cvtsi128_si32(sum);
-  *sse = _mm_cvtsi128_si32(_mm_srli_si128(sum, 4));
+  *sse = (unsigned int)_mm_cvtsi128_si32(_mm_srli_si128(sum, 4));
 }
 
 #if CONFIG_AV1_HIGHBITDEPTH
@@ -986,9 +985,8 @@ static void highbd_masked_variance4xh(const uint16_t *src_ptr, int src_stride,
     const __m128i a = _mm_loadu_si128((const __m128i *)a_ptr);
     const __m128i b = _mm_loadu_si128((const __m128i *)b_ptr);
     const __m128i m = _mm_unpacklo_epi8(
-        _mm_unpacklo_epi32(
-            _mm_cvtsi32_si128(*(const uint32_t *)m_ptr),
-            _mm_cvtsi32_si128(*(const uint32_t *)&m_ptr[m_stride])),
+        _mm_unpacklo_epi32(_mm_cvtsi32_si128(*(const int *)m_ptr),
+                           _mm_cvtsi32_si128(*(const int *)&m_ptr[m_stride])),
         zero);
     const __m128i m_inv = _mm_sub_epi16(mask_max, m);
 
@@ -1024,7 +1022,7 @@ static void highbd_masked_variance4xh(const uint16_t *src_ptr, int src_stride,
   sum = _mm_hadd_epi32(sum, sum_sq);
   sum = _mm_hadd_epi32(sum, zero);
   *sum_ = _mm_cvtsi128_si32(sum);
-  *sse = _mm_cvtsi128_si32(_mm_srli_si128(sum, 4));
+  *sse = (unsigned int)_mm_cvtsi128_si32(_mm_srli_si128(sum, 4));
 }
 #endif  // CONFIG_AV1_HIGHBITDEPTH
 
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/x86/mem_sse2.h b/chromium/third_party/libaom/source/libaom/aom_dsp/x86/mem_sse2.h
index dacb6136412..085a572cb10 100644
--- a/chromium/third_party/libaom/source/libaom/aom_dsp/x86/mem_sse2.h
+++ b/chromium/third_party/libaom/source/libaom/aom_dsp/x86/mem_sse2.h
@@ -19,20 +19,20 @@
 
 #include "aom/aom_integer.h"
 
-static INLINE uint16_t loadu_uint16(const void *src) {
-  uint16_t v;
+static INLINE int16_t loadu_int16(const void *src) {
+  int16_t v;
   memcpy(&v, src, sizeof(v));
   return v;
 }
 
-static INLINE uint32_t loadu_uint32(const void *src) {
-  uint32_t v;
+static INLINE int32_t loadu_int32(const void *src) {
+  int32_t v;
   memcpy(&v, src, sizeof(v));
   return v;
 }
 
-static INLINE uint64_t loadu_uint64(const void *src) {
-  uint64_t v;
+static INLINE int64_t loadu_int64(const void *src) {
+  int64_t v;
   memcpy(&v, src, sizeof(v));
   return v;
 }
@@ -48,10 +48,10 @@ static INLINE __m128i loadh_epi64(const void *const src, const __m128i s) {
 
 static INLINE __m128i load_8bit_4x4_to_1_reg_sse2(const void *const src,
                                                   const int byte_stride) {
-  return _mm_setr_epi32(loadu_uint32((int8_t *)src + 0 * byte_stride),
-                        loadu_uint32((int8_t *)src + 1 * byte_stride),
-                        loadu_uint32((int8_t *)src + 2 * byte_stride),
-                        loadu_uint32((int8_t *)src + 3 * byte_stride));
+  return _mm_setr_epi32(loadu_int32((int8_t *)src + 0 * byte_stride),
+                        loadu_int32((int8_t *)src + 1 * byte_stride),
+                        loadu_int32((int8_t *)src + 2 * byte_stride),
+                        loadu_int32((int8_t *)src + 3 * byte_stride));
 }
 
 static INLINE __m128i load_8bit_8x2_to_1_reg_sse2(const void *const src,
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/x86/obmc_intrinsic_sse4.h b/chromium/third_party/libaom/source/libaom/aom_dsp/x86/obmc_intrinsic_sse4.h
index 5181e444c02..210f466b6f9 100644
--- a/chromium/third_party/libaom/source/libaom/aom_dsp/x86/obmc_intrinsic_sse4.h
+++ b/chromium/third_party/libaom/source/libaom/aom_dsp/x86/obmc_intrinsic_sse4.h
@@ -28,7 +28,7 @@ static INLINE void obmc_variance_w4(const uint8_t *pre, const int pre_stride,
   assert(IS_POWER_OF_TWO(h));
 
   do {
-    const __m128i v_p_b = _mm_cvtsi32_si128(*(const uint32_t *)(pre + n));
+    const __m128i v_p_b = _mm_cvtsi32_si128(*(const int *)(pre + n));
     const __m128i v_m_d = _mm_load_si128((const __m128i *)(mask + n));
     const __m128i v_w_d = _mm_load_si128((const __m128i *)(wsrc + n));
 
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/x86/obmc_variance_avx2.c b/chromium/third_party/libaom/source/libaom/aom_dsp/x86/obmc_variance_avx2.c
index bfec0e8a84b..b2df8a953d9 100644
--- a/chromium/third_party/libaom/source/libaom/aom_dsp/x86/obmc_variance_avx2.c
+++ b/chromium/third_party/libaom/source/libaom/aom_dsp/x86/obmc_variance_avx2.c
@@ -77,7 +77,7 @@ static INLINE void obmc_variance_w8n(const uint8_t *pre, const int pre_stride,
   v_d = _mm_hadd_epi32(v_sum_d, v_sse_d);
   v_d = _mm_hadd_epi32(v_d, v_d);
   *sum = _mm_cvtsi128_si32(v_d);
-  *sse = _mm_cvtsi128_si32(_mm_srli_si128(v_d, 4));
+  *sse = (unsigned int)_mm_cvtsi128_si32(_mm_srli_si128(v_d, 4));
 }
 
 static INLINE void obmc_variance_w16n(const uint8_t *pre, const int pre_stride,
@@ -147,7 +147,7 @@ static INLINE void obmc_variance_w16n(const uint8_t *pre, const int pre_stride,
   res0 = _mm256_castsi256_si128(v_d);
   res0 = _mm_add_epi32(res0, _mm256_extractf128_si256(v_d, 1));
   *sum = _mm_cvtsi128_si32(res0);
-  *sse = _mm_cvtsi128_si32(_mm_srli_si128(res0, 4));
+  *sse = (unsigned int)_mm_cvtsi128_si32(_mm_srli_si128(res0, 4));
 }
 
 #define OBMCVARWXH(W, H)                                                \
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/x86/quantize_avx2.c b/chromium/third_party/libaom/source/libaom/aom_dsp/x86/quantize_avx2.c
new file mode 100644
index 00000000000..b808d46778e
--- /dev/null
+++ b/chromium/third_party/libaom/source/libaom/aom_dsp/x86/quantize_avx2.c
@@ -0,0 +1,274 @@
+/*
+ * Copyright (c) 2022, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <immintrin.h>
+#include "config/aom_dsp_rtcd.h"
+#include "aom/aom_integer.h"
+#include "aom_dsp/x86/quantize_x86.h"
+
+static INLINE void load_b_values_avx2(const int16_t *zbin_ptr, __m256i *zbin,
+                                      const int16_t *round_ptr, __m256i *round,
+                                      const int16_t *quant_ptr, __m256i *quant,
+                                      const int16_t *dequant_ptr,
+                                      __m256i *dequant,
+                                      const int16_t *shift_ptr, __m256i *shift,
+                                      int log_scale) {
+  *zbin = _mm256_castsi128_si256(_mm_load_si128((const __m128i *)zbin_ptr));
+  *zbin = _mm256_permute4x64_epi64(*zbin, 0x54);
+  if (log_scale > 0) {
+    const __m256i rnd = _mm256_set1_epi16((int16_t)(1 << (log_scale - 1)));
+    *zbin = _mm256_add_epi16(*zbin, rnd);
+    *zbin = _mm256_srai_epi16(*zbin, log_scale);
+  }
+  // Subtracting 1 here eliminates a _mm256_cmpeq_epi16() instruction when
+  // calculating the zbin mask. (See quantize_b_logscale{0,1,2}_16)
+  *zbin = _mm256_sub_epi16(*zbin, _mm256_set1_epi16(1));
+
+  *round = _mm256_castsi128_si256(_mm_load_si128((const __m128i *)round_ptr));
+  *round = _mm256_permute4x64_epi64(*round, 0x54);
+  if (log_scale > 0) {
+    const __m256i rnd = _mm256_set1_epi16((int16_t)(1 << (log_scale - 1)));
+    *round = _mm256_add_epi16(*round, rnd);
+    *round = _mm256_srai_epi16(*round, log_scale);
+  }
+
+  *quant = _mm256_castsi128_si256(_mm_load_si128((const __m128i *)quant_ptr));
+  *quant = _mm256_permute4x64_epi64(*quant, 0x54);
+  *dequant =
+      _mm256_castsi128_si256(_mm_load_si128((const __m128i *)dequant_ptr));
+  *dequant = _mm256_permute4x64_epi64(*dequant, 0x54);
+  *shift = _mm256_castsi128_si256(_mm_load_si128((const __m128i *)shift_ptr));
+  *shift = _mm256_permute4x64_epi64(*shift, 0x54);
+}
+
+static INLINE __m256i load_coefficients_avx2(const tran_low_t *coeff_ptr) {
+  const __m256i coeff1 = _mm256_load_si256((__m256i *)coeff_ptr);
+  const __m256i coeff2 = _mm256_load_si256((__m256i *)(coeff_ptr + 8));
+  return _mm256_packs_epi32(coeff1, coeff2);
+}
+
+static INLINE void store_coefficients_avx2(__m256i coeff_vals,
+                                           tran_low_t *coeff_ptr) {
+  __m256i coeff_sign = _mm256_srai_epi16(coeff_vals, 15);
+  __m256i coeff_vals_lo = _mm256_unpacklo_epi16(coeff_vals, coeff_sign);
+  __m256i coeff_vals_hi = _mm256_unpackhi_epi16(coeff_vals, coeff_sign);
+  _mm256_store_si256((__m256i *)coeff_ptr, coeff_vals_lo);
+  _mm256_store_si256((__m256i *)(coeff_ptr + 8), coeff_vals_hi);
+}
+
+static AOM_FORCE_INLINE __m256i quantize_b_logscale0_16(
+    const tran_low_t *coeff_ptr, tran_low_t *qcoeff_ptr,
+    tran_low_t *dqcoeff_ptr, __m256i *v_quant, __m256i *v_dequant,
+    __m256i *v_round, __m256i *v_zbin, __m256i *v_quant_shift) {
+  const __m256i v_coeff = load_coefficients_avx2(coeff_ptr);
+  const __m256i v_abs_coeff = _mm256_abs_epi16(v_coeff);
+  const __m256i v_zbin_mask = _mm256_cmpgt_epi16(v_abs_coeff, *v_zbin);
+
+  if (_mm256_movemask_epi8(v_zbin_mask) == 0) {
+    _mm256_store_si256((__m256i *)qcoeff_ptr, _mm256_setzero_si256());
+    _mm256_store_si256((__m256i *)dqcoeff_ptr, _mm256_setzero_si256());
+    _mm256_store_si256((__m256i *)(qcoeff_ptr + 8), _mm256_setzero_si256());
+    _mm256_store_si256((__m256i *)(dqcoeff_ptr + 8), _mm256_setzero_si256());
+    return _mm256_setzero_si256();
+  }
+
+  // tmp = v_zbin_mask ? (int64_t)abs_coeff + log_scaled_round : 0
+  const __m256i v_tmp_rnd =
+      _mm256_and_si256(_mm256_adds_epi16(v_abs_coeff, *v_round), v_zbin_mask);
+  //  tmp32 = (int)(((((tmp * quant_ptr[rc != 0]) >> 16) + tmp) *
+  //                 quant_shift_ptr[rc != 0]) >>
+  //                (16 - log_scale + AOM_QM_BITS));
+  const __m256i v_tmp32_a = _mm256_mulhi_epi16(v_tmp_rnd, *v_quant);
+  const __m256i v_tmp32_b = _mm256_add_epi16(v_tmp32_a, v_tmp_rnd);
+  const __m256i v_tmp32 = _mm256_mulhi_epi16(v_tmp32_b, *v_quant_shift);
+  const __m256i v_nz_mask = _mm256_cmpgt_epi16(v_tmp32, _mm256_setzero_si256());
+  const __m256i v_qcoeff = _mm256_sign_epi16(v_tmp32, v_coeff);
+  const __m256i v_dqcoeff = _mm256_mullo_epi16(v_qcoeff, *v_dequant);
+  store_coefficients_avx2(v_qcoeff, qcoeff_ptr);
+  store_coefficients_avx2(v_dqcoeff, dqcoeff_ptr);
+  return v_nz_mask;
+}
+
+static INLINE __m256i get_max_lane_eob(const int16_t *iscan, __m256i v_eobmax,
+                                       __m256i v_mask) {
+  const __m256i v_iscan = _mm256_loadu_si256((const __m256i *)iscan);
+  const __m256i v_iscan_perm = _mm256_permute4x64_epi64(v_iscan, 0xD8);
+  const __m256i v_iscan_plus1 = _mm256_sub_epi16(v_iscan_perm, v_mask);
+  const __m256i v_nz_iscan = _mm256_and_si256(v_iscan_plus1, v_mask);
+  return _mm256_max_epi16(v_eobmax, v_nz_iscan);
+}
+
+static INLINE int16_t accumulate_eob256(__m256i eob256) {
+  const __m128i eob_lo = _mm256_castsi256_si128(eob256);
+  const __m128i eob_hi = _mm256_extractf128_si256(eob256, 1);
+  __m128i eob = _mm_max_epi16(eob_lo, eob_hi);
+  __m128i eob_shuffled = _mm_shuffle_epi32(eob, 0xe);
+  eob = _mm_max_epi16(eob, eob_shuffled);
+  eob_shuffled = _mm_shufflelo_epi16(eob, 0xe);
+  eob = _mm_max_epi16(eob, eob_shuffled);
+  eob_shuffled = _mm_shufflelo_epi16(eob, 0x1);
+  eob = _mm_max_epi16(eob, eob_shuffled);
+  return _mm_extract_epi16(eob, 1);
+}
+
+void aom_quantize_b_avx2(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
+                         const int16_t *zbin_ptr, const int16_t *round_ptr,
+                         const int16_t *quant_ptr,
+                         const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
+                         tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr,
+                         uint16_t *eob_ptr, const int16_t *scan,
+                         const int16_t *iscan) {
+  (void)scan;
+  __m256i v_zbin, v_round, v_quant, v_dequant, v_quant_shift;
+  __m256i v_eobmax = _mm256_setzero_si256();
+
+  load_b_values_avx2(zbin_ptr, &v_zbin, round_ptr, &v_round, quant_ptr,
+                     &v_quant, dequant_ptr, &v_dequant, quant_shift_ptr,
+                     &v_quant_shift, 0);
+
+  // Do DC and first 15 AC.
+  __m256i v_nz_mask =
+      quantize_b_logscale0_16(coeff_ptr, qcoeff_ptr, dqcoeff_ptr, &v_quant,
+                              &v_dequant, &v_round, &v_zbin, &v_quant_shift);
+
+  v_eobmax = get_max_lane_eob(iscan, v_eobmax, v_nz_mask);
+
+  v_round = _mm256_unpackhi_epi64(v_round, v_round);
+  v_quant = _mm256_unpackhi_epi64(v_quant, v_quant);
+  v_dequant = _mm256_unpackhi_epi64(v_dequant, v_dequant);
+  v_quant_shift = _mm256_unpackhi_epi64(v_quant_shift, v_quant_shift);
+  v_zbin = _mm256_unpackhi_epi64(v_zbin, v_zbin);
+
+  for (intptr_t count = n_coeffs - 16; count > 0; count -= 16) {
+    coeff_ptr += 16;
+    qcoeff_ptr += 16;
+    dqcoeff_ptr += 16;
+    iscan += 16;
+    v_nz_mask =
+        quantize_b_logscale0_16(coeff_ptr, qcoeff_ptr, dqcoeff_ptr, &v_quant,
+                                &v_dequant, &v_round, &v_zbin, &v_quant_shift);
+
+    v_eobmax = get_max_lane_eob(iscan, v_eobmax, v_nz_mask);
+  }
+
+  *eob_ptr = accumulate_eob256(v_eobmax);
+}
+
+static AOM_FORCE_INLINE __m256i quantize_b_logscale_16(
+    const tran_low_t *coeff_ptr, tran_low_t *qcoeff_ptr,
+    tran_low_t *dqcoeff_ptr, __m256i *v_quant, __m256i *v_dequant,
+    __m256i *v_round, __m256i *v_zbin, __m256i *v_quant_shift, int log_scale) {
+  const __m256i v_coeff = load_coefficients_avx2(coeff_ptr);
+  const __m256i v_abs_coeff = _mm256_abs_epi16(v_coeff);
+  const __m256i v_zbin_mask = _mm256_cmpgt_epi16(v_abs_coeff, *v_zbin);
+
+  if (_mm256_movemask_epi8(v_zbin_mask) == 0) {
+    _mm256_store_si256((__m256i *)qcoeff_ptr, _mm256_setzero_si256());
+    _mm256_store_si256((__m256i *)dqcoeff_ptr, _mm256_setzero_si256());
+    _mm256_store_si256((__m256i *)(qcoeff_ptr + 8), _mm256_setzero_si256());
+    _mm256_store_si256((__m256i *)(dqcoeff_ptr + 8), _mm256_setzero_si256());
+    return _mm256_setzero_si256();
+  }
+
+  // tmp = v_zbin_mask ? (int64_t)abs_coeff + log_scaled_round : 0
+  const __m256i v_tmp_rnd =
+      _mm256_and_si256(_mm256_adds_epi16(v_abs_coeff, *v_round), v_zbin_mask);
+  //  tmp32 = (int)(((((tmp * quant_ptr[rc != 0]) >> 16) + tmp) *
+  //                 quant_shift_ptr[rc != 0]) >>
+  //                (16 - log_scale + AOM_QM_BITS));
+  const __m256i v_tmp32_a = _mm256_mulhi_epi16(v_tmp_rnd, *v_quant);
+  const __m256i v_tmp32_b = _mm256_add_epi16(v_tmp32_a, v_tmp_rnd);
+  const __m256i v_tmp32_hi = _mm256_slli_epi16(
+      _mm256_mulhi_epi16(v_tmp32_b, *v_quant_shift), log_scale);
+  const __m256i v_tmp32_lo = _mm256_srli_epi16(
+      _mm256_mullo_epi16(v_tmp32_b, *v_quant_shift), 16 - log_scale);
+  const __m256i v_tmp32 = _mm256_or_si256(v_tmp32_hi, v_tmp32_lo);
+  const __m256i v_dqcoeff_hi = _mm256_slli_epi16(
+      _mm256_mulhi_epi16(v_tmp32, *v_dequant), 16 - log_scale);
+  const __m256i v_dqcoeff_lo =
+      _mm256_srli_epi16(_mm256_mullo_epi16(v_tmp32, *v_dequant), log_scale);
+  const __m256i v_dqcoeff =
+      _mm256_sign_epi16(_mm256_or_si256(v_dqcoeff_hi, v_dqcoeff_lo), v_coeff);
+  const __m256i v_qcoeff = _mm256_sign_epi16(v_tmp32, v_coeff);
+  const __m256i v_nz_mask = _mm256_cmpgt_epi16(v_tmp32, _mm256_setzero_si256());
+  store_coefficients_avx2(v_qcoeff, qcoeff_ptr);
+  store_coefficients_avx2(v_dqcoeff, dqcoeff_ptr);
+  return v_nz_mask;
+}
+
+static AOM_FORCE_INLINE void quantize_b_no_qmatrix_avx2(
+    const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr,
+    const int16_t *round_ptr, const int16_t *quant_ptr,
+    const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
+    tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
+    const int16_t *iscan, int log_scale) {
+  __m256i v_zbin, v_round, v_quant, v_dequant, v_quant_shift;
+  __m256i v_eobmax = _mm256_setzero_si256();
+
+  load_b_values_avx2(zbin_ptr, &v_zbin, round_ptr, &v_round, quant_ptr,
+                     &v_quant, dequant_ptr, &v_dequant, quant_shift_ptr,
+                     &v_quant_shift, log_scale);
+
+  // Do DC and first 15 AC.
+  __m256i v_nz_mask = quantize_b_logscale_16(
+      coeff_ptr, qcoeff_ptr, dqcoeff_ptr, &v_quant, &v_dequant, &v_round,
+      &v_zbin, &v_quant_shift, log_scale);
+
+  v_eobmax = get_max_lane_eob(iscan, v_eobmax, v_nz_mask);
+
+  v_round = _mm256_unpackhi_epi64(v_round, v_round);
+  v_quant = _mm256_unpackhi_epi64(v_quant, v_quant);
+  v_dequant = _mm256_unpackhi_epi64(v_dequant, v_dequant);
+  v_quant_shift = _mm256_unpackhi_epi64(v_quant_shift, v_quant_shift);
+  v_zbin = _mm256_unpackhi_epi64(v_zbin, v_zbin);
+
+  for (intptr_t count = n_coeffs - 16; count > 0; count -= 16) {
+    coeff_ptr += 16;
+    qcoeff_ptr += 16;
+    dqcoeff_ptr += 16;
+    iscan += 16;
+    v_nz_mask = quantize_b_logscale_16(coeff_ptr, qcoeff_ptr, dqcoeff_ptr,
+                                       &v_quant, &v_dequant, &v_round, &v_zbin,
+                                       &v_quant_shift, log_scale);
+
+    v_eobmax = get_max_lane_eob(iscan, v_eobmax, v_nz_mask);
+  }
+
+  *eob_ptr = accumulate_eob256(v_eobmax);
+}
+
+void aom_quantize_b_32x32_avx2(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
+                               const int16_t *zbin_ptr,
+                               const int16_t *round_ptr,
+                               const int16_t *quant_ptr,
+                               const int16_t *quant_shift_ptr,
+                               tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
+                               const int16_t *dequant_ptr, uint16_t *eob_ptr,
+                               const int16_t *scan, const int16_t *iscan) {
+  (void)scan;
+  quantize_b_no_qmatrix_avx2(coeff_ptr, n_coeffs, zbin_ptr, round_ptr,
+                             quant_ptr, quant_shift_ptr, qcoeff_ptr,
+                             dqcoeff_ptr, dequant_ptr, eob_ptr, iscan, 1);
+}
+
+void aom_quantize_b_64x64_avx2(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
+                               const int16_t *zbin_ptr,
+                               const int16_t *round_ptr,
+                               const int16_t *quant_ptr,
+                               const int16_t *quant_shift_ptr,
+                               tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
+                               const int16_t *dequant_ptr, uint16_t *eob_ptr,
+                               const int16_t *scan, const int16_t *iscan) {
+  (void)scan;
+  quantize_b_no_qmatrix_avx2(coeff_ptr, n_coeffs, zbin_ptr, round_ptr,
+                             quant_ptr, quant_shift_ptr, qcoeff_ptr,
+                             dqcoeff_ptr, dequant_ptr, eob_ptr, iscan, 2);
+}
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/x86/sad4d_sse2.asm b/chromium/third_party/libaom/source/libaom/aom_dsp/x86/sad4d_sse2.asm
index 9ab44c13403..6de708b9950 100644
--- a/chromium/third_party/libaom/source/libaom/aom_dsp/x86/sad4d_sse2.asm
+++ b/chromium/third_party/libaom/source/libaom/aom_dsp/x86/sad4d_sse2.asm
@@ -22,114 +22,95 @@ SECTION .text
   pavgb                 %2, m2
   lea                   second_predq, [second_predq+8]
 %endmacro
-; 'mflag' affect a lot how the code works.
+; 'spill_src_stride' affect a lot how the code works.
 ;
-; When 'mflag' is false, the 'src_strideq' resides in register,
-; [srcq + src_strideq + offset] is allowed, so we can simply
-; use such form to access src memory and don't bother to update
-; 'srcq' at each line. We only update 'srcq' each two-lines using
-; a compact LEA instruction like [srcq+src_strideq*2].
+; When 'spill_src_stride' is false, the 'src_strideq' resides in
+; register, [srcq + src_strideq + offset] is allowed, so we can simply
+; use such form to access src memory and don't bother to update 'srcq'
+; at each line. We only update 'srcq' each two-lines using a compact
+; LEA instruction like [srcq+src_strideq*2].
 ;
-; When 'mflag' is true, the 'src_strideq' resides in memory.
+; When 'spill_src_stride' is true, the 'src_strideq' resides in memory.
 ; we cannot use above form to access memory, we have to update
 ; 'srcq' at each line break. As we process two parts (first,second)
 ; together in each macro function, the second part may also sit
 ; in the next line, which means we also need to possibly add
 ; one 'src_strideq' to 'srcq' before processing second part.
 
-%macro HANDLE_FIRST_OFFSET 2
-  %define first_offset %2
-  %if mflag == 0 && %1 == 1
-    %define first_offset (src_strideq + %2)
-  %endif
-%endmacro
-
-; first_extraline, second_extraline, in_line_offset
-%macro HANDLE_SECOND_OFFSET 3
-  %define second_offset %3
-  %if mflag && %1 == 0 && %2 == 1
+%macro HANDLE_SECOND_OFFSET 0
+  %if spill_src_stride
+    %define second_offset 0
     add srcq, src_strideq
-  %endif
-  %if mflag == 0 && %2 == 1
-    %define second_offset (src_strideq + %3)
+  %else
+    %define second_offset (src_strideq)
   %endif
 %endmacro
 
-; Notes for line_ending:
-; 0 -- not a line ending
-; 1 -- line ending of a odd line [line numbers starts from one]
-; 2 -- line ending of a even line
 ; This is specically designed to handle when src_strideq is a
 ; memory position, under such case, we can not accomplish
 ; complex address calculation using LEA, and fall back to
 ; using simple ADD instruction at each line ending.
-%macro ADVANCE_END_OF_LINE 1
-  %if mflag
+%macro ADVANCE_END_OF_TWO_LINES 0
+  %if spill_src_stride
     add srcq, src_strideq
-  %endif
-  %if mflag == 0 && %1 == 2
-    lea                 srcq, [srcq +src_strideq*2]
+  %else
+    lea                 srcq, [srcq+src_strideq*2]
   %endif
 
-  %if %1 == 2
-    lea                ref1q, [ref1q+ref_strideq*2]
-    lea                ref2q, [ref2q+ref_strideq*2]
-    lea                ref3q, [ref3q+ref_strideq*2]
-    lea                ref4q, [ref4q+ref_strideq*2]
-  %endif
+; note: ref_stride is never spilled when processing two lines
+  lea                ref1q, [ref1q+ref_strideq*2]
+  lea                ref2q, [ref2q+ref_strideq*2]
+  lea                ref3q, [ref3q+ref_strideq*2]
+  lea                ref4q, [ref4q+ref_strideq*2]
 %endmacro
 
-; Please note that the second_offset of src is for in_line_offset,
-; so it is less than src_stride.
-; PROCESS_4x2x4 first, off_{first,second}_{src,ref}, do_avg,
-;               {first, second}_extraline, line_ending
-%macro PROCESS_4x2x4 9
-  HANDLE_FIRST_OFFSET   %7, %2
-  movd                  m0, [srcq + first_offset]
-  HANDLE_SECOND_OFFSET  %7, %8, %4
+; PROCESS_4x2x4 first, do_avg
+%macro PROCESS_4x2x4 2
+  movd                  m0, [srcq]
+  HANDLE_SECOND_OFFSET
 %if %1 == 1
-  movd                  m6, [ref1q+%3]
-  movd                  m4, [ref2q+%3]
-  movd                  m7, [ref3q+%3]
-  movd                  m5, [ref4q+%3]
+  movd                  m6, [ref1q]
+  movd                  m4, [ref2q]
+  movd                  m7, [ref3q]
+  movd                  m5, [ref4q]
 
   movd                  m1, [srcq + second_offset]
-  movd                  m2, [ref1q+%5]
+  movd                  m2, [ref1q+ref_strideq]
   punpckldq             m0, m1
   punpckldq             m6, m2
-  movd                  m1, [ref2q+%5]
-  movd                  m2, [ref3q+%5]
-  movd                  m3, [ref4q+%5]
+  movd                  m1, [ref2q+ref_strideq]
+  movd                  m2, [ref3q+ref_strideq]
+  movd                  m3, [ref4q+ref_strideq]
   punpckldq             m4, m1
   punpckldq             m7, m2
   punpckldq             m5, m3
   movlhps               m0, m0
   movlhps               m6, m4
   movlhps               m7, m5
-%if %6 == 1
+%if %2 == 1
   AVG_4x2x4             m6, m7
 %endif
   psadbw                m6, m0
   psadbw                m7, m0
 %else
-  movd                  m1, [ref1q+%3]
-  movd                  m5, [ref1q+%5]
-  movd                  m2, [ref2q+%3]
-  movd                  m4, [ref2q+%5]
+  movd                  m1, [ref1q]
+  movd                  m5, [ref1q+ref_strideq]
+  movd                  m2, [ref2q]
+  movd                  m4, [ref2q+ref_strideq]
   punpckldq             m1, m5
   punpckldq             m2, m4
-  movd                  m3, [ref3q+%3]
-  movd                  m5, [ref3q+%5]
+  movd                  m3, [ref3q]
+  movd                  m5, [ref3q+ref_strideq]
   punpckldq             m3, m5
-  movd                  m4, [ref4q+%3]
-  movd                  m5, [ref4q+%5]
+  movd                  m4, [ref4q]
+  movd                  m5, [ref4q+ref_strideq]
   punpckldq             m4, m5
   movd                  m5, [srcq + second_offset]
   punpckldq             m0, m5
   movlhps               m0, m0
   movlhps               m1, m2
   movlhps               m3, m4
-%if %6 == 1
+%if %2 == 1
   AVG_4x2x4             m1, m3
 %endif
   psadbw                m1, m0
@@ -137,28 +118,23 @@ SECTION .text
   paddd                 m6, m1
   paddd                 m7, m3
 %endif
-%if %9 > 0
-  ADVANCE_END_OF_LINE %9
-%endif
 %endmacro
 
-; PROCESS_8x2x4 first, off_{first,second}_{src,ref}, do_avg,
-;               {first,second}_extraline, line_ending
-%macro PROCESS_8x2x4 9
-  HANDLE_FIRST_OFFSET   %7, %2
-  movh                  m0, [srcq + first_offset]
-  HANDLE_SECOND_OFFSET  %7, %8, %4
+; PROCESS_8x2x4 first, do_avg
+%macro PROCESS_8x2x4 2
+  movh                  m0, [srcq]
+  HANDLE_SECOND_OFFSET
 %if %1 == 1
-  movh                  m4, [ref1q+%3]
-  movh                  m5, [ref2q+%3]
-  movh                  m6, [ref3q+%3]
-  movh                  m7, [ref4q+%3]
+  movh                  m4, [ref1q]
+  movh                  m5, [ref2q]
+  movh                  m6, [ref3q]
+  movh                  m7, [ref4q]
   movhps                m0, [srcq + second_offset]
-  movhps                m4, [ref1q+%5]
-  movhps                m5, [ref2q+%5]
-  movhps                m6, [ref3q+%5]
-  movhps                m7, [ref4q+%5]
-%if %6 == 1
+  movhps                m4, [ref1q+ref_strideq]
+  movhps                m5, [ref2q+ref_strideq]
+  movhps                m6, [ref3q+ref_strideq]
+  movhps                m7, [ref4q+ref_strideq]
+%if %2 == 1
   movu                  m3, [second_predq]
   pavgb                 m4, m3
   pavgb                 m5, m3
@@ -171,12 +147,12 @@ SECTION .text
   psadbw                m6, m0
   psadbw                m7, m0
 %else
-  movh                  m1, [ref1q+%3]
-  movh                  m2, [ref2q+%3]
+  movh                  m1, [ref1q]
+  movh                  m2, [ref2q]
   movhps                m0, [srcq + second_offset]
-  movhps                m1, [ref1q+%5]
-  movhps                m2, [ref2q+%5]
-%if %6 == 1
+  movhps                m1, [ref1q+ref_strideq]
+  movhps                m2, [ref2q+ref_strideq]
+%if %2 == 1
   movu                  m3, [second_predq]
   pavgb                 m1, m3
   pavgb                 m2, m3
@@ -186,11 +162,11 @@ SECTION .text
   paddd                 m4, m1
   paddd                 m5, m2
 
-  movh                  m1, [ref3q+%3]
-  movhps                m1, [ref3q+%5]
-  movh                  m2, [ref4q+%3]
-  movhps                m2, [ref4q+%5]
-%if %6 == 1
+  movh                  m1, [ref3q]
+  movhps                m1, [ref3q+ref_strideq]
+  movh                  m2, [ref4q]
+  movhps                m2, [ref4q+ref_strideq]
+%if %2 == 1
   pavgb                 m1, m3
   pavgb                 m2, m3
   lea                   second_predq, [second_predq+mmsize]
@@ -200,24 +176,16 @@ SECTION .text
   paddd                 m6, m1
   paddd                 m7, m2
 %endif
-%if %9 > 0
-  ADVANCE_END_OF_LINE %9
-%endif
 %endmacro
 
-; PROCESS_16x2x4 first, off_{first,second}_{src,ref}, do_avg,
-;                {first,second}_extraline, line_ending
-%macro PROCESS_16x2x4 9
-  ; 1st 16 px
-  HANDLE_FIRST_OFFSET   %7, %2
-  mova                  m0, [srcq + first_offset]
-  HANDLE_SECOND_OFFSET  %7, %8, %4
+; PROCESS_FIRST_MMSIZE do_avg
+%macro PROCESS_FIRST_MMSIZE 1
+  mova                  m0, [srcq]
+  movu                  m4, [ref1q]
+  movu                  m5, [ref2q]
+  movu                  m6, [ref3q]
+  movu                  m7, [ref4q]
 %if %1 == 1
-  movu                  m4, [ref1q+%3]
-  movu                  m5, [ref2q+%3]
-  movu                  m6, [ref3q+%3]
-  movu                  m7, [ref4q+%3]
-%if %6 == 1
   movu                  m3, [second_predq]
   pavgb                 m4, m3
   pavgb                 m5, m3
@@ -229,38 +197,14 @@ SECTION .text
   psadbw                m5, m0
   psadbw                m6, m0
   psadbw                m7, m0
-%else ; %1 == 1
-  movu                  m1, [ref1q+%3]
-  movu                  m2, [ref2q+%3]
-%if %6 == 1
-  movu                  m3, [second_predq]
-  pavgb                 m1, m3
-  pavgb                 m2, m3
-%endif
-  psadbw                m1, m0
-  psadbw                m2, m0
-  paddd                 m4, m1
-  paddd                 m5, m2
-
-  movu                  m1, [ref3q+%3]
-  movu                  m2, [ref4q+%3]
-%if %6 == 1
-  pavgb                 m1, m3
-  pavgb                 m2, m3
-  lea                   second_predq, [second_predq+mmsize]
-%endif
-  psadbw                m1, m0
-  psadbw                m2, m0
-  paddd                 m6, m1
-  paddd                 m7, m2
-%endif ; %1 == 1
-
-  ; 2nd 16 px
-  mova                  m0, [srcq + second_offset]
-  movu                  m1, [ref1q+%5]
-  movu                  m2, [ref2q+%5]
+%endmacro
 
-%if %6 == 1
+; PROCESS_16x1x4 offset, do_avg
+%macro PROCESS_16x1x4 2
+  mova                  m0, [srcq + %1]
+  movu                  m1, [ref1q + ref_offsetq + %1]
+  movu                  m2, [ref2q + ref_offsetq + %1]
+%if %2 == 1
   movu                  m3, [second_predq]
   pavgb                 m1, m3
   pavgb                 m2, m3
@@ -270,14 +214,9 @@ SECTION .text
   paddd                 m4, m1
   paddd                 m5, m2
 
-  movu                  m1, [ref3q+%5]
-  movu                  m2, [ref4q+%5]
-
-%if %9 > 0
-  ADVANCE_END_OF_LINE %9
-%endif
-
-%if %6 == 1
+  movu                  m1, [ref3q + ref_offsetq + %1]
+  movu                  m2, [ref4q + ref_offsetq + %1]
+%if %2 == 1
   pavgb                 m1, m3
   pavgb                 m2, m3
   lea                   second_predq, [second_predq+mmsize]
@@ -288,27 +227,6 @@ SECTION .text
   paddd                 m7, m2
 %endmacro
 
-; PROCESS_32x2x4 first, off_{first,second}_{src,ref}, do_avg,
-;                {first,second}_extraline, line_ending
-%macro PROCESS_32x2x4 9
-  PROCESS_16x2x4 %1, %2, %3, %2 + 16, %3 + 16, %6, %7, %7, %8 - %7
-  PROCESS_16x2x4  0, %4, %5, %4 + 16, %5 + 16, %6, %8, %8, %9
-%endmacro
-
-; PROCESS_64x2x4 first, off_{first,second}_{src,ref}, do_avg,
-;                {first,second}_extraline, line_ending
-%macro PROCESS_64x2x4 9
-  PROCESS_32x2x4 %1, %2, %3, %2 + 32, %3 + 32, %6, %7, %7, %8 - %7
-  PROCESS_32x2x4  0, %4, %5, %4 + 32, %5 + 32, %6, %8, %8, %9
-%endmacro
-
-; PROCESS_128x2x4 first, off_{first,second}_{src,ref}, do_avg,
-;                 {first,second}_extraline, line_ending
-%macro PROCESS_128x2x4 9
-  PROCESS_64x2x4 %1, %2, %3, %2 + 64, %3 + 64, %6, %7, %7, %8 - %7
-  PROCESS_64x2x4  0, %4, %5, %4 + 64, %5 + 64, %6, %8, %8, %9
-%endmacro
-
 ; void aom_sadNxNx4d_sse2(uint8_t *src,    int src_stride,
 ;                         uint8_t *ref[4], int ref_stride,
 ;                         uint32_t res[4]);
@@ -318,38 +236,118 @@ SECTION .text
 ;   3: If 0, then normal sad, else avg
 ;   4: If 0, then normal sad, else skip rows
 %macro SADNXN4D 2-4 0,0
+
+%define spill_src_stride 0
+%define spill_ref_stride 0
+%define spill_cnt 0
+
+; Whether a shared offset should be used instead of adding strides to
+; each reference array. With this option, only one line will be processed
+; per loop iteration.
+%define use_ref_offset (%1 >= mmsize)
+
+; Remove loops in the 4x4 and 8x4 case
+%define use_loop (use_ref_offset || %2 > 4)
+
 %if %4 == 1  ; skip rows
 %if ARCH_X86_64
-cglobal sad_skip_%1x%2x4d, 5, 8, 8, src, src_stride, ref1, ref_stride, \
-                              res, ref2, ref3, ref4
+%if use_ref_offset
+cglobal sad_skip_%1x%2x4d, 5, 10, 8, src, src_stride, ref1, ref_stride, res, \
+                                     ref2, ref3, ref4, cnt, ref_offset
+%elif use_loop
+cglobal sad_skip_%1x%2x4d, 5, 9, 8, src, src_stride, ref1, ref_stride, res, \
+                                    ref2, ref3, ref4, cnt
+%else
+cglobal sad_skip_%1x%2x4d, 5, 8, 8, src, src_stride, ref1, ref_stride, res, \
+                                    ref2, ref3, ref4
+%endif
 %else
-cglobal sad_skip_%1x%2x4d, 4, 7, 8, src, src_stride, ref1, ref_stride, \
-                              ref2, ref3, ref4
+%if use_ref_offset
+cglobal sad_skip_%1x%2x4d, 4, 7, 8, src, ref_offset, ref1, cnt, ref2, ref3, \
+                                    ref4
+%define spill_src_stride 1
+%define spill_ref_stride 1
+%elif use_loop
+cglobal sad_skip_%1x%2x4d, 4, 7, 8, src, cnt, ref1, ref_stride, ref2, \
+                                    ref3, ref4
+%define spill_src_stride 1
+%else
+cglobal sad_skip_%1x%2x4d, 4, 7, 8, src, src_stride, ref1, ref_stride, ref2, \
+                                    ref3, ref4
+%endif
 %endif
 %elif %3 == 0  ; normal sad
 %if ARCH_X86_64
-cglobal sad%1x%2x4d, 5, 8, 8, src, src_stride, ref1, ref_stride, \
-                              res, ref2, ref3, ref4
+%if use_ref_offset
+cglobal sad%1x%2x4d, 5, 10, 8, src, src_stride, ref1, ref_stride, res, ref2, \
+                               ref3, ref4, cnt, ref_offset
+%elif use_loop
+cglobal sad%1x%2x4d, 5, 9, 8, src, src_stride, ref1, ref_stride, res, ref2, \
+                              ref3, ref4, cnt
+%else
+cglobal sad%1x%2x4d, 5, 8, 8, src, src_stride, ref1, ref_stride, res, ref2, \
+                              ref3, ref4
+%endif
+%else
+%if use_ref_offset
+cglobal sad%1x%2x4d, 4, 7, 8, src, ref_offset, ref1, cnt, ref2, ref3, ref4
+  %define spill_src_stride 1
+  %define spill_ref_stride 1
+%elif use_loop
+cglobal sad%1x%2x4d, 4, 7, 8, src, cnt, ref1, ref_stride, ref2, ref3, ref4
+  %define spill_src_stride 1
 %else
-cglobal sad%1x%2x4d, 4, 7, 8, src, src_stride, ref1, ref_stride, \
-                              ref2, ref3, ref4
+cglobal sad%1x%2x4d, 4, 7, 8, src, src_stride, ref1, ref_stride, ref2, ref3, \
+                              ref4
+%endif
 %endif
 %else ; avg
 %if ARCH_X86_64
+%if use_ref_offset
+cglobal sad%1x%2x4d_avg, 6, 11, 8, src, src_stride, ref1, ref_stride, \
+                                   second_pred, res, ref2, ref3, ref4, cnt, \
+                                   ref_offset
+%elif use_loop
 cglobal sad%1x%2x4d_avg, 6, 10, 8, src, src_stride, ref1, ref_stride, \
-                                  second_pred, res, ref2, ref3, ref4
+                                   second_pred, res, ref2, ref3, ref4, cnt
+%else
+cglobal sad%1x%2x4d_avg, 6, 9, 8, src, src_stride, ref1, ref_stride, \
+                                   second_pred, res, ref2, ref3, ref4
+%endif
 %else
-cglobal sad%1x%2x4d_avg, 5, 7, 8, src, ref4, ref1, ref_stride, \
-                                  second_pred, ref2, ref3
+%if use_ref_offset
+cglobal sad%1x%2x4d_avg, 5, 7, 8, src, ref4, ref1, ref_offset, second_pred, ref2, ref3
+  %define spill_src_stride 1
+  %define spill_ref_stride 1
+  %define spill_cnt 1
+%elif use_loop
+cglobal sad%1x%2x4d_avg, 5, 7, 8, src, ref4, ref1, ref_stride, second_pred, ref2, ref3
+  %define spill_src_stride 1
+  %define spill_cnt 1
+%else
+cglobal sad%1x%2x4d_avg, 5, 7, 8, src, ref4, ref1, ref_stride, second_pred, ref2, ref3
+  %define spill_src_stride 1
+%endif
+%endif
+%endif
+
+%if spill_src_stride
   %define src_strideq r1mp
   %define src_strided r1mp
 %endif
+%if spill_ref_stride
+  %define ref_strideq r3mp
+  %define ref_strided r3mp
+%endif
+
+%if spill_cnt
+  SUB                  rsp, 4
+  %define cntd word [rsp]
 %endif
 
-  %define mflag ((1 - ARCH_X86_64) & %3)
 %if %4 == 1
-  lea          src_strided, [2*src_strided]
-  lea          ref_strided, [2*ref_strided]
+  sal          src_strided, 1
+  sal          ref_strided, 1
 %endif
   movsxdifnidn src_strideq, src_strided
   movsxdifnidn ref_strideq, ref_strided
@@ -359,18 +357,67 @@ cglobal sad%1x%2x4d_avg, 5, 7, 8, src, ref4, ref1, ref_stride, \
   mov                ref4q, [ref1q+gprsize*3]
   mov                ref1q, [ref1q+gprsize*0]
 
-  PROCESS_%1x2x4 1, 0, 0, 0, ref_strideq, %3, 0, 1, 2
-%if %4 == 1  ; downsample number of rows by 2
-%define num_rep (%2-8)/4
+; Is the loop for this wxh in another function?
+; If so, we jump into that function for the loop and returning
+%define external_loop (use_ref_offset && %1 > mmsize && %1 != %2)
+
+%if use_ref_offset
+  PROCESS_FIRST_MMSIZE %3
+%if %1 > mmsize
+  mov          ref_offsetq, 0
+  mov                 cntd, %2 >> %4
+; Jump part way into the loop for the square version of this width
+%if %3 == 1
+  jmp mangle(private_prefix %+ _sad%1x%1x4d_avg %+ SUFFIX).midloop
+%elif %4 == 1
+  jmp mangle(private_prefix %+ _sad_skip_%1x%1x4d %+ SUFFIX).midloop
 %else
-%define num_rep (%2-4)/2
+  jmp mangle(private_prefix %+ _sad%1x%1x4d %+ SUFFIX).midloop
+%endif
+%else
+  mov          ref_offsetq, ref_strideq
+  add                 srcq, src_strideq
+  mov                 cntd, (%2 >> %4) - 1
+%endif
+%if external_loop == 0
+.loop:
+; Unrolled horizontal loop
+%assign h_offset 0
+%rep %1/mmsize
+  PROCESS_16x1x4 h_offset, %3
+%if h_offset == 0
+; The first row of the first column is done outside the loop and jumps here
+.midloop:
 %endif
-%rep num_rep
-  PROCESS_%1x2x4 0, 0, 0, 0, ref_strideq, %3, 0, 1, 2
+%assign h_offset h_offset+mmsize
 %endrep
-%undef num_rep
-  PROCESS_%1x2x4 0, 0, 0, 0, ref_strideq, %3, 0, 1, 2
 
+  add                 srcq, src_strideq
+  add          ref_offsetq, ref_strideq
+  sub                 cntd, 1
+  jnz .loop
+%endif
+%else
+  PROCESS_%1x2x4 1, %3
+  ADVANCE_END_OF_TWO_LINES
+%if use_loop
+  mov                 cntd, (%2/2 >> %4) - 1
+.loop:
+%endif
+  PROCESS_%1x2x4 0, %3
+%if use_loop
+  ADVANCE_END_OF_TWO_LINES
+  sub                 cntd, 1
+  jnz .loop
+%endif
+%endif
+
+%if spill_cnt
+; Undo stack allocation for cnt
+  ADD                  rsp, 4
+%endif
+
+%if external_loop == 0
 %if %3 == 0
   %define resultq r4
   %define resultmp r4mp
@@ -379,6 +426,16 @@ cglobal sad%1x%2x4d_avg, 5, 7, 8, src, ref4, ref1, ref_stride, \
   %define resultmp r5mp
 %endif
 
+; Undo modifications on parameters on the stack
+%if %4 == 1
+%if spill_src_stride
+  shr          src_strided, 1
+%endif
+%if spill_ref_stride
+  shr          ref_strided, 1
+%endif
+%endif
+
 %if %1 > 4
   pslldq                m5, 4
   pslldq                m7, 4
@@ -407,6 +464,7 @@ cglobal sad%1x%2x4d_avg, 5, 7, 8, src, ref4, ref1, ref_stride, \
   movq              [resultq+8], m7
   RET
 %endif
+%endif ; external_loop == 0
 %endmacro
 
 INIT_XMM sse2
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/x86/sad_avx2.c b/chromium/third_party/libaom/source/libaom/aom_dsp/x86/sad_avx2.c
index ef3fdc1d28c..24cea76b37c 100644
--- a/chromium/third_party/libaom/source/libaom/aom_dsp/x86/sad_avx2.c
+++ b/chromium/third_party/libaom/source/libaom/aom_dsp/x86/sad_avx2.c
@@ -17,7 +17,7 @@
 static INLINE unsigned int sad64xh_avx2(const uint8_t *src_ptr, int src_stride,
                                         const uint8_t *ref_ptr, int ref_stride,
                                         int h) {
-  int i, res;
+  int i;
   __m256i sad1_reg, sad2_reg, ref1_reg, ref2_reg;
   __m256i sum_sad = _mm256_setzero_si256();
   __m256i sum_sad_h;
@@ -37,7 +37,7 @@ static INLINE unsigned int sad64xh_avx2(const uint8_t *src_ptr, int src_stride,
   sum_sad = _mm256_add_epi32(sum_sad, sum_sad_h);
   sum_sad128 = _mm256_extracti128_si256(sum_sad, 1);
   sum_sad128 = _mm_add_epi32(_mm256_castsi256_si128(sum_sad), sum_sad128);
-  res = _mm_cvtsi128_si32(sum_sad128);
+  unsigned int res = (unsigned int)_mm_cvtsi128_si32(sum_sad128);
   _mm256_zeroupper();
   return res;
 }
@@ -45,7 +45,7 @@ static INLINE unsigned int sad64xh_avx2(const uint8_t *src_ptr, int src_stride,
 static INLINE unsigned int sad32xh_avx2(const uint8_t *src_ptr, int src_stride,
                                         const uint8_t *ref_ptr, int ref_stride,
                                         int h) {
-  int i, res;
+  int i;
   __m256i sad1_reg, sad2_reg, ref1_reg, ref2_reg;
   __m256i sum_sad = _mm256_setzero_si256();
   __m256i sum_sad_h;
@@ -68,7 +68,7 @@ static INLINE unsigned int sad32xh_avx2(const uint8_t *src_ptr, int src_stride,
   sum_sad = _mm256_add_epi32(sum_sad, sum_sad_h);
   sum_sad128 = _mm256_extracti128_si256(sum_sad, 1);
   sum_sad128 = _mm_add_epi32(_mm256_castsi256_si128(sum_sad), sum_sad128);
-  res = _mm_cvtsi128_si32(sum_sad128);
+  unsigned int res = (unsigned int)_mm_cvtsi128_si32(sum_sad128);
   _mm256_zeroupper();
   return res;
 }
@@ -129,7 +129,7 @@ FSAD32
   unsigned int aom_sad64x##h##_avg_avx2(                                      \
       const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr,         \
       int ref_stride, const uint8_t *second_pred) {                           \
-    int i, res;                                                               \
+    int i;                                                                    \
     __m256i sad1_reg, sad2_reg, ref1_reg, ref2_reg;                           \
     __m256i sum_sad = _mm256_setzero_si256();                                 \
     __m256i sum_sad_h;                                                        \
@@ -155,7 +155,7 @@ FSAD32
     sum_sad = _mm256_add_epi32(sum_sad, sum_sad_h);                           \
     sum_sad128 = _mm256_extracti128_si256(sum_sad, 1);                        \
     sum_sad128 = _mm_add_epi32(_mm256_castsi256_si128(sum_sad), sum_sad128);  \
-    res = _mm_cvtsi128_si32(sum_sad128);                                      \
+    unsigned int res = (unsigned int)_mm_cvtsi128_si32(sum_sad128);           \
     _mm256_zeroupper();                                                       \
     return res;                                                               \
   }
@@ -164,7 +164,7 @@ FSAD32
   unsigned int aom_sad32x##h##_avg_avx2(                                      \
       const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr,         \
       int ref_stride, const uint8_t *second_pred) {                           \
-    int i, res;                                                               \
+    int i;                                                                    \
     __m256i sad1_reg, sad2_reg, ref1_reg, ref2_reg;                           \
     __m256i sum_sad = _mm256_setzero_si256();                                 \
     __m256i sum_sad_h;                                                        \
@@ -194,7 +194,7 @@ FSAD32
     sum_sad = _mm256_add_epi32(sum_sad, sum_sad_h);                           \
     sum_sad128 = _mm256_extracti128_si256(sum_sad, 1);                        \
     sum_sad128 = _mm_add_epi32(_mm256_castsi256_si128(sum_sad), sum_sad128);  \
-    res = _mm_cvtsi128_si32(sum_sad128);                                      \
+    unsigned int res = (unsigned int)_mm_cvtsi128_si32(sum_sad128);           \
     _mm256_zeroupper();                                                       \
     return res;                                                               \
   }
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/x86/sad_impl_avx2.c b/chromium/third_party/libaom/source/libaom/aom_dsp/x86/sad_impl_avx2.c
index 2afae4bc648..c5da6e9ab36 100644
--- a/chromium/third_party/libaom/source/libaom/aom_dsp/x86/sad_impl_avx2.c
+++ b/chromium/third_party/libaom/source/libaom/aom_dsp/x86/sad_impl_avx2.c
@@ -34,7 +34,7 @@ static unsigned int sad32x32(const uint8_t *src_ptr, int src_stride,
   sum = _mm256_add_epi32(sum, _mm256_srli_si256(sum, 8));
   sum_i128 = _mm_add_epi32(_mm256_extracti128_si256(sum, 1),
                            _mm256_castsi256_si128(sum));
-  return _mm_cvtsi128_si32(sum_i128);
+  return (unsigned int)_mm_cvtsi128_si32(sum_i128);
 }
 
 static unsigned int sad64x32(const uint8_t *src_ptr, int src_stride,
@@ -112,7 +112,7 @@ static unsigned int sad_w64_avg_avx2(const uint8_t *src_ptr, int src_stride,
                                      const uint8_t *ref_ptr, int ref_stride,
                                      const int h, const uint8_t *second_pred,
                                      const int second_pred_stride) {
-  int i, res;
+  int i;
   __m256i sad1_reg, sad2_reg, ref1_reg, ref2_reg;
   __m256i sum_sad = _mm256_setzero_si256();
   __m256i sum_sad_h;
@@ -137,9 +137,7 @@ static unsigned int sad_w64_avg_avx2(const uint8_t *src_ptr, int src_stride,
   sum_sad = _mm256_add_epi32(sum_sad, sum_sad_h);
   sum_sad128 = _mm256_extracti128_si256(sum_sad, 1);
   sum_sad128 = _mm_add_epi32(_mm256_castsi256_si128(sum_sad), sum_sad128);
-  res = _mm_cvtsi128_si32(sum_sad128);
-
-  return res;
+  return (unsigned int)_mm_cvtsi128_si32(sum_sad128);
 }
 
 unsigned int aom_sad64x128_avg_avx2(const uint8_t *src_ptr, int src_stride,
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/x86/sum_squares_avx2.c b/chromium/third_party/libaom/source/libaom/aom_dsp/x86/sum_squares_avx2.c
index 0d63db288ee..89b9b824bf2 100644
--- a/chromium/third_party/libaom/source/libaom/aom_dsp/x86/sum_squares_avx2.c
+++ b/chromium/third_party/libaom/source/libaom/aom_dsp/x86/sum_squares_avx2.c
@@ -21,7 +21,7 @@ static uint64_t aom_sum_squares_2d_i16_nxn_avx2(const int16_t *src, int stride,
                                                 int width, int height) {
   uint64_t result;
   __m256i v_acc_q = _mm256_setzero_si256();
-  const __m256i v_zext_mask_q = yy_set1_64_from_32i(0xffffffff);
+  const __m256i v_zext_mask_q = yy_set1_64_from_32i(~0);
   for (int col = 0; col < height; col += 4) {
     __m256i v_acc_d = _mm256_setzero_si256();
     for (int row = 0; row < width; row += 16) {
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/x86/sum_squares_sse2.c b/chromium/third_party/libaom/source/libaom/aom_dsp/x86/sum_squares_sse2.c
index 0bdeee9f270..25be8568a8b 100644
--- a/chromium/third_party/libaom/source/libaom/aom_dsp/x86/sum_squares_sse2.c
+++ b/chromium/third_party/libaom/source/libaom/aom_dsp/x86/sum_squares_sse2.c
@@ -84,7 +84,7 @@ uint64_t aom_sum_squares_2d_i16_4xn_sse2(const int16_t *src, int stride,
     src += stride << 2;
     r += 4;
   } while (r < height);
-  const __m128i v_zext_mask_q = xx_set1_64_from_32i(0xffffffff);
+  const __m128i v_zext_mask_q = xx_set1_64_from_32i(~0);
   __m128i v_acc_64 = _mm_add_epi64(_mm_srli_epi64(v_acc_q, 32),
                                    _mm_and_si128(v_acc_q, v_zext_mask_q));
   v_acc_64 = _mm_add_epi64(v_acc_64, _mm_srli_si128(v_acc_64, 8));
@@ -116,7 +116,7 @@ aom_sum_squares_2d_i16_nxn_sse2(const int16_t *src, int stride, int width,
                                 int height) {
   int r = 0;
 
-  const __m128i v_zext_mask_q = xx_set1_64_from_32i(0xffffffff);
+  const __m128i v_zext_mask_q = xx_set1_64_from_32i(~0);
   __m128i v_acc_q = _mm_setzero_si128();
 
   do {
@@ -254,7 +254,7 @@ uint64_t aom_sum_sse_2d_i16_sse2(const int16_t *src, int src_stride, int width,
 //////////////////////////////////////////////////////////////////////////////
 
 static uint64_t aom_sum_squares_i16_64n_sse2(const int16_t *src, uint32_t n) {
-  const __m128i v_zext_mask_q = xx_set1_64_from_32i(0xffffffff);
+  const __m128i v_zext_mask_q = xx_set1_64_from_32i(~0);
   __m128i v_acc0_q = _mm_setzero_si128();
   __m128i v_acc1_q = _mm_setzero_si128();
 
@@ -306,7 +306,7 @@ uint64_t aom_sum_squares_i16_sse2(const int16_t *src, uint32_t n) {
   if (n % 64 == 0) {
     return aom_sum_squares_i16_64n_sse2(src, n);
   } else if (n > 64) {
-    int k = n & ~(64 - 1);
+    const uint32_t k = n & ~63u;
     return aom_sum_squares_i16_64n_sse2(src, k) +
            aom_sum_squares_i16_c(src + k, n - k);
   } else {
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/x86/variance_avx2.c b/chromium/third_party/libaom/source/libaom/aom_dsp/x86/variance_avx2.c
index 7398a73b0eb..d5eb2531d35 100644
--- a/chromium/third_party/libaom/source/libaom/aom_dsp/x86/variance_avx2.c
+++ b/chromium/third_party/libaom/source/libaom/aom_dsp/x86/variance_avx2.c
@@ -234,19 +234,20 @@ unsigned int aom_mse16x16_avx2(const uint8_t *src, int src_stride,
   return *sse;
 }
 
-unsigned int aom_sub_pixel_variance32xh_avx2(const uint8_t *src, int src_stride,
-                                             int x_offset, int y_offset,
-                                             const uint8_t *dst, int dst_stride,
-                                             int height, unsigned int *sse);
-unsigned int aom_sub_pixel_variance16xh_avx2(const uint8_t *src, int src_stride,
-                                             int x_offset, int y_offset,
-                                             const uint8_t *dst, int dst_stride,
-                                             int height, unsigned int *sse);
-
-unsigned int aom_sub_pixel_avg_variance32xh_avx2(
-    const uint8_t *src, int src_stride, int x_offset, int y_offset,
-    const uint8_t *dst, int dst_stride, const uint8_t *sec, int sec_stride,
-    int height, unsigned int *sseptr);
+int aom_sub_pixel_variance32xh_avx2(const uint8_t *src, int src_stride,
+                                    int x_offset, int y_offset,
+                                    const uint8_t *dst, int dst_stride,
+                                    int height, unsigned int *sse);
+int aom_sub_pixel_variance16xh_avx2(const uint8_t *src, int src_stride,
+                                    int x_offset, int y_offset,
+                                    const uint8_t *dst, int dst_stride,
+                                    int height, unsigned int *sse);
+
+int aom_sub_pixel_avg_variance32xh_avx2(const uint8_t *src, int src_stride,
+                                        int x_offset, int y_offset,
+                                        const uint8_t *dst, int dst_stride,
+                                        const uint8_t *sec, int sec_stride,
+                                        int height, unsigned int *sseptr);
 
 #define AOM_SUB_PIXEL_VAR_AVX2(w, h, wf, wlog2, hlog2)                        \
   unsigned int aom_sub_pixel_variance##w##x##h##_avx2(                        \
@@ -534,15 +535,15 @@ uint64_t aom_mse_4xh_16bit_avx2(uint8_t *dst, int dstride, uint16_t *src,
   __m128i dst0_4x8, dst1_4x8, dst2_4x8, dst3_4x8, dst_16x8;
   __m128i src0_4x16, src1_4x16, src2_4x16, src3_4x16;
   __m256i src0_8x16, src1_8x16, dst_16x16, src_16x16;
-  __m256i res0_4x64, res1_4x64, res2_4x64, res3_4x64;
+  __m256i res0_4x64, res1_4x64;
   __m256i sub_result;
   const __m256i zeros = _mm256_broadcastsi128_si256(_mm_setzero_si128());
   __m256i square_result = _mm256_broadcastsi128_si256(_mm_setzero_si128());
   for (int i = 0; i < h; i += 4) {
-    dst0_4x8 = _mm_cvtsi32_si128(*(uint32_t const *)(&dst[(i + 0) * dstride]));
-    dst1_4x8 = _mm_cvtsi32_si128(*(uint32_t const *)(&dst[(i + 1) * dstride]));
-    dst2_4x8 = _mm_cvtsi32_si128(*(uint32_t const *)(&dst[(i + 2) * dstride]));
-    dst3_4x8 = _mm_cvtsi32_si128(*(uint32_t const *)(&dst[(i + 3) * dstride]));
+    dst0_4x8 = _mm_cvtsi32_si128(*(int const *)(&dst[(i + 0) * dstride]));
+    dst1_4x8 = _mm_cvtsi32_si128(*(int const *)(&dst[(i + 1) * dstride]));
+    dst2_4x8 = _mm_cvtsi32_si128(*(int const *)(&dst[(i + 2) * dstride]));
+    dst3_4x8 = _mm_cvtsi32_si128(*(int const *)(&dst[(i + 3) * dstride]));
     dst_16x8 = _mm_unpacklo_epi64(_mm_unpacklo_epi32(dst0_4x8, dst1_4x8),
                                   _mm_unpacklo_epi32(dst2_4x8, dst3_4x8));
     dst_16x16 = _mm256_cvtepu8_epi16(dst_16x8);
@@ -557,30 +558,121 @@ uint64_t aom_mse_4xh_16bit_avx2(uint8_t *dst, int dstride, uint16_t *src,
         _mm256_castsi128_si256(_mm_unpacklo_epi64(src2_4x16, src3_4x16));
     src_16x16 = _mm256_permute2x128_si256(src0_8x16, src1_8x16, 0x20);
 
+    // r15 r14 r13------------r1 r0  - 16 bit
     sub_result = _mm256_abs_epi16(_mm256_sub_epi16(src_16x16, dst_16x16));
 
-    src_16x16 = _mm256_unpacklo_epi16(sub_result, zeros);
-    dst_16x16 = _mm256_unpackhi_epi16(sub_result, zeros);
+    // s7 s6 s5 s4 s3 s2 s1 s0 - 32bit
+    src_16x16 = _mm256_madd_epi16(sub_result, sub_result);
 
-    src_16x16 = _mm256_madd_epi16(src_16x16, src_16x16);  // 32bit store
-    dst_16x16 = _mm256_madd_epi16(dst_16x16, dst_16x16);  // 32bit store
+    // accumulation of result
+    square_result = _mm256_add_epi32(square_result, src_16x16);
+  }
+
+  // s5 s4 s1 s0  - 64bit
+  res0_4x64 = _mm256_unpacklo_epi32(square_result, zeros);
+  // s7 s6 s3 s2 - 64bit
+  res1_4x64 = _mm256_unpackhi_epi32(square_result, zeros);
+  // r3 r2 r1 r0 - 64bit
+  res0_4x64 = _mm256_add_epi64(res0_4x64, res1_4x64);
+  // r1+r3 r2+r0 - 64bit
+  const __m128i sum_1x64 =
+      _mm_add_epi64(_mm256_castsi256_si128(res0_4x64),
+                    _mm256_extracti128_si256(res0_4x64, 1));
+  xx_storel_64(&sum, _mm_add_epi64(sum_1x64, _mm_srli_si128(sum_1x64, 8)));
+  return sum;
+}
 
-    res0_4x64 = _mm256_unpacklo_epi32(src_16x16, zeros);
-    res1_4x64 = _mm256_unpackhi_epi32(src_16x16, zeros);
-    res2_4x64 = _mm256_unpacklo_epi32(dst_16x16, zeros);
-    res3_4x64 = _mm256_unpackhi_epi32(dst_16x16, zeros);
+// Compute mse of four consecutive 4x4 blocks.
+// In src buffer, each 4x4 block in a 32x32 filter block is stored sequentially.
+// Hence src_blk_stride is same as block width. Whereas dst buffer is a frame
+// buffer, thus dstride is a frame level stride.
+uint64_t aom_mse_4xh_quad_16bit_avx2(uint8_t *dst, int dstride, uint16_t *src,
+                                     int src_blk_stride, int h) {
+  uint64_t sum = 0;
+  __m128i dst0_16x8, dst1_16x8, dst2_16x8, dst3_16x8;
+  __m256i dst0_16x16, dst1_16x16, dst2_16x16, dst3_16x16;
+  __m256i res0_4x64, res1_4x64;
+  __m256i sub_result_0, sub_result_1, sub_result_2, sub_result_3;
+  const __m256i zeros = _mm256_broadcastsi128_si256(_mm_setzero_si128());
+  __m256i square_result = zeros;
+  uint16_t *src_temp = src;
 
-    square_result = _mm256_add_epi64(
-        square_result,
-        _mm256_add_epi64(
-            _mm256_add_epi64(_mm256_add_epi64(res0_4x64, res1_4x64), res2_4x64),
-            res3_4x64));
+  for (int i = 0; i < h; i += 4) {
+    dst0_16x8 = _mm_loadu_si128((__m128i *)(&dst[(i + 0) * dstride]));
+    dst1_16x8 = _mm_loadu_si128((__m128i *)(&dst[(i + 1) * dstride]));
+    dst2_16x8 = _mm_loadu_si128((__m128i *)(&dst[(i + 2) * dstride]));
+    dst3_16x8 = _mm_loadu_si128((__m128i *)(&dst[(i + 3) * dstride]));
+
+    // row0 of 1st,2nd, 3rd and 4th 4x4 blocks- d00 d10 d20 d30
+    dst0_16x16 = _mm256_cvtepu8_epi16(dst0_16x8);
+    // row1 of 1st,2nd, 3rd and 4th 4x4 blocks - d01 d11 d21 d31
+    dst1_16x16 = _mm256_cvtepu8_epi16(dst1_16x8);
+    // row2 of 1st,2nd, 3rd and 4th 4x4 blocks - d02 d12 d22 d32
+    dst2_16x16 = _mm256_cvtepu8_epi16(dst2_16x8);
+    // row3 of 1st,2nd, 3rd and 4th 4x4 blocks - d03 d13 d23 d33
+    dst3_16x16 = _mm256_cvtepu8_epi16(dst3_16x8);
+
+    // All rows of 1st 4x4 block - r00 r01 r02 r03
+    __m256i src0_16x16 = _mm256_loadu_si256((__m256i const *)(&src_temp[0]));
+    // All rows of 2nd 4x4 block - r10 r11 r12 r13
+    __m256i src1_16x16 =
+        _mm256_loadu_si256((__m256i const *)(&src_temp[src_blk_stride]));
+    // All rows of 3rd 4x4 block - r20 r21 r22 r23
+    __m256i src2_16x16 =
+        _mm256_loadu_si256((__m256i const *)(&src_temp[2 * src_blk_stride]));
+    // All rows of 4th 4x4 block - r30 r31 r32 r33
+    __m256i src3_16x16 =
+        _mm256_loadu_si256((__m256i const *)(&src_temp[3 * src_blk_stride]));
+
+    // r00 r10 r02 r12
+    __m256i tmp0_16x16 = _mm256_unpacklo_epi64(src0_16x16, src1_16x16);
+    // r01 r11 r03 r13
+    __m256i tmp1_16x16 = _mm256_unpackhi_epi64(src0_16x16, src1_16x16);
+    // r20 r30 r22 r32
+    __m256i tmp2_16x16 = _mm256_unpacklo_epi64(src2_16x16, src3_16x16);
+    // r21 r31 r23 r33
+    __m256i tmp3_16x16 = _mm256_unpackhi_epi64(src2_16x16, src3_16x16);
+
+    // r00 r10 r20 r30
+    src0_16x16 = _mm256_permute2f128_si256(tmp0_16x16, tmp2_16x16, 0x20);
+    // r01 r11 r21 r31
+    src1_16x16 = _mm256_permute2f128_si256(tmp1_16x16, tmp3_16x16, 0x20);
+    // r02 r12 r22 r32
+    src2_16x16 = _mm256_permute2f128_si256(tmp0_16x16, tmp2_16x16, 0x31);
+    // r03 r13 r23 r33
+    src3_16x16 = _mm256_permute2f128_si256(tmp1_16x16, tmp3_16x16, 0x31);
+
+    // r15 r14 r13------------r1 r0  - 16 bit
+    sub_result_0 = _mm256_abs_epi16(_mm256_sub_epi16(src0_16x16, dst0_16x16));
+    sub_result_1 = _mm256_abs_epi16(_mm256_sub_epi16(src1_16x16, dst1_16x16));
+    sub_result_2 = _mm256_abs_epi16(_mm256_sub_epi16(src2_16x16, dst2_16x16));
+    sub_result_3 = _mm256_abs_epi16(_mm256_sub_epi16(src3_16x16, dst3_16x16));
+
+    // s7 s6 s5 s4 s3 s2 s1 s0    - 32bit
+    src0_16x16 = _mm256_madd_epi16(sub_result_0, sub_result_0);
+    src1_16x16 = _mm256_madd_epi16(sub_result_1, sub_result_1);
+    src2_16x16 = _mm256_madd_epi16(sub_result_2, sub_result_2);
+    src3_16x16 = _mm256_madd_epi16(sub_result_3, sub_result_3);
+
+    // accumulation of result
+    src0_16x16 = _mm256_add_epi32(src0_16x16, src1_16x16);
+    src2_16x16 = _mm256_add_epi32(src2_16x16, src3_16x16);
+    const __m256i square_result_0 = _mm256_add_epi32(src0_16x16, src2_16x16);
+    square_result = _mm256_add_epi32(square_result, square_result_0);
+    src_temp += 16;
   }
-  const __m128i sum_2x64 =
-      _mm_add_epi64(_mm256_castsi256_si128(square_result),
-                    _mm256_extracti128_si256(square_result, 1));
-  const __m128i sum_1x64 = _mm_add_epi64(sum_2x64, _mm_srli_si128(sum_2x64, 8));
-  xx_storel_64(&sum, sum_1x64);
+
+  // s5 s4 s1 s0  - 64bit
+  res0_4x64 = _mm256_unpacklo_epi32(square_result, zeros);
+  // s7  s6  s3  s2 - 64bit
+  res1_4x64 = _mm256_unpackhi_epi32(square_result, zeros);
+  // r3 r2 r1 r0 - 64bit
+  res0_4x64 = _mm256_add_epi64(res0_4x64, res1_4x64);
+  // r1+r3 r2+r0 - 64bit
+  const __m128i sum_1x64 =
+      _mm_add_epi64(_mm256_castsi256_si128(res0_4x64),
+                    _mm256_extracti128_si256(res0_4x64, 1));
+  xx_storel_64(&sum, _mm_add_epi64(sum_1x64, _mm_srli_si128(sum_1x64, 8)));
   return sum;
 }
 
@@ -589,7 +681,7 @@ uint64_t aom_mse_8xh_16bit_avx2(uint8_t *dst, int dstride, uint16_t *src,
   uint64_t sum = 0;
   __m128i dst0_8x8, dst1_8x8, dst3_16x8;
   __m256i src0_8x16, src1_8x16, src_16x16, dst_16x16;
-  __m256i res0_4x64, res1_4x64, res2_4x64, res3_4x64;
+  __m256i res0_4x64, res1_4x64;
   __m256i sub_result;
   const __m256i zeros = _mm256_broadcastsi128_si256(_mm_setzero_si128());
   __m256i square_result = _mm256_broadcastsi128_si256(_mm_setzero_si128());
@@ -606,38 +698,98 @@ uint64_t aom_mse_8xh_16bit_avx2(uint8_t *dst, int dstride, uint16_t *src,
         _mm_loadu_si128((__m128i *)&src[(i + 1) * sstride]));
     src_16x16 = _mm256_permute2x128_si256(src0_8x16, src1_8x16, 0x20);
 
+    // r15 r14 r13 - - - r1 r0 - 16 bit
     sub_result = _mm256_abs_epi16(_mm256_sub_epi16(src_16x16, dst_16x16));
 
-    src_16x16 = _mm256_unpacklo_epi16(sub_result, zeros);
-    dst_16x16 = _mm256_unpackhi_epi16(sub_result, zeros);
+    // s7 s6 s5 s4 s3 s2 s1 s0 - 32bit
+    src_16x16 = _mm256_madd_epi16(sub_result, sub_result);
 
-    src_16x16 = _mm256_madd_epi16(src_16x16, src_16x16);
-    dst_16x16 = _mm256_madd_epi16(dst_16x16, dst_16x16);
+    // accumulation of result
+    square_result = _mm256_add_epi32(square_result, src_16x16);
+  }
 
-    res0_4x64 = _mm256_unpacklo_epi32(src_16x16, zeros);
-    res1_4x64 = _mm256_unpackhi_epi32(src_16x16, zeros);
-    res2_4x64 = _mm256_unpacklo_epi32(dst_16x16, zeros);
-    res3_4x64 = _mm256_unpackhi_epi32(dst_16x16, zeros);
+  // s5 s4 s1 s0  - 64bit
+  res0_4x64 = _mm256_unpacklo_epi32(square_result, zeros);
+  // s7 s6 s3 s2 - 64bit
+  res1_4x64 = _mm256_unpackhi_epi32(square_result, zeros);
+  // r3 r2 r1 r0 - 64bit
+  res0_4x64 = _mm256_add_epi64(res0_4x64, res1_4x64);
+  // r1+r3 r2+r0 - 64bit
+  const __m128i sum_1x64 =
+      _mm_add_epi64(_mm256_castsi256_si128(res0_4x64),
+                    _mm256_extracti128_si256(res0_4x64, 1));
+  xx_storel_64(&sum, _mm_add_epi64(sum_1x64, _mm_srli_si128(sum_1x64, 8)));
+  return sum;
+}
 
-    square_result = _mm256_add_epi64(
-        square_result,
-        _mm256_add_epi64(
-            _mm256_add_epi64(_mm256_add_epi64(res0_4x64, res1_4x64), res2_4x64),
-            res3_4x64));
+// Compute mse of two consecutive 8x8 blocks.
+// In src buffer, each 8x8 block in a 64x64 filter block is stored sequentially.
+// Hence src_blk_stride is same as block width. Whereas dst buffer is a frame
+// buffer, thus dstride is a frame level stride.
+uint64_t aom_mse_8xh_dual_16bit_avx2(uint8_t *dst, int dstride, uint16_t *src,
+                                     int src_blk_stride, int h) {
+  uint64_t sum = 0;
+  __m128i dst0_16x8, dst1_16x8;
+  __m256i dst0_16x16, dst1_16x16;
+  __m256i res0_4x64, res1_4x64;
+  __m256i sub_result_0, sub_result_1;
+  const __m256i zeros = _mm256_broadcastsi128_si256(_mm_setzero_si128());
+  __m256i square_result = zeros;
+  uint16_t *src_temp = src;
+
+  for (int i = 0; i < h; i += 2) {
+    dst0_16x8 = _mm_loadu_si128((__m128i *)(&dst[(i + 0) * dstride]));
+    dst1_16x8 = _mm_loadu_si128((__m128i *)(&dst[(i + 1) * dstride]));
+
+    // row0 of 1st and 2nd 8x8 block - d00 d10
+    dst0_16x16 = _mm256_cvtepu8_epi16(dst0_16x8);
+    // row1 of 1st and 2nd 8x8 block - d01 d11
+    dst1_16x16 = _mm256_cvtepu8_epi16(dst1_16x8);
+
+    // 2 rows of 1st 8x8 block - r00 r01
+    __m256i src0_16x16 = _mm256_loadu_si256((__m256i const *)(&src_temp[0]));
+    // 2 rows of 2nd 8x8 block - r10 r11
+    __m256i src1_16x16 =
+        _mm256_loadu_si256((__m256i const *)(&src_temp[src_blk_stride]));
+    // r00 r10 - 128bit
+    __m256i tmp0_16x16 =
+        _mm256_permute2f128_si256(src0_16x16, src1_16x16, 0x20);
+    // r01 r11 - 128bit
+    __m256i tmp1_16x16 =
+        _mm256_permute2f128_si256(src0_16x16, src1_16x16, 0x31);
+
+    // r15 r14 r13------------r1 r0 - 16 bit
+    sub_result_0 = _mm256_abs_epi16(_mm256_sub_epi16(tmp0_16x16, dst0_16x16));
+    sub_result_1 = _mm256_abs_epi16(_mm256_sub_epi16(tmp1_16x16, dst1_16x16));
+
+    // s7 s6 s5 s4 s3 s2 s1 s0 - 32bit each
+    src0_16x16 = _mm256_madd_epi16(sub_result_0, sub_result_0);
+    src1_16x16 = _mm256_madd_epi16(sub_result_1, sub_result_1);
+
+    // accumulation of result
+    src0_16x16 = _mm256_add_epi32(src0_16x16, src1_16x16);
+    square_result = _mm256_add_epi32(square_result, src0_16x16);
+    src_temp += 16;
   }
 
-  const __m128i sum_2x64 =
-      _mm_add_epi64(_mm256_castsi256_si128(square_result),
-                    _mm256_extracti128_si256(square_result, 1));
-  const __m128i sum_1x64 = _mm_add_epi64(sum_2x64, _mm_srli_si128(sum_2x64, 8));
-  xx_storel_64(&sum, sum_1x64);
+  // s5 s4 s1 s0  - 64bit
+  res0_4x64 = _mm256_unpacklo_epi32(square_result, zeros);
+  // s7 s6 s3 s2 - 64bit
+  res1_4x64 = _mm256_unpackhi_epi32(square_result, zeros);
+  // r3 r2 r1 r0 - 64bit
+  res0_4x64 = _mm256_add_epi64(res0_4x64, res1_4x64);
+  // r1+r3 r2+r0 - 64bit
+  const __m128i sum_1x64 =
+      _mm_add_epi64(_mm256_castsi256_si128(res0_4x64),
+                    _mm256_extracti128_si256(res0_4x64, 1));
+  xx_storel_64(&sum, _mm_add_epi64(sum_1x64, _mm_srli_si128(sum_1x64, 8)));
   return sum;
 }
 
 uint64_t aom_mse_wxh_16bit_avx2(uint8_t *dst, int dstride, uint16_t *src,
                                 int sstride, int w, int h) {
   assert((w == 8 || w == 4) && (h == 8 || h == 4) &&
-         "w=8/4 and h=8/4 must satisfy");
+         "w=8/4 and h=8/4 must be satisfied");
   switch (w) {
     case 4: return aom_mse_4xh_16bit_avx2(dst, dstride, src, sstride, h);
     case 8: return aom_mse_8xh_16bit_avx2(dst, dstride, src, sstride, h);
@@ -645,6 +797,21 @@ uint64_t aom_mse_wxh_16bit_avx2(uint8_t *dst, int dstride, uint16_t *src,
   }
 }
 
+// Computes mse of two 8x8 or four 4x4 consecutive blocks. Luma plane uses 8x8
+// block and Chroma uses 4x4 block. In src buffer, each block in a filter block
+// is stored sequentially. Hence src_blk_stride is same as block width. Whereas
+// dst buffer is a frame buffer, thus dstride is a frame level stride.
+uint64_t aom_mse_16xh_16bit_avx2(uint8_t *dst, int dstride, uint16_t *src,
+                                 int w, int h) {
+  assert((w == 8 || w == 4) && (h == 8 || h == 4) &&
+         "w=8/4 and h=8/4 must be satisfied");
+  switch (w) {
+    case 4: return aom_mse_4xh_quad_16bit_avx2(dst, dstride, src, w * h, h);
+    case 8: return aom_mse_8xh_dual_16bit_avx2(dst, dstride, src, w * h, h);
+    default: assert(0 && "unsupported width"); return -1;
+  }
+}
+
 static INLINE void sum_final_256bit_avx2(__m256i sum_8x16[2], int *const sum) {
   const __m256i sum_result_0 = _mm256_hadd_epi16(sum_8x16[0], sum_8x16[1]);
   const __m256i sum_result_1 =
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/x86/variance_impl_avx2.c b/chromium/third_party/libaom/source/libaom/aom_dsp/x86/variance_impl_avx2.c
index 163e4cc566f..8ea0443d5a2 100644
--- a/chromium/third_party/libaom/source/libaom/aom_dsp/x86/variance_impl_avx2.c
+++ b/chromium/third_party/libaom/source/libaom/aom_dsp/x86/variance_impl_avx2.c
@@ -163,17 +163,17 @@ DECLARE_ALIGNED(32, static const uint8_t, bilinear_filters_avx2[512]) = {
   src_lo = _mm_srai_epi16(src_lo, 4);         \
   src_hi = _mm_srai_epi16(src_hi, 4);
 
-unsigned int aom_sub_pixel_variance32xh_avx2(const uint8_t *src, int src_stride,
-                                             int x_offset, int y_offset,
-                                             const uint8_t *dst, int dst_stride,
-                                             int height, unsigned int *sse) {
+int aom_sub_pixel_variance32xh_avx2(const uint8_t *src, int src_stride,
+                                    int x_offset, int y_offset,
+                                    const uint8_t *dst, int dst_stride,
+                                    int height, unsigned int *sse) {
   __m256i src_reg, dst_reg, exp_src_lo, exp_src_hi, exp_dst_lo, exp_dst_hi;
   __m256i sse_reg, sum_reg, sse_reg_hi, res_cmp, sum_reg_lo, sum_reg_hi;
   __m256i zero_reg;
   int i, sum;
-  sum_reg = _mm256_set1_epi16(0);
-  sse_reg = _mm256_set1_epi16(0);
-  zero_reg = _mm256_set1_epi16(0);
+  sum_reg = _mm256_setzero_si256();
+  sse_reg = _mm256_setzero_si256();
+  zero_reg = _mm256_setzero_si256();
 
   // x_offset = 0 and y_offset = 0
   if (x_offset == 0) {
@@ -351,17 +351,17 @@ unsigned int aom_sub_pixel_variance32xh_avx2(const uint8_t *src, int src_stride,
   return sum;
 }
 
-unsigned int aom_sub_pixel_variance16xh_avx2(const uint8_t *src, int src_stride,
-                                             int x_offset, int y_offset,
-                                             const uint8_t *dst, int dst_stride,
-                                             int height, unsigned int *sse) {
+int aom_sub_pixel_variance16xh_avx2(const uint8_t *src, int src_stride,
+                                    int x_offset, int y_offset,
+                                    const uint8_t *dst, int dst_stride,
+                                    int height, unsigned int *sse) {
   __m256i src_reg, dst_reg, exp_src_lo, exp_src_hi, exp_dst_lo, exp_dst_hi;
   __m256i sse_reg, sum_reg, sse_reg_hi, res_cmp, sum_reg_lo, sum_reg_hi;
   __m256i zero_reg;
   int i, sum;
-  sum_reg = _mm256_set1_epi16(0);
-  sse_reg = _mm256_set1_epi16(0);
-  zero_reg = _mm256_set1_epi16(0);
+  sum_reg = _mm256_setzero_si256();
+  sse_reg = _mm256_setzero_si256();
+  zero_reg = _mm256_setzero_si256();
 
   // x_offset = 0 and y_offset = 0
   if (x_offset == 0) {
@@ -589,18 +589,19 @@ unsigned int aom_sub_pixel_variance16xh_avx2(const uint8_t *src, int src_stride,
   return sum;
 }
 
-unsigned int aom_sub_pixel_avg_variance32xh_avx2(
-    const uint8_t *src, int src_stride, int x_offset, int y_offset,
-    const uint8_t *dst, int dst_stride, const uint8_t *sec, int sec_stride,
-    int height, unsigned int *sse) {
+int aom_sub_pixel_avg_variance32xh_avx2(const uint8_t *src, int src_stride,
+                                        int x_offset, int y_offset,
+                                        const uint8_t *dst, int dst_stride,
+                                        const uint8_t *sec, int sec_stride,
+                                        int height, unsigned int *sse) {
   __m256i sec_reg;
   __m256i src_reg, dst_reg, exp_src_lo, exp_src_hi, exp_dst_lo, exp_dst_hi;
   __m256i sse_reg, sum_reg, sse_reg_hi, res_cmp, sum_reg_lo, sum_reg_hi;
   __m256i zero_reg;
   int i, sum;
-  sum_reg = _mm256_set1_epi16(0);
-  sse_reg = _mm256_set1_epi16(0);
-  zero_reg = _mm256_set1_epi16(0);
+  sum_reg = _mm256_setzero_si256();
+  sse_reg = _mm256_setzero_si256();
+  zero_reg = _mm256_setzero_si256();
 
   // x_offset = 0 and y_offset = 0
   if (x_offset == 0) {
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/x86/variance_impl_ssse3.c b/chromium/third_party/libaom/source/libaom/aom_dsp/x86/variance_impl_ssse3.c
index 66b0d7d845a..699002195bb 100644
--- a/chromium/third_party/libaom/source/libaom/aom_dsp/x86/variance_impl_ssse3.c
+++ b/chromium/third_party/libaom/source/libaom/aom_dsp/x86/variance_impl_ssse3.c
@@ -25,8 +25,8 @@ void aom_var_filter_block2d_bil_first_pass_ssse3(
   // Change {128, 0} to {64, 0} and reduce FILTER_BITS by 1 to avoid overflow.
   const int16_t round = (1 << (FILTER_BITS - 1)) >> 1;
   const __m128i r = _mm_set1_epi16(round);
-  const uint8_t f0 = filter[0] >> 1;
-  const uint8_t f1 = filter[1] >> 1;
+  const int8_t f0 = (int8_t)(filter[0] >> 1);
+  const int8_t f1 = (int8_t)(filter[1] >> 1);
   const __m128i filters = _mm_setr_epi8(f0, f1, f0, f1, f0, f1, f0, f1, f0, f1,
                                         f0, f1, f0, f1, f0, f1);
   unsigned int i, j;
diff --git a/chromium/third_party/libaom/source/libaom/aom_dsp/x86/variance_sse2.c b/chromium/third_party/libaom/source/libaom/aom_dsp/x86/variance_sse2.c
index c36eeeedde9..a4c3262fe8d 100644
--- a/chromium/third_party/libaom/source/libaom/aom_dsp/x86/variance_sse2.c
+++ b/chromium/third_party/libaom/source/libaom/aom_dsp/x86/variance_sse2.c
@@ -32,12 +32,12 @@ unsigned int aom_get_mb_ss_sse2(const int16_t *src) {
 
   vsum = _mm_add_epi32(vsum, _mm_srli_si128(vsum, 8));
   vsum = _mm_add_epi32(vsum, _mm_srli_si128(vsum, 4));
-  return _mm_cvtsi128_si32(vsum);
+  return (unsigned int)_mm_cvtsi128_si32(vsum);
 }
 
 static INLINE __m128i load4x2_sse2(const uint8_t *const p, const int stride) {
-  const __m128i p0 = _mm_cvtsi32_si128(loadu_uint32(p + 0 * stride));
-  const __m128i p1 = _mm_cvtsi32_si128(loadu_uint32(p + 1 * stride));
+  const __m128i p0 = _mm_cvtsi32_si128(loadu_int32(p + 0 * stride));
+  const __m128i p1 = _mm_cvtsi32_si128(loadu_int32(p + 1 * stride));
   return _mm_unpacklo_epi8(_mm_unpacklo_epi32(p0, p1), _mm_setzero_si128());
 }
 
@@ -50,7 +50,7 @@ static INLINE __m128i load8_8to16_sse2(const uint8_t *const p) {
 static INLINE unsigned int add32x4_sse2(__m128i val) {
   val = _mm_add_epi32(val, _mm_srli_si128(val, 8));
   val = _mm_add_epi32(val, _mm_srli_si128(val, 4));
-  return _mm_cvtsi128_si32(val);
+  return (unsigned int)_mm_cvtsi128_si32(val);
 }
 
 // Accumulate 8 16bit in sum to 4 32bit number
@@ -103,7 +103,7 @@ static INLINE void variance_final_512_pel_sse2(__m128i vsse, __m128i vsum,
   vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 8));
   vsum = _mm_unpacklo_epi16(vsum, vsum);
   vsum = _mm_srai_epi32(vsum, 16);
-  *sum = add32x4_sse2(vsum);
+  *sum = (int)add32x4_sse2(vsum);
 }
 
 // Can handle 1024 pixels' diff sum (such as 32x32)
@@ -113,7 +113,7 @@ static INLINE void variance_final_1024_pel_sse2(__m128i vsse, __m128i vsum,
   *sse = add32x4_sse2(vsse);
 
   vsum = sum_to_32bit_sse2(vsum);
-  *sum = add32x4_sse2(vsum);
+  *sum = (int)add32x4_sse2(vsum);
 }
 
 static INLINE void variance4_sse2(const uint8_t *src, const int src_stride,
@@ -314,7 +314,7 @@ AOM_VAR_NO_LOOP_SSE2(16, 64, 10, 1024)
       ref += (ref_stride * uh);                                               \
     }                                                                         \
     *sse = add32x4_sse2(vsse);                                                \
-    int sum = add32x4_sse2(vsum);                                             \
+    int sum = (int)add32x4_sse2(vsum);                                        \
     assert(sum <= 255 * bw * bh);                                             \
     assert(sum >= -255 * bw * bh);                                            \
     return *sse - (uint32_t)(((int64_t)sum * sum) >> bits);                   \
@@ -678,8 +678,8 @@ uint64_t aom_mse_4xh_16bit_sse2(uint8_t *dst, int dstride, uint16_t *src,
   const __m128i zeros = _mm_setzero_si128();
   __m128i square_result = _mm_setzero_si128();
   for (int i = 0; i < h; i += 2) {
-    dst0_8x8 = _mm_cvtsi32_si128(*(uint32_t const *)(&dst[(i + 0) * dstride]));
-    dst1_8x8 = _mm_cvtsi32_si128(*(uint32_t const *)(&dst[(i + 1) * dstride]));
+    dst0_8x8 = _mm_cvtsi32_si128(*(int const *)(&dst[(i + 0) * dstride]));
+    dst1_8x8 = _mm_cvtsi32_si128(*(int const *)(&dst[(i + 1) * dstride]));
     dst_16x8 = _mm_unpacklo_epi8(_mm_unpacklo_epi32(dst0_8x8, dst1_8x8), zeros);
 
     src0_16x4 = _mm_loadl_epi64((__m128i const *)(&src[(i + 0) * sstride]));
@@ -762,3 +762,17 @@ uint64_t aom_mse_wxh_16bit_sse2(uint8_t *dst, int dstride, uint16_t *src,
     default: assert(0 && "unsupported width"); return -1;
   }
 }
+
+uint64_t aom_mse_16xh_16bit_sse2(uint8_t *dst, int dstride, uint16_t *src,
+                                 int w, int h) {
+  assert((w == 8 || w == 4) && (h == 8 || h == 4) &&
+         "w=8/4 and h=8/4 must be satisfied");
+  const int num_blks = 16 / w;
+  uint64_t sum = 0;
+  for (int i = 0; i < num_blks; i++) {
+    sum += aom_mse_wxh_16bit_sse2(dst, dstride, src, w, w, h);
+    dst += w;
+    src += (w * h);
+  }
+  return sum;
+}
diff --git a/chromium/third_party/libaom/source/libaom/aom_ports/mem.h b/chromium/third_party/libaom/source/libaom/aom_ports/mem.h
index e9bb8adbc53..e39684202ee 100644
--- a/chromium/third_party/libaom/source/libaom/aom_ports/mem.h
+++ b/chromium/third_party/libaom/source/libaom/aom_ports/mem.h
@@ -71,6 +71,8 @@
 
 #define ALIGN_POWER_OF_TWO(value, n) \
   (((value) + ((1 << (n)) - 1)) & ~((1 << (n)) - 1))
+#define ALIGN_POWER_OF_TWO_UNSIGNED(value, n) \
+  (((value) + ((1u << (n)) - 1)) & ~((1u << (n)) - 1))
 
 #define DIVIDE_AND_ROUND(x, y) (((x) + ((y) >> 1)) / (y))
 
diff --git a/chromium/third_party/libaom/source/libaom/aom_ports/x86.h b/chromium/third_party/libaom/source/libaom/aom_ports/x86.h
index 79cbd02bf2c..f390dfac6e4 100644
--- a/chromium/third_party/libaom/source/libaom/aom_ports/x86.h
+++ b/chromium/third_party/libaom/source/libaom/aom_ports/x86.h
@@ -387,7 +387,7 @@ static INLINE unsigned int x87_set_double_precision(void) {
   // Reserved                      01B
   // Double Precision (53-Bits)    10B
   // Extended Precision (64-Bits)  11B
-  x87_set_control_word((mode & ~0x300) | 0x200);
+  x87_set_control_word((mode & ~0x300u) | 0x200u);
   return mode;
 }
 
diff --git a/chromium/third_party/libaom/source/libaom/aom_scale/aom_scale.cmake b/chromium/third_party/libaom/source/libaom/aom_scale/aom_scale.cmake
index e83299320fc..ea94dbc0635 100644
--- a/chromium/third_party/libaom/source/libaom/aom_scale/aom_scale.cmake
+++ b/chromium/third_party/libaom/source/libaom/aom_scale/aom_scale.cmake
@@ -20,20 +20,12 @@ list(APPEND AOM_SCALE_SOURCES "${AOM_ROOT}/aom_scale/aom_scale.h"
             "${AOM_ROOT}/aom_scale/generic/yv12extend.c"
             "${AOM_ROOT}/aom_scale/yv12config.h")
 
-list(APPEND AOM_SCALE_INTRIN_DSPR2
-            "${AOM_ROOT}/aom_scale/mips/dspr2/yv12extend_dspr2.c")
-
 # Creates the aom_scale build target and makes libaom depend on it. The libaom
 # target must exist before this function is called.
 function(setup_aom_scale_targets)
   add_library(aom_scale OBJECT ${AOM_SCALE_SOURCES})
   target_sources(aom PRIVATE $<TARGET_OBJECTS:aom_scale>)
 
-  if(HAVE_DSPR2)
-    add_intrinsics_object_library("" "dspr2" "aom_scale"
-                                  "AOM_SCALE_INTRIN_DSPR2")
-  endif()
-
   target_sources(aom PRIVATE $<TARGET_OBJECTS:aom_scale>)
   if(BUILD_SHARED_LIBS)
     target_sources(aom_static PRIVATE $<TARGET_OBJECTS:aom_scale>)
diff --git a/chromium/third_party/libaom/source/libaom/aom_scale/aom_scale_rtcd.pl b/chromium/third_party/libaom/source/libaom/aom_scale/aom_scale_rtcd.pl
index eef6f16a74c..e84b6f95ec3 100644
--- a/chromium/third_party/libaom/source/libaom/aom_scale/aom_scale_rtcd.pl
+++ b/chromium/third_party/libaom/source/libaom/aom_scale/aom_scale_rtcd.pl
@@ -45,11 +45,11 @@ add_proto qw/void aom_yv12_partial_coloc_copy_u/, "const struct yv12_buffer_conf
 add_proto qw/void aom_yv12_partial_copy_v/, "const struct yv12_buffer_config *src_bc, int hstart1, int hend1, int vstart1, int vend1, struct yv12_buffer_config *dst_bc, int hstart2, int vstart2";
 add_proto qw/void aom_yv12_partial_coloc_copy_v/, "const struct yv12_buffer_config *src_bc, struct yv12_buffer_config *dst_bc, int hstart, int hend, int vstart, int vend";
 
+add_proto qw/void aom_extend_frame_borders_plane_row/, "const struct yv12_buffer_config *ybf, int plane, int v_start, int v_end";
+
 add_proto qw/void aom_extend_frame_borders/, "struct yv12_buffer_config *ybf, const int num_planes";
-specialize qw/aom_extend_frame_borders dspr2/;
 
 add_proto qw/void aom_extend_frame_inner_borders/, "struct yv12_buffer_config *ybf, const int num_planes";
-specialize qw/aom_extend_frame_inner_borders dspr2/;
 
 add_proto qw/void aom_extend_frame_borders_y/, "struct yv12_buffer_config *ybf";
 1;
diff --git a/chromium/third_party/libaom/source/libaom/aom_scale/generic/yv12config.c b/chromium/third_party/libaom/source/libaom/aom_scale/generic/yv12config.c
index dedfc0277c0..de56263fa4c 100644
--- a/chromium/third_party/libaom/source/libaom/aom_scale/generic/yv12config.c
+++ b/chromium/third_party/libaom/source/libaom/aom_scale/generic/yv12config.c
@@ -51,7 +51,7 @@ static int realloc_frame_buffer_aligned(
     const uint64_t uvplane_size, const int aligned_width,
     const int aligned_height, const int uv_width, const int uv_height,
     const int uv_stride, const int uv_border_w, const int uv_border_h,
-    int alloc_y_buffer_8bit) {
+    int alloc_y_buffer_8bit, int alloc_y_plane_only) {
   if (ybf) {
     const int aom_byte_align = (byte_alignment == 0) ? 1 : byte_alignment;
     const uint64_t frame_size =
@@ -144,13 +144,18 @@ static int realloc_frame_buffer_aligned(
 
     ybf->y_buffer = (uint8_t *)aom_align_addr(
         buf + (border * y_stride) + border, aom_byte_align);
-    ybf->u_buffer = (uint8_t *)aom_align_addr(
-        buf + yplane_size + (uv_border_h * uv_stride) + uv_border_w,
-        aom_byte_align);
-    ybf->v_buffer =
-        (uint8_t *)aom_align_addr(buf + yplane_size + uvplane_size +
-                                      (uv_border_h * uv_stride) + uv_border_w,
-                                  aom_byte_align);
+    if (!alloc_y_plane_only) {
+      ybf->u_buffer = (uint8_t *)aom_align_addr(
+          buf + yplane_size + (uv_border_h * uv_stride) + uv_border_w,
+          aom_byte_align);
+      ybf->v_buffer =
+          (uint8_t *)aom_align_addr(buf + yplane_size + uvplane_size +
+                                        (uv_border_h * uv_stride) + uv_border_w,
+                                    aom_byte_align);
+    } else {
+      ybf->u_buffer = NULL;
+      ybf->v_buffer = NULL;
+    }
 
     ybf->use_external_reference_buffers = 0;
 
@@ -172,13 +177,11 @@ static int realloc_frame_buffer_aligned(
   return AOM_CODEC_MEM_ERROR;
 }
 
-static int calc_stride_and_planesize(const int ss_x, const int ss_y,
-                                     const int aligned_width,
-                                     const int aligned_height, const int border,
-                                     const int byte_alignment, int *y_stride,
-                                     int *uv_stride, uint64_t *yplane_size,
-                                     uint64_t *uvplane_size,
-                                     const int uv_height) {
+static int calc_stride_and_planesize(
+    const int ss_x, const int ss_y, const int aligned_width,
+    const int aligned_height, const int border, const int byte_alignment,
+    int alloc_y_plane_only, int *y_stride, int *uv_stride,
+    uint64_t *yplane_size, uint64_t *uvplane_size, const int uv_height) {
   /* Only support allocating buffers that have a border that's a multiple
    * of 32. The border restriction is required to get 16-byte alignment of
    * the start of the chroma rows without introducing an arbitrary gap
@@ -189,9 +192,15 @@ static int calc_stride_and_planesize(const int ss_x, const int ss_y,
   *yplane_size =
       (aligned_height + 2 * border) * (uint64_t)(*y_stride) + byte_alignment;
 
-  *uv_stride = *y_stride >> ss_x;
-  *uvplane_size = (uv_height + 2 * (border >> ss_y)) * (uint64_t)(*uv_stride) +
-                  byte_alignment;
+  if (!alloc_y_plane_only) {
+    *uv_stride = *y_stride >> ss_x;
+    *uvplane_size =
+        (uv_height + 2 * (border >> ss_y)) * (uint64_t)(*uv_stride) +
+        byte_alignment;
+  } else {
+    *uv_stride = 0;
+    *uvplane_size = 0;
+  }
   return 0;
 }
 
@@ -200,7 +209,7 @@ int aom_realloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, int width, int height,
                              int border, int byte_alignment,
                              aom_codec_frame_buffer_t *fb,
                              aom_get_frame_buffer_cb_fn_t cb, void *cb_priv,
-                             int alloc_y_buffer_8bit) {
+                             int alloc_y_buffer_8bit, int alloc_y_plane_only) {
 #if CONFIG_SIZE_LIMIT
   if (width > DECODE_WIDTH_LIMIT || height > DECODE_HEIGHT_LIMIT)
     return AOM_CODEC_MEM_ERROR;
@@ -220,25 +229,26 @@ int aom_realloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, int width, int height,
 
     int error = calc_stride_and_planesize(
         ss_x, ss_y, aligned_width, aligned_height, border, byte_alignment,
-        &y_stride, &uv_stride, &yplane_size, &uvplane_size, uv_height);
+        alloc_y_plane_only, &y_stride, &uv_stride, &yplane_size, &uvplane_size,
+        uv_height);
     if (error) return error;
     return realloc_frame_buffer_aligned(
         ybf, width, height, ss_x, ss_y, use_highbitdepth, border,
         byte_alignment, fb, cb, cb_priv, y_stride, yplane_size, uvplane_size,
         aligned_width, aligned_height, uv_width, uv_height, uv_stride,
-        uv_border_w, uv_border_h, alloc_y_buffer_8bit);
+        uv_border_w, uv_border_h, alloc_y_buffer_8bit, alloc_y_plane_only);
   }
   return AOM_CODEC_MEM_ERROR;
 }
 
 int aom_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, int width, int height,
                            int ss_x, int ss_y, int use_highbitdepth, int border,
-                           int byte_alignment) {
+                           int byte_alignment, int alloc_y_plane_only) {
   if (ybf) {
     aom_free_frame_buffer(ybf);
     return aom_realloc_frame_buffer(ybf, width, height, ss_x, ss_y,
                                     use_highbitdepth, border, byte_alignment,
-                                    NULL, NULL, NULL, 0);
+                                    NULL, NULL, NULL, 0, alloc_y_plane_only);
   }
   return AOM_CODEC_MEM_ERROR;
 }
diff --git a/chromium/third_party/libaom/source/libaom/aom_scale/generic/yv12extend.c b/chromium/third_party/libaom/source/libaom/aom_scale/generic/yv12extend.c
index a32aee56456..997ff5434f9 100644
--- a/chromium/third_party/libaom/source/libaom/aom_scale/generic/yv12extend.c
+++ b/chromium/third_party/libaom/source/libaom/aom_scale/generic/yv12extend.c
@@ -21,19 +21,20 @@
 
 static void extend_plane(uint8_t *const src, int src_stride, int width,
                          int height, int extend_top, int extend_left,
-                         int extend_bottom, int extend_right) {
+                         int extend_bottom, int extend_right, int v_start,
+                         int v_end) {
   assert(src != NULL);
   int i;
   const int linesize = extend_left + extend_right + width;
   assert(linesize <= src_stride);
 
   /* copy the left and right most columns out */
-  uint8_t *src_ptr1 = src;
-  uint8_t *src_ptr2 = src + width - 1;
-  uint8_t *dst_ptr1 = src - extend_left;
+  uint8_t *src_ptr1 = src + v_start * src_stride;
+  uint8_t *src_ptr2 = src + v_start * src_stride + width - 1;
+  uint8_t *dst_ptr1 = src + v_start * src_stride - extend_left;
   uint8_t *dst_ptr2 = src_ptr2 + 1;
 
-  for (i = 0; i < height; ++i) {
+  for (i = v_start; i < v_end; ++i) {
     memset(dst_ptr1, src_ptr1[0], extend_left);
     memset(dst_ptr2, src_ptr2[0], extend_right);
     src_ptr1 += src_stride;
@@ -65,19 +66,20 @@ static void extend_plane(uint8_t *const src, int src_stride, int width,
 #if CONFIG_AV1_HIGHBITDEPTH
 static void extend_plane_high(uint8_t *const src8, int src_stride, int width,
                               int height, int extend_top, int extend_left,
-                              int extend_bottom, int extend_right) {
+                              int extend_bottom, int extend_right, int v_start,
+                              int v_end) {
   int i;
   const int linesize = extend_left + extend_right + width;
   assert(linesize <= src_stride);
   uint16_t *src = CONVERT_TO_SHORTPTR(src8);
 
   /* copy the left and right most columns out */
-  uint16_t *src_ptr1 = src;
-  uint16_t *src_ptr2 = src + width - 1;
-  uint16_t *dst_ptr1 = src - extend_left;
+  uint16_t *src_ptr1 = src + v_start * src_stride;
+  uint16_t *src_ptr2 = src + v_start * src_stride + width - 1;
+  uint16_t *dst_ptr1 = src + v_start * src_stride - extend_left;
   uint16_t *dst_ptr2 = src_ptr2 + 1;
 
-  for (i = 0; i < height; ++i) {
+  for (i = v_start; i < v_end; ++i) {
     aom_memset16(dst_ptr1, src_ptr1[0], extend_left);
     aom_memset16(dst_ptr2, src_ptr2[0], extend_right);
     src_ptr1 += src_stride;
@@ -107,6 +109,41 @@ static void extend_plane_high(uint8_t *const src8, int src_stride, int width,
 }
 #endif  // CONFIG_AV1_HIGHBITDEPTH
 
+void aom_extend_frame_borders_plane_row_c(const YV12_BUFFER_CONFIG *ybf,
+                                          int plane, int v_start, int v_end) {
+  const int ext_size = ybf->border;
+  const int ss_x = ybf->subsampling_x;
+  const int ss_y = ybf->subsampling_y;
+
+  assert(ybf->y_height - ybf->y_crop_height < 16);
+  assert(ybf->y_width - ybf->y_crop_width < 16);
+  assert(ybf->y_height - ybf->y_crop_height >= 0);
+  assert(ybf->y_width - ybf->y_crop_width >= 0);
+
+  const int is_uv = plane > 0;
+  const int top = ext_size >> (is_uv ? ss_y : 0);
+  const int left = ext_size >> (is_uv ? ss_x : 0);
+  const int bottom = top + ybf->heights[is_uv] - ybf->crop_heights[is_uv];
+  const int right = left + ybf->widths[is_uv] - ybf->crop_widths[is_uv];
+  const int extend_top_border = (v_start == 0);
+  const int extend_bottom_border = (v_end == ybf->crop_heights[is_uv]);
+
+#if CONFIG_AV1_HIGHBITDEPTH
+  if (ybf->flags & YV12_FLAG_HIGHBITDEPTH) {
+    extend_plane_high(ybf->buffers[plane], ybf->strides[is_uv],
+                      ybf->crop_widths[is_uv], ybf->crop_heights[is_uv],
+                      extend_top_border ? top : 0, left,
+                      extend_bottom_border ? bottom : 0, right, v_start, v_end);
+    return;
+  }
+#endif
+
+  extend_plane(ybf->buffers[plane], ybf->strides[is_uv],
+               ybf->crop_widths[is_uv], ybf->crop_heights[is_uv],
+               extend_top_border ? top : 0, left,
+               extend_bottom_border ? bottom : 0, right, v_start, v_end);
+}
+
 void aom_yv12_extend_frame_borders_c(YV12_BUFFER_CONFIG *ybf,
                                      const int num_planes) {
   assert(ybf->border % 2 == 0);
@@ -124,7 +161,8 @@ void aom_yv12_extend_frame_borders_c(YV12_BUFFER_CONFIG *ybf,
           ybf->buffers[plane], ybf->strides[is_uv], ybf->crop_widths[is_uv],
           ybf->crop_heights[is_uv], plane_border, plane_border,
           plane_border + ybf->heights[is_uv] - ybf->crop_heights[is_uv],
-          plane_border + ybf->widths[is_uv] - ybf->crop_widths[is_uv]);
+          plane_border + ybf->widths[is_uv] - ybf->crop_widths[is_uv], 0,
+          ybf->crop_heights[is_uv]);
     }
     return;
   }
@@ -137,7 +175,8 @@ void aom_yv12_extend_frame_borders_c(YV12_BUFFER_CONFIG *ybf,
                  ybf->crop_widths[is_uv], ybf->crop_heights[is_uv],
                  plane_border, plane_border,
                  plane_border + ybf->heights[is_uv] - ybf->crop_heights[is_uv],
-                 plane_border + ybf->widths[is_uv] - ybf->crop_widths[is_uv]);
+                 plane_border + ybf->widths[is_uv] - ybf->crop_widths[is_uv], 0,
+                 ybf->crop_heights[is_uv]);
   }
 }
 
@@ -161,7 +200,7 @@ static void extend_frame(YV12_BUFFER_CONFIG *const ybf, int ext_size,
       const int right = left + ybf->widths[is_uv] - ybf->crop_widths[is_uv];
       extend_plane_high(ybf->buffers[plane], ybf->strides[is_uv],
                         ybf->crop_widths[is_uv], ybf->crop_heights[is_uv], top,
-                        left, bottom, right);
+                        left, bottom, right, 0, ybf->crop_heights[is_uv]);
     }
     return;
   }
@@ -175,7 +214,7 @@ static void extend_frame(YV12_BUFFER_CONFIG *const ybf, int ext_size,
     const int right = left + ybf->widths[is_uv] - ybf->crop_widths[is_uv];
     extend_plane(ybf->buffers[plane], ybf->strides[is_uv],
                  ybf->crop_widths[is_uv], ybf->crop_heights[is_uv], top, left,
-                 bottom, right);
+                 bottom, right, 0, ybf->crop_heights[is_uv]);
   }
 }
 
@@ -199,17 +238,17 @@ void aom_extend_frame_borders_y_c(YV12_BUFFER_CONFIG *ybf) {
   assert(ybf->y_width - ybf->y_crop_width >= 0);
 #if CONFIG_AV1_HIGHBITDEPTH
   if (ybf->flags & YV12_FLAG_HIGHBITDEPTH) {
-    extend_plane_high(ybf->y_buffer, ybf->y_stride, ybf->y_crop_width,
-                      ybf->y_crop_height, ext_size, ext_size,
-                      ext_size + ybf->y_height - ybf->y_crop_height,
-                      ext_size + ybf->y_width - ybf->y_crop_width);
+    extend_plane_high(
+        ybf->y_buffer, ybf->y_stride, ybf->y_crop_width, ybf->y_crop_height,
+        ext_size, ext_size, ext_size + ybf->y_height - ybf->y_crop_height,
+        ext_size + ybf->y_width - ybf->y_crop_width, 0, ybf->y_crop_height);
     return;
   }
 #endif
-  extend_plane(ybf->y_buffer, ybf->y_stride, ybf->y_crop_width,
-               ybf->y_crop_height, ext_size, ext_size,
-               ext_size + ybf->y_height - ybf->y_crop_height,
-               ext_size + ybf->y_width - ybf->y_crop_width);
+  extend_plane(
+      ybf->y_buffer, ybf->y_stride, ybf->y_crop_width, ybf->y_crop_height,
+      ext_size, ext_size, ext_size + ybf->y_height - ybf->y_crop_height,
+      ext_size + ybf->y_width - ybf->y_crop_width, 0, ybf->y_crop_height);
 }
 
 #if CONFIG_AV1_HIGHBITDEPTH
@@ -460,7 +499,7 @@ int aom_yv12_realloc_with_new_border_c(YV12_BUFFER_CONFIG *ybf, int new_border,
     const int error = aom_alloc_frame_buffer(
         &new_buf, ybf->y_crop_width, ybf->y_crop_height, ybf->subsampling_x,
         ybf->subsampling_y, ybf->flags & YV12_FLAG_HIGHBITDEPTH, new_border,
-        byte_alignment);
+        byte_alignment, 0);
     if (error) return error;
     // Copy image buffer
     aom_yv12_copy_frame(ybf, &new_buf, num_planes);
diff --git a/chromium/third_party/libaom/source/libaom/aom_scale/mips/dspr2/yv12extend_dspr2.c b/chromium/third_party/libaom/source/libaom/aom_scale/mips/dspr2/yv12extend_dspr2.c
deleted file mode 100644
index 8556e71a203..00000000000
--- a/chromium/third_party/libaom/source/libaom/aom_scale/mips/dspr2/yv12extend_dspr2.c
+++ /dev/null
@@ -1,143 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-
-#include "config/aom_config.h"
-
-#include "aom_scale/yv12config.h"
-#include "aom_mem/aom_mem.h"
-#include "aom_scale/aom_scale.h"
-
-#if HAVE_DSPR2
-static void extend_plane(uint8_t *const src, int src_stride, int width,
-                         int height, int extend_top, int extend_left,
-                         int extend_bottom, int extend_right) {
-  int i, j;
-  uint8_t *left_src, *right_src;
-  uint8_t *left_dst_start, *right_dst_start;
-  uint8_t *left_dst, *right_dst;
-  uint8_t *top_src, *bot_src;
-  uint8_t *top_dst, *bot_dst;
-  uint32_t left_pix;
-  uint32_t right_pix;
-  uint32_t linesize;
-
-  /* copy the left and right most columns out */
-  left_src = src;
-  right_src = src + width - 1;
-  left_dst_start = src - extend_left;
-  right_dst_start = src + width;
-
-  for (i = height; i--;) {
-    left_dst = left_dst_start;
-    right_dst = right_dst_start;
-
-    __asm__ __volatile__(
-        "lb        %[left_pix],     0(%[left_src])      \n\t"
-        "lb        %[right_pix],    0(%[right_src])     \n\t"
-        "replv.qb  %[left_pix],     %[left_pix]         \n\t"
-        "replv.qb  %[right_pix],    %[right_pix]        \n\t"
-
-        : [left_pix] "=&r"(left_pix), [right_pix] "=&r"(right_pix)
-        : [left_src] "r"(left_src), [right_src] "r"(right_src));
-
-    for (j = extend_left / 4; j--;) {
-      __asm__ __volatile__(
-          "sw     %[left_pix],    0(%[left_dst])     \n\t"
-          "sw     %[right_pix],   0(%[right_dst])    \n\t"
-
-          :
-          : [left_dst] "r"(left_dst), [left_pix] "r"(left_pix),
-            [right_dst] "r"(right_dst), [right_pix] "r"(right_pix));
-
-      left_dst += 4;
-      right_dst += 4;
-    }
-
-    for (j = extend_left % 4; j--;) {
-      __asm__ __volatile__(
-          "sb     %[left_pix],    0(%[left_dst])     \n\t"
-          "sb     %[right_pix],   0(%[right_dst])     \n\t"
-
-          :
-          : [left_dst] "r"(left_dst), [left_pix] "r"(left_pix),
-            [right_dst] "r"(right_dst), [right_pix] "r"(right_pix));
-
-      left_dst += 1;
-      right_dst += 1;
-    }
-
-    left_src += src_stride;
-    right_src += src_stride;
-    left_dst_start += src_stride;
-    right_dst_start += src_stride;
-  }
-
-  /* Now copy the top and bottom lines into each line of the respective
-   * borders
-   */
-  top_src = src - extend_left;
-  bot_src = src + src_stride * (height - 1) - extend_left;
-  top_dst = src + src_stride * (-extend_top) - extend_left;
-  bot_dst = src + src_stride * (height)-extend_left;
-  linesize = extend_left + extend_right + width;
-  assert(linesize <= src_stride);
-
-  for (i = 0; i < extend_top; i++) {
-    memcpy(top_dst, top_src, linesize);
-    top_dst += src_stride;
-  }
-
-  for (i = 0; i < extend_bottom; i++) {
-    memcpy(bot_dst, bot_src, linesize);
-    bot_dst += src_stride;
-  }
-}
-
-static void extend_frame(YV12_BUFFER_CONFIG *const ybf, int ext_size) {
-  const int c_w = ybf->uv_crop_width;
-  const int c_h = ybf->uv_crop_height;
-  const int ss_x = ybf->subsampling_x;
-  const int ss_y = ybf->subsampling_y;
-  const int c_et = ext_size >> ss_y;
-  const int c_el = ext_size >> ss_x;
-  const int c_eb = c_et + ybf->uv_height - ybf->uv_crop_height;
-  const int c_er = c_el + ybf->uv_width - ybf->uv_crop_width;
-
-  assert(ybf->y_height - ybf->y_crop_height < 16);
-  assert(ybf->y_width - ybf->y_crop_width < 16);
-  assert(ybf->y_height - ybf->y_crop_height >= 0);
-  assert(ybf->y_width - ybf->y_crop_width >= 0);
-
-  extend_plane(ybf->y_buffer, ybf->y_stride, ybf->y_crop_width,
-               ybf->y_crop_height, ext_size, ext_size,
-               ext_size + ybf->y_height - ybf->y_crop_height,
-               ext_size + ybf->y_width - ybf->y_crop_width);
-
-  extend_plane(ybf->u_buffer, ybf->uv_stride, c_w, c_h, c_et, c_el, c_eb, c_er);
-
-  extend_plane(ybf->v_buffer, ybf->uv_stride, c_w, c_h, c_et, c_el, c_eb, c_er);
-}
-
-void aom_extend_frame_borders_dspr2(YV12_BUFFER_CONFIG *ybf,
-                                    const int num_planes) {
-  extend_frame(ybf, ybf->border, num_planes);
-}
-
-void aom_extend_frame_inner_borders_dspr2(YV12_BUFFER_CONFIG *ybf,
-                                          const int num_planes) {
-  const int inner_bw = (ybf->border > AOMINNERBORDERINPIXELS)
-                           ? AOMINNERBORDERINPIXELS
-                           : ybf->border;
-  extend_frame(ybf, inner_bw, num_planes);
-}
-#endif
diff --git a/chromium/third_party/libaom/source/libaom/aom_scale/yv12config.h b/chromium/third_party/libaom/source/libaom/aom_scale/yv12config.h
index 31af69cc864..581e9233220 100644
--- a/chromium/third_party/libaom/source/libaom/aom_scale/yv12config.h
+++ b/chromium/third_party/libaom/source/libaom/aom_scale/yv12config.h
@@ -123,7 +123,7 @@ typedef struct yv12_buffer_config {
 
 int aom_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, int width, int height,
                            int ss_x, int ss_y, int use_highbitdepth, int border,
-                           int byte_alignment);
+                           int byte_alignment, int alloc_y_plane_only);
 
 // Updates the yv12 buffer config with the frame buffer. |byte_alignment| must
 // be a power of 2, from 32 to 1024. 0 sets legacy alignment. If cb is not
@@ -137,7 +137,7 @@ int aom_realloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, int width, int height,
                              int border, int byte_alignment,
                              aom_codec_frame_buffer_t *fb,
                              aom_get_frame_buffer_cb_fn_t cb, void *cb_priv,
-                             int alloc_y_buffer_8bit);
+                             int alloc_y_buffer_8bit, int alloc_y_plane_only);
 
 int aom_free_frame_buffer(YV12_BUFFER_CONFIG *ybf);
 
diff --git a/chromium/third_party/libaom/source/libaom/aom_util/aom_thread.c b/chromium/third_party/libaom/source/libaom/aom_util/aom_thread.c
index a749a22401b..391602151d1 100644
--- a/chromium/third_party/libaom/source/libaom/aom_util/aom_thread.c
+++ b/chromium/third_party/libaom/source/libaom/aom_util/aom_thread.c
@@ -52,7 +52,7 @@ static THREADFN thread_loop(void *ptr) {
     thread_name[sizeof(thread_name) - 1] = '\0';
     pthread_setname_np(thread_name);
   }
-#elif defined(__GLIBC__) || defined(__BIONIC__)
+#elif (defined(__GLIBC__) && !defined(__GNU__)) || defined(__BIONIC__)
   if (worker->thread_name != NULL) {
     // Linux and Android require names (with nul) fit in 16 chars, otherwise
     // pthread_setname_np() returns ERANGE (34).
diff --git a/chromium/third_party/libaom/source/libaom/aom_util/endian_inl.h b/chromium/third_party/libaom/source/libaom/aom_util/endian_inl.h
index f536ec5b8a4..b69102a7f51 100644
--- a/chromium/third_party/libaom/source/libaom/aom_util/endian_inl.h
+++ b/chromium/third_party/libaom/source/libaom/aom_util/endian_inl.h
@@ -64,11 +64,6 @@
 #define HAVE_BUILTIN_BSWAP64
 #endif
 
-#if HAVE_MIPS32 && defined(__mips__) && !defined(__mips64) && \
-    defined(__mips_isa_rev) && (__mips_isa_rev >= 2) && (__mips_isa_rev < 6)
-#define AOM_USE_MIPS32_R2
-#endif
-
 static INLINE uint16_t BSwap16(uint16_t x) {
 #if defined(HAVE_BUILTIN_BSWAP16)
   return __builtin_bswap16(x);
@@ -81,15 +76,7 @@ static INLINE uint16_t BSwap16(uint16_t x) {
 }
 
 static INLINE uint32_t BSwap32(uint32_t x) {
-#if defined(AOM_USE_MIPS32_R2)
-  uint32_t ret;
-  __asm__ volatile(
-      "wsbh   %[ret], %[x]          \n\t"
-      "rotr   %[ret], %[ret],  16   \n\t"
-      : [ret] "=r"(ret)
-      : [x] "r"(x));
-  return ret;
-#elif defined(HAVE_BUILTIN_BSWAP32)
+#if defined(HAVE_BUILTIN_BSWAP32)
   return __builtin_bswap32(x);
 #elif defined(__i386__) || defined(__x86_64__)
   uint32_t swapped_bytes;
diff --git a/chromium/third_party/libaom/source/libaom/apps/aomdec.c b/chromium/third_party/libaom/source/libaom/apps/aomdec.c
index 9a052cefa05..2c74dd36cab 100644
--- a/chromium/third_party/libaom/source/libaom/apps/aomdec.c
+++ b/chromium/third_party/libaom/source/libaom/apps/aomdec.c
@@ -493,6 +493,10 @@ static int main_loop(int argc, const char **argv_) {
   /* Parse command line */
   exec_name = argv_[0];
   argv = argv_dup(argc - 1, argv_ + 1);
+  if (!argv) {
+    fprintf(stderr, "Error allocating argument list\n");
+    return EXIT_FAILURE;
+  }
 
   aom_codec_iface_t *interface = NULL;
   for (argi = argj = argv; (*argj = *argi); argi += arg.argv_step) {
@@ -1040,6 +1044,10 @@ int main(int argc, const char **argv_) {
   int error = 0;
 
   argv = argv_dup(argc - 1, argv_ + 1);
+  if (!argv) {
+    fprintf(stderr, "Error allocating argument list\n");
+    return EXIT_FAILURE;
+  }
   for (argi = argj = argv; (*argj = *argi); argi += arg.argv_step) {
     memset(&arg, 0, sizeof(arg));
     arg.argv_step = 1;
diff --git a/chromium/third_party/libaom/source/libaom/apps/aomenc.c b/chromium/third_party/libaom/source/libaom/apps/aomenc.c
index 182606fc664..60bc01ab1dc 100644
--- a/chromium/third_party/libaom/source/libaom/apps/aomenc.c
+++ b/chromium/third_party/libaom/source/libaom/apps/aomenc.c
@@ -130,9 +130,7 @@ static const int av1_arg_ctrl_map[] = { AOME_SET_CPUUSED,
                                         AOME_SET_SHARPNESS,
                                         AOME_SET_STATIC_THRESHOLD,
                                         AV1E_SET_ROW_MT,
-#if CONFIG_FRAME_PARALLEL_ENCODE
                                         AV1E_SET_FP_MT,
-#endif
                                         AV1E_SET_TILE_COLUMNS,
                                         AV1E_SET_TILE_ROWS,
                                         AV1E_SET_ENABLE_TPL_MODEL,
@@ -336,9 +334,7 @@ const arg_def_t *av1_ctrl_args[] = {
   &g_av1_codec_arg_defs.sharpness,
   &g_av1_codec_arg_defs.static_thresh,
   &g_av1_codec_arg_defs.rowmtarg,
-#if CONFIG_FRAME_PARALLEL_ENCODE
   &g_av1_codec_arg_defs.fpmtarg,
-#endif
   &g_av1_codec_arg_defs.tile_cols,
   &g_av1_codec_arg_defs.tile_rows,
   &g_av1_codec_arg_defs.enable_tpl_model,
@@ -455,6 +451,7 @@ const arg_def_t *av1_key_val_args[] = {
   &g_av1_codec_arg_defs.fwd_kf_dist,
   &g_av1_codec_arg_defs.strict_level_conformance,
   &g_av1_codec_arg_defs.dist_metric,
+  &g_av1_codec_arg_defs.kf_max_pyr_height,
   NULL,
 };
 
@@ -1993,6 +1990,10 @@ int main(int argc, const char **argv_) {
    * codec.
    */
   argv = argv_dup(argc - 1, argv_ + 1);
+  if (!argv) {
+    fprintf(stderr, "Error allocating argument list\n");
+    return EXIT_FAILURE;
+  }
   parse_global_config(&global, &argv);
 
   if (argc < 2) usage_exit();
@@ -2572,13 +2573,16 @@ int main(int argc, const char **argv_) {
 
     if (pass == global.passes - 1) {
       FOREACH_STREAM(stream, streams) {
-        int levels[32] = { 0 };
-        int target_levels[32] = { 0 };
+        int num_operating_points;
+        int levels[32];
+        int target_levels[32];
+        aom_codec_control(&stream->encoder, AV1E_GET_NUM_OPERATING_POINTS,
+                          &num_operating_points);
         aom_codec_control(&stream->encoder, AV1E_GET_SEQ_LEVEL_IDX, levels);
         aom_codec_control(&stream->encoder, AV1E_GET_TARGET_SEQ_LEVEL_IDX,
                           target_levels);
 
-        for (int i = 0; i < 32; i++) {
+        for (int i = 0; i < num_operating_points; i++) {
           if (levels[i] > target_levels[i]) {
             aom_tools_warn(
                 "Failed to encode to target level %d.%d for operating point "
diff --git a/chromium/third_party/libaom/source/libaom/av1/arg_defs.c b/chromium/third_party/libaom/source/libaom/av1/arg_defs.c
index b5f43035026..acda4f81aa8 100644
--- a/chromium/third_party/libaom/source/libaom/av1/arg_defs.c
+++ b/chromium/third_party/libaom/source/libaom/av1/arg_defs.c
@@ -164,9 +164,9 @@ const av1_codec_arg_definitions_t g_av1_codec_arg_defs = {
   .verbosearg = ARG_DEF("v", "verbose", 0, "Show encoder parameters"),
   .psnrarg = ARG_DEF(
       NULL, "psnr", -1,
-      "Show PSNR in status line"
+      "Show PSNR in status line "
       "(0: Disable PSNR status line display, 1: PSNR calculated using input "
-      "bit-depth (default), 2: PSNR calculated using stream bit-depth), "
+      "bit-depth (default), 2: PSNR calculated using stream bit-depth); "
       "takes default option when arguments are not specified"),
   .use_cfg = ARG_DEF("c", "cfg", 1, "Config file to use"),
   .recontest = ARG_DEF_ENUM(NULL, "test-decode", 1,
@@ -182,20 +182,18 @@ const av1_codec_arg_definitions_t g_av1_codec_arg_defs = {
       ARG_DEF(NULL, "rate-hist", 1, "Show rate histogram (n-buckets)"),
   .disable_warnings =
       ARG_DEF(NULL, "disable-warnings", 0,
-              "Disable warnings about potentially incorrect encode settings."),
+              "Disable warnings about potentially incorrect encode settings"),
   .disable_warning_prompt =
       ARG_DEF("y", "disable-warning-prompt", 0,
-              "Display warnings, but do not prompt user to continue."),
-  .bitdeptharg = ARG_DEF_ENUM(
-      "b", "bit-depth", 1,
-      "Bit depth for codec (8 for version <=1, 10 or 12 for version 2)",
-      bitdepth_enum),
+              "Display warnings, but do not prompt user to continue"),
+  .bitdeptharg =
+      ARG_DEF_ENUM("b", "bit-depth", 1, "Bit depth for codec", bitdepth_enum),
   .inbitdeptharg = ARG_DEF(NULL, "input-bit-depth", 1, "Bit depth of input"),
 
   .input_chroma_subsampling_x = ARG_DEF(NULL, "input-chroma-subsampling-x", 1,
-                                        "chroma subsampling x value."),
+                                        "Chroma subsampling x value"),
   .input_chroma_subsampling_y = ARG_DEF(NULL, "input-chroma-subsampling-y", 1,
-                                        "chroma subsampling y value."),
+                                        "Chroma subsampling y value"),
 
   .usage = ARG_DEF("u", "usage", 1,
                    "Usage profile number to use (0: good, 1: rt, 2: allintra)"),
@@ -305,25 +303,23 @@ const av1_codec_arg_definitions_t g_av1_codec_arg_defs = {
   .rowmtarg =
       ARG_DEF(NULL, "row-mt", 1,
               "Enable row based multi-threading (0: off, 1: on (default))"),
-#if CONFIG_FRAME_PARALLEL_ENCODE
   .fpmtarg = ARG_DEF(
       NULL, "fp-mt", 1,
       "Enable frame parallel multi-threading (0: off (default), 1: on)"),
-#endif
   .tile_cols =
       ARG_DEF(NULL, "tile-columns", 1, "Number of tile columns to use, log2"),
   .tile_rows =
       ARG_DEF(NULL, "tile-rows", 1, "Number of tile rows to use, log2"),
   .enable_tpl_model = ARG_DEF(NULL, "enable-tpl-model", 1,
                               "RDO based on frame temporal dependency "
-                              "(0: off, 1: backward source based). "
-                              "This is required for deltaq mode."),
+                              "(0: off, 1: backward source based); "
+                              "required for deltaq mode"),
   .enable_keyframe_filtering = ARG_DEF(
       NULL, "enable-keyframe-filtering", 1,
-      "Apply temporal filtering on key frame"
+      "Apply temporal filtering on key frame "
       "(0: no filter, 1: filter without overlay (default), "
       "2: filter with overlay - experimental, may break random access in "
-      "players.)"),
+      "players)"),
   .tile_width = ARG_DEF(NULL, "tile-width", 1, "Tile widths (comma separated)"),
   .tile_height =
       ARG_DEF(NULL, "tile-height", 1, "Tile heights (command separated)"),
@@ -335,8 +331,8 @@ const av1_codec_arg_definitions_t g_av1_codec_arg_defs = {
       "1: true (default), 2: disable for non-reference frames)"),
   .enable_restoration = ARG_DEF(NULL, "enable-restoration", 1,
                                 "Enable the loop restoration filter (0: false "
-                                "(default in Realtime mode), "
-                                "1: true (default in Non-realtime mode))"),
+                                "(default in realtime mode), "
+                                "1: true (default in non-realtime mode))"),
   .enable_rect_partitions = ARG_DEF(NULL, "enable-rect-partitions", 1,
                                     "Enable rectangular partitions "
                                     "(0: false, 1: true (default))"),
@@ -349,9 +345,9 @@ const av1_codec_arg_definitions_t g_av1_codec_arg_defs = {
   .min_partition_size =
       ARG_DEF(NULL, "min-partition-size", 1,
               "Set min partition size "
-              "(4:4x4, 8:8x8, 16:16x16, 32:32x32, 64:64x64, 128:128x128). "
-              "On frame with 4k+ resolutions or higher speed settings, the min "
-              "partition size will have a minimum of 8."),
+              "(4:4x4, 8:8x8, 16:16x16, 32:32x32, 64:64x64, 128:128x128); "
+              "with 4k+ resolutions or higher speed settings, min "
+              "partition size will have a minimum of 8"),
   .max_partition_size =
       ARG_DEF(NULL, "max-partition-size", 1,
               "Set max partition size "
@@ -429,10 +425,11 @@ const av1_codec_arg_definitions_t g_av1_codec_arg_defs = {
   .enable_diagonal_intra =
       ARG_DEF(NULL, "enable-diagonal-intra", 1,
               "Enable diagonal (D45 to D203) intra prediction modes, which are "
-              "a subset of directional modes. Has no effect if "
+              "a subset of directional modes; has no effect if "
               "enable-directional-intra is 0 (0: false, 1: true (default))"),
-  .force_video_mode = ARG_DEF(NULL, "force-video-mode", 1,
-                              "Force video mode (0: false, 1: true (default))"),
+  .force_video_mode = ARG_DEF(
+      NULL, "force-video-mode", 1,
+      "Force video mode even for a single frame (0: false (default), 1: true)"),
   .enable_obmc = ARG_DEF(NULL, "enable-obmc", 1,
                          "Enable OBMC (0: false, 1: true (default))"),
   .enable_overlay =
@@ -450,7 +447,7 @@ const av1_codec_arg_definitions_t g_av1_codec_arg_defs = {
   .disable_trellis_quant = ARG_DEF(
       NULL, "disable-trellis-quant", 1,
       "Disable trellis optimization of quantized coefficients (0: false "
-      "1: true  2: true for rd search 3: true for estimate yrd serch "
+      "1: true  2: true for rd search 3: true for estimate yrd search "
       "(default))"),
   .enable_qm =
       ARG_DEF(NULL, "enable-qm", 1,
@@ -489,7 +486,7 @@ const av1_codec_arg_definitions_t g_av1_codec_arg_defs = {
               "overrides maximum number of tile groups"),
   .timing_info = ARG_DEF_ENUM(
       NULL, "timing-info", 1,
-      "Signal timing info in the bitstream (model unly works for no "
+      "Signal timing info in the bitstream (model only works for no "
       "hidden frames, no super-res yet):",
       timing_info_enum),
 #if CONFIG_TUNE_VMAF
@@ -533,8 +530,8 @@ const av1_codec_arg_definitions_t g_av1_codec_arg_defs = {
       ARG_DEF(NULL, "deltaq-mode", 1,
               "Delta qindex mode (0: off, 1: deltaq objective (default), "
               "2: deltaq placeholder, 3: key frame visual quality, 4: user "
-              "rating based visual quality optimization). "
-              "Currently this requires enable-tpl-model as a prerequisite."),
+              "rating based visual quality optimization); "
+              "requires --enable-tpl-model=1"),
   .deltaq_strength = ARG_DEF(NULL, "deltaq-strength", 1,
                              "Deltaq strength for"
                              " --deltaq-mode=4 (%)"),
@@ -549,32 +546,31 @@ const av1_codec_arg_definitions_t g_av1_codec_arg_defs = {
       ARG_DEF(NULL, "max-inter-rate", 1, "Max P-frame bitrate (pct)"),
   .min_gf_interval = ARG_DEF(
       NULL, "min-gf-interval", 1,
-      "min gf/arf frame interval (default 0, indicating in-built behavior)"),
+      "Min gf/arf frame interval (default 0, indicating in-built behavior)"),
   .max_gf_interval = ARG_DEF(
       NULL, "max-gf-interval", 1,
-      "max gf/arf frame interval (default 0, indicating in-built behavior)"),
+      "Max gf/arf frame interval (default 0, indicating in-built behavior)"),
   .gf_min_pyr_height =
       ARG_DEF(NULL, "gf-min-pyr-height", 1,
               "Min height for GF group pyramid structure (0 (default) to 5)"),
   .gf_max_pyr_height = ARG_DEF(
       NULL, "gf-max-pyr-height", 1,
-      "maximum height for GF group pyramid structure (0 to 5 (default))"),
+      "Maximum height for GF group pyramid structure (0 to 5 (default))"),
   .max_reference_frames = ARG_DEF(NULL, "max-reference-frames", 1,
-                                  "maximum number of reference frames allowed "
+                                  "Maximum number of reference frames allowed "
                                   "per frame (3 to 7 (default))"),
   .reduced_reference_set =
       ARG_DEF(NULL, "reduced-reference-set", 1,
               "Use reduced set of single and compound references (0: off "
               "(default), 1: on)"),
-  .target_seq_level_idx = ARG_DEF(
-      NULL, "target-seq-level-idx", 1,
-      "Target sequence level index. "
-      "Possible values are in the form of \"ABxy\"(pad leading zeros if "
-      "less than 4 digits). "
-      "AB: Operating point(OP) index, "
-      "xy: Target level index for the OP. "
-      "E.g. \"0\" means target level index 0 for the 0th OP, "
-      "\"1021\" means target level index 21 for the 10th OP."),
+  .target_seq_level_idx =
+      ARG_DEF(NULL, "target-seq-level-idx", 1,
+              "Target sequence level index. "
+              "Possible values are in the form of \"ABxy\". "
+              "AB: Operating point (OP) index, "
+              "xy: Target level index for the OP. "
+              "E.g. \"0\" means target level index 0 (2.0) for the 0th OP, "
+              "\"1019\" means target level index 19 (6.3) for the 10th OP."),
   .set_min_cr = ARG_DEF(
       NULL, "min-cr", 1,
       "Set minimum compression ratio. Take integer values. Default is 0. "
@@ -605,7 +601,7 @@ const av1_codec_arg_definitions_t g_av1_codec_arg_defs = {
   .cdf_update_mode =
       ARG_DEF(NULL, "cdf-update-mode", 1,
               "CDF update mode for entropy coding "
-              "(0: no CDF update, 1: update CDF on all frames(default), "
+              "(0: no CDF update, 1: update CDF on all frames (default), "
               "2: selectively update CDF on some frames)"),
 
   .superblock_size = ARG_DEF_ENUM(NULL, "sb-size", 1, "Superblock size to use",
@@ -615,7 +611,7 @@ const av1_codec_arg_definitions_t g_av1_codec_arg_defs = {
       ARG_DEF(NULL, "set-tier-mask", 1,
               "Set bit mask to specify which tier each of the 32 possible "
               "operating points conforms to. "
-              "Bit value 0(defualt): Main Tier, 1: High Tier."),
+              "Bit value 0 (default): Main Tier, 1: High Tier."),
 
   .use_fixed_qp_offsets =
       ARG_DEF(NULL, "use-fixed-qp-offsets", 1,
@@ -638,44 +634,50 @@ const av1_codec_arg_definitions_t g_av1_codec_arg_defs = {
       "Set average corpus complexity per mb for single pass VBR using lap. "
       "(0..10000), default is 0"),
 
-  .fwd_kf_dist =
-      ARG_DEF(NULL, "fwd-kf-dist", -1,
-              "Set distance between forward keyframes. A value of -1 means no "
-              "repetitive forward keyframes. Default is -1."),
+  .fwd_kf_dist = ARG_DEF(NULL, "fwd-kf-dist", -1,
+                         "Set distance between forward keyframes. A value of "
+                         "-1 (default) means no repetitive forward keyframes."),
 
   .enable_tx_size_search = ARG_DEF(
       NULL, "enable-tx-size-search", 1,
       "Enable transform size search to find the best size for each block. "
       "If false, transforms always have the largest possible size "
-      "(0: false, 1: true (default))"),
+      "(0: false, 1: true (default)). Ignored in non rd pick mode in "
+      "real-time coding."),
 
   .loopfilter_control = ARG_DEF(
       NULL, "loopfilter-control", 1,
       "Control loop filtering "
-      "(0: Loopfilter disabled for all frames, 1: Enable "
-      "loopfilter for all frames (default), 2: Disable loopfilter for "
-      "non-reference frames, 3: Disable loopfilter for frames with low motion"),
+      "(0: Loopfilter disabled for all frames, 1: Enable loopfilter for all "
+      "frames (default), 2: Disable loopfilter for non-reference frames, 3: "
+      "Disable loopfilter for frames with low motion)"),
 
   .auto_intra_tools_off = ARG_DEF(
       NULL, "auto-intra-tools-off", 1,
-      "Automatically turn off several intra coding tools for allintra mode. "
-      "Only in effect if --deltaq-mode=3."),
+      "Automatically turn off several intra coding tools for allintra mode; "
+      "only in effect if --deltaq-mode=3"),
 
   .two_pass_input =
       ARG_DEF(NULL, "two-pass-input", 1,
-              "The input file for the second pass for three-pass encoding."),
+              "The input file for the second pass for three-pass encoding"),
   .two_pass_output = ARG_DEF(
       NULL, "two-pass-output", 1,
-      "The output file for the first two passes for three-pass encoding."),
+      "The output file for the first two passes for three-pass encoding"),
   .two_pass_width =
-      ARG_DEF(NULL, "two-pass-width", 1, "The width of two-pass-input."),
+      ARG_DEF(NULL, "two-pass-width", 1, "The width of two-pass-input"),
   .two_pass_height =
-      ARG_DEF(NULL, "two-pass-height", 1, "The height of two-pass-input."),
+      ARG_DEF(NULL, "two-pass-height", 1, "The height of two-pass-input"),
   .second_pass_log =
-      ARG_DEF("spf", "second-pass-log", 1, "Log file from second pass."),
+      ARG_DEF("spf", "second-pass-log", 1, "Log file from second pass"),
   .strict_level_conformance =
       ARG_DEF(NULL, "strict-level-conformance", 1,
               "When set to 1, exit the encoder when it fails to encode "
               "to a given target level"),
+  .kf_max_pyr_height = ARG_DEF(
+      NULL, "kf-max-pyr-height", 1,
+      "Maximum height of pyramid structure used for the GOP starting with a "
+      "key frame (-1 to 5). When set to -1 (default), it does not have any "
+      "effect. The actual maximum pyramid height will be the minimum of this "
+      "value and the value of gf_max_pyr_height."),
 #endif  // CONFIG_AV1_ENCODER
 };
diff --git a/chromium/third_party/libaom/source/libaom/av1/arg_defs.h b/chromium/third_party/libaom/source/libaom/av1/arg_defs.h
index cfd269e8dd2..812df2745cd 100644
--- a/chromium/third_party/libaom/source/libaom/av1/arg_defs.h
+++ b/chromium/third_party/libaom/source/libaom/av1/arg_defs.h
@@ -121,9 +121,7 @@ typedef struct av1_codec_arg_definitions {
 #if CONFIG_AV1_ENCODER
   arg_def_t cpu_used_av1;
   arg_def_t rowmtarg;
-#if CONFIG_FRAME_PARALLEL_ENCODE
   arg_def_t fpmtarg;
-#endif
   arg_def_t tile_cols;
   arg_def_t tile_rows;
   arg_def_t enable_tpl_model;
@@ -233,6 +231,7 @@ typedef struct av1_codec_arg_definitions {
   arg_def_t second_pass_log;
   arg_def_t auto_intra_tools_off;
   arg_def_t strict_level_conformance;
+  arg_def_t kf_max_pyr_height;
 #endif  // CONFIG_AV1_ENCODER
 } av1_codec_arg_definitions_t;
 
diff --git a/chromium/third_party/libaom/source/libaom/av1/av1.cmake b/chromium/third_party/libaom/source/libaom/av1/av1.cmake
index a87f31eb2ea..d75bf5ef3f9 100644
--- a/chromium/third_party/libaom/source/libaom/av1/av1.cmake
+++ b/chromium/third_party/libaom/source/libaom/av1/av1.cmake
@@ -110,6 +110,7 @@ list(APPEND AOM_AV1_DECODER_SOURCES
 
 list(APPEND AOM_AV1_ENCODER_SOURCES
             "${AOM_ROOT}/av1/av1_cx_iface.c"
+            "${AOM_ROOT}/av1/av1_cx_iface.h"
             "${AOM_ROOT}/av1/encoder/aq_complexity.c"
             "${AOM_ROOT}/av1/encoder/aq_complexity.h"
             "${AOM_ROOT}/av1/encoder/aq_cyclicrefresh.c"
@@ -186,6 +187,7 @@ list(APPEND AOM_AV1_ENCODER_SOURCES
             "${AOM_ROOT}/av1/encoder/lookahead.h"
             "${AOM_ROOT}/av1/encoder/mcomp.c"
             "${AOM_ROOT}/av1/encoder/mcomp.h"
+            "${AOM_ROOT}/av1/encoder/mcomp_structs.h"
             "${AOM_ROOT}/av1/encoder/ml.c"
             "${AOM_ROOT}/av1/encoder/ml.h"
             "${AOM_ROOT}/av1/encoder/model_rd.h"
@@ -354,6 +356,7 @@ list(APPEND AOM_AV1_ENCODER_INTRIN_AVX2
 
 list(APPEND AOM_AV1_ENCODER_INTRIN_NEON
             "${AOM_ROOT}/av1/encoder/arm/neon/quantize_neon.c"
+            "${AOM_ROOT}/av1/encoder/arm/neon/av1_highbd_quantize_neon.c"
             "${AOM_ROOT}/av1/encoder/arm/neon/ml_neon.c"
             "${AOM_ROOT}/av1/encoder/arm/neon/picksrt_neon.c"
             "${AOM_ROOT}/av1/encoder/arm/neon/rdopt_neon.c"
@@ -361,12 +364,12 @@ list(APPEND AOM_AV1_ENCODER_INTRIN_NEON
             "${AOM_ROOT}/av1/encoder/arm/neon/encodetxb_neon.c"
             "${AOM_ROOT}/av1/encoder/arm/neon/hybrid_fwd_txfm_neon.c"
             "${AOM_ROOT}/av1/encoder/arm/neon/av1_fwd_txfm2d_neon.c"
-            "${AOM_ROOT}/av1/encoder/arm/neon/highbd_fwd_txfm_neon.c")
+            "${AOM_ROOT}/av1/encoder/arm/neon/highbd_fwd_txfm_neon.c"
+            "${AOM_ROOT}/av1/encoder/arm/neon/wedge_utils_neon.c"
+            "${AOM_ROOT}/av1/encoder/arm/neon/temporal_filter_neon.c")
 
-list(APPEND AOM_AV1_ENCODER_INTRIN_MSA
-            "${AOM_ROOT}/av1/encoder/mips/msa/error_msa.c"
-            "${AOM_ROOT}/av1/encoder/mips/msa/fdct4x4_msa.c"
-            "${AOM_ROOT}/av1/encoder/mips/msa/temporal_filter_msa.c")
+list(APPEND AOM_AV1_ENCODER_INTRIN_ARM_CRC32
+            "${AOM_ROOT}/av1/encoder/arm/crc32/hash_crc32.c")
 
 list(APPEND AOM_AV1_COMMON_INTRIN_NEON
             "${AOM_ROOT}/av1/common/arm/av1_inv_txfm_neon.c"
@@ -468,24 +471,6 @@ if(CONFIG_INTERNAL_STATS)
 endif()
 
 if(CONFIG_REALTIME_ONLY)
-  list(REMOVE_ITEM AOM_AV1_COMMON_SOURCES "${AOM_ROOT}/av1/common/restoration.c"
-                   "${AOM_ROOT}/av1/common/restoration.h"
-                   "${AOM_ROOT}/av1/common/warped_motion.c"
-                   "${AOM_ROOT}/av1/common/warped_motion.h")
-
-  list(REMOVE_ITEM AOM_AV1_COMMON_INTRIN_SSE2
-                   "${AOM_ROOT}/av1/common/x86/warp_plane_sse2.c")
-
-  list(REMOVE_ITEM AOM_AV1_COMMON_INTRIN_SSE4_1
-                   "${AOM_ROOT}/av1/common/x86/highbd_warp_plane_sse4.c"
-                   "${AOM_ROOT}/av1/common/x86/selfguided_sse4.c"
-                   "${AOM_ROOT}/av1/common/x86/warp_plane_sse4.c")
-
-  list(REMOVE_ITEM AOM_AV1_COMMON_INTRIN_AVX2
-                   "${AOM_ROOT}/av1/common/x86/highbd_warp_affine_avx2.c"
-                   "${AOM_ROOT}/av1/common/x86/selfguided_avx2.c"
-                   "${AOM_ROOT}/av1/common/x86/warp_plane_avx2.c")
-
   list(REMOVE_ITEM AOM_AV1_ENCODER_INTRIN_SSE4_1
                    "${AOM_ROOT}/av1/encoder/x86/pickrst_sse4.c")
 
@@ -493,10 +478,6 @@ if(CONFIG_REALTIME_ONLY)
                    "${AOM_ROOT}/av1/encoder/x86/pickrst_avx2.c"
                    "${AOM_ROOT}/av1/encoder/x86/cnn_avx2.c")
 
-  list(REMOVE_ITEM AOM_AV1_COMMON_INTRIN_NEON
-                   "${AOM_ROOT}/av1/common/arm/selfguided_neon.c"
-                   "${AOM_ROOT}/av1/common/arm/warp_plane_neon.c")
-
   list(REMOVE_ITEM AOM_AV1_ENCODER_SOURCES
                    "${AOM_ROOT}/av1/encoder/cnn.c"
                    "${AOM_ROOT}/av1/encoder/cnn.h"
@@ -650,6 +631,16 @@ function(setup_av1_targets)
                                       "AOM_AV1_ENCODER_INTRIN_NEON")
       endif()
     endif()
+
+    if(HAVE_ARM_CRC32)
+      if(CONFIG_AV1_ENCODER)
+        if(AOM_AV1_ENCODER_INTRIN_ARM_CRC32)
+          add_intrinsics_object_library("${AOM_ARM_CRC32_FLAG}" "crc32"
+                                        "aom_av1_encoder"
+                                        "AOM_AV1_ENCODER_INTRIN_ARM_CRC32")
+        endif()
+      endif()
+    endif()
   endif()
 
   if(HAVE_VSX)
@@ -659,11 +650,6 @@ function(setup_av1_targets)
     endif()
   endif()
 
-  if(HAVE_MSA)
-    add_intrinsics_object_library("" "msa" "aom_av1_encoder"
-                                  "AOM_AV1_ENCODER_INTRIN_MSA")
-  endif()
-
   # Pass the new lib targets up to the parent scope instance of
   # $AOM_LIB_TARGETS.
   set(AOM_LIB_TARGETS ${AOM_LIB_TARGETS} PARENT_SCOPE)
diff --git a/chromium/third_party/libaom/source/libaom/av1/av1_cx_iface.c b/chromium/third_party/libaom/source/libaom/av1/av1_cx_iface.c
index 4a319816293..a44ff5bddd5 100644
--- a/chromium/third_party/libaom/source/libaom/av1/av1_cx_iface.c
+++ b/chromium/third_party/libaom/source/libaom/av1/av1_cx_iface.c
@@ -23,6 +23,7 @@
 #include "av1/av1_iface_common.h"
 #include "av1/encoder/bitstream.h"
 #include "av1/encoder/encoder.h"
+#include "av1/encoder/encoder_utils.h"
 #include "av1/encoder/ethread.h"
 #include "av1/encoder/external_partition.h"
 #include "av1/encoder/firstpass.h"
@@ -38,9 +39,7 @@ struct av1_extracfg {
   unsigned int sharpness;
   unsigned int static_thresh;
   unsigned int row_mt;
-#if CONFIG_FRAME_PARALLEL_ENCODE
   unsigned int fp_mt;
-#endif
   unsigned int tile_columns;  // log2 number of tile columns
   unsigned int tile_rows;     // log2 number of tile rows
   unsigned int enable_tpl_model;
@@ -100,7 +99,7 @@ struct av1_extracfg {
   int film_grain_test_vector;
   const char *film_grain_table_filename;
   unsigned int motion_vector_unit_test;
-#if CONFIG_FRAME_PARALLEL_ENCODE && CONFIG_FPMT_TEST
+#if CONFIG_FPMT_TEST
   unsigned int fpmt_unit_test;
 #endif
   unsigned int cdf_update_mode;
@@ -173,6 +172,9 @@ struct av1_extracfg {
   int fwd_kf_dist;
 
   LOOPFILTER_CONTROL loopfilter_control;
+  // Indicates if the application of post-processing filters should be skipped
+  // on reconstructed frame.
+  unsigned int skip_postproc_filtering;
   // the name of the second pass output file when passes > 2
   const char *two_pass_output;
   const char *second_pass_log;
@@ -189,6 +191,7 @@ struct av1_extracfg {
   // "--enable_diagonal_intra".
   int auto_intra_tools_off;
   int strict_level_conformance;
+  int kf_max_pyr_height;
 };
 
 #if CONFIG_REALTIME_ONLY
@@ -206,16 +209,14 @@ struct av1_extracfg {
 // mv_cost_upd_freq: COST_UPD_OFF
 // dv_cost_upd_freq: COST_UPD_OFF
 static const struct av1_extracfg default_extra_cfg = {
-  7,  // cpu_used
-  1,  // enable_auto_alt_ref
-  0,  // enable_auto_bwd_ref
-  0,  // noise_sensitivity
-  0,  // sharpness
-  0,  // static_thresh
-  1,  // row_mt
-#if CONFIG_FRAME_PARALLEL_ENCODE
-  0,  // fp_mt
-#endif
+  7,              // cpu_used
+  1,              // enable_auto_alt_ref
+  0,              // enable_auto_bwd_ref
+  0,              // noise_sensitivity
+  0,              // sharpness
+  0,              // static_thresh
+  1,              // row_mt
+  0,              // fp_mt
   0,              // tile_columns
   0,              // tile_rows
   0,              // enable_tpl_model
@@ -273,7 +274,7 @@ static const struct av1_extracfg default_extra_cfg = {
   0,                            // film_grain_test_vector
   NULL,                         // film_grain_table_filename
   0,                            // motion_vector_unit_test
-#if CONFIG_FRAME_PARALLEL_ENCODE && CONFIG_FPMT_TEST
+#if CONFIG_FPMT_TEST
   0,  // fpmt_unit_test
 #endif
   1,    // CDF update mode
@@ -347,10 +348,12 @@ static const struct av1_extracfg default_extra_cfg = {
   -1,              // passes
   -1,              // fwd_kf_dist
   LOOPFILTER_ALL,  // loopfilter_control
+  0,               // skip_postproc_filtering
   NULL,            // two_pass_output
   NULL,            // second_pass_log
   0,               // auto_intra_tools_off
   0,               // strict_level_conformance
+  -1,              // kf_max_pyr_height
 };
 #else
 static const struct av1_extracfg default_extra_cfg = {
@@ -361,9 +364,7 @@ static const struct av1_extracfg default_extra_cfg = {
   0,              // sharpness
   0,              // static_thresh
   1,              // row_mt
-#if CONFIG_FRAME_PARALLEL_ENCODE
   0,              // fp_mt
-#endif
   0,              // tile_columns
   0,              // tile_rows
   1,              // enable_tpl_model
@@ -421,7 +422,7 @@ static const struct av1_extracfg default_extra_cfg = {
   0,                            // film_grain_test_vector
   NULL,                         // film_grain_table_filename
   0,                            // motion_vector_unit_test
-#if CONFIG_FRAME_PARALLEL_ENCODE && CONFIG_FPMT_TEST
+#if CONFIG_FPMT_TEST
   0,                            // fpmt_unit_test
 #endif
   1,                            // CDF update mode
@@ -495,10 +496,12 @@ static const struct av1_extracfg default_extra_cfg = {
   -1,              // passes
   -1,              // fwd_kf_dist
   LOOPFILTER_ALL,  // loopfilter_control
+  0,               // skip_postproc_filtering
   NULL,            // two_pass_output
   NULL,            // second_pass_log
   0,               // auto_intra_tools_off
   0,               // strict_level_conformance
+  -1,              // kf_max_pyr_height
 };
 #endif
 
@@ -540,7 +543,7 @@ static INLINE int gcd(int64_t a, int b) {
   return (int)a;
 }
 
-static INLINE void reduce_ratio(aom_rational64_t *ratio) {
+static void reduce_ratio(aom_rational64_t *ratio) {
   const int denom = gcd(ratio->num, ratio->den);
   ratio->num /= denom;
   ratio->den /= denom;
@@ -617,8 +620,16 @@ static aom_codec_err_t allocate_and_set_string(const char *src,
 static aom_codec_err_t validate_config(aom_codec_alg_priv_t *ctx,
                                        const aom_codec_enc_cfg_t *cfg,
                                        const struct av1_extracfg *extra_cfg) {
-  RANGE_CHECK(cfg, g_w, 1, 65535);  // 16 bits available
-  RANGE_CHECK(cfg, g_h, 1, 65535);  // 16 bits available
+  RANGE_CHECK(cfg, g_w, 1, 65536);                        // 16 bits available
+  RANGE_CHECK(cfg, g_h, 1, 65536);                        // 16 bits available
+  RANGE_CHECK_HI(cfg, g_forced_max_frame_width, 65536);   // 16 bits available
+  RANGE_CHECK_HI(cfg, g_forced_max_frame_height, 65536);  // 16 bits available
+  if (cfg->g_forced_max_frame_width) {
+    RANGE_CHECK_HI(cfg, g_w, cfg->g_forced_max_frame_width);
+  }
+  if (cfg->g_forced_max_frame_height) {
+    RANGE_CHECK_HI(cfg, g_h, cfg->g_forced_max_frame_height);
+  }
   RANGE_CHECK(cfg, g_timebase.den, 1, 1000000000);
   RANGE_CHECK(cfg, g_timebase.num, 1, cfg->g_timebase.den);
   RANGE_CHECK_HI(cfg, g_profile, MAX_PROFILES - 1);
@@ -681,7 +692,7 @@ static aom_codec_err_t validate_config(aom_codec_alg_priv_t *ctx,
   RANGE_CHECK_HI(extra_cfg, cdf_update_mode, 2);
 
   RANGE_CHECK_HI(extra_cfg, motion_vector_unit_test, 2);
-#if CONFIG_FRAME_PARALLEL_ENCODE && CONFIG_FPMT_TEST
+#if CONFIG_FPMT_TEST
   RANGE_CHECK_HI(extra_cfg, fpmt_unit_test, 1);
 #endif
   RANGE_CHECK_HI(extra_cfg, sb_multipass_unit_test, 1);
@@ -697,9 +708,7 @@ static aom_codec_err_t validate_config(aom_codec_alg_priv_t *ctx,
   RANGE_CHECK_HI(extra_cfg, single_tile_decoding, 1);
 
   RANGE_CHECK_HI(extra_cfg, row_mt, 1);
-#if CONFIG_FRAME_PARALLEL_ENCODE
   RANGE_CHECK_HI(extra_cfg, fp_mt, 1);
-#endif
 
   RANGE_CHECK_HI(extra_cfg, tile_columns, 6);
   RANGE_CHECK_HI(extra_cfg, tile_rows, 6);
@@ -843,9 +852,19 @@ static aom_codec_err_t validate_config(aom_codec_alg_priv_t *ctx,
 
   RANGE_CHECK(extra_cfg, deltaq_strength, 0, 1000);
   RANGE_CHECK_HI(extra_cfg, loopfilter_control, 3);
+  RANGE_CHECK_BOOL(extra_cfg, skip_postproc_filtering);
   RANGE_CHECK_HI(extra_cfg, enable_cdef, 2);
+  RANGE_CHECK_BOOL(extra_cfg, auto_intra_tools_off);
   RANGE_CHECK_BOOL(extra_cfg, strict_level_conformance);
 
+  RANGE_CHECK(extra_cfg, kf_max_pyr_height, -1, 5);
+  if (extra_cfg->kf_max_pyr_height != -1 &&
+      extra_cfg->kf_max_pyr_height < (int)extra_cfg->gf_min_pyr_height) {
+    ERROR(
+        "The value of kf-max-pyr-height should not be smaller than "
+        "gf-min-pyr-height");
+  }
+
   return AOM_CODEC_OK;
 }
 
@@ -897,7 +916,7 @@ static aom_codec_err_t validate_img(aom_codec_alg_priv_t *ctx,
   return AOM_CODEC_OK;
 }
 
-static int get_image_bps(const aom_image_t *img) {
+int av1_get_image_bps(const aom_image_t *img) {
   switch (img->fmt) {
     case AOM_IMG_FMT_YV12:
     case AOM_IMG_FMT_NV12:
@@ -926,11 +945,10 @@ static void update_default_encoder_config(const cfg_options_t *cfg,
                                           struct av1_extracfg *extra_cfg) {
   extra_cfg->enable_cdef = (cfg->disable_cdef == 0) ? 1 : 0;
   extra_cfg->enable_restoration = (cfg->disable_lr == 0);
-  extra_cfg->superblock_size = (cfg->super_block_size == 64)
-                                   ? AOM_SUPERBLOCK_SIZE_64X64
-                                   : (cfg->super_block_size == 128)
-                                         ? AOM_SUPERBLOCK_SIZE_128X128
-                                         : AOM_SUPERBLOCK_SIZE_DYNAMIC;
+  extra_cfg->superblock_size =
+      (cfg->super_block_size == 64)    ? AOM_SUPERBLOCK_SIZE_64X64
+      : (cfg->super_block_size == 128) ? AOM_SUPERBLOCK_SIZE_128X128
+                                       : AOM_SUPERBLOCK_SIZE_DYNAMIC;
   extra_cfg->enable_warped_motion = (cfg->disable_warp_motion == 0);
   extra_cfg->enable_dist_wtd_comp = (cfg->disable_dist_wtd_comp == 0);
   extra_cfg->enable_diff_wtd_comp = (cfg->disable_diff_wtd_comp == 0);
@@ -1127,7 +1145,7 @@ static aom_codec_err_t set_encoder_config(AV1EncoderConfig *oxcf,
       extra_cfg->allow_ref_frame_mvs && !cfg->large_scale_tile;
   tool_cfg->superblock_size = extra_cfg->superblock_size;
   tool_cfg->enable_monochrome = cfg->monochrome;
-  tool_cfg->full_still_picture_hdr = cfg->full_still_picture_hdr;
+  tool_cfg->full_still_picture_hdr = cfg->full_still_picture_hdr != 0;
   tool_cfg->enable_dual_filter = extra_cfg->enable_dual_filter;
   tool_cfg->enable_order_hint = extra_cfg->enable_order_hint;
   tool_cfg->enable_interintra_comp = extra_cfg->enable_interintra_comp;
@@ -1188,6 +1206,7 @@ static aom_codec_err_t set_encoder_config(AV1EncoderConfig *oxcf,
   algo_cfg->enable_tpl_model =
       resize_cfg->resize_mode ? 0 : extra_cfg->enable_tpl_model;
   algo_cfg->loopfilter_control = extra_cfg->loopfilter_control;
+  algo_cfg->skip_postproc_filtering = extra_cfg->skip_postproc_filtering;
 
   // Set two-pass stats configuration.
   oxcf->twopass_stats_in = cfg->rc_twopass_stats_in;
@@ -1215,6 +1234,12 @@ static aom_codec_err_t set_encoder_config(AV1EncoderConfig *oxcf,
   kf_cfg->enable_intrabc = extra_cfg->enable_intrabc;
 
   oxcf->speed = extra_cfg->cpu_used;
+  // TODO(yunqingwang, any) In REALTIME mode, 1080p performance at speed 5 & 6
+  // is quite bad. Force to use speed 7 for now. Will investigate it when we
+  // work on rd path optimization later.
+  if (oxcf->mode == REALTIME && AOMMIN(cfg->g_w, cfg->g_h) >= 1080 &&
+      oxcf->speed < 7)
+    oxcf->speed = 7;
 
   // Set Color related configuration.
   color_cfg->color_primaries = extra_cfg->color_primaries;
@@ -1276,10 +1301,10 @@ static aom_codec_err_t set_encoder_config(AV1EncoderConfig *oxcf,
   tile_cfg->tile_width_count = AOMMIN(cfg->tile_width_count, MAX_TILE_COLS);
   tile_cfg->tile_height_count = AOMMIN(cfg->tile_height_count, MAX_TILE_ROWS);
   for (int i = 0; i < tile_cfg->tile_width_count; i++) {
-    tile_cfg->tile_widths[i] = AOMMAX(cfg->tile_widths[i], 1);
+    tile_cfg->tile_widths[i] = cfg->tile_widths[i];
   }
   for (int i = 0; i < tile_cfg->tile_height_count; i++) {
-    tile_cfg->tile_heights[i] = AOMMAX(cfg->tile_heights[i], 1);
+    tile_cfg->tile_heights[i] = cfg->tile_heights[i];
   }
   tile_cfg->enable_ext_tile_debug = extra_cfg->ext_tile_debug;
 
@@ -1300,16 +1325,23 @@ static aom_codec_err_t set_encoder_config(AV1EncoderConfig *oxcf,
   oxcf->ref_frm_cfg.enable_onesided_comp = extra_cfg->enable_onesided_comp;
 
   oxcf->row_mt = extra_cfg->row_mt;
-#if CONFIG_FRAME_PARALLEL_ENCODE
   oxcf->fp_mt = extra_cfg->fp_mt;
-#endif
 
   // Set motion mode related configuration.
   oxcf->motion_mode_cfg.enable_obmc = extra_cfg->enable_obmc;
   oxcf->motion_mode_cfg.enable_warped_motion = extra_cfg->enable_warped_motion;
 #if !CONFIG_REALTIME_ONLY
-  oxcf->motion_mode_cfg.allow_warped_motion =
-      (extra_cfg->allow_warped_motion & extra_cfg->enable_warped_motion);
+  if (cfg->g_usage == AOM_USAGE_REALTIME && oxcf->speed >= 7 &&
+      oxcf->tune_cfg.content == AOM_CONTENT_SCREEN) {
+    // TODO(marpan): warped motion is causing a crash for RT mode with screen
+    // in nonrd (speed >= 7), for non-realtime build.
+    // Re-enable/allow when the issue is fixed.
+    oxcf->motion_mode_cfg.enable_warped_motion = 0;
+    oxcf->motion_mode_cfg.allow_warped_motion = 0;
+  } else {
+    oxcf->motion_mode_cfg.allow_warped_motion =
+        (extra_cfg->allow_warped_motion & extra_cfg->enable_warped_motion);
+  }
 #else
   oxcf->motion_mode_cfg.allow_warped_motion =
       (cfg->g_usage == AOM_USAGE_REALTIME && oxcf->speed >= 7)
@@ -1406,14 +1438,9 @@ static aom_codec_err_t set_encoder_config(AV1EncoderConfig *oxcf,
   oxcf->unit_test_cfg.sb_multipass_unit_test =
       extra_cfg->sb_multipass_unit_test;
 
-  // For allintra encoding mode, inter-frame motion search is not applicable and
-  // the intraBC motion vectors are restricted within the tile boundaries. Hence
-  // a smaller frame border size (AOM_ENC_ALLINTRA_BORDER) is used in this case.
   oxcf->border_in_pixels =
-      (resize_cfg->resize_mode || superres_cfg->superres_mode)
-          ? AOM_BORDER_IN_PIXELS
-          : (oxcf->kf_cfg.key_freq_max == 0) ? AOM_ENC_ALLINTRA_BORDER
-                                             : AOM_ENC_NO_SCALE_BORDER;
+      av1_get_enc_border_size(av1_is_resize_needed(oxcf),
+                              (oxcf->kf_cfg.key_freq_max == 0), BLOCK_128X128);
   memcpy(oxcf->target_seq_level_idx, extra_cfg->target_seq_level_idx,
          sizeof(oxcf->target_seq_level_idx));
   oxcf->tier_mask = extra_cfg->tier_mask;
@@ -1422,9 +1449,18 @@ static aom_codec_err_t set_encoder_config(AV1EncoderConfig *oxcf,
 
   oxcf->strict_level_conformance = extra_cfg->strict_level_conformance;
 
+  oxcf->kf_max_pyr_height = extra_cfg->kf_max_pyr_height;
+
   return AOM_CODEC_OK;
 }
 
+AV1EncoderConfig av1_get_encoder_config(const aom_codec_enc_cfg_t *cfg) {
+  AV1EncoderConfig oxcf;
+  struct av1_extracfg extra_cfg = default_extra_cfg;
+  set_encoder_config(&oxcf, cfg, &extra_cfg);
+  return oxcf;
+}
+
 static aom_codec_err_t encoder_set_config(aom_codec_alg_priv_t *ctx,
                                           const aom_codec_enc_cfg_t *cfg) {
   InitialDimensions *const initial_dimensions =
@@ -1463,15 +1499,10 @@ static aom_codec_err_t encoder_set_config(aom_codec_alg_priv_t *ctx,
     force_key |= ctx->ppi->seq_params.profile != ctx->oxcf.profile;
     bool is_sb_size_changed = false;
     av1_change_config_seq(ctx->ppi, &ctx->oxcf, &is_sb_size_changed);
-#if CONFIG_FRAME_PARALLEL_ENCODE
-    int i;
-    for (i = 0; i < ctx->ppi->num_fp_contexts; i++) {
+    for (int i = 0; i < ctx->ppi->num_fp_contexts; i++) {
       av1_change_config(ctx->ppi->parallel_cpi[i], &ctx->oxcf,
                         is_sb_size_changed);
     }
-#else
-    av1_change_config(ctx->ppi->cpi, &ctx->oxcf, is_sb_size_changed);
-#endif  // CONFIG_FRAME_PARALLEL_ENCODE
     if (ctx->ppi->cpi_lap != NULL) {
       av1_change_config(ctx->ppi->cpi_lap, &ctx->oxcf, is_sb_size_changed);
     }
@@ -1524,20 +1555,13 @@ static aom_codec_err_t update_extra_cfg(aom_codec_alg_priv_t *ctx,
   if (res == AOM_CODEC_OK) {
     ctx->extra_cfg = *extra_cfg;
     set_encoder_config(&ctx->oxcf, &ctx->cfg, &ctx->extra_cfg);
-#if CONFIG_FRAME_PARALLEL_ENCODE
     av1_check_fpmt_config(ctx->ppi, &ctx->oxcf);
-#endif
     bool is_sb_size_changed = false;
     av1_change_config_seq(ctx->ppi, &ctx->oxcf, &is_sb_size_changed);
-#if CONFIG_FRAME_PARALLEL_ENCODE
-    int i;
-    for (i = 0; i < ctx->ppi->num_fp_contexts; i++) {
+    for (int i = 0; i < ctx->ppi->num_fp_contexts; i++) {
       av1_change_config(ctx->ppi->parallel_cpi[i], &ctx->oxcf,
                         is_sb_size_changed);
     }
-#else
-    av1_change_config(ctx->ppi->cpi, &ctx->oxcf, is_sb_size_changed);
-#endif  // CONFIG_FRAME_PARALLEL_ENCODE
     if (ctx->ppi->cpi_lap != NULL) {
       av1_change_config(ctx->ppi->cpi_lap, &ctx->oxcf, is_sb_size_changed);
     }
@@ -2149,9 +2173,15 @@ static aom_codec_err_t ctrl_set_enable_tx_size_search(aom_codec_alg_priv_t *ctx,
 
 static aom_codec_err_t ctrl_set_quant_b_adapt(aom_codec_alg_priv_t *ctx,
                                               va_list args) {
+#if CONFIG_REALTIME_ONLY
+  (void)ctx;
+  (void)args;
+  return AOM_CODEC_INCAPABLE;
+#else
   struct av1_extracfg extra_cfg = ctx->extra_cfg;
   extra_cfg.quant_b_adapt = CAST(AV1E_SET_QUANT_B_ADAPT, args);
   return update_extra_cfg(ctx, &extra_cfg);
+#endif
 }
 
 static aom_codec_err_t ctrl_set_vbr_corpus_complexity_lap(
@@ -2347,7 +2377,7 @@ static aom_codec_err_t ctrl_enable_motion_vector_unit_test(
 
 static aom_codec_err_t ctrl_enable_fpmt_unit_test(aom_codec_alg_priv_t *ctx,
                                                   va_list args) {
-#if !(CONFIG_FRAME_PARALLEL_ENCODE && CONFIG_FPMT_TEST)
+#if !CONFIG_FPMT_TEST
   (void)args;
   (void)ctx;
   return AOM_CODEC_INCAPABLE;
@@ -2422,6 +2452,17 @@ static aom_codec_err_t ctrl_set_loopfilter_control(aom_codec_alg_priv_t *ctx,
   return update_extra_cfg(ctx, &extra_cfg);
 }
 
+static aom_codec_err_t ctrl_set_skip_postproc_filtering(
+    aom_codec_alg_priv_t *ctx, va_list args) {
+  // Skipping the application of post-processing filters is allowed only
+  // for ALLINTRA mode.
+  if (ctx->cfg.g_usage != AOM_USAGE_ALL_INTRA) return AOM_CODEC_INCAPABLE;
+  struct av1_extracfg extra_cfg = ctx->extra_cfg;
+  extra_cfg.skip_postproc_filtering =
+      CAST(AV1E_SET_SKIP_POSTPROC_FILTERING, args);
+  return update_extra_cfg(ctx, &extra_cfg);
+}
+
 static aom_codec_err_t ctrl_set_rtc_external_rc(aom_codec_alg_priv_t *ctx,
                                                 va_list args) {
   ctx->ppi->cpi->rc.rtc_external_ratectrl =
@@ -2430,9 +2471,9 @@ static aom_codec_err_t ctrl_set_rtc_external_rc(aom_codec_alg_priv_t *ctx,
 }
 
 #if !CONFIG_REALTIME_ONLY
-static aom_codec_err_t create_stats_buffer(FIRSTPASS_STATS **frame_stats_buffer,
-                                           STATS_BUFFER_CTX *stats_buf_context,
-                                           int num_lap_buffers) {
+aom_codec_err_t av1_create_stats_buffer(FIRSTPASS_STATS **frame_stats_buffer,
+                                        STATS_BUFFER_CTX *stats_buf_context,
+                                        int num_lap_buffers) {
   aom_codec_err_t res = AOM_CODEC_OK;
 
   int size = get_stats_buf_size(num_lap_buffers, MAX_LAG_BUFFERS);
@@ -2455,9 +2496,12 @@ static aom_codec_err_t create_stats_buffer(FIRSTPASS_STATS **frame_stats_buffer,
 }
 #endif
 
-static aom_codec_err_t create_context_and_bufferpool(
-    AV1_PRIMARY *ppi, AV1_COMP **p_cpi, BufferPool **p_buffer_pool,
-    AV1EncoderConfig *oxcf, COMPRESSOR_STAGE stage, int lap_lag_in_frames) {
+aom_codec_err_t av1_create_context_and_bufferpool(AV1_PRIMARY *ppi,
+                                                  AV1_COMP **p_cpi,
+                                                  BufferPool **p_buffer_pool,
+                                                  const AV1EncoderConfig *oxcf,
+                                                  COMPRESSOR_STAGE stage,
+                                                  int lap_lag_in_frames) {
   aom_codec_err_t res = AOM_CODEC_OK;
 
   if (*p_buffer_pool == NULL) {
@@ -2478,11 +2522,6 @@ static aom_codec_err_t create_context_and_bufferpool(
 }
 
 static aom_codec_err_t ctrl_set_fp_mt(aom_codec_alg_priv_t *ctx, va_list args) {
-#if !CONFIG_FRAME_PARALLEL_ENCODE
-  (void)args;
-  (void)ctx;
-  return AOM_CODEC_INCAPABLE;
-#else
   struct av1_extracfg extra_cfg = ctx->extra_cfg;
   extra_cfg.fp_mt = CAST(AV1E_SET_FP_MT, args);
   const aom_codec_err_t result = update_extra_cfg(ctx, &extra_cfg);
@@ -2493,7 +2532,7 @@ static aom_codec_err_t ctrl_set_fp_mt(aom_codec_alg_priv_t *ctx, va_list args) {
     if (num_fp_contexts > 1) {
       int i;
       for (i = 1; i < num_fp_contexts; i++) {
-        int res = create_context_and_bufferpool(
+        int res = av1_create_context_and_bufferpool(
             ctx->ppi, &ctx->ppi->parallel_cpi[i], &ctx->buffer_pool, &ctx->oxcf,
             ENCODE_STAGE, -1);
         if (res != AOM_CODEC_OK) {
@@ -2508,7 +2547,6 @@ static aom_codec_err_t ctrl_set_fp_mt(aom_codec_alg_priv_t *ctx, va_list args) {
   }
   ctx->ppi->num_fp_contexts = num_fp_contexts;
   return result;
-#endif
 }
 
 static aom_codec_err_t ctrl_set_auto_intra_tools_off(aom_codec_alg_priv_t *ctx,
@@ -2542,7 +2580,7 @@ static aom_codec_err_t encoder_init(aom_codec_ctx_t *ctx) {
     if (priv->cfg.g_usage == ALLINTRA) {
       priv->extra_cfg.enable_cdef = 0;
     }
-    av1_initialize_enc();
+    av1_initialize_enc(priv->cfg.g_usage, priv->cfg.rc_end_usage);
 
     res = validate_config(priv, &priv->cfg, &priv->extra_cfg);
 
@@ -2576,8 +2614,8 @@ static aom_codec_err_t encoder_init(aom_codec_ctx_t *ctx) {
       if (!priv->ppi) return AOM_CODEC_MEM_ERROR;
 
 #if !CONFIG_REALTIME_ONLY
-      res = create_stats_buffer(&priv->frame_stats_buffer,
-                                &priv->stats_buf_context, *num_lap_buffers);
+      res = av1_create_stats_buffer(&priv->frame_stats_buffer,
+                                    &priv->stats_buf_context, *num_lap_buffers);
       if (res != AOM_CODEC_OK) return AOM_CODEC_MEM_ERROR;
 
       assert(MAX_LAP_BUFFERS >= MAX_LAG_BUFFERS);
@@ -2588,9 +2626,8 @@ static aom_codec_err_t encoder_init(aom_codec_ctx_t *ctx) {
       priv->ppi->twopass.stats_buf_ctx = &priv->stats_buf_context;
 #endif
 
-#if CONFIG_FRAME_PARALLEL_ENCODE
       assert(priv->ppi->num_fp_contexts >= 1);
-      res = create_context_and_bufferpool(
+      res = av1_create_context_and_bufferpool(
           priv->ppi, &priv->ppi->parallel_cpi[0], &priv->buffer_pool,
           &priv->oxcf, ENCODE_STAGE, -1);
       if (res != AOM_CODEC_OK) {
@@ -2601,19 +2638,10 @@ static aom_codec_err_t encoder_init(aom_codec_ctx_t *ctx) {
           priv->ppi->twopass.stats_buf_ctx->stats_in_start;
 #endif
       priv->ppi->cpi = priv->ppi->parallel_cpi[0];
-#else
-      res = create_context_and_bufferpool(priv->ppi, &priv->ppi->cpi,
-                                          &priv->buffer_pool, &priv->oxcf,
-                                          ENCODE_STAGE, -1);
-#if !CONFIG_REALTIME_ONLY
-      priv->ppi->cpi->twopass_frame.stats_in =
-          priv->ppi->twopass.stats_buf_ctx->stats_in_start;
-#endif
-#endif  // CONFIG_FRAME_PARALLEL_ENCODE
 
       // Create another compressor if look ahead is enabled
       if (res == AOM_CODEC_OK && *num_lap_buffers) {
-        res = create_context_and_bufferpool(
+        res = av1_create_context_and_bufferpool(
             priv->ppi, &priv->ppi->cpi_lap, &priv->buffer_pool_lap, &priv->oxcf,
             LAP_STAGE, clamp(lap_lag_in_frames, 0, MAX_LAG_BUFFERS));
       }
@@ -2623,8 +2651,8 @@ static aom_codec_err_t encoder_init(aom_codec_ctx_t *ctx) {
   return res;
 }
 
-static void destroy_context_and_bufferpool(AV1_COMP *cpi,
-                                           BufferPool **p_buffer_pool) {
+void av1_destroy_context_and_bufferpool(AV1_COMP *cpi,
+                                        BufferPool **p_buffer_pool) {
   av1_remove_compressor(cpi);
   if (*p_buffer_pool) {
     av1_free_ref_frame_buffers(*p_buffer_pool);
@@ -2636,8 +2664,8 @@ static void destroy_context_and_bufferpool(AV1_COMP *cpi,
   }
 }
 
-static void destroy_stats_buffer(STATS_BUFFER_CTX *stats_buf_context,
-                                 FIRSTPASS_STATS *frame_stats_buffer) {
+void av1_destroy_stats_buffer(STATS_BUFFER_CTX *stats_buf_context,
+                              FIRSTPASS_STATS *frame_stats_buffer) {
   aom_free(stats_buf_context->total_left_stats);
   aom_free(stats_buf_context->total_stats);
   aom_free(frame_stats_buffer);
@@ -2673,34 +2701,30 @@ static aom_codec_err_t encoder_destroy(aom_codec_alg_priv_t *ctx) {
 
   if (ctx->ppi) {
     AV1_PRIMARY *ppi = ctx->ppi;
-#if CONFIG_FRAME_PARALLEL_ENCODE
     for (int i = 0; i < MAX_PARALLEL_FRAMES - 1; i++) {
       if (ppi->parallel_frames_data[i].cx_data) {
         free(ppi->parallel_frames_data[i].cx_data);
       }
     }
-#endif
 #if CONFIG_ENTROPY_STATS
     print_entropy_stats(ppi);
 #endif
 #if CONFIG_INTERNAL_STATS
     print_internal_stats(ppi);
 #endif
-#if CONFIG_FRAME_PARALLEL_ENCODE
-    int i;
-    for (i = 0; i < MAX_PARALLEL_FRAMES; i++) {
-      destroy_context_and_bufferpool(ppi->parallel_cpi[i], &ctx->buffer_pool);
+
+    for (int i = 0; i < MAX_PARALLEL_FRAMES; i++) {
+      av1_destroy_context_and_bufferpool(ppi->parallel_cpi[i],
+                                         &ctx->buffer_pool);
     }
     ppi->cpi = NULL;
-#else
-    destroy_context_and_bufferpool(ppi->cpi, &ctx->buffer_pool);
-#endif  // CONFIG_FRAME_PARALLEL_ENCODE
+
     if (ppi->cpi_lap) {
-      destroy_context_and_bufferpool(ppi->cpi_lap, &ctx->buffer_pool_lap);
+      av1_destroy_context_and_bufferpool(ppi->cpi_lap, &ctx->buffer_pool_lap);
     }
     av1_remove_primary_compressor(ppi);
   }
-  destroy_stats_buffer(&ctx->stats_buf_context, ctx->frame_stats_buffer);
+  av1_destroy_stats_buffer(&ctx->stats_buf_context, ctx->frame_stats_buffer);
   aom_free(ctx);
   return AOM_CODEC_OK;
 }
@@ -2718,6 +2742,25 @@ static aom_codec_frame_flags_t get_frame_pkt_flags(const AV1_COMP *cpi,
   return flags;
 }
 
+static INLINE int get_src_border_in_pixels(AV1_COMP *cpi, BLOCK_SIZE sb_size) {
+  if (cpi->oxcf.mode != REALTIME || av1_is_resize_needed(&cpi->oxcf))
+    return cpi->oxcf.border_in_pixels;
+
+  const int sb_size_in_pixels_log2 = mi_size_wide_log2[sb_size] + MI_SIZE_LOG2;
+  const int sb_aligned_width =
+      ALIGN_POWER_OF_TWO(cpi->oxcf.frm_dim_cfg.width, sb_size_in_pixels_log2);
+  const int sb_aligned_height =
+      ALIGN_POWER_OF_TWO(cpi->oxcf.frm_dim_cfg.height, sb_size_in_pixels_log2);
+  // Align the border pixels to a multiple of 32.
+  const int border_pixels_width =
+      ALIGN_POWER_OF_TWO(sb_aligned_width - cpi->oxcf.frm_dim_cfg.width, 5);
+  const int border_pixels_height =
+      ALIGN_POWER_OF_TWO(sb_aligned_height - cpi->oxcf.frm_dim_cfg.height, 5);
+  const int border_in_pixels =
+      AOMMAX(AOMMAX(border_pixels_width, border_pixels_height), 32);
+  return border_in_pixels;
+}
+
 // TODO(Mufaddal): Check feasibility of abstracting functions related to LAP
 // into a separate function.
 static aom_codec_err_t encoder_encode(aom_codec_alg_priv_t *ctx,
@@ -2744,9 +2787,10 @@ static aom_codec_err_t encoder_encode(aom_codec_alg_priv_t *ctx,
   if (img != NULL) {
     res = validate_img(ctx, img);
     if (res == AOM_CODEC_OK) {
-      const size_t uncompressed_frame_sz = ALIGN_POWER_OF_TWO(ctx->cfg.g_w, 5) *
-                                           ALIGN_POWER_OF_TWO(ctx->cfg.g_h, 5) *
-                                           get_image_bps(img) / 8;
+      const size_t uncompressed_frame_sz =
+          ALIGN_POWER_OF_TWO_UNSIGNED(ctx->cfg.g_w, 5) *
+          ALIGN_POWER_OF_TWO_UNSIGNED(ctx->cfg.g_h, 5) *
+          av1_get_image_bps(img) / 8;
 
       // Due to the presence of no-show frames, the ctx->cx_data buffer holds
       // compressed data corresponding to multiple frames. As no-show frames are
@@ -2772,7 +2816,6 @@ static aom_codec_err_t encoder_encode(aom_codec_alg_priv_t *ctx,
           return AOM_CODEC_MEM_ERROR;
         }
       }
-#if CONFIG_FRAME_PARALLEL_ENCODE
       for (int i = 0; i < ppi->num_fp_contexts - 1; i++) {
         if (ppi->parallel_frames_data[i].cx_data == NULL) {
           ppi->parallel_frames_data[i].cx_data_sz = uncompressed_frame_sz;
@@ -2786,7 +2829,6 @@ static aom_codec_err_t encoder_encode(aom_codec_alg_priv_t *ctx,
           }
         }
       }
-#endif
     }
   }
 
@@ -2836,11 +2878,11 @@ static aom_codec_err_t encoder_encode(aom_codec_alg_priv_t *ctx,
   }
 
   if (res == AOM_CODEC_OK) {
-#if CONFIG_FRAME_PARALLEL_ENCODE
     AV1_COMP *cpi = ppi->cpi;
-#else
-    AV1_COMP *const cpi = ppi->cpi;
-#endif
+
+    const int num_layers =
+        cpi->svc.number_spatial_layers * cpi->svc.number_temporal_layers;
+    av1_alloc_layer_context(cpi, num_layers);
 
     // Set up internal flags
     if (ctx->base.init_flags & AOM_CODEC_USE_PSNR) ppi->b_calculate_psnr = 1;
@@ -2871,27 +2913,32 @@ static aom_codec_err_t encoder_encode(aom_codec_alg_priv_t *ctx,
       if (!ppi->lookahead) {
         int lag_in_frames = cpi_lap != NULL ? cpi_lap->oxcf.gf_cfg.lag_in_frames
                                             : cpi->oxcf.gf_cfg.lag_in_frames;
+        AV1EncoderConfig *oxcf = &cpi->oxcf;
+        const BLOCK_SIZE sb_size = av1_select_sb_size(
+            oxcf, oxcf->frm_dim_cfg.width, oxcf->frm_dim_cfg.height,
+            cpi->svc.number_spatial_layers);
+        oxcf->border_in_pixels =
+            av1_get_enc_border_size(av1_is_resize_needed(oxcf),
+                                    oxcf->kf_cfg.key_freq_max == 0, sb_size);
+        for (int i = 0; i < ppi->num_fp_contexts; i++) {
+          ppi->parallel_cpi[i]->oxcf.border_in_pixels = oxcf->border_in_pixels;
+        }
 
+        const int src_border_in_pixels = get_src_border_in_pixels(cpi, sb_size);
         ppi->lookahead = av1_lookahead_init(
             cpi->oxcf.frm_dim_cfg.width, cpi->oxcf.frm_dim_cfg.height,
             subsampling_x, subsampling_y, use_highbitdepth, lag_in_frames,
-            cpi->oxcf.border_in_pixels, cpi->common.features.byte_alignment,
+            src_border_in_pixels, cpi->common.features.byte_alignment,
             ctx->num_lap_buffers, (cpi->oxcf.kf_cfg.key_freq_max == 0),
             cpi->oxcf.tool_cfg.enable_global_motion);
       }
       if (!ppi->lookahead)
         aom_internal_error(&ppi->error, AOM_CODEC_MEM_ERROR,
                            "Failed to allocate lag buffers");
-#if CONFIG_FRAME_PARALLEL_ENCODE
-      int i;
-      for (i = 0; i < ppi->num_fp_contexts; i++) {
+      for (int i = 0; i < ppi->num_fp_contexts; i++) {
         av1_check_initial_width(ppi->parallel_cpi[i], use_highbitdepth,
                                 subsampling_x, subsampling_y);
       }
-#else
-      av1_check_initial_width(cpi, use_highbitdepth, subsampling_x,
-                              subsampling_y);
-#endif
       if (cpi_lap != NULL) {
         av1_check_initial_width(cpi_lap, use_highbitdepth, subsampling_x,
                                 subsampling_y);
@@ -2901,7 +2948,7 @@ static aom_codec_err_t encoder_encode(aom_codec_alg_priv_t *ctx,
       // key frame flag when we actually encode this frame.
       if (av1_receive_raw_frame(cpi, flags | ctx->next_frame_flags, &sd,
                                 src_time_stamp, src_end_time_stamp)) {
-        res = update_error_state(ctx, &ppi->error);
+        res = update_error_state(ctx, cpi->common.error);
       }
       ctx->next_frame_flags = 0;
     }
@@ -2937,36 +2984,26 @@ static aom_codec_err_t encoder_encode(aom_codec_alg_priv_t *ctx,
       num_workers = av1_get_max_num_workers(cpi);
     }
     if ((num_workers > 1) && (ppi->p_mt_info.num_workers == 0)) {
-#if CONFIG_FRAME_PARALLEL_ENCODE
       // Obtain the maximum no. of frames that can be supported in a parallel
       // encode set.
       if (is_stat_consumption_stage(cpi)) {
         ppi->num_fp_contexts = av1_compute_num_fp_contexts(ppi, &cpi->oxcf);
       }
-#endif  // CONFIG_FRAME_PARALLEL_ENCODE
       av1_create_workers(ppi, num_workers);
       av1_init_tile_thread_data(ppi, cpi->oxcf.pass == AOM_RC_FIRST_PASS);
 #if CONFIG_MULTITHREAD
-#if CONFIG_FRAME_PARALLEL_ENCODE
       for (int i = 0; i < ppi->num_fp_contexts; i++) {
         av1_init_mt_sync(ppi->parallel_cpi[i],
                          ppi->parallel_cpi[i]->oxcf.pass == AOM_RC_FIRST_PASS);
       }
-#else
-      av1_init_mt_sync(cpi, cpi->oxcf.pass == 1);
-#endif  // CONFIG_FRAME_PARALLEL_ENCODE
       if (cpi_lap != NULL) {
         av1_init_mt_sync(cpi_lap, 1);
       }
 #endif  // CONFIG_MULTITHREAD
     }
-#if CONFIG_FRAME_PARALLEL_ENCODE
     for (int i = 0; i < ppi->num_fp_contexts; i++) {
       av1_init_frame_mt(ppi, ppi->parallel_cpi[i]);
     }
-#else
-    av1_init_frame_mt(ppi, cpi);
-#endif  // CONFIG_FRAME_PARALLEL_ENCODE
     if (cpi_lap != NULL) {
       av1_init_frame_mt(ppi, cpi_lap);
     }
@@ -2985,27 +3022,28 @@ static aom_codec_err_t encoder_encode(aom_codec_alg_priv_t *ctx,
       av1_post_encode_updates(cpi_lap, &cpi_lap_data);
     }
 
-#if CONFIG_FRAME_PARALLEL_ENCODE
     // Recalculate the maximum number of frames that can be encoded in
     // parallel at the beginning of sub gop.
     if (is_stat_consumption_stage(cpi) && ppi->gf_group.size > 0 &&
         cpi->gf_frame_index == ppi->gf_group.size) {
       ppi->num_fp_contexts = av1_compute_num_fp_contexts(ppi, &cpi->oxcf);
     }
-#endif  // CONFIG_FRAME_PARALLEL_ENCODE
+
+    // Reset gf_frame_index in case it reaches MAX_STATIC_GF_GROUP_LENGTH for
+    // real time encoding.
+    if (is_one_pass_rt_params(cpi) &&
+        cpi->gf_frame_index == MAX_STATIC_GF_GROUP_LENGTH)
+      cpi->gf_frame_index = 0;
 
     // Get the next visible frame. Invisible frames get packed with the next
     // visible frame.
     while (cpi_data.cx_data_sz >= ctx->cx_data_sz / 2 && !is_frame_visible) {
-#if CONFIG_FRAME_PARALLEL_ENCODE
       int simulate_parallel_frame = 0;
       int status = -1;
       cpi->do_frame_data_update = true;
-#if CONFIG_FRAME_PARALLEL_ENCODE_2
       cpi->ref_idx_to_skip = INVALID_IDX;
       cpi->ref_refresh_index = INVALID_IDX;
       cpi->refresh_idx_available = false;
-#endif  // CONFIG_FRAME_PARALLEL_ENCODE_2
 
 #if CONFIG_FPMT_TEST
       simulate_parallel_frame =
@@ -3020,7 +3058,7 @@ static aom_codec_err_t encoder_encode(aom_codec_alg_priv_t *ctx,
         status = av1_get_compressed_data(cpi, &cpi_data);
       }
 
-#endif
+#endif  // CONFIG_FPMT_TEST
       if (!simulate_parallel_frame) {
         if (ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] == 0) {
           status = av1_get_compressed_data(cpi, &cpi_data);
@@ -3032,18 +3070,13 @@ static aom_codec_err_t encoder_encode(aom_codec_alg_priv_t *ctx,
           status = AOM_CODEC_OK;
         }
       }
-#else
-      const int status = av1_get_compressed_data(cpi, &cpi_data);
-#endif  // CONFIG_FRAME_PARALLEL_ENCODE
       if (status == -1) break;
       if (status != AOM_CODEC_OK) {
         aom_internal_error(&ppi->error, AOM_CODEC_ERROR, NULL);
       }
-#if CONFIG_FRAME_PARALLEL_ENCODE
       if (ppi->num_fp_contexts > 0 && frame_is_intra_only(&cpi->common)) {
         av1_init_sc_decisions(ppi);
       }
-#endif
 
       ppi->seq_params_locked = 1;
       av1_post_encode_updates(cpi, &cpi_data);
@@ -3191,6 +3224,8 @@ static aom_codec_err_t ctrl_set_reference(aom_codec_alg_priv_t *ctx,
 
 static aom_codec_err_t ctrl_copy_reference(aom_codec_alg_priv_t *ctx,
                                            va_list args) {
+  if (ctx->ppi->cpi->oxcf.algo_cfg.skip_postproc_filtering)
+    return AOM_CODEC_INCAPABLE;
   av1_ref_frame_t *const frame = va_arg(args, av1_ref_frame_t *);
 
   if (frame != NULL) {
@@ -3206,6 +3241,8 @@ static aom_codec_err_t ctrl_copy_reference(aom_codec_alg_priv_t *ctx,
 
 static aom_codec_err_t ctrl_get_reference(aom_codec_alg_priv_t *ctx,
                                           va_list args) {
+  if (ctx->ppi->cpi->oxcf.algo_cfg.skip_postproc_filtering)
+    return AOM_CODEC_INCAPABLE;
   av1_ref_frame_t *const frame = va_arg(args, av1_ref_frame_t *);
 
   if (frame != NULL) {
@@ -3322,10 +3359,8 @@ static aom_codec_err_t ctrl_set_scale_mode(aom_codec_alg_priv_t *ctx,
   if (mode) {
     const int res = av1_set_internal_size(
         &ctx->ppi->cpi->oxcf, &ctx->ppi->cpi->resize_pending_params,
-        (AOM_SCALING)mode->h_scaling_mode, (AOM_SCALING)mode->v_scaling_mode);
-#if CONFIG_FRAME_PARALLEL_ENCODE
+        mode->h_scaling_mode, mode->v_scaling_mode);
     av1_check_fpmt_config(ctx->ppi, &ctx->ppi->cpi->oxcf);
-#endif
     return (res == 0) ? AOM_CODEC_OK : AOM_CODEC_INVALID_PARAM;
   } else {
     return AOM_CODEC_INVALID_PARAM;
@@ -3374,6 +3409,10 @@ static aom_codec_err_t ctrl_set_svc_params(aom_codec_alg_priv_t *ctx,
   if (ppi->number_spatial_layers > 1 || ppi->number_temporal_layers > 1) {
     unsigned int sl, tl;
     ctx->ppi->use_svc = 1;
+    const int num_layers =
+        ppi->number_spatial_layers * ppi->number_temporal_layers;
+    av1_alloc_layer_context(cpi, num_layers);
+
     for (sl = 0; sl < ppi->number_spatial_layers; ++sl) {
       for (tl = 0; tl < ppi->number_temporal_layers; ++tl) {
         const int layer = LAYER_IDS_TO_IDX(sl, tl, ppi->number_temporal_layers);
@@ -3399,9 +3438,7 @@ static aom_codec_err_t ctrl_set_svc_params(aom_codec_alg_priv_t *ctx,
     }
     av1_update_layer_context_change_config(cpi, target_bandwidth);
   }
-#if CONFIG_FRAME_PARALLEL_ENCODE
   av1_check_fpmt_config(ctx->ppi, &ctx->ppi->cpi->oxcf);
-#endif  // CONFIG_FRAME_PARALLEL_ENCODE
   return AOM_CODEC_OK;
 }
 
@@ -3410,13 +3447,13 @@ static aom_codec_err_t ctrl_set_svc_ref_frame_config(aom_codec_alg_priv_t *ctx,
   AV1_COMP *const cpi = ctx->ppi->cpi;
   aom_svc_ref_frame_config_t *const data =
       va_arg(args, aom_svc_ref_frame_config_t *);
-  cpi->svc.set_ref_frame_config = 1;
+  cpi->rtc_ref.set_ref_frame_config = 1;
   for (unsigned int i = 0; i < INTER_REFS_PER_FRAME; ++i) {
-    cpi->svc.reference[i] = data->reference[i];
-    cpi->svc.ref_idx[i] = data->ref_idx[i];
+    cpi->rtc_ref.reference[i] = data->reference[i];
+    cpi->rtc_ref.ref_idx[i] = data->ref_idx[i];
   }
   for (unsigned int i = 0; i < REF_FRAMES; ++i)
-    cpi->svc.refresh[i] = data->refresh[i];
+    cpi->rtc_ref.refresh[i] = data->refresh[i];
   cpi->svc.use_flexible_mode = 1;
   cpi->svc.ksvc_fixed_mode = 0;
   return AOM_CODEC_OK;
@@ -3427,9 +3464,9 @@ static aom_codec_err_t ctrl_set_svc_ref_frame_comp_pred(
   AV1_COMP *const cpi = ctx->ppi->cpi;
   aom_svc_ref_frame_comp_pred_t *const data =
       va_arg(args, aom_svc_ref_frame_comp_pred_t *);
-  cpi->svc.ref_frame_comp[0] = data->use_comp_pred[0];
-  cpi->svc.ref_frame_comp[1] = data->use_comp_pred[1];
-  cpi->svc.ref_frame_comp[2] = data->use_comp_pred[2];
+  cpi->rtc_ref.ref_frame_comp[0] = data->use_comp_pred[0];
+  cpi->rtc_ref.ref_frame_comp[1] = data->use_comp_pred[1];
+  cpi->rtc_ref.ref_frame_comp[2] = data->use_comp_pred[2];
   return AOM_CODEC_OK;
 }
 
@@ -3536,6 +3573,7 @@ static aom_codec_err_t encoder_set_option(aom_codec_alg_priv_t *ctx,
 #endif
 
   argv[0] = aom_malloc(len * sizeof(argv[1][0]));
+  if (!argv[0]) return AOM_CODEC_MEM_ERROR;
   snprintf(argv[0], len, "--%s=%s", name, value);
   struct arg arg;
   aom_codec_err_t err = AOM_CODEC_OK;
@@ -3575,15 +3613,11 @@ static aom_codec_err_t encoder_set_option(aom_codec_alg_priv_t *ctx,
   } else if (arg_match_helper(&arg, &g_av1_codec_arg_defs.rowmtarg, argv,
                               err_string)) {
     extra_cfg.row_mt = arg_parse_uint_helper(&arg, err_string);
-  }
-#if CONFIG_FRAME_PARALLEL_ENCODE
-  else if (arg_match_helper(&arg, &g_av1_codec_arg_defs.fpmtarg, argv,
-                            err_string)) {
+  } else if (arg_match_helper(&arg, &g_av1_codec_arg_defs.fpmtarg, argv,
+                              err_string)) {
     extra_cfg.fp_mt = arg_parse_uint_helper(&arg, err_string);
-  }
-#endif
-  else if (arg_match_helper(&arg, &g_av1_codec_arg_defs.tile_cols, argv,
-                            err_string)) {
+  } else if (arg_match_helper(&arg, &g_av1_codec_arg_defs.tile_cols, argv,
+                              err_string)) {
     extra_cfg.tile_columns = arg_parse_uint_helper(&arg, err_string);
   } else if (arg_match_helper(&arg, &g_av1_codec_arg_defs.tile_rows, argv,
                               err_string)) {
@@ -3932,10 +3966,24 @@ static aom_codec_err_t encoder_set_option(aom_codec_alg_priv_t *ctx,
   } else if (arg_match_helper(&arg, &g_av1_codec_arg_defs.loopfilter_control,
                               argv, err_string)) {
     extra_cfg.loopfilter_control = arg_parse_int_helper(&arg, err_string);
+  } else if (arg_match_helper(&arg, &g_av1_codec_arg_defs.auto_intra_tools_off,
+                              argv, err_string)) {
+    extra_cfg.auto_intra_tools_off = arg_parse_uint_helper(&arg, err_string);
   } else if (arg_match_helper(&arg,
                               &g_av1_codec_arg_defs.strict_level_conformance,
                               argv, err_string)) {
     extra_cfg.strict_level_conformance = arg_parse_int_helper(&arg, err_string);
+  } else if (arg_match_helper(&arg, &g_av1_codec_arg_defs.kf_max_pyr_height,
+                              argv, err_string)) {
+    extra_cfg.kf_max_pyr_height = arg_parse_int_helper(&arg, err_string);
+  } else if (arg_match_helper(&arg, &g_av1_codec_arg_defs.tile_width, argv,
+                              err_string)) {
+    ctx->cfg.tile_width_count = arg_parse_list_helper(
+        &arg, ctx->cfg.tile_widths, MAX_TILE_WIDTHS, err_string);
+  } else if (arg_match_helper(&arg, &g_av1_codec_arg_defs.tile_height, argv,
+                              err_string)) {
+    ctx->cfg.tile_height_count = arg_parse_list_helper(
+        &arg, ctx->cfg.tile_heights, MAX_TILE_HEIGHTS, err_string);
   } else {
     match = 0;
     snprintf(err_string, ARG_ERR_MSG_MAX_LEN, "Cannot find aom option %s",
@@ -3977,6 +4025,14 @@ static aom_codec_err_t ctrl_get_target_seq_level_idx(aom_codec_alg_priv_t *ctx,
                                       &ctx->ppi->level_params, arg);
 }
 
+static aom_codec_err_t ctrl_get_num_operating_points(aom_codec_alg_priv_t *ctx,
+                                                     va_list args) {
+  int *const arg = va_arg(args, int *);
+  if (arg == NULL) return AOM_CODEC_INVALID_PARAM;
+  *arg = ctx->ppi->seq_params.operating_points_cnt_minus_1 + 1;
+  return AOM_CODEC_OK;
+}
+
 static aom_codec_ctrl_fn_map_t encoder_ctrl_maps[] = {
   { AV1_COPY_REFERENCE, ctrl_copy_reference },
   { AOME_USE_REFERENCE, ctrl_use_reference },
@@ -4113,6 +4169,7 @@ static aom_codec_ctrl_fn_map_t encoder_ctrl_maps[] = {
   { AV1E_SET_EXTERNAL_PARTITION, ctrl_set_external_partition },
   { AV1E_SET_ENABLE_TX_SIZE_SEARCH, ctrl_set_enable_tx_size_search },
   { AV1E_SET_LOOPFILTER_CONTROL, ctrl_set_loopfilter_control },
+  { AV1E_SET_SKIP_POSTPROC_FILTERING, ctrl_set_skip_postproc_filtering },
   { AV1E_SET_AUTO_INTRA_TOOLS_OFF, ctrl_set_auto_intra_tools_off },
   { AV1E_SET_RTC_EXTERNAL_RC, ctrl_set_rtc_external_rc },
 
@@ -4129,6 +4186,7 @@ static aom_codec_ctrl_fn_map_t encoder_ctrl_maps[] = {
   { AV1E_GET_SEQ_LEVEL_IDX, ctrl_get_seq_level_idx },
   { AV1E_GET_BASELINE_GF_INTERVAL, ctrl_get_baseline_gf_interval },
   { AV1E_GET_TARGET_SEQ_LEVEL_IDX, ctrl_get_target_seq_level_idx },
+  { AV1E_GET_NUM_OPERATING_POINTS, ctrl_get_num_operating_points },
 
   CTRL_MAP_END,
 };
@@ -4186,21 +4244,22 @@ static const aom_codec_enc_cfg_t encoder_usage_cfg[] = {
       2000,  // rc_two_pass_vbrmax_section
 
       // keyframing settings (kf)
-      0,            // fwd_kf_enabled
-      AOM_KF_AUTO,  // kf_mode
-      0,            // kf_min_dist
-      9999,         // kf_max_dist
-      0,            // sframe_dist
-      1,            // sframe_mode
-      0,            // large_scale_tile
-      0,            // monochrome
-      0,            // full_still_picture_hdr
-      0,            // save_as_annexb
-      0,            // tile_width_count
-      0,            // tile_height_count
-      { 0 },        // tile_widths
-      { 0 },        // tile_heights
-      0,            // use_fixed_qp_offsets
+      0,                       // fwd_kf_enabled
+      AOM_KF_AUTO,             // kf_mode
+      0,                       // kf_min_dist
+      9999,                    // kf_max_dist
+      0,                       // sframe_dist
+      1,                       // sframe_mode
+      0,                       // large_scale_tile
+      0,                       // monochrome
+      0,                       // full_still_picture_hdr
+      0,                       // save_as_annexb
+      0,                       // tile_width_count
+      0,                       // tile_height_count
+      { 0 },                   // tile_widths
+      { 0 },                   // tile_heights
+      0,                       // use_fixed_qp_offsets
+      { -1, -1, -1, -1, -1 },  // fixed_qp_offsets
       { 0, 128, 128, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0,   0,   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },  // cfg
   },
@@ -4256,21 +4315,22 @@ static const aom_codec_enc_cfg_t encoder_usage_cfg[] = {
       2000,  // rc_two_pass_vbrmax_section
 
       // keyframing settings (kf)
-      0,            // fwd_kf_enabled
-      AOM_KF_AUTO,  // kf_mode
-      0,            // kf_min_dist
-      9999,         // kf_max_dist
-      0,            // sframe_dist
-      1,            // sframe_mode
-      0,            // large_scale_tile
-      0,            // monochrome
-      0,            // full_still_picture_hdr
-      0,            // save_as_annexb
-      0,            // tile_width_count
-      0,            // tile_height_count
-      { 0 },        // tile_widths
-      { 0 },        // tile_heights
-      0,            // use_fixed_qp_offsets
+      0,                       // fwd_kf_enabled
+      AOM_KF_AUTO,             // kf_mode
+      0,                       // kf_min_dist
+      9999,                    // kf_max_dist
+      0,                       // sframe_dist
+      1,                       // sframe_mode
+      0,                       // large_scale_tile
+      0,                       // monochrome
+      0,                       // full_still_picture_hdr
+      0,                       // save_as_annexb
+      0,                       // tile_width_count
+      0,                       // tile_height_count
+      { 0 },                   // tile_widths
+      { 0 },                   // tile_heights
+      0,                       // use_fixed_qp_offsets
+      { -1, -1, -1, -1, -1 },  // fixed_qp_offsets
       { 0, 128, 128, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0,   0,   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },  // cfg
   },
@@ -4326,21 +4386,22 @@ static const aom_codec_enc_cfg_t encoder_usage_cfg[] = {
       2000,  // rc_two_pass_vbrmax_section
 
       // keyframing settings (kf)
-      0,                // fwd_kf_enabled
-      AOM_KF_DISABLED,  // kf_mode
-      0,                // kf_min_dist
-      0,                // kf_max_dist
-      0,                // sframe_dist
-      1,                // sframe_mode
-      0,                // large_scale_tile
-      0,                // monochrome
-      0,                // full_still_picture_hdr
-      0,                // save_as_annexb
-      0,                // tile_width_count
-      0,                // tile_height_count
-      { 0 },            // tile_widths
-      { 0 },            // tile_heights
-      0,                // use_fixed_qp_offsets
+      0,                       // fwd_kf_enabled
+      AOM_KF_DISABLED,         // kf_mode
+      0,                       // kf_min_dist
+      0,                       // kf_max_dist
+      0,                       // sframe_dist
+      1,                       // sframe_mode
+      0,                       // large_scale_tile
+      0,                       // monochrome
+      0,                       // full_still_picture_hdr
+      0,                       // save_as_annexb
+      0,                       // tile_width_count
+      0,                       // tile_height_count
+      { 0 },                   // tile_widths
+      { 0 },                   // tile_heights
+      0,                       // use_fixed_qp_offsets
+      { -1, -1, -1, -1, -1 },  // fixed_qp_offsets
       { 0, 128, 128, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0,   0,   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },  // cfg
   },
diff --git a/chromium/third_party/libaom/source/libaom/av1/av1_cx_iface.h b/chromium/third_party/libaom/source/libaom/av1/av1_cx_iface.h
new file mode 100644
index 00000000000..05f4901af9c
--- /dev/null
+++ b/chromium/third_party/libaom/source/libaom/av1/av1_cx_iface.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2022, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#ifndef AOM_AV1_AV1_CX_IFACE_H_
+#define AOM_AV1_AV1_CX_IFACE_H_
+#include "av1/encoder/encoder.h"
+#include "aom/aom_encoder.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+AV1EncoderConfig av1_get_encoder_config(const aom_codec_enc_cfg_t *cfg);
+
+aom_codec_err_t av1_create_stats_buffer(FIRSTPASS_STATS **frame_stats_buffer,
+                                        STATS_BUFFER_CTX *stats_buf_context,
+                                        int num_lap_buffers);
+
+void av1_destroy_stats_buffer(STATS_BUFFER_CTX *stats_buf_context,
+                              FIRSTPASS_STATS *frame_stats_buffer);
+
+aom_codec_err_t av1_create_context_and_bufferpool(AV1_PRIMARY *ppi,
+                                                  AV1_COMP **p_cpi,
+                                                  BufferPool **p_buffer_pool,
+                                                  const AV1EncoderConfig *oxcf,
+                                                  COMPRESSOR_STAGE stage,
+                                                  int lap_lag_in_frames);
+
+void av1_destroy_context_and_bufferpool(AV1_COMP *cpi,
+                                        BufferPool **p_buffer_pool);
+
+int av1_get_image_bps(const aom_image_t *img);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // AOM_AV1_AV1_CX_IFACE_H_
diff --git a/chromium/third_party/libaom/source/libaom/av1/av1_dx_iface.c b/chromium/third_party/libaom/source/libaom/av1/av1_dx_iface.c
index 45e8cb991a8..256747524d0 100644
--- a/chromium/third_party/libaom/source/libaom/av1/av1_dx_iface.c
+++ b/chromium/third_party/libaom/source/libaom/av1/av1_dx_iface.c
@@ -121,12 +121,9 @@ static aom_codec_err_t decoder_destroy(aom_codec_alg_priv_t *ctx) {
     aom_free(pbi->common.tpl_mvs);
     pbi->common.tpl_mvs = NULL;
     av1_remove_common(&frame_worker_data->pbi->common);
-    av1_free_cdef_buffers(&pbi->common, &pbi->cdef_worker, &pbi->cdef_sync,
-                          pbi->num_workers);
+    av1_free_cdef_buffers(&pbi->common, &pbi->cdef_worker, &pbi->cdef_sync);
     av1_free_cdef_sync(&pbi->cdef_sync);
-#if !CONFIG_REALTIME_ONLY
     av1_free_restoration_buffers(&pbi->common);
-#endif
     av1_decoder_remove(pbi);
     aom_free(frame_worker_data);
 #if CONFIG_MULTITHREAD
@@ -748,8 +745,8 @@ static aom_image_t *add_grain_if_needed(aom_codec_alg_priv_t *ctx,
                                         aom_film_grain_t *grain_params) {
   if (!grain_params->apply_grain) return img;
 
-  const int w_even = ALIGN_POWER_OF_TWO(img->d_w, 1);
-  const int h_even = ALIGN_POWER_OF_TWO(img->d_h, 1);
+  const int w_even = ALIGN_POWER_OF_TWO_UNSIGNED(img->d_w, 1);
+  const int h_even = ALIGN_POWER_OF_TWO_UNSIGNED(img->d_h, 1);
 
   BufferPool *const pool = ctx->buffer_pool;
   aom_codec_frame_buffer_t *fb =
diff --git a/chromium/third_party/libaom/source/libaom/av1/av1_iface_common.h b/chromium/third_party/libaom/source/libaom/av1/av1_iface_common.h
index 57dd1b8edac..b923c3dcfff 100644
--- a/chromium/third_party/libaom/source/libaom/av1/av1_iface_common.h
+++ b/chromium/third_party/libaom/source/libaom/av1/av1_iface_common.h
@@ -137,7 +137,7 @@ static AOM_INLINE aom_codec_err_t image2yuvconfig(const aom_image_t *img,
   // Note(yunqing): if img is allocated the same as the frame buffer, y_stride
   // is 32-byte aligned. Also, handle the cases while allocating img without a
   // border or stride_align is less than 32.
-  int border = (yv12->y_stride - (int)((img->w + 31) & ~31)) / 2;
+  int border = (yv12->y_stride - (int)((img->w + 31) & ~31u)) / 2;
   yv12->border = (border < 0) ? 0 : border;
   yv12->subsampling_x = img->x_chroma_shift;
   yv12->subsampling_y = img->y_chroma_shift;
diff --git a/chromium/third_party/libaom/source/libaom/av1/common/alloccommon.c b/chromium/third_party/libaom/source/libaom/av1/common/alloccommon.c
index aeda111712d..e373dc12144 100644
--- a/chromium/third_party/libaom/source/libaom/av1/common/alloccommon.c
+++ b/chromium/third_party/libaom/source/libaom/av1/common/alloccommon.c
@@ -28,8 +28,8 @@ int av1_get_MBs(int width, int height) {
   const int mi_cols = aligned_width >> MI_SIZE_LOG2;
   const int mi_rows = aligned_height >> MI_SIZE_LOG2;
 
-  const int mb_cols = (mi_cols + 2) >> 2;
-  const int mb_rows = (mi_rows + 2) >> 2;
+  const int mb_cols = ROUND_POWER_OF_TWO(mi_cols, 2);
+  const int mb_rows = ROUND_POWER_OF_TWO(mi_rows, 2);
   return mb_rows * mb_cols;
 }
 
@@ -110,7 +110,7 @@ static INLINE void free_cdef_row_sync(AV1CdefRowSync **cdef_row_mt,
 
 void av1_free_cdef_buffers(AV1_COMMON *const cm,
                            AV1CdefWorkerData **cdef_worker,
-                           AV1CdefSync *cdef_sync, int num_workers) {
+                           AV1CdefSync *cdef_sync) {
   CdefInfo *cdef_info = &cm->cdef_info;
   const int num_mi_rows = cdef_info->allocated_mi_rows;
 
@@ -121,16 +121,17 @@ void av1_free_cdef_buffers(AV1_COMMON *const cm,
   // De-allocation of column buffer & source buffer (worker_0).
   free_cdef_bufs(cdef_info->colbuf, &cdef_info->srcbuf);
 
-  if (num_workers < 2) return;
+  free_cdef_row_sync(&cdef_sync->cdef_row_mt, num_mi_rows);
+
+  if (cdef_info->allocated_num_workers < 2) return;
   if (*cdef_worker != NULL) {
-    for (int idx = num_workers - 1; idx >= 1; idx--) {
+    for (int idx = cdef_info->allocated_num_workers - 1; idx >= 1; idx--) {
       // De-allocation of column buffer & source buffer for remaining workers.
       free_cdef_bufs((*cdef_worker)[idx].colbuf, &(*cdef_worker)[idx].srcbuf);
     }
     aom_free(*cdef_worker);
     *cdef_worker = NULL;
   }
-  free_cdef_row_sync(&cdef_sync->cdef_row_mt, num_mi_rows);
 }
 
 static INLINE void alloc_cdef_linebuf(AV1_COMMON *const cm, uint16_t **linebuf,
@@ -237,6 +238,9 @@ void av1_alloc_cdef_buffers(AV1_COMMON *const cm,
       // num_workers
       for (int idx = cdef_info->allocated_num_workers - 1; idx >= 1; idx--)
         free_cdef_bufs((*cdef_worker)[idx].colbuf, &(*cdef_worker)[idx].srcbuf);
+
+      aom_free(*cdef_worker);
+      *cdef_worker = NULL;
     } else if (num_workers > 1) {
       // Free src and column buffers for remaining workers in case of
       // reallocation
@@ -281,7 +285,6 @@ void av1_alloc_cdef_buffers(AV1_COMMON *const cm,
                       cdef_info->allocated_mi_rows);
 }
 
-#if !CONFIG_REALTIME_ONLY
 // Assumes cm->rst_info[p].restoration_unit_size is already initialized
 void av1_alloc_restoration_buffers(AV1_COMMON *cm) {
   const int num_planes = av1_num_planes(cm);
@@ -362,7 +365,6 @@ void av1_free_restoration_buffers(AV1_COMMON *cm) {
 
   aom_free_frame_buffer(&cm->rst_frame);
 }
-#endif  // !CONFIG_REALTIME_ONLY
 
 void av1_free_above_context_buffers(CommonContexts *above_contexts) {
   int i;
diff --git a/chromium/third_party/libaom/source/libaom/av1/common/alloccommon.h b/chromium/third_party/libaom/source/libaom/av1/common/alloccommon.h
index c7022fd0722..fc4a8ba1872 100644
--- a/chromium/third_party/libaom/source/libaom/av1/common/alloccommon.h
+++ b/chromium/third_party/libaom/source/libaom/av1/common/alloccommon.h
@@ -47,11 +47,9 @@ void av1_alloc_cdef_buffers(struct AV1Common *const cm,
                             int init_worker);
 void av1_free_cdef_buffers(struct AV1Common *const cm,
                            struct AV1CdefWorker **cdef_worker,
-                           struct AV1CdefSyncData *cdef_sync, int num_workers);
-#if !CONFIG_REALTIME_ONLY
+                           struct AV1CdefSyncData *cdef_sync);
 void av1_alloc_restoration_buffers(struct AV1Common *cm);
 void av1_free_restoration_buffers(struct AV1Common *cm);
-#endif
 
 int av1_alloc_state_buffers(struct AV1Common *cm, int width, int height);
 void av1_free_state_buffers(struct AV1Common *cm);
diff --git a/chromium/third_party/libaom/source/libaom/av1/common/arm/av1_inv_txfm_neon.c b/chromium/third_party/libaom/source/libaom/av1/common/arm/av1_inv_txfm_neon.c
index bee496a4918..1628cbf23e7 100644
--- a/chromium/third_party/libaom/source/libaom/av1/common/arm/av1_inv_txfm_neon.c
+++ b/chromium/third_party/libaom/source/libaom/av1/common/arm/av1_inv_txfm_neon.c
@@ -250,8 +250,7 @@ static INLINE void btf_16_half_neon(int16x8_t *const x, const int16x4_t c) {
 
 static INLINE int16x4_t set_s16x4_neon(const int16_t c0, const int16_t c1,
                                        const int16_t c2, const int16_t c3) {
-  int16x4_t val = vdup_n_s16((int16_t)0);
-  val = vset_lane_s16(c0, val, 0);
+  int16x4_t val = vdup_n_s16(c0);
   val = vset_lane_s16(c1, val, 1);
   val = vset_lane_s16(c2, val, 2);
   val = vset_lane_s16(c3, val, 3);
diff --git a/chromium/third_party/libaom/source/libaom/av1/common/arm/blend_a64_hmask_neon.c b/chromium/third_party/libaom/source/libaom/av1/common/arm/blend_a64_hmask_neon.c
index 4639d4c4169..89252ef3cb9 100644
--- a/chromium/third_party/libaom/source/libaom/av1/common/arm/blend_a64_hmask_neon.c
+++ b/chromium/third_party/libaom/source/libaom/av1/common/arm/blend_a64_hmask_neon.c
@@ -83,6 +83,7 @@ void aom_blend_a64_hmask_neon(uint8_t *dst, uint32_t dst_stride,
       dst += dst_stride;
     }
   } else if (w == 4) {
+    assert(((uintptr_t)mask & 3) == 0);
     const uint8x8_t m = vreinterpret_u8_u32(vld1_dup_u32((uint32_t *)mask));
     const uint8x8_t max_minus_m = vsub_u8(vdup_64, m);
     for (int i = 0; i < h; i += 2) {
@@ -96,17 +97,15 @@ void aom_blend_a64_hmask_neon(uint8_t *dst, uint32_t dst_stride,
       tmp1 = vreinterpret_u8_u32(tmp1_32);
       res = vmull_u8(m, tmp0);
       res = vmlal_u8(res, max_minus_m, tmp1);
-      vst1_lane_u32(
-          (uint32_t *)(dst + (0 * dst_stride)),
-          vreinterpret_u32_u8(vrshrn_n_u16(res, AOM_BLEND_A64_ROUND_BITS)), 0);
-      vst1_lane_u32(
-          (uint32_t *)(dst + (1 * dst_stride)),
-          vreinterpret_u32_u8(vrshrn_n_u16(res, AOM_BLEND_A64_ROUND_BITS)), 1);
+      const uint8x8_t result = vrshrn_n_u16(res, AOM_BLEND_A64_ROUND_BITS);
+      store_unaligned_u8_4x1(dst + 0 * dst_stride, result, 0);
+      store_unaligned_u8_4x1(dst + 1 * dst_stride, result, 1);
       src0 += (2 * src0_stride);
       src1 += (2 * src1_stride);
       dst += (2 * dst_stride);
     }
   } else if (w == 2) {
+    assert(((uintptr_t)mask & 1) == 0);
     const uint8x8_t m = vreinterpret_u8_u16(vld1_dup_u16((uint16_t *)mask));
     const uint8x8_t max_minus_m = vsub_u8(vdup_64, m);
     for (int i = 0; i < h; i += 2) {
@@ -120,12 +119,9 @@ void aom_blend_a64_hmask_neon(uint8_t *dst, uint32_t dst_stride,
       tmp1 = vreinterpret_u8_u16(tmp1_16);
       res = vmull_u8(m, tmp0);
       res = vmlal_u8(res, max_minus_m, tmp1);
-      vst1_lane_u16(
-          (uint16_t *)(dst + (0 * dst_stride)),
-          vreinterpret_u16_u8(vrshrn_n_u16(res, AOM_BLEND_A64_ROUND_BITS)), 0);
-      vst1_lane_u16(
-          (uint16_t *)(dst + (1 * dst_stride)),
-          vreinterpret_u16_u8(vrshrn_n_u16(res, AOM_BLEND_A64_ROUND_BITS)), 1);
+      const uint8x8_t result = vrshrn_n_u16(res, AOM_BLEND_A64_ROUND_BITS);
+      store_unaligned_u8_2x1(dst + 0 * dst_stride, result, 0);
+      store_unaligned_u8_2x1(dst + 1 * dst_stride, result, 1);
       src0 += (2 * src0_stride);
       src1 += (2 * src1_stride);
       dst += (2 * dst_stride);
diff --git a/chromium/third_party/libaom/source/libaom/av1/common/arm/blend_a64_vmask_neon.c b/chromium/third_party/libaom/source/libaom/av1/common/arm/blend_a64_vmask_neon.c
index 061af74055a..2132fbdbc5d 100644
--- a/chromium/third_party/libaom/source/libaom/av1/common/arm/blend_a64_vmask_neon.c
+++ b/chromium/third_party/libaom/source/libaom/av1/common/arm/blend_a64_vmask_neon.c
@@ -95,12 +95,9 @@ void aom_blend_a64_vmask_neon(uint8_t *dst, uint32_t dst_stride,
       tmp1 = vreinterpret_u8_u32(tmp1_32);
       res = vmull_u8(m, tmp0);
       res = vmlal_u8(res, max_minus_m, tmp1);
-      vst1_lane_u32(
-          (uint32_t *)(dst + (0 * dst_stride)),
-          vreinterpret_u32_u8(vrshrn_n_u16(res, AOM_BLEND_A64_ROUND_BITS)), 0);
-      vst1_lane_u32(
-          (uint32_t *)(dst + (1 * dst_stride)),
-          vreinterpret_u32_u8(vrshrn_n_u16(res, AOM_BLEND_A64_ROUND_BITS)), 1);
+      const uint8x8_t result = vrshrn_n_u16(res, AOM_BLEND_A64_ROUND_BITS);
+      store_unaligned_u8_4x1(dst + 0 * dst_stride, result, 0);
+      store_unaligned_u8_4x1(dst + 1 * dst_stride, result, 1);
       src0 += (2 * src0_stride);
       src1 += (2 * src1_stride);
       dst += (2 * dst_stride);
@@ -127,12 +124,9 @@ void aom_blend_a64_vmask_neon(uint8_t *dst, uint32_t dst_stride,
       tmp1 = vreinterpret_u8_u16(tmp1_16);
       res = vmull_u8(m, tmp0);
       res = vmlal_u8(res, max_minus_m, tmp1);
-      vst1_lane_u16(
-          (uint16_t *)(dst + (0 * dst_stride)),
-          vreinterpret_u16_u8(vrshrn_n_u16(res, AOM_BLEND_A64_ROUND_BITS)), 0);
-      vst1_lane_u16(
-          (uint16_t *)(dst + (1 * dst_stride)),
-          vreinterpret_u16_u8(vrshrn_n_u16(res, AOM_BLEND_A64_ROUND_BITS)), 1);
+      const uint8x8_t result = vrshrn_n_u16(res, AOM_BLEND_A64_ROUND_BITS);
+      store_unaligned_u8_2x1(dst + 0 * dst_stride, result, 0);
+      store_unaligned_u8_2x1(dst + 1 * dst_stride, result, 1);
       src0 += (2 * src0_stride);
       src1 += (2 * src1_stride);
       dst += (2 * dst_stride);
diff --git a/chromium/third_party/libaom/source/libaom/av1/common/arm/cdef_block_neon.c b/chromium/third_party/libaom/source/libaom/av1/common/arm/cdef_block_neon.c
index b694329c38d..7a8fed50f15 100644
--- a/chromium/third_party/libaom/source/libaom/av1/common/arm/cdef_block_neon.c
+++ b/chromium/third_party/libaom/source/libaom/av1/common/arm/cdef_block_neon.c
@@ -27,3 +27,14 @@ void cdef_copy_rect8_8bit_to_16bit_neon(uint16_t *dst, int dstride,
     }
   }
 }
+
+void cdef_find_dir_dual_neon(const uint16_t *img1, const uint16_t *img2,
+                             int stride, int32_t *var_out_1st,
+                             int32_t *var_out_2nd, int coeff_shift,
+                             int *out_dir_1st_8x8, int *out_dir_2nd_8x8) {
+  // Process first 8x8.
+  *out_dir_1st_8x8 = cdef_find_dir(img1, stride, var_out_1st, coeff_shift);
+
+  // Process second 8x8.
+  *out_dir_2nd_8x8 = cdef_find_dir(img2, stride, var_out_2nd, coeff_shift);
+}
diff --git a/chromium/third_party/libaom/source/libaom/av1/common/arm/convolve_neon.c b/chromium/third_party/libaom/source/libaom/av1/common/arm/convolve_neon.c
index f0e4bedccb9..7abcbd3d96c 100644
--- a/chromium/third_party/libaom/source/libaom/av1/common/arm/convolve_neon.c
+++ b/chromium/third_party/libaom/source/libaom/av1/common/arm/convolve_neon.c
@@ -27,68 +27,41 @@ static INLINE int16x4_t convolve8_4x4(const int16x4_t s0, const int16x4_t s1,
                                       const int16x4_t s2, const int16x4_t s3,
                                       const int16x4_t s4, const int16x4_t s5,
                                       const int16x4_t s6, const int16x4_t s7,
-                                      const int16_t *filter) {
+                                      const int16x8_t filter) {
+  const int16x4_t filter_lo = vget_low_s16(filter);
+  const int16x4_t filter_hi = vget_high_s16(filter);
   int16x4_t sum;
 
-  sum = vmul_n_s16(s0, filter[0]);
-  sum = vmla_n_s16(sum, s1, filter[1]);
-  sum = vmla_n_s16(sum, s2, filter[2]);
-  sum = vmla_n_s16(sum, s5, filter[5]);
-  sum = vmla_n_s16(sum, s6, filter[6]);
-  sum = vmla_n_s16(sum, s7, filter[7]);
-  /* filter[3] can take a max value of 128. So the max value of the result :
-   * 128*255 + sum > 16 bits
-   */
-  sum = vqadd_s16(sum, vmul_n_s16(s3, filter[3]));
-  sum = vqadd_s16(sum, vmul_n_s16(s4, filter[4]));
+  sum = vmul_lane_s16(s0, filter_lo, 0);
+  sum = vmla_lane_s16(sum, s1, filter_lo, 1);
+  sum = vmla_lane_s16(sum, s2, filter_lo, 2);
+  sum = vmla_lane_s16(sum, s3, filter_lo, 3);
+  sum = vmla_lane_s16(sum, s4, filter_hi, 0);
+  sum = vmla_lane_s16(sum, s5, filter_hi, 1);
+  sum = vmla_lane_s16(sum, s6, filter_hi, 2);
+  sum = vmla_lane_s16(sum, s7, filter_hi, 3);
 
   return sum;
 }
 
-static INLINE uint8x8_t convolve8_horiz_8x8(
-    const int16x8_t s0, const int16x8_t s1, const int16x8_t s2,
-    const int16x8_t s3, const int16x8_t s4, const int16x8_t s5,
-    const int16x8_t s6, const int16x8_t s7, const int16_t *filter,
-    const int16x8_t shift_round_0, const int16x8_t shift_by_bits) {
-  int16x8_t sum;
-
-  sum = vmulq_n_s16(s0, filter[0]);
-  sum = vmlaq_n_s16(sum, s1, filter[1]);
-  sum = vmlaq_n_s16(sum, s2, filter[2]);
-  sum = vmlaq_n_s16(sum, s5, filter[5]);
-  sum = vmlaq_n_s16(sum, s6, filter[6]);
-  sum = vmlaq_n_s16(sum, s7, filter[7]);
-  /* filter[3] can take a max value of 128. So the max value of the result :
-   * 128*255 + sum > 16 bits
-   */
-  sum = vqaddq_s16(sum, vmulq_n_s16(s3, filter[3]));
-  sum = vqaddq_s16(sum, vmulq_n_s16(s4, filter[4]));
-
-  sum = vqrshlq_s16(sum, shift_round_0);
-  sum = vqrshlq_s16(sum, shift_by_bits);
-
-  return vqmovun_s16(sum);
-}
-
 #if !defined(__aarch64__)
 static INLINE uint8x8_t convolve8_horiz_4x1(
     const int16x4_t s0, const int16x4_t s1, const int16x4_t s2,
     const int16x4_t s3, const int16x4_t s4, const int16x4_t s5,
-    const int16x4_t s6, const int16x4_t s7, const int16_t *filter,
+    const int16x4_t s6, const int16x4_t s7, const int16x8_t filter,
     const int16x4_t shift_round_0, const int16x4_t shift_by_bits) {
+  const int16x4_t filter_lo = vget_low_s16(filter);
+  const int16x4_t filter_hi = vget_high_s16(filter);
   int16x4_t sum;
 
-  sum = vmul_n_s16(s0, filter[0]);
-  sum = vmla_n_s16(sum, s1, filter[1]);
-  sum = vmla_n_s16(sum, s2, filter[2]);
-  sum = vmla_n_s16(sum, s5, filter[5]);
-  sum = vmla_n_s16(sum, s6, filter[6]);
-  sum = vmla_n_s16(sum, s7, filter[7]);
-  /* filter[3] can take a max value of 128. So the max value of the result :
-   * 128*255 + sum > 16 bits
-   */
-  sum = vqadd_s16(sum, vmul_n_s16(s3, filter[3]));
-  sum = vqadd_s16(sum, vmul_n_s16(s4, filter[4]));
+  sum = vmul_lane_s16(s0, filter_lo, 0);
+  sum = vmla_lane_s16(sum, s1, filter_lo, 1);
+  sum = vmla_lane_s16(sum, s2, filter_lo, 2);
+  sum = vmla_lane_s16(sum, s3, filter_lo, 3);
+  sum = vmla_lane_s16(sum, s4, filter_hi, 0);
+  sum = vmla_lane_s16(sum, s5, filter_hi, 1);
+  sum = vmla_lane_s16(sum, s6, filter_hi, 2);
+  sum = vmla_lane_s16(sum, s7, filter_hi, 3);
 
   sum = vqrshl_s16(sum, shift_round_0);
   sum = vqrshl_s16(sum, shift_by_bits);
@@ -100,80 +73,161 @@ static INLINE uint8x8_t convolve8_horiz_4x1(
 static INLINE uint8x8_t convolve8_vert_8x4(
     const int16x8_t s0, const int16x8_t s1, const int16x8_t s2,
     const int16x8_t s3, const int16x8_t s4, const int16x8_t s5,
-    const int16x8_t s6, const int16x8_t s7, const int16_t *filter) {
+    const int16x8_t s6, const int16x8_t s7, const int16x8_t filter) {
+  const int16x4_t filter_lo = vget_low_s16(filter);
+  const int16x4_t filter_hi = vget_high_s16(filter);
   int16x8_t sum;
 
-  sum = vmulq_n_s16(s0, filter[0]);
-  sum = vmlaq_n_s16(sum, s1, filter[1]);
-  sum = vmlaq_n_s16(sum, s2, filter[2]);
-  sum = vmlaq_n_s16(sum, s5, filter[5]);
-  sum = vmlaq_n_s16(sum, s6, filter[6]);
-  sum = vmlaq_n_s16(sum, s7, filter[7]);
-  /* filter[3] can take a max value of 128. So the max value of the result :
-   * 128*255 + sum > 16 bits
-   */
-  sum = vqaddq_s16(sum, vmulq_n_s16(s3, filter[3]));
-  sum = vqaddq_s16(sum, vmulq_n_s16(s4, filter[4]));
-
-  return vqrshrun_n_s16(sum, FILTER_BITS);
+  sum = vmulq_lane_s16(s0, filter_lo, 0);
+  sum = vmlaq_lane_s16(sum, s1, filter_lo, 1);
+  sum = vmlaq_lane_s16(sum, s2, filter_lo, 2);
+  sum = vmlaq_lane_s16(sum, s3, filter_lo, 3);
+  sum = vmlaq_lane_s16(sum, s4, filter_hi, 0);
+  sum = vmlaq_lane_s16(sum, s5, filter_hi, 1);
+  sum = vmlaq_lane_s16(sum, s6, filter_hi, 2);
+  sum = vmlaq_lane_s16(sum, s7, filter_hi, 3);
+
+  return vqrshrun_n_s16(sum, FILTER_BITS - 1);
 }
 
-static INLINE uint16x4_t convolve8_vert_4x4_s32(
+static INLINE int16x4_t convolve8_vert_4x4_s32(
     const int16x4_t s0, const int16x4_t s1, const int16x4_t s2,
     const int16x4_t s3, const int16x4_t s4, const int16x4_t s5,
-    const int16x4_t s6, const int16x4_t s7, const int16_t *y_filter,
+    const int16x4_t s6, const int16x4_t s7, const int16x8_t y_filter,
     const int32x4_t round_shift_vec, const int32x4_t offset_const,
     const int32x4_t sub_const_vec) {
-  int32x4_t sum0;
-  uint16x4_t res;
-  const int32x4_t zero = vdupq_n_s32(0);
-
-  sum0 = vmull_n_s16(s0, y_filter[0]);
-  sum0 = vmlal_n_s16(sum0, s1, y_filter[1]);
-  sum0 = vmlal_n_s16(sum0, s2, y_filter[2]);
-  sum0 = vmlal_n_s16(sum0, s3, y_filter[3]);
-  sum0 = vmlal_n_s16(sum0, s4, y_filter[4]);
-  sum0 = vmlal_n_s16(sum0, s5, y_filter[5]);
-  sum0 = vmlal_n_s16(sum0, s6, y_filter[6]);
-  sum0 = vmlal_n_s16(sum0, s7, y_filter[7]);
+  const int16x4_t y_filter_lo = vget_low_s16(y_filter);
+  const int16x4_t y_filter_hi = vget_high_s16(y_filter);
+  int32x4_t sum;
+
+  sum = vmull_lane_s16(s0, y_filter_lo, 0);
+  sum = vmlal_lane_s16(sum, s1, y_filter_lo, 1);
+  sum = vmlal_lane_s16(sum, s2, y_filter_lo, 2);
+  sum = vmlal_lane_s16(sum, s3, y_filter_lo, 3);
+  sum = vmlal_lane_s16(sum, s4, y_filter_hi, 0);
+  sum = vmlal_lane_s16(sum, s5, y_filter_hi, 1);
+  sum = vmlal_lane_s16(sum, s6, y_filter_hi, 2);
+  sum = vmlal_lane_s16(sum, s7, y_filter_hi, 3);
+
+  sum = vaddq_s32(sum, offset_const);
+  sum = vqrshlq_s32(sum, round_shift_vec);
+  sum = vsubq_s32(sum, sub_const_vec);
+
+  return vmovn_s32(sum);
+}
+
+static INLINE uint8x8_t convolve8_vert_8x4_s32(
+    const int16x8_t s0, const int16x8_t s1, const int16x8_t s2,
+    const int16x8_t s3, const int16x8_t s4, const int16x8_t s5,
+    const int16x8_t s6, const int16x8_t s7, const int16x8_t y_filter,
+    const int32x4_t round_shift_vec, const int32x4_t offset_const,
+    const int32x4_t sub_const_vec, const int16x8_t vec_round_bits) {
+  const int16x4_t y_filter_lo = vget_low_s16(y_filter);
+  const int16x4_t y_filter_hi = vget_high_s16(y_filter);
+  int32x4_t sum0, sum1;
+  int16x8_t res;
+
+  sum0 = vmull_lane_s16(vget_low_s16(s0), y_filter_lo, 0);
+  sum0 = vmlal_lane_s16(sum0, vget_low_s16(s1), y_filter_lo, 1);
+  sum0 = vmlal_lane_s16(sum0, vget_low_s16(s2), y_filter_lo, 2);
+  sum0 = vmlal_lane_s16(sum0, vget_low_s16(s3), y_filter_lo, 3);
+  sum0 = vmlal_lane_s16(sum0, vget_low_s16(s4), y_filter_hi, 0);
+  sum0 = vmlal_lane_s16(sum0, vget_low_s16(s5), y_filter_hi, 1);
+  sum0 = vmlal_lane_s16(sum0, vget_low_s16(s6), y_filter_hi, 2);
+  sum0 = vmlal_lane_s16(sum0, vget_low_s16(s7), y_filter_hi, 3);
+
+  sum1 = vmull_lane_s16(vget_high_s16(s0), y_filter_lo, 0);
+  sum1 = vmlal_lane_s16(sum1, vget_high_s16(s1), y_filter_lo, 1);
+  sum1 = vmlal_lane_s16(sum1, vget_high_s16(s2), y_filter_lo, 2);
+  sum1 = vmlal_lane_s16(sum1, vget_high_s16(s3), y_filter_lo, 3);
+  sum1 = vmlal_lane_s16(sum1, vget_high_s16(s4), y_filter_hi, 0);
+  sum1 = vmlal_lane_s16(sum1, vget_high_s16(s5), y_filter_hi, 1);
+  sum1 = vmlal_lane_s16(sum1, vget_high_s16(s6), y_filter_hi, 2);
+  sum1 = vmlal_lane_s16(sum1, vget_high_s16(s7), y_filter_hi, 3);
 
   sum0 = vaddq_s32(sum0, offset_const);
+  sum1 = vaddq_s32(sum1, offset_const);
   sum0 = vqrshlq_s32(sum0, round_shift_vec);
+  sum1 = vqrshlq_s32(sum1, round_shift_vec);
   sum0 = vsubq_s32(sum0, sub_const_vec);
-  sum0 = vmaxq_s32(sum0, zero);
+  sum1 = vsubq_s32(sum1, sub_const_vec);
 
-  res = vmovn_u32(vreinterpretq_u32_s32(sum0));
+  res = vcombine_s16(vmovn_s32(sum0), vmovn_s32(sum1));
+  res = vqrshlq_s16(res, vec_round_bits);
 
-  return res;
+  return vqmovun_s16(res);
 }
 
-static INLINE uint8x8_t convolve8_vert_8x4_s32(
+static INLINE int16x4_t convolve12_vert_4x4_s32(
+    const int16x4_t s0, const int16x4_t s1, const int16x4_t s2,
+    const int16x4_t s3, const int16x4_t s4, const int16x4_t s5,
+    const int16x4_t s6, const int16x4_t s7, const int16x4_t s8,
+    const int16x4_t s9, const int16x4_t s10, const int16x4_t s11,
+    const int16x8_t y_filter_0_7, const int16x4_t y_filter_8_11,
+    const int32x4_t round_shift_vec, const int32x4_t offset_const,
+    const int32x4_t sub_const_vec) {
+  const int16x4_t y_filter_0_3 = vget_low_s16(y_filter_0_7);
+  const int16x4_t y_filter_4_7 = vget_high_s16(y_filter_0_7);
+  int32x4_t sum;
+
+  sum = vmull_lane_s16(s0, y_filter_0_3, 0);
+  sum = vmlal_lane_s16(sum, s1, y_filter_0_3, 1);
+  sum = vmlal_lane_s16(sum, s2, y_filter_0_3, 2);
+  sum = vmlal_lane_s16(sum, s3, y_filter_0_3, 3);
+  sum = vmlal_lane_s16(sum, s4, y_filter_4_7, 0);
+  sum = vmlal_lane_s16(sum, s5, y_filter_4_7, 1);
+  sum = vmlal_lane_s16(sum, s6, y_filter_4_7, 2);
+  sum = vmlal_lane_s16(sum, s7, y_filter_4_7, 3);
+  sum = vmlal_lane_s16(sum, s8, y_filter_8_11, 0);
+  sum = vmlal_lane_s16(sum, s9, y_filter_8_11, 1);
+  sum = vmlal_lane_s16(sum, s10, y_filter_8_11, 2);
+  sum = vmlal_lane_s16(sum, s11, y_filter_8_11, 3);
+
+  sum = vaddq_s32(sum, offset_const);
+  sum = vqrshlq_s32(sum, round_shift_vec);
+  sum = vsubq_s32(sum, sub_const_vec);
+
+  return vmovn_s32(sum);
+}
+
+static INLINE uint8x8_t convolve12_vert_8x4_s32(
     const int16x8_t s0, const int16x8_t s1, const int16x8_t s2,
     const int16x8_t s3, const int16x8_t s4, const int16x8_t s5,
-    const int16x8_t s6, const int16x8_t s7, const int16_t *y_filter,
+    const int16x8_t s6, const int16x8_t s7, const int16x8_t s8,
+    const int16x8_t s9, const int16x8_t s10, const int16x8_t s11,
+    const int16x8_t y_filter_0_7, const int16x4_t y_filter_8_11,
     const int32x4_t round_shift_vec, const int32x4_t offset_const,
     const int32x4_t sub_const_vec, const int16x8_t vec_round_bits) {
+  const int16x4_t y_filter_0_3 = vget_low_s16(y_filter_0_7);
+  const int16x4_t y_filter_4_7 = vget_high_s16(y_filter_0_7);
   int32x4_t sum0, sum1;
-  uint16x8_t res;
-  const int32x4_t zero = vdupq_n_s32(0);
-
-  sum0 = vmull_n_s16(vget_low_s16(s0), y_filter[0]);
-  sum0 = vmlal_n_s16(sum0, vget_low_s16(s1), y_filter[1]);
-  sum0 = vmlal_n_s16(sum0, vget_low_s16(s2), y_filter[2]);
-  sum0 = vmlal_n_s16(sum0, vget_low_s16(s3), y_filter[3]);
-  sum0 = vmlal_n_s16(sum0, vget_low_s16(s4), y_filter[4]);
-  sum0 = vmlal_n_s16(sum0, vget_low_s16(s5), y_filter[5]);
-  sum0 = vmlal_n_s16(sum0, vget_low_s16(s6), y_filter[6]);
-  sum0 = vmlal_n_s16(sum0, vget_low_s16(s7), y_filter[7]);
-
-  sum1 = vmull_n_s16(vget_high_s16(s0), y_filter[0]);
-  sum1 = vmlal_n_s16(sum1, vget_high_s16(s1), y_filter[1]);
-  sum1 = vmlal_n_s16(sum1, vget_high_s16(s2), y_filter[2]);
-  sum1 = vmlal_n_s16(sum1, vget_high_s16(s3), y_filter[3]);
-  sum1 = vmlal_n_s16(sum1, vget_high_s16(s4), y_filter[4]);
-  sum1 = vmlal_n_s16(sum1, vget_high_s16(s5), y_filter[5]);
-  sum1 = vmlal_n_s16(sum1, vget_high_s16(s6), y_filter[6]);
-  sum1 = vmlal_n_s16(sum1, vget_high_s16(s7), y_filter[7]);
+  int16x8_t res;
+
+  sum0 = vmull_lane_s16(vget_low_s16(s0), y_filter_0_3, 0);
+  sum0 = vmlal_lane_s16(sum0, vget_low_s16(s1), y_filter_0_3, 1);
+  sum0 = vmlal_lane_s16(sum0, vget_low_s16(s2), y_filter_0_3, 2);
+  sum0 = vmlal_lane_s16(sum0, vget_low_s16(s3), y_filter_0_3, 3);
+  sum0 = vmlal_lane_s16(sum0, vget_low_s16(s4), y_filter_4_7, 0);
+  sum0 = vmlal_lane_s16(sum0, vget_low_s16(s5), y_filter_4_7, 1);
+  sum0 = vmlal_lane_s16(sum0, vget_low_s16(s6), y_filter_4_7, 2);
+  sum0 = vmlal_lane_s16(sum0, vget_low_s16(s7), y_filter_4_7, 3);
+  sum0 = vmlal_lane_s16(sum0, vget_low_s16(s8), y_filter_8_11, 0);
+  sum0 = vmlal_lane_s16(sum0, vget_low_s16(s9), y_filter_8_11, 1);
+  sum0 = vmlal_lane_s16(sum0, vget_low_s16(s10), y_filter_8_11, 2);
+  sum0 = vmlal_lane_s16(sum0, vget_low_s16(s11), y_filter_8_11, 3);
+
+  sum1 = vmull_lane_s16(vget_high_s16(s0), y_filter_0_3, 0);
+  sum1 = vmlal_lane_s16(sum1, vget_high_s16(s1), y_filter_0_3, 1);
+  sum1 = vmlal_lane_s16(sum1, vget_high_s16(s2), y_filter_0_3, 2);
+  sum1 = vmlal_lane_s16(sum1, vget_high_s16(s3), y_filter_0_3, 3);
+  sum1 = vmlal_lane_s16(sum1, vget_high_s16(s4), y_filter_4_7, 0);
+  sum1 = vmlal_lane_s16(sum1, vget_high_s16(s5), y_filter_4_7, 1);
+  sum1 = vmlal_lane_s16(sum1, vget_high_s16(s6), y_filter_4_7, 2);
+  sum1 = vmlal_lane_s16(sum1, vget_high_s16(s7), y_filter_4_7, 3);
+  sum1 = vmlal_lane_s16(sum1, vget_high_s16(s8), y_filter_8_11, 0);
+  sum1 = vmlal_lane_s16(sum1, vget_high_s16(s9), y_filter_8_11, 1);
+  sum1 = vmlal_lane_s16(sum1, vget_high_s16(s10), y_filter_8_11, 2);
+  sum1 = vmlal_lane_s16(sum1, vget_high_s16(s11), y_filter_8_11, 3);
 
   sum0 = vaddq_s32(sum0, offset_const);
   sum1 = vaddq_s32(sum1, offset_const);
@@ -181,14 +235,329 @@ static INLINE uint8x8_t convolve8_vert_8x4_s32(
   sum1 = vqrshlq_s32(sum1, round_shift_vec);
   sum0 = vsubq_s32(sum0, sub_const_vec);
   sum1 = vsubq_s32(sum1, sub_const_vec);
-  sum0 = vmaxq_s32(sum0, zero);
-  sum1 = vmaxq_s32(sum1, zero);
-  res = vcombine_u16(vqmovn_u32(vreinterpretq_u32_s32(sum0)),
-                     vqmovn_u32(vreinterpretq_u32_s32(sum1)));
 
-  res = vqrshlq_u16(res, vec_round_bits);
+  res = vcombine_s16(vmovn_s32(sum0), vmovn_s32(sum1));
+  res = vqrshlq_s16(res, vec_round_bits);
+
+  return vqmovun_s16(res);
+}
+
+#if defined(__aarch64__) && defined(__ARM_FEATURE_MATMUL_INT8)
+
+void av1_convolve_x_sr_neon(const uint8_t *src, int src_stride, uint8_t *dst,
+                            int dst_stride, int w, int h,
+                            const InterpFilterParams *filter_params_x,
+                            const int subpel_x_qn,
+                            ConvolveParams *conv_params) {
+  if (filter_params_x->taps > 8) {
+    av1_convolve_x_sr_c(src, src_stride, dst, dst_stride, w, h, filter_params_x,
+                        subpel_x_qn, conv_params);
+    return;
+  }
+  const uint8_t horiz_offset = filter_params_x->taps / 2 - 1;
+  const int8_t bits = FILTER_BITS - conv_params->round_0;
+
+  assert(bits >= 0);
+  assert((FILTER_BITS - conv_params->round_1) >= 0 ||
+         ((conv_params->round_0 + conv_params->round_1) == 2 * FILTER_BITS));
+
+  const int16_t *x_filter_ptr = av1_get_interp_filter_subpel_kernel(
+      filter_params_x, subpel_x_qn & SUBPEL_MASK);
+  // Filter values are even, so downshift by 1 to reduce intermediate precision
+  // requirements.
+  const int8x8_t x_filter = vshrn_n_s16(vld1q_s16(x_filter_ptr), 1);
+
+  const int16x8_t shift_round_0 = vdupq_n_s16(-conv_params->round_0 + 1);
+  const int16x8_t shift_by_bits = vdupq_n_s16(-bits);
+
+  src -= horiz_offset;
+
+  if (w <= 4) {
+    const uint8x16x2_t permute_tbl = vld1q_u8_x2(dot_prod_permute_tbl);
+    uint8x16_t s0, s1, s2, s3;
+    int32x4_t t0, t1, t2, t3;
+    int16x8_t t01, t23;
+    uint8x8_t d01, d23;
+
+    do {
+      s0 = vld1q_u8(src + 0 * src_stride);
+      s1 = vld1q_u8(src + 1 * src_stride);
+      s2 = vld1q_u8(src + 2 * src_stride);
+      s3 = vld1q_u8(src + 3 * src_stride);
+
+      t0 = convolve8_4_usdot(s0, x_filter, permute_tbl, vdupq_n_s32(0));
+      t1 = convolve8_4_usdot(s1, x_filter, permute_tbl, vdupq_n_s32(0));
+      t2 = convolve8_4_usdot(s2, x_filter, permute_tbl, vdupq_n_s32(0));
+      t3 = convolve8_4_usdot(s3, x_filter, permute_tbl, vdupq_n_s32(0));
+
+      t01 = vcombine_s16(vmovn_s32(t0), vmovn_s32(t1));
+      t23 = vcombine_s16(vmovn_s32(t2), vmovn_s32(t3));
+
+      t01 = vqrshlq_s16(t01, shift_round_0);
+      t23 = vqrshlq_s16(t23, shift_round_0);
+
+      t01 = vqrshlq_s16(t01, shift_by_bits);
+      t23 = vqrshlq_s16(t23, shift_by_bits);
+
+      d01 = vqmovun_s16(t01);
+      d23 = vqmovun_s16(t23);
+
+      if (w == 2) {
+        vst1_lane_u16((uint16_t *)(dst + 0 * dst_stride),
+                      vreinterpret_u16_u8(d01), 0);
+        vst1_lane_u16((uint16_t *)(dst + 1 * dst_stride),
+                      vreinterpret_u16_u8(d01), 2);
+        if (h != 2) {
+          vst1_lane_u16((uint16_t *)(dst + 2 * dst_stride),
+                        vreinterpret_u16_u8(d23), 0);
+          vst1_lane_u16((uint16_t *)(dst + 3 * dst_stride),
+                        vreinterpret_u16_u8(d23), 2);
+        }
+      } else {
+        vst1_lane_u32((uint32_t *)(dst + 0 * dst_stride),
+                      vreinterpret_u32_u8(d01), 0);
+        vst1_lane_u32((uint32_t *)(dst + 1 * dst_stride),
+                      vreinterpret_u32_u8(d01), 1);
+        if (h != 2) {
+          vst1_lane_u32((uint32_t *)(dst + 2 * dst_stride),
+                        vreinterpret_u32_u8(d23), 0);
+          vst1_lane_u32((uint32_t *)(dst + 3 * dst_stride),
+                        vreinterpret_u32_u8(d23), 1);
+        }
+      }
+
+      h -= 4;
+      src += 4 * src_stride;
+      dst += 4 * dst_stride;
+    } while (h > 0);
+
+  } else {
+    const uint8x16x3_t permute_tbl = vld1q_u8_x3(dot_prod_permute_tbl);
+    uint8x16_t s0, s1, s2, s3;
+    int16x8_t t0, t1, t2, t3;
+    uint8x8_t d0, d1, d2, d3;
+
+    do {
+      int width = w;
+      const uint8_t *s = src;
+      uint8_t *d = dst;
+
+      do {
+        s0 = vld1q_u8(s + 0 * src_stride);
+        s1 = vld1q_u8(s + 1 * src_stride);
+        s2 = vld1q_u8(s + 2 * src_stride);
+        s3 = vld1q_u8(s + 3 * src_stride);
+
+        t0 = convolve8_8_usdot(s0, x_filter, permute_tbl, vdupq_n_s32(0),
+                               shift_round_0);
+        t1 = convolve8_8_usdot(s1, x_filter, permute_tbl, vdupq_n_s32(0),
+                               shift_round_0);
+        t2 = convolve8_8_usdot(s2, x_filter, permute_tbl, vdupq_n_s32(0),
+                               shift_round_0);
+        t3 = convolve8_8_usdot(s3, x_filter, permute_tbl, vdupq_n_s32(0),
+                               shift_round_0);
+
+        t0 = vqrshlq_s16(t0, shift_by_bits);
+        t1 = vqrshlq_s16(t1, shift_by_bits);
+        t2 = vqrshlq_s16(t2, shift_by_bits);
+        t3 = vqrshlq_s16(t3, shift_by_bits);
+
+        d0 = vqmovun_s16(t0);
+        d1 = vqmovun_s16(t1);
+        d2 = vqmovun_s16(t2);
+        d3 = vqmovun_s16(t3);
+
+        vst1_u8(d + 0 * dst_stride, d0);
+        vst1_u8(d + 1 * dst_stride, d1);
+        if (h != 2) {
+          vst1_u8(d + 2 * dst_stride, d2);
+          vst1_u8(d + 3 * dst_stride, d3);
+        }
+
+        s += 8;
+        d += 8;
+        width -= 8;
+      } while (width > 0);
+
+      src += 4 * src_stride;
+      dst += 4 * dst_stride;
+      h -= 4;
+    } while (h > 0);
+  }
+}
+
+#elif defined(__aarch64__) && defined(__ARM_FEATURE_DOTPROD)
+
+void av1_convolve_x_sr_neon(const uint8_t *src, int src_stride, uint8_t *dst,
+                            int dst_stride, int w, int h,
+                            const InterpFilterParams *filter_params_x,
+                            const int subpel_x_qn,
+                            ConvolveParams *conv_params) {
+  if (filter_params_x->taps > 8) {
+    av1_convolve_x_sr_c(src, src_stride, dst, dst_stride, w, h, filter_params_x,
+                        subpel_x_qn, conv_params);
+    return;
+  }
+  const uint8_t horiz_offset = filter_params_x->taps / 2 - 1;
+  const int8_t bits = FILTER_BITS - conv_params->round_0;
+
+  assert(bits >= 0);
+  assert((FILTER_BITS - conv_params->round_1) >= 0 ||
+         ((conv_params->round_0 + conv_params->round_1) == 2 * FILTER_BITS));
+
+  const int16_t *x_filter_ptr = av1_get_interp_filter_subpel_kernel(
+      filter_params_x, subpel_x_qn & SUBPEL_MASK);
+  // Filter values are even, so downshift by 1 to reduce intermediate precision
+  // requirements.
+  const int8x8_t x_filter = vshrn_n_s16(vld1q_s16(x_filter_ptr), 1);
+  // Dot product constants.
+  const int16x8_t correct_tmp = vshll_n_s8(x_filter, 7);
+  const int32x4_t correction = vdupq_n_s32(vaddlvq_s16(correct_tmp));
+  const uint8x16_t range_limit = vdupq_n_u8(128);
+
+  const int16x8_t shift_round_0 = vdupq_n_s16(-conv_params->round_0 + 1);
+  const int16x8_t shift_by_bits = vdupq_n_s16(-bits);
+
+  src -= horiz_offset;
+
+  if (w <= 4) {
+    const uint8x16x2_t permute_tbl = vld1q_u8_x2(dot_prod_permute_tbl);
+    uint8x16_t s0, s1, s2, s3;
+    int32x4_t t0, t1, t2, t3;
+    int16x8_t t01, t23;
+    uint8x8_t d01, d23;
+
+    do {
+      s0 = vld1q_u8(src + 0 * src_stride);
+      s1 = vld1q_u8(src + 1 * src_stride);
+      s2 = vld1q_u8(src + 2 * src_stride);
+      s3 = vld1q_u8(src + 3 * src_stride);
+
+      t0 = convolve8_4_sdot(s0, x_filter, correction, range_limit, permute_tbl);
+      t1 = convolve8_4_sdot(s1, x_filter, correction, range_limit, permute_tbl);
+      t2 = convolve8_4_sdot(s2, x_filter, correction, range_limit, permute_tbl);
+      t3 = convolve8_4_sdot(s3, x_filter, correction, range_limit, permute_tbl);
+
+      t01 = vcombine_s16(vmovn_s32(t0), vmovn_s32(t1));
+      t23 = vcombine_s16(vmovn_s32(t2), vmovn_s32(t3));
+
+      t01 = vqrshlq_s16(t01, shift_round_0);
+      t23 = vqrshlq_s16(t23, shift_round_0);
+
+      t01 = vqrshlq_s16(t01, shift_by_bits);
+      t23 = vqrshlq_s16(t23, shift_by_bits);
+
+      d01 = vqmovun_s16(t01);
+      d23 = vqmovun_s16(t23);
+
+      if (w == 2) {
+        vst1_lane_u16((uint16_t *)(dst + 0 * dst_stride),
+                      vreinterpret_u16_u8(d01), 0);
+        vst1_lane_u16((uint16_t *)(dst + 1 * dst_stride),
+                      vreinterpret_u16_u8(d01), 2);
+        if (h != 2) {
+          vst1_lane_u16((uint16_t *)(dst + 2 * dst_stride),
+                        vreinterpret_u16_u8(d23), 0);
+          vst1_lane_u16((uint16_t *)(dst + 3 * dst_stride),
+                        vreinterpret_u16_u8(d23), 2);
+        }
+      } else {
+        vst1_lane_u32((uint32_t *)(dst + 0 * dst_stride),
+                      vreinterpret_u32_u8(d01), 0);
+        vst1_lane_u32((uint32_t *)(dst + 1 * dst_stride),
+                      vreinterpret_u32_u8(d01), 1);
+        if (h != 2) {
+          vst1_lane_u32((uint32_t *)(dst + 2 * dst_stride),
+                        vreinterpret_u32_u8(d23), 0);
+          vst1_lane_u32((uint32_t *)(dst + 3 * dst_stride),
+                        vreinterpret_u32_u8(d23), 1);
+        }
+      }
+
+      h -= 4;
+      src += 4 * src_stride;
+      dst += 4 * dst_stride;
+    } while (h > 0);
+
+  } else {
+    const uint8x16x3_t permute_tbl = vld1q_u8_x3(dot_prod_permute_tbl);
+    uint8x16_t s0, s1, s2, s3;
+    int16x8_t t0, t1, t2, t3;
+    uint8x8_t d0, d1, d2, d3;
+
+    do {
+      int width = w;
+      const uint8_t *s = src;
+      uint8_t *d = dst;
+
+      do {
+        s0 = vld1q_u8(s + 0 * src_stride);
+        s1 = vld1q_u8(s + 1 * src_stride);
+        s2 = vld1q_u8(s + 2 * src_stride);
+        s3 = vld1q_u8(s + 3 * src_stride);
+
+        t0 = convolve8_8_sdot(s0, x_filter, correction, range_limit,
+                              permute_tbl, shift_round_0);
+        t1 = convolve8_8_sdot(s1, x_filter, correction, range_limit,
+                              permute_tbl, shift_round_0);
+        t2 = convolve8_8_sdot(s2, x_filter, correction, range_limit,
+                              permute_tbl, shift_round_0);
+        t3 = convolve8_8_sdot(s3, x_filter, correction, range_limit,
+                              permute_tbl, shift_round_0);
+
+        t0 = vqrshlq_s16(t0, shift_by_bits);
+        t1 = vqrshlq_s16(t1, shift_by_bits);
+        t2 = vqrshlq_s16(t2, shift_by_bits);
+        t3 = vqrshlq_s16(t3, shift_by_bits);
+
+        d0 = vqmovun_s16(t0);
+        d1 = vqmovun_s16(t1);
+        d2 = vqmovun_s16(t2);
+        d3 = vqmovun_s16(t3);
+
+        vst1_u8(d + 0 * dst_stride, d0);
+        vst1_u8(d + 1 * dst_stride, d1);
+        if (h != 2) {
+          vst1_u8(d + 2 * dst_stride, d2);
+          vst1_u8(d + 3 * dst_stride, d3);
+        }
+
+        s += 8;
+        d += 8;
+        width -= 8;
+      } while (width > 0);
+
+      src += 4 * src_stride;
+      dst += 4 * dst_stride;
+      h -= 4;
+    } while (h > 0);
+  }
+}
+
+#else  // !(defined(__aarch64__) && defined(__ARM_FEATURE_DOTPROD))
+
+static INLINE uint8x8_t convolve8_horiz_8x8(
+    const int16x8_t s0, const int16x8_t s1, const int16x8_t s2,
+    const int16x8_t s3, const int16x8_t s4, const int16x8_t s5,
+    const int16x8_t s6, const int16x8_t s7, const int16x8_t filter,
+    const int16x8_t shift_round_0, const int16x8_t shift_by_bits) {
+  const int16x4_t filter_lo = vget_low_s16(filter);
+  const int16x4_t filter_hi = vget_high_s16(filter);
+  int16x8_t sum;
+
+  sum = vmulq_lane_s16(s0, filter_lo, 0);
+  sum = vmlaq_lane_s16(sum, s1, filter_lo, 1);
+  sum = vmlaq_lane_s16(sum, s2, filter_lo, 2);
+  sum = vmlaq_lane_s16(sum, s3, filter_lo, 3);
+  sum = vmlaq_lane_s16(sum, s4, filter_hi, 0);
+  sum = vmlaq_lane_s16(sum, s5, filter_hi, 1);
+  sum = vmlaq_lane_s16(sum, s6, filter_hi, 2);
+  sum = vmlaq_lane_s16(sum, s7, filter_hi, 3);
 
-  return vqmovn_u16(res);
+  sum = vqrshlq_s16(sum, shift_round_0);
+  sum = vqrshlq_s16(sum, shift_by_bits);
+
+  return vqmovun_s16(sum);
 }
 
 void av1_convolve_x_sr_neon(const uint8_t *src, int src_stride, uint8_t *dst,
@@ -213,10 +582,12 @@ void av1_convolve_x_sr_neon(const uint8_t *src, int src_stride, uint8_t *dst,
   assert((FILTER_BITS - conv_params->round_1) >= 0 ||
          ((conv_params->round_0 + conv_params->round_1) == 2 * FILTER_BITS));
 
-  const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
+  const int16_t *x_filter_ptr = av1_get_interp_filter_subpel_kernel(
       filter_params_x, subpel_x_qn & SUBPEL_MASK);
+  // Filter values are even so downshift by 1 to reduce precision requirements.
+  const int16x8_t x_filter = vshrq_n_s16(vld1q_s16(x_filter_ptr), 1);
 
-  const int16x8_t shift_round_0 = vdupq_n_s16(-conv_params->round_0);
+  const int16x8_t shift_round_0 = vdupq_n_s16(-conv_params->round_0 + 1);
   const int16x8_t shift_by_bits = vdupq_n_s16(-bits);
 
   src -= horiz_offset;
@@ -609,6 +980,8 @@ void av1_convolve_x_sr_neon(const uint8_t *src, int src_stride, uint8_t *dst,
 #endif
 }
 
+#endif  // defined(__aarch64__) && defined(__ARM_FEATURE_DOTPROD)
+
 void av1_convolve_y_sr_neon(const uint8_t *src, int src_stride, uint8_t *dst,
                             int dst_stride, int w, int h,
                             const InterpFilterParams *filter_params_y,
@@ -622,8 +995,10 @@ void av1_convolve_y_sr_neon(const uint8_t *src, int src_stride, uint8_t *dst,
 
   src -= vert_offset * src_stride;
 
-  const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
+  const int16_t *y_filter_ptr = av1_get_interp_filter_subpel_kernel(
       filter_params_y, subpel_y_qn & SUBPEL_MASK);
+  // Filter values are even so downshift by 1 to reduce precision requirements.
+  const int16x8_t y_filter = vshrq_n_s16(vld1q_s16(y_filter_ptr), 1);
 
   if (w <= 4) {
     uint8x8_t d01;
@@ -671,8 +1046,8 @@ void av1_convolve_y_sr_neon(const uint8_t *src, int src_stride, uint8_t *dst,
       d2 = convolve8_4x4(s2, s3, s4, s5, s6, s7, s8, s9, y_filter);
       d3 = convolve8_4x4(s3, s4, s5, s6, s7, s8, s9, s10, y_filter);
 
-      d01 = vqrshrun_n_s16(vcombine_s16(d0, d1), FILTER_BITS);
-      d23 = vqrshrun_n_s16(vcombine_s16(d2, d3), FILTER_BITS);
+      d01 = vqrshrun_n_s16(vcombine_s16(d0, d1), FILTER_BITS - 1);
+      d23 = vqrshrun_n_s16(vcombine_s16(d2, d3), FILTER_BITS - 1);
       if ((w == 4) && (h != 2)) {
         vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d01),
                       0);  // 00 01 02 03
@@ -722,7 +1097,7 @@ void av1_convolve_y_sr_neon(const uint8_t *src, int src_stride, uint8_t *dst,
 
       d0 = convolve8_4x4(s0, s1, s2, s3, s4, s5, s6, s7, y_filter);
 
-      d01 = vqrshrun_n_s16(vcombine_s16(d0, d0), FILTER_BITS);
+      d01 = vqrshrun_n_s16(vcombine_s16(d0, d0), FILTER_BITS - 1);
 
       if (w == 4) {
         vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d01), 0);
@@ -849,11 +1224,1211 @@ void av1_convolve_y_sr_neon(const uint8_t *src, int src_stride, uint8_t *dst,
   }
 }
 
+#if defined(__aarch64__) && defined(__ARM_FEATURE_MATMUL_INT8)
+
+static INLINE int16x4_t convolve12_4_usdot(uint8x16_t samples,
+                                           const int8x16_t filters,
+                                           const uint8x16x3_t permute_tbl,
+                                           const int32x4_t horiz_const,
+                                           const int32x4_t shift_round_0) {
+  uint8x16_t permuted_samples[3];
+  int32x4_t sum;
+
+  /* Permute samples ready for dot product. */
+  /* { 0,  1,  2,  3,  1,  2,  3,  4,  2,  3,  4,  5,  3,  4,  5,  6 } */
+  permuted_samples[0] = vqtbl1q_u8(samples, permute_tbl.val[0]);
+  /* { 4,  5,  6,  7,  5,  6,  7,  8,  6,  7,  8,  9,  7,  8,  9, 10 } */
+  permuted_samples[1] = vqtbl1q_u8(samples, permute_tbl.val[1]);
+  /* { 8,  9, 10, 11,  9, 10, 11, 12, 10, 11, 12, 13, 11, 12, 13, 14 } */
+  permuted_samples[2] = vqtbl1q_u8(samples, permute_tbl.val[2]);
+
+  /* First 4 output values. */
+  sum = vusdotq_laneq_s32(horiz_const, permuted_samples[0], filters, 0);
+  sum = vusdotq_laneq_s32(sum, permuted_samples[1], filters, 1);
+  sum = vusdotq_laneq_s32(sum, permuted_samples[2], filters, 2);
+
+  /* Narrow and re-pack. */
+  sum = vqrshlq_s32(sum, shift_round_0);
+
+  return vmovn_s32(sum);
+}
+
+static INLINE int16x8_t convolve12_8_usdot(uint8x16_t samples0,
+                                           uint8x16_t samples1,
+                                           const int8x16_t filters,
+                                           const uint8x16x3_t permute_tbl,
+                                           const int32x4_t horiz_const,
+                                           const int32x4_t shift_round_0) {
+  uint8x16_t permuted_samples[4];
+  int32x4_t sum[2];
+
+  /* Permute samples ready for dot product. */
+  /* { 0,  1,  2,  3,  1,  2,  3,  4,  2,  3,  4,  5,  3,  4,  5,  6 } */
+  permuted_samples[0] = vqtbl1q_u8(samples0, permute_tbl.val[0]);
+  /* { 4,  5,  6,  7,  5,  6,  7,  8,  6,  7,  8,  9,  7,  8,  9, 10 } */
+  permuted_samples[1] = vqtbl1q_u8(samples0, permute_tbl.val[1]);
+  /* { 8,  9, 10, 11,  9, 10, 11, 12, 10, 11, 12, 13, 11, 12, 13, 14 } */
+  permuted_samples[2] = vqtbl1q_u8(samples0, permute_tbl.val[2]);
+  /* {12, 13, 14, 15, 13, 14, 15, 16, 14, 15, 16, 17, 15, 16, 17, 18 } */
+  permuted_samples[3] = vqtbl1q_u8(samples1, permute_tbl.val[2]);
+
+  /* First 4 output values. */
+  sum[0] = vusdotq_laneq_s32(horiz_const, permuted_samples[0], filters, 0);
+  sum[0] = vusdotq_laneq_s32(sum[0], permuted_samples[1], filters, 1);
+  sum[0] = vusdotq_laneq_s32(sum[0], permuted_samples[2], filters, 2);
+  /* Second 4 output values. */
+  sum[1] = vusdotq_laneq_s32(horiz_const, permuted_samples[1], filters, 0);
+  sum[1] = vusdotq_laneq_s32(sum[1], permuted_samples[2], filters, 1);
+  sum[1] = vusdotq_laneq_s32(sum[1], permuted_samples[3], filters, 2);
+
+  /* Narrow and re-pack. */
+  sum[0] = vqrshlq_s32(sum[0], shift_round_0);
+  sum[1] = vqrshlq_s32(sum[1], shift_round_0);
+
+  return vcombine_s16(vmovn_s32(sum[0]), vmovn_s32(sum[1]));
+}
+
+static INLINE void av1_convolve_2d_sr_horiz_12tap_neon(
+    const uint8_t *src_ptr, int src_stride, int16_t *dst_ptr,
+    const int dst_stride, int w, int h, const int16x8_t x_filter_0_7,
+    const int16x4_t x_filter_8_11, const int round_0) {
+  const int bd = 8;
+
+  // Special case the following no-op filter as 128 won't fit into the
+  // 8-bit signed dot-product instruction:
+  // { 0, 0, 0, 0, 0, 128, 0, 0, 0, 0, 0, 0 }
+  if (vgetq_lane_s16(x_filter_0_7, 5) == 128) {
+    const int16x8_t horiz_const = vdupq_n_s16((1 << (bd - 1)));
+    const int16x8_t shift_round_0 = vdupq_n_s16(FILTER_BITS - round_0);
+    // Undo the horizontal offset in the calling function.
+    src_ptr += 5;
+
+    for (int i = 0; i < h; i++) {
+      for (int j = 0; j < w; j += 8) {
+        uint8x8_t s0 = vld1_u8(src_ptr + i * src_stride + j);
+        uint16x8_t t0 = vaddw_u8(vreinterpretq_u16_s16(horiz_const), s0);
+        int16x8_t d0 = vqrshlq_s16(vreinterpretq_s16_u16(t0), shift_round_0);
+        if (w == 2) {
+          vst1q_lane_s32((int32_t *)(dst_ptr + i * dst_stride),
+                         vreinterpretq_s32_s16(d0), 0);
+        } else if (w == 4) {
+          vst1_s16(dst_ptr + i * dst_stride, vget_low_s16(d0));
+        } else {
+          vst1q_s16(dst_ptr + i * dst_stride + j, d0);
+        }
+      }
+    }
+  } else {
+    // Narrow filter values to 8-bit.
+    const int16x8x2_t x_filter_s16 = {
+      { x_filter_0_7, vcombine_s16(x_filter_8_11, vdup_n_s16(0)) }
+    };
+    const int8x16_t x_filter = vcombine_s8(vmovn_s16(x_filter_s16.val[0]),
+                                           vmovn_s16(x_filter_s16.val[1]));
+
+    const int32x4_t horiz_const = vdupq_n_s32((1 << (bd + FILTER_BITS - 1)));
+    const int32x4_t shift_round_0 = vdupq_n_s32(-round_0);
+    const uint8x16x3_t permute_tbl = vld1q_u8_x3(dot_prod_permute_tbl);
+
+    if (w <= 4) {
+      do {
+        const uint8_t *s = src_ptr;
+        int16_t *d = dst_ptr;
+        int width = w;
+
+        do {
+          uint8x16_t s0, s1, s2, s3;
+          int16x4_t d0, d1, d2, d3;
+
+          s0 = vld1q_u8(s + 0 * src_stride);
+          s1 = vld1q_u8(s + 1 * src_stride);
+          s2 = vld1q_u8(s + 2 * src_stride);
+          s3 = vld1q_u8(s + 3 * src_stride);
+
+          d0 = convolve12_4_usdot(s0, x_filter, permute_tbl, horiz_const,
+                                  shift_round_0);
+          d1 = convolve12_4_usdot(s1, x_filter, permute_tbl, horiz_const,
+                                  shift_round_0);
+          d2 = convolve12_4_usdot(s2, x_filter, permute_tbl, horiz_const,
+                                  shift_round_0);
+          d3 = convolve12_4_usdot(s3, x_filter, permute_tbl, horiz_const,
+                                  shift_round_0);
+
+          if (w == 2) {
+            vst1_lane_s32((int32_t *)(d + 0 * dst_stride),
+                          vreinterpret_s32_s16(d0), 0);
+            vst1_lane_s32((int32_t *)(d + 1 * dst_stride),
+                          vreinterpret_s32_s16(d1), 0);
+            vst1_lane_s32((int32_t *)(d + 2 * dst_stride),
+                          vreinterpret_s32_s16(d2), 0);
+            vst1_lane_s32((int32_t *)(d + 3 * dst_stride),
+                          vreinterpret_s32_s16(d3), 0);
+          } else {
+            vst1_s16(d + 0 * dst_stride, d0);
+            vst1_s16(d + 1 * dst_stride, d1);
+            vst1_s16(d + 2 * dst_stride, d2);
+            vst1_s16(d + 3 * dst_stride, d3);
+          }
+
+          s += 4;
+          d += 4;
+          width -= 4;
+        } while (width > 0);
+
+        src_ptr += 4 * src_stride;
+        dst_ptr += 4 * dst_stride;
+        h -= 4;
+      } while (h >= 4);
+
+      for (; h > 0; h--) {
+        const uint8_t *s = src_ptr;
+        int16_t *d = dst_ptr;
+        int width = w;
+
+        do {
+          uint8x16_t s0;
+          int16x4_t d0;
+
+          s0 = vld1q_u8(s);
+
+          d0 = convolve12_4_usdot(s0, x_filter, permute_tbl, horiz_const,
+                                  shift_round_0);
+
+          if (w == 2) {
+            vst1_lane_s32((int32_t *)d, vreinterpret_s32_s16(d0), 0);
+          } else {
+            vst1_s16(d, d0);
+          }
+
+          s += 4;
+          d += 4;
+          width -= 4;
+        } while (width > 0);
+
+        src_ptr += src_stride;
+        dst_ptr += dst_stride;
+      }
+    } else {
+      do {
+        const uint8_t *s = src_ptr;
+        int16_t *d = dst_ptr;
+        int width = w;
+
+        do {
+          uint8x16_t s0[2], s1[2], s2[2], s3[2];
+          int16x8_t d0, d1, d2, d3;
+
+          s0[0] = vld1q_u8(s + 0 * src_stride);
+          s1[0] = vld1q_u8(s + 1 * src_stride);
+          s2[0] = vld1q_u8(s + 2 * src_stride);
+          s3[0] = vld1q_u8(s + 3 * src_stride);
+          s0[1] = vld1q_u8(s + 0 * src_stride + 4);
+          s1[1] = vld1q_u8(s + 1 * src_stride + 4);
+          s2[1] = vld1q_u8(s + 2 * src_stride + 4);
+          s3[1] = vld1q_u8(s + 3 * src_stride + 4);
+
+          d0 = convolve12_8_usdot(s0[0], s0[1], x_filter, permute_tbl,
+                                  horiz_const, shift_round_0);
+          d1 = convolve12_8_usdot(s1[0], s1[1], x_filter, permute_tbl,
+                                  horiz_const, shift_round_0);
+          d2 = convolve12_8_usdot(s2[0], s2[1], x_filter, permute_tbl,
+                                  horiz_const, shift_round_0);
+          d3 = convolve12_8_usdot(s3[0], s3[1], x_filter, permute_tbl,
+                                  horiz_const, shift_round_0);
+
+          vst1q_s16(d + 0 * dst_stride, d0);
+          vst1q_s16(d + 1 * dst_stride, d1);
+          vst1q_s16(d + 2 * dst_stride, d2);
+          vst1q_s16(d + 3 * dst_stride, d3);
+
+          s += 8;
+          d += 8;
+          width -= 8;
+        } while (width > 0);
+
+        src_ptr += 4 * src_stride;
+        dst_ptr += 4 * dst_stride;
+        h -= 4;
+      } while (h >= 4);
+
+      for (; h > 0; h--) {
+        const uint8_t *s = src_ptr;
+        int16_t *d = dst_ptr;
+        int width = w;
+
+        do {
+          uint8x16_t s0[2];
+          int16x8_t d0;
+
+          s0[0] = vld1q_u8(s);
+          s0[1] = vld1q_u8(s + 4);
+
+          d0 = convolve12_8_usdot(s0[0], s0[1], x_filter, permute_tbl,
+                                  horiz_const, shift_round_0);
+
+          vst1q_s16(d, d0);
+
+          s += 8;
+          d += 8;
+          width -= 8;
+        } while (width > 0);
+
+        src_ptr += src_stride;
+        dst_ptr += dst_stride;
+      }
+    }
+  }
+}
+
+#elif defined(__aarch64__) && defined(__ARM_FEATURE_DOTPROD)
+
+static INLINE int16x4_t convolve12_4_sdot(uint8x16_t samples,
+                                          const int8x16_t filters,
+                                          const int32x4_t correction,
+                                          const uint8x16_t range_limit,
+                                          const uint8x16x3_t permute_tbl,
+                                          const int32x4_t shift_round_0) {
+  int8x16_t clamped_samples, permuted_samples[3];
+  int32x4_t sum;
+
+  /* Clamp sample range to [-128, 127] for 8-bit signed dot product. */
+  clamped_samples = vreinterpretq_s8_u8(vsubq_u8(samples, range_limit));
+
+  /* Permute samples ready for dot product. */
+  /* { 0,  1,  2,  3,  1,  2,  3,  4,  2,  3,  4,  5,  3,  4,  5,  6 } */
+  permuted_samples[0] = vqtbl1q_s8(clamped_samples, permute_tbl.val[0]);
+  /* { 4,  5,  6,  7,  5,  6,  7,  8,  6,  7,  8,  9,  7,  8,  9, 10 } */
+  permuted_samples[1] = vqtbl1q_s8(clamped_samples, permute_tbl.val[1]);
+  /* { 8,  9, 10, 11,  9, 10, 11, 12, 10, 11, 12, 13, 11, 12, 13, 14 } */
+  permuted_samples[2] = vqtbl1q_s8(clamped_samples, permute_tbl.val[2]);
+
+  /* Accumulate dot product into 'correction' to account for range clamp. */
+  /* First 4 output values. */
+  sum = vdotq_laneq_s32(correction, permuted_samples[0], filters, 0);
+  sum = vdotq_laneq_s32(sum, permuted_samples[1], filters, 1);
+  sum = vdotq_laneq_s32(sum, permuted_samples[2], filters, 2);
+
+  /* Narrow and re-pack. */
+  sum = vqrshlq_s32(sum, shift_round_0);
+
+  return vmovn_s32(sum);
+}
+
+static INLINE int16x8_t convolve12_8_sdot(
+    uint8x16_t samples0, uint8x16_t samples1, const int8x16_t filters,
+    const int32x4_t correction, const uint8x16_t range_limit,
+    const uint8x16x3_t permute_tbl, const int32x4_t shift_round_0) {
+  int8x16_t clamped_samples[2], permuted_samples[4];
+  int32x4_t sum[2];
+
+  /* Clamp sample range to [-128, 127] for 8-bit signed dot product. */
+  clamped_samples[0] = vreinterpretq_s8_u8(vsubq_u8(samples0, range_limit));
+  clamped_samples[1] = vreinterpretq_s8_u8(vsubq_u8(samples1, range_limit));
+
+  /* Permute samples ready for dot product. */
+  /* { 0,  1,  2,  3,  1,  2,  3,  4,  2,  3,  4,  5,  3,  4,  5,  6 } */
+  permuted_samples[0] = vqtbl1q_s8(clamped_samples[0], permute_tbl.val[0]);
+  /* { 4,  5,  6,  7,  5,  6,  7,  8,  6,  7,  8,  9,  7,  8,  9, 10 } */
+  permuted_samples[1] = vqtbl1q_s8(clamped_samples[0], permute_tbl.val[1]);
+  /* { 8,  9, 10, 11,  9, 10, 11, 12, 10, 11, 12, 13, 11, 12, 13, 14 } */
+  permuted_samples[2] = vqtbl1q_s8(clamped_samples[0], permute_tbl.val[2]);
+  /* {12, 13, 14, 15, 13, 14, 15, 16, 14, 15, 16, 17, 15, 16, 17, 18 } */
+  permuted_samples[3] = vqtbl1q_s8(clamped_samples[1], permute_tbl.val[2]);
+
+  /* Accumulate dot product into 'correction' to account for range clamp. */
+  /* First 4 output values. */
+  sum[0] = vdotq_laneq_s32(correction, permuted_samples[0], filters, 0);
+  sum[0] = vdotq_laneq_s32(sum[0], permuted_samples[1], filters, 1);
+  sum[0] = vdotq_laneq_s32(sum[0], permuted_samples[2], filters, 2);
+  /* Second 4 output values. */
+  sum[1] = vdotq_laneq_s32(correction, permuted_samples[1], filters, 0);
+  sum[1] = vdotq_laneq_s32(sum[1], permuted_samples[2], filters, 1);
+  sum[1] = vdotq_laneq_s32(sum[1], permuted_samples[3], filters, 2);
+
+  /* Narrow and re-pack. */
+  sum[0] = vqrshlq_s32(sum[0], shift_round_0);
+  sum[1] = vqrshlq_s32(sum[1], shift_round_0);
+
+  return vcombine_s16(vmovn_s32(sum[0]), vmovn_s32(sum[1]));
+}
+
+static INLINE void av1_convolve_2d_sr_horiz_12tap_neon(
+    const uint8_t *src_ptr, int src_stride, int16_t *dst_ptr,
+    const int dst_stride, int w, int h, const int16x8_t x_filter_0_7,
+    const int16x4_t x_filter_8_11, const int round_0) {
+  const int bd = 8;
+
+  // Special case the following no-op filter as 128 won't fit into the
+  // 8-bit signed dot-product instruction:
+  // { 0, 0, 0, 0, 0, 128, 0, 0, 0, 0, 0, 0 }
+  if (vgetq_lane_s16(x_filter_0_7, 5) == 128) {
+    const int16x8_t horiz_const = vdupq_n_s16((1 << (bd - 1)));
+    const int16x8_t shift_round_0 = vdupq_n_s16(FILTER_BITS - round_0);
+    // Undo the horizontal offset in the calling function.
+    src_ptr += 5;
+
+    for (int i = 0; i < h; i++) {
+      for (int j = 0; j < w; j += 8) {
+        uint8x8_t s0 = vld1_u8(src_ptr + i * src_stride + j);
+        uint16x8_t t0 = vaddw_u8(vreinterpretq_u16_s16(horiz_const), s0);
+        int16x8_t d0 = vqrshlq_s16(vreinterpretq_s16_u16(t0), shift_round_0);
+        if (w == 2) {
+          vst1q_lane_s32((int32_t *)(dst_ptr + i * dst_stride),
+                         vreinterpretq_s32_s16(d0), 0);
+        } else if (w == 4) {
+          vst1_s16(dst_ptr + i * dst_stride, vget_low_s16(d0));
+        } else {
+          vst1q_s16(dst_ptr + i * dst_stride + j, d0);
+        }
+      }
+    }
+  } else {
+    const int32x4_t shift_round_0 = vdupq_n_s32(-round_0);
+
+    // Narrow filter values to 8-bit.
+    const int16x8x2_t x_filter_s16 = {
+      { x_filter_0_7, vcombine_s16(x_filter_8_11, vdup_n_s16(0)) }
+    };
+    const int8x16_t x_filter = vcombine_s8(vmovn_s16(x_filter_s16.val[0]),
+                                           vmovn_s16(x_filter_s16.val[1]));
+
+    // Dot product constants.
+    const int32_t horiz_const = (1 << (bd + FILTER_BITS - 1));
+    const int32x4_t correct_tmp =
+        vaddq_s32(vpaddlq_s16(vshlq_n_s16(x_filter_s16.val[0], 7)),
+                  vpaddlq_s16(vshlq_n_s16(x_filter_s16.val[1], 7)));
+    const int32x4_t correction =
+        vdupq_n_s32(vaddvq_s32(correct_tmp) + horiz_const);
+    const uint8x16_t range_limit = vdupq_n_u8(128);
+    const uint8x16x3_t permute_tbl = vld1q_u8_x3(dot_prod_permute_tbl);
+
+    if (w <= 4) {
+      do {
+        const uint8_t *s = src_ptr;
+        int16_t *d = dst_ptr;
+        int width = w;
+
+        do {
+          uint8x16_t s0, s1, s2, s3;
+          int16x4_t d0, d1, d2, d3;
+
+          s0 = vld1q_u8(s + 0 * src_stride);
+          s1 = vld1q_u8(s + 1 * src_stride);
+          s2 = vld1q_u8(s + 2 * src_stride);
+          s3 = vld1q_u8(s + 3 * src_stride);
+
+          d0 = convolve12_4_sdot(s0, x_filter, correction, range_limit,
+                                 permute_tbl, shift_round_0);
+          d1 = convolve12_4_sdot(s1, x_filter, correction, range_limit,
+                                 permute_tbl, shift_round_0);
+          d2 = convolve12_4_sdot(s2, x_filter, correction, range_limit,
+                                 permute_tbl, shift_round_0);
+          d3 = convolve12_4_sdot(s3, x_filter, correction, range_limit,
+                                 permute_tbl, shift_round_0);
+
+          if (w == 2) {
+            vst1_lane_s32((int32_t *)(d + 0 * dst_stride),
+                          vreinterpret_s32_s16(d0), 0);
+            vst1_lane_s32((int32_t *)(d + 1 * dst_stride),
+                          vreinterpret_s32_s16(d1), 0);
+            vst1_lane_s32((int32_t *)(d + 2 * dst_stride),
+                          vreinterpret_s32_s16(d2), 0);
+            vst1_lane_s32((int32_t *)(d + 3 * dst_stride),
+                          vreinterpret_s32_s16(d3), 0);
+          } else {
+            vst1_s16(d + 0 * dst_stride, d0);
+            vst1_s16(d + 1 * dst_stride, d1);
+            vst1_s16(d + 2 * dst_stride, d2);
+            vst1_s16(d + 3 * dst_stride, d3);
+          }
+
+          s += 4;
+          d += 4;
+          width -= 4;
+        } while (width > 0);
+
+        src_ptr += 4 * src_stride;
+        dst_ptr += 4 * dst_stride;
+        h -= 4;
+      } while (h >= 4);
+
+      for (; h > 0; h--) {
+        const uint8_t *s = src_ptr;
+        int16_t *d = dst_ptr;
+        int width = w;
+
+        do {
+          uint8x16_t s0;
+          int16x4_t d0;
+
+          s0 = vld1q_u8(s);
+
+          d0 = convolve12_4_sdot(s0, x_filter, correction, range_limit,
+                                 permute_tbl, shift_round_0);
+
+          if (w == 2) {
+            vst1_lane_s32((int32_t *)d, vreinterpret_s32_s16(d0), 0);
+          } else {
+            vst1_s16(d, d0);
+          }
+
+          s += 4;
+          d += 4;
+          width -= 4;
+        } while (width > 0);
+
+        src_ptr += src_stride;
+        dst_ptr += dst_stride;
+      }
+    } else {
+      do {
+        const uint8_t *s = src_ptr;
+        int16_t *d = dst_ptr;
+        int width = w;
+
+        do {
+          uint8x16_t s0[2], s1[2], s2[2], s3[2];
+          int16x8_t d0, d1, d2, d3;
+
+          s0[0] = vld1q_u8(s + 0 * src_stride);
+          s1[0] = vld1q_u8(s + 1 * src_stride);
+          s2[0] = vld1q_u8(s + 2 * src_stride);
+          s3[0] = vld1q_u8(s + 3 * src_stride);
+          s0[1] = vld1q_u8(s + 0 * src_stride + 4);
+          s1[1] = vld1q_u8(s + 1 * src_stride + 4);
+          s2[1] = vld1q_u8(s + 2 * src_stride + 4);
+          s3[1] = vld1q_u8(s + 3 * src_stride + 4);
+
+          d0 = convolve12_8_sdot(s0[0], s0[1], x_filter, correction,
+                                 range_limit, permute_tbl, shift_round_0);
+          d1 = convolve12_8_sdot(s1[0], s1[1], x_filter, correction,
+                                 range_limit, permute_tbl, shift_round_0);
+          d2 = convolve12_8_sdot(s2[0], s2[1], x_filter, correction,
+                                 range_limit, permute_tbl, shift_round_0);
+          d3 = convolve12_8_sdot(s3[0], s3[1], x_filter, correction,
+                                 range_limit, permute_tbl, shift_round_0);
+
+          vst1q_s16(d + 0 * dst_stride, d0);
+          vst1q_s16(d + 1 * dst_stride, d1);
+          vst1q_s16(d + 2 * dst_stride, d2);
+          vst1q_s16(d + 3 * dst_stride, d3);
+
+          s += 8;
+          d += 8;
+          width -= 8;
+        } while (width > 0);
+
+        src_ptr += 4 * src_stride;
+        dst_ptr += 4 * dst_stride;
+        h -= 4;
+      } while (h >= 4);
+
+      for (; h > 0; h--) {
+        const uint8_t *s = src_ptr;
+        int16_t *d = dst_ptr;
+        int width = w;
+
+        do {
+          uint8x16_t s0[2];
+          int16x8_t d0;
+
+          s0[0] = vld1q_u8(s);
+          s0[1] = vld1q_u8(s + 4);
+
+          d0 = convolve12_8_sdot(s0[0], s0[1], x_filter, correction,
+                                 range_limit, permute_tbl, shift_round_0);
+
+          vst1q_s16(d, d0);
+
+          s += 8;
+          d += 8;
+          width -= 8;
+        } while (width > 0);
+
+        src_ptr += src_stride;
+        dst_ptr += dst_stride;
+      }
+    }
+  }
+}
+
+#else  // !(defined(__aarch64__) && defined(__ARM_FEATURE_DOTPROD))
+
+static INLINE int16x4_t convolve12_horiz_4x4_s16(
+    const int16x4_t s0, const int16x4_t s1, const int16x4_t s2,
+    const int16x4_t s3, const int16x4_t s4, const int16x4_t s5,
+    const int16x4_t s6, const int16x4_t s7, const int16x4_t s8,
+    const int16x4_t s9, const int16x4_t s10, const int16x4_t s11,
+    const int16x8_t x_filter_0_7, const int16x4_t x_filter_8_11,
+    const int32x4_t horiz_const, const int32x4_t shift_round_0) {
+  const int16x4_t x_filter_0_3 = vget_low_s16(x_filter_0_7);
+  const int16x4_t x_filter_4_7 = vget_high_s16(x_filter_0_7);
+  int32x4_t sum;
+
+  sum = horiz_const;
+  sum = vmlal_lane_s16(sum, s0, x_filter_0_3, 0);
+  sum = vmlal_lane_s16(sum, s1, x_filter_0_3, 1);
+  sum = vmlal_lane_s16(sum, s2, x_filter_0_3, 2);
+  sum = vmlal_lane_s16(sum, s3, x_filter_0_3, 3);
+  sum = vmlal_lane_s16(sum, s4, x_filter_4_7, 0);
+  sum = vmlal_lane_s16(sum, s5, x_filter_4_7, 1);
+  sum = vmlal_lane_s16(sum, s6, x_filter_4_7, 2);
+  sum = vmlal_lane_s16(sum, s7, x_filter_4_7, 3);
+  sum = vmlal_lane_s16(sum, s8, x_filter_8_11, 0);
+  sum = vmlal_lane_s16(sum, s9, x_filter_8_11, 1);
+  sum = vmlal_lane_s16(sum, s10, x_filter_8_11, 2);
+  sum = vmlal_lane_s16(sum, s11, x_filter_8_11, 3);
+
+  sum = vqrshlq_s32(sum, shift_round_0);
+
+  return vmovn_s32(sum);
+}
+
+// 4 column per iteration horizontal filtering for 12-tap convolve_2d_sr.
+// Processes one row at a time.
+static INLINE void horiz_filter_12tap_w4_single_row(
+    const uint8_t *src_ptr, int src_stride, int16_t *dst_ptr,
+    const int dst_stride, int w, int h, const int16x8_t x_filter_0_7,
+    const int16x4_t x_filter_8_11, const int32x4_t horiz_const,
+    const int32x4_t shift_round_0) {
+  do {
+    const uint8_t *s = src_ptr;
+    int16_t *d = dst_ptr;
+    int width = w;
+
+    do {
+      int16x4_t s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, d0;
+      uint8x16_t t0;
+      int16x8_t tt0, tt1;
+
+      t0 = vld1q_u8(s);
+      tt0 = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(t0)));
+      tt1 = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(t0)));
+
+      s0 = vget_low_s16(tt0);
+      s4 = vget_high_s16(tt0);
+      s8 = vget_low_s16(tt1);
+      s12 = vget_high_s16(tt1);
+
+      s1 = vext_s16(s0, s4, 1);    //  a1  a2  a3  a4
+      s2 = vext_s16(s0, s4, 2);    //  a2  a3  a4  a5
+      s3 = vext_s16(s0, s4, 3);    //  a3  a4  a5  a6
+      s5 = vext_s16(s4, s8, 1);    //  a5  a6  a7  a8
+      s6 = vext_s16(s4, s8, 2);    //  a6  a7  a8  a9
+      s7 = vext_s16(s4, s8, 3);    //  a7  a8  a9 a10
+      s9 = vext_s16(s8, s12, 1);   //  a9 a10 a11 a12
+      s10 = vext_s16(s8, s12, 2);  // a10 a11 a12 a13
+      s11 = vext_s16(s8, s12, 3);  // a11 a12 a13 a14
+
+      d0 = convolve12_horiz_4x4_s16(s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10,
+                                    s11, x_filter_0_7, x_filter_8_11,
+                                    horiz_const, shift_round_0);
+
+      if (w == 2) {
+        vst1_lane_s32((int32_t *)d, vreinterpret_s32_s16(d0), 0);
+      } else {
+        vst1_s16(d, d0);
+      }
+
+      s += 4;
+      d += 4;
+      width -= 4;
+    } while (width > 0);
+
+    src_ptr += src_stride;
+    dst_ptr += dst_stride;
+    h--;
+  } while (h > 0);
+}
+
+static INLINE void av1_convolve_2d_sr_horiz_12tap_neon(
+    const uint8_t *src_ptr, int src_stride, int16_t *dst_ptr,
+    const int dst_stride, int w, int h, const int16x8_t x_filter_0_7,
+    const int16x4_t x_filter_8_11, const int round_0) {
+  const int bd = 8;
+  const int32x4_t shift_round_0 = vdupq_n_s32(-(round_0));
+  const int32x4_t horiz_const = vdupq_n_s32((1 << (bd + FILTER_BITS - 1)));
+
+#if defined(__aarch64__)
+  do {
+    int16x4_t s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10;
+    uint8x8_t t0, t1, t2, t3;
+
+    const uint8_t *s = src_ptr;
+    int16_t *d = dst_ptr;
+    int width = w;
+
+    load_u8_8x4(s, src_stride, &t0, &t1, &t2, &t3);
+    transpose_u8_8x4(&t0, &t1, &t2, &t3);
+
+    s0 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(t0)));
+    s1 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(t1)));
+    s2 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(t2)));
+    s3 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(t3)));
+    s4 = vget_high_s16(vreinterpretq_s16_u16(vmovl_u8(t0)));
+    s5 = vget_high_s16(vreinterpretq_s16_u16(vmovl_u8(t1)));
+    s6 = vget_high_s16(vreinterpretq_s16_u16(vmovl_u8(t2)));
+    s7 = vget_high_s16(vreinterpretq_s16_u16(vmovl_u8(t3)));
+
+    load_u8_8x4(s + 8, src_stride, &t0, &t1, &t2, &t3);
+    transpose_u8_8x4(&t0, &t1, &t2, &t3);
+
+    s8 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(t0)));
+    s9 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(t1)));
+    s10 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(t2)));
+
+    s += 11;
+
+    do {
+      int16x4_t s11, s12, s13, s14, d0, d1, d2, d3;
+
+      load_u8_8x4(s, src_stride, &t0, &t1, &t2, &t3);
+      transpose_u8_8x4(&t0, &t1, &t2, &t3);
+
+      s11 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(t0)));
+      s12 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(t1)));
+      s13 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(t2)));
+      s14 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(t3)));
+
+      d0 = convolve12_horiz_4x4_s16(s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10,
+                                    s11, x_filter_0_7, x_filter_8_11,
+                                    horiz_const, shift_round_0);
+      d1 = convolve12_horiz_4x4_s16(s1, s2, s3, s4, s5, s6, s7, s8, s9, s10,
+                                    s11, s12, x_filter_0_7, x_filter_8_11,
+                                    horiz_const, shift_round_0);
+      d2 = convolve12_horiz_4x4_s16(s2, s3, s4, s5, s6, s7, s8, s9, s10, s11,
+                                    s12, s13, x_filter_0_7, x_filter_8_11,
+                                    horiz_const, shift_round_0);
+      d3 = convolve12_horiz_4x4_s16(s3, s4, s5, s6, s7, s8, s9, s10, s11, s12,
+                                    s13, s14, x_filter_0_7, x_filter_8_11,
+                                    horiz_const, shift_round_0);
+
+      transpose_s16_4x4d(&d0, &d1, &d2, &d3);
+
+      if (w == 2) {
+        vst1_lane_s32((int32_t *)(d + 0 * dst_stride), vreinterpret_s32_s16(d0),
+                      0);
+        vst1_lane_s32((int32_t *)(d + 1 * dst_stride), vreinterpret_s32_s16(d1),
+                      0);
+        vst1_lane_s32((int32_t *)(d + 2 * dst_stride), vreinterpret_s32_s16(d2),
+                      0);
+        vst1_lane_s32((int32_t *)(d + 3 * dst_stride), vreinterpret_s32_s16(d3),
+                      0);
+      } else {
+        vst1_s16((d + 0 * dst_stride), d0);
+        vst1_s16((d + 1 * dst_stride), d1);
+        vst1_s16((d + 2 * dst_stride), d2);
+        vst1_s16((d + 3 * dst_stride), d3);
+      }
+
+      s0 = s4;
+      s1 = s5;
+      s2 = s6;
+      s3 = s7;
+      s4 = s8;
+      s5 = s9;
+      s6 = s10;
+      s7 = s11;
+      s8 = s12;
+      s9 = s13;
+      s10 = s14;
+
+      s += 4;
+      d += 4;
+      width -= 4;
+    } while (width > 0);
+
+    src_ptr += 4 * src_stride;
+    dst_ptr += 4 * dst_stride;
+    h -= 4;
+  } while (h >= 4);
+
+  if (h) {
+    horiz_filter_12tap_w4_single_row(src_ptr, src_stride, dst_ptr, dst_stride,
+                                     w, h, x_filter_0_7, x_filter_8_11,
+                                     horiz_const, shift_round_0);
+  }
+#else   // !defined(__aarch64__)
+  horiz_filter_12tap_w4_single_row(src_ptr, src_stride, dst_ptr, dst_stride, w,
+                                   h, x_filter_0_7, x_filter_8_11, horiz_const,
+                                   shift_round_0);
+#endif  // defined(__aarch64__)
+}
+
+#endif  // defined(__aarch64__) && defined(__ARM_FEATURE_DOTPROD)
+
+static INLINE void av1_convolve_2d_sr_vert_12tap_neon(
+    int16_t *src_ptr, int src_stride, uint8_t *dst_ptr, int dst_stride, int w,
+    int h, const int16x8_t y_filter_0_7, const int16x4_t y_filter_8_11,
+    ConvolveParams *conv_params) {
+  const int bd = 8;
+  const int16_t round_bits =
+      FILTER_BITS * 2 - conv_params->round_0 - conv_params->round_1;
+  const int16x8_t vec_round_bits = vdupq_n_s16(-round_bits);
+  const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
+  const int32_t sub_const = (1 << (offset_bits - conv_params->round_1)) +
+                            (1 << (offset_bits - conv_params->round_1 - 1));
+  const int32x4_t round_shift_vec = vdupq_n_s32(-(conv_params->round_1));
+  const int32x4_t offset_const = vdupq_n_s32(1 << offset_bits);
+  const int32x4_t sub_const_vec = vdupq_n_s32(sub_const);
+
+  if (w <= 4) {
+    int16x4_t s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14;
+    int16x4_t d0, d1, d2, d3;
+    int16x8_t dd01, dd23;
+    uint8x8_t d01, d23;
+
+    load_s16_4x8(src_ptr, src_stride, &s0, &s1, &s2, &s3, &s4, &s5, &s6, &s7);
+    src_ptr += (8 * src_stride);
+    load_s16_4x4(src_ptr, src_stride, &s8, &s9, &s10, &s11);
+    src_ptr += (3 * src_stride);
+
+    do {
+      load_s16_4x4(src_ptr, src_stride, &s11, &s12, &s13, &s14);
+      src_ptr += 4 * src_stride;
+
+      d0 = convolve12_vert_4x4_s32(
+          s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, y_filter_0_7,
+          y_filter_8_11, round_shift_vec, offset_const, sub_const_vec);
+      d1 = convolve12_vert_4x4_s32(
+          s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, y_filter_0_7,
+          y_filter_8_11, round_shift_vec, offset_const, sub_const_vec);
+      d2 = convolve12_vert_4x4_s32(
+          s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, y_filter_0_7,
+          y_filter_8_11, round_shift_vec, offset_const, sub_const_vec);
+      d3 = convolve12_vert_4x4_s32(
+          s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, y_filter_0_7,
+          y_filter_8_11, round_shift_vec, offset_const, sub_const_vec);
+
+      dd01 = vqrshlq_s16(vcombine_s16(d0, d1), vec_round_bits);
+      dd23 = vqrshlq_s16(vcombine_s16(d2, d3), vec_round_bits);
+
+      d01 = vqmovun_s16(dd01);
+      d23 = vqmovun_s16(dd23);
+
+      if (w == 2) {
+        vst1_lane_u16((uint16_t *)dst_ptr, vreinterpret_u16_u8(d01), 0);
+        dst_ptr += dst_stride;
+        vst1_lane_u16((uint16_t *)dst_ptr, vreinterpret_u16_u8(d01), 2);
+        dst_ptr += dst_stride;
+        if (h != 2) {
+          vst1_lane_u16((uint16_t *)dst_ptr, vreinterpret_u16_u8(d23), 0);
+          dst_ptr += dst_stride;
+          vst1_lane_u16((uint16_t *)dst_ptr, vreinterpret_u16_u8(d23), 2);
+          dst_ptr += dst_stride;
+        }
+      } else {
+        vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d01), 0);
+        dst_ptr += dst_stride;
+        vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d01), 1);
+        dst_ptr += dst_stride;
+        if (h != 2) {
+          vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d23), 0);
+          dst_ptr += dst_stride;
+          vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d23), 1);
+          dst_ptr += dst_stride;
+        }
+      }
+
+      s0 = s4;
+      s1 = s5;
+      s2 = s6;
+      s3 = s7;
+      s4 = s8;
+      s5 = s9;
+      s6 = s10;
+      s7 = s11;
+      s8 = s12;
+      s9 = s13;
+      s10 = s14;
+      h -= 4;
+    } while (h > 0);
+
+  } else {
+    do {
+      int16x8_t s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14;
+      uint8x8_t d0, d1, d2, d3;
+
+      int16_t *s = src_ptr;
+      uint8_t *d = dst_ptr;
+
+      int height = h;
+
+      load_s16_8x8(s, src_stride, &s0, &s1, &s2, &s3, &s4, &s5, &s6, &s7);
+      s += (8 * src_stride);
+      load_s16_8x4(s, src_stride, &s8, &s9, &s10, &s11);
+      s += (3 * src_stride);
+
+      do {
+        load_s16_8x4(s, src_stride, &s11, &s12, &s13, &s14);
+        s += 4 * src_stride;
+
+        d0 = convolve12_vert_8x4_s32(s0, s1, s2, s3, s4, s5, s6, s7, s8, s9,
+                                     s10, s11, y_filter_0_7, y_filter_8_11,
+                                     round_shift_vec, offset_const,
+                                     sub_const_vec, vec_round_bits);
+        d1 = convolve12_vert_8x4_s32(s1, s2, s3, s4, s5, s6, s7, s8, s9, s10,
+                                     s11, s12, y_filter_0_7, y_filter_8_11,
+                                     round_shift_vec, offset_const,
+                                     sub_const_vec, vec_round_bits);
+        d2 = convolve12_vert_8x4_s32(s2, s3, s4, s5, s6, s7, s8, s9, s10, s11,
+                                     s12, s13, y_filter_0_7, y_filter_8_11,
+                                     round_shift_vec, offset_const,
+                                     sub_const_vec, vec_round_bits);
+        d3 = convolve12_vert_8x4_s32(s3, s4, s5, s6, s7, s8, s9, s10, s11, s12,
+                                     s13, s14, y_filter_0_7, y_filter_8_11,
+                                     round_shift_vec, offset_const,
+                                     sub_const_vec, vec_round_bits);
+
+        vst1_u8(d, d0);
+        d += dst_stride;
+        vst1_u8(d, d1);
+        d += dst_stride;
+        if (h != 2) {
+          vst1_u8(d, d2);
+          d += dst_stride;
+          vst1_u8(d, d3);
+          d += dst_stride;
+        }
+
+        s0 = s4;
+        s1 = s5;
+        s2 = s6;
+        s3 = s7;
+        s4 = s8;
+        s5 = s9;
+        s6 = s10;
+        s7 = s11;
+        s8 = s12;
+        s9 = s13;
+        s10 = s14;
+        height -= 4;
+      } while (height > 0);
+
+      src_ptr += 8;
+      dst_ptr += 8;
+      w -= 8;
+    } while (w > 0);
+  }
+}
+
+#if defined(__aarch64__) && defined(__ARM_FEATURE_MATMUL_INT8)
+
+static INLINE void av1_convolve_2d_sr_horiz_neon(
+    const uint8_t *src, int src_stride, int16_t *im_block, int im_stride, int w,
+    int im_h, const int16x8_t x_filter_s16, const int round_0) {
+  const int bd = 8;
+
+  const uint8_t *src_ptr = src;
+  int16_t *dst_ptr = im_block;
+  int dst_stride = im_stride;
+
+  int height = im_h;
+
+  // Filter values are even, so downshift by 1 to reduce intermediate precision
+  // requirements.
+  const int8x8_t x_filter = vshrn_n_s16(x_filter_s16, 1);
+  const int32x4_t horiz_const = vdupq_n_s32(1 << (bd + FILTER_BITS - 2));
+
+  assert(round_0 > 0);
+
+  if (w <= 4) {
+    const uint8x16x2_t permute_tbl = vld1q_u8_x2(dot_prod_permute_tbl);
+    const int16x4_t shift_round_0 = vdup_n_s16(-(round_0 - 1));
+    uint8x16_t s0, s1, s2, s3;
+    int32x4_t t0, t1, t2, t3;
+    int16x4_t d0, d1, d2, d3;
+
+    do {
+      assert(height >= 4);
+
+      load_u8_8x16(src_ptr, src_stride, &s0, &s1, &s2, &s3);
+
+      t0 = convolve8_4_usdot(s0, x_filter, permute_tbl, horiz_const);
+      t1 = convolve8_4_usdot(s1, x_filter, permute_tbl, horiz_const);
+      t2 = convolve8_4_usdot(s2, x_filter, permute_tbl, horiz_const);
+      t3 = convolve8_4_usdot(s3, x_filter, permute_tbl, horiz_const);
+
+      d0 = vqrshl_s16(vmovn_s32(t0), shift_round_0);
+      d1 = vqrshl_s16(vmovn_s32(t1), shift_round_0);
+      d2 = vqrshl_s16(vmovn_s32(t2), shift_round_0);
+      d3 = vqrshl_s16(vmovn_s32(t3), shift_round_0);
+
+      if (w == 2) {
+        vst1_lane_u32((uint32_t *)(dst_ptr + 0 * dst_stride),
+                      vreinterpret_u32_s16(d0), 0);
+        vst1_lane_u32((uint32_t *)(dst_ptr + 1 * dst_stride),
+                      vreinterpret_u32_s16(d1), 0);
+        vst1_lane_u32((uint32_t *)(dst_ptr + 2 * dst_stride),
+                      vreinterpret_u32_s16(d2), 0);
+        vst1_lane_u32((uint32_t *)(dst_ptr + 3 * dst_stride),
+                      vreinterpret_u32_s16(d3), 0);
+      } else {
+        vst1_s16(dst_ptr + 0 * dst_stride, d0);
+        vst1_s16(dst_ptr + 1 * dst_stride, d1);
+        vst1_s16(dst_ptr + 2 * dst_stride, d2);
+        vst1_s16(dst_ptr + 3 * dst_stride, d3);
+      }
+
+      src_ptr += 4 * src_stride;
+      dst_ptr += 4 * dst_stride;
+      height -= 4;
+    } while (height >= 4);
+
+    if (height) {
+      assert(height < 4);
+
+      do {
+        s0 = vld1q_u8(src_ptr);
+        t0 = convolve8_4_usdot(s0, x_filter, permute_tbl, horiz_const);
+        d0 = vqrshl_s16(vmovn_s32(t0), shift_round_0);
+
+        if (w == 2) {
+          vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_s16(d0), 0);
+        } else {
+          vst1_s16(dst_ptr, d0);
+        }
+
+        src_ptr += src_stride;
+        dst_ptr += dst_stride;
+        height--;
+      } while (height > 0);
+    }
+  } else {
+    const uint8x16x3_t permute_tbl = vld1q_u8_x3(dot_prod_permute_tbl);
+    const int16x8_t shift_round_0 = vdupq_n_s16(-(round_0 - 1));
+    uint8x16_t s0, s1, s2, s3;
+    int16x8_t d0, d1, d2, d3;
+
+    do {
+      assert(height >= 4);
+
+      const uint8_t *s = src_ptr;
+      int16_t *d = dst_ptr;
+      int width = w;
+
+      do {
+        s0 = vld1q_u8(s + 0 * src_stride);
+        s1 = vld1q_u8(s + 1 * src_stride);
+        s2 = vld1q_u8(s + 2 * src_stride);
+        s3 = vld1q_u8(s + 3 * src_stride);
+
+        d0 = convolve8_8_usdot(s0, x_filter, permute_tbl, horiz_const,
+                               shift_round_0);
+        d1 = convolve8_8_usdot(s1, x_filter, permute_tbl, horiz_const,
+                               shift_round_0);
+        d2 = convolve8_8_usdot(s2, x_filter, permute_tbl, horiz_const,
+                               shift_round_0);
+        d3 = convolve8_8_usdot(s3, x_filter, permute_tbl, horiz_const,
+                               shift_round_0);
+
+        vst1q_s16(d + 0 * dst_stride, d0);
+        vst1q_s16(d + 1 * dst_stride, d1);
+        vst1q_s16(d + 2 * dst_stride, d2);
+        vst1q_s16(d + 3 * dst_stride, d3);
+
+        s += 8;
+        d += 8;
+        width -= 8;
+      } while (width > 0);
+
+      src_ptr += 4 * src_stride;
+      dst_ptr += 4 * dst_stride;
+      height -= 4;
+    } while (height >= 4);
+
+    if (height) {
+      assert(height < 4);
+
+      do {
+        const uint8_t *s = src_ptr;
+        int16_t *d = dst_ptr;
+        int width = w;
+
+        do {
+          s0 = vld1q_u8(s);
+          d0 = convolve8_8_usdot(s0, x_filter, permute_tbl, horiz_const,
+                                 shift_round_0);
+          vst1q_s16(d, d0);
+
+          s += 8;
+          d += 8;
+          width -= 8;
+        } while (width > 0);
+
+        src_ptr += src_stride;
+        dst_ptr += dst_stride;
+        height--;
+      } while (height > 0);
+    }
+  }
+}
+
+#elif defined(__aarch64__) && defined(__ARM_FEATURE_DOTPROD)
+
+static INLINE void av1_convolve_2d_sr_horiz_neon(
+    const uint8_t *src, int src_stride, int16_t *im_block, int im_stride, int w,
+    int im_h, const int16x8_t x_filter_s16, const int round_0) {
+  const int bd = 8;
+
+  const uint8_t *src_ptr = src;
+  int16_t *dst_ptr = im_block;
+  int dst_stride = im_stride;
+
+  int height = im_h;
+
+  // Filter values are even, so downshift by 1 to reduce intermediate precision
+  // requirements.
+  const int8x8_t x_filter = vshrn_n_s16(x_filter_s16, 1);
+  const int32_t horiz_const = (1 << (bd + FILTER_BITS - 2));
+  // Dot product constants.
+  const int16x8_t correct_tmp = vshlq_n_s16(x_filter_s16, 6);
+  const int32x4_t correction =
+      vdupq_n_s32(vaddlvq_s16(correct_tmp) + horiz_const);
+  const uint8x16_t range_limit = vdupq_n_u8(128);
+
+  assert(round_0 > 0);
+
+  if (w <= 4) {
+    const uint8x16x2_t permute_tbl = vld1q_u8_x2(dot_prod_permute_tbl);
+    const int16x4_t shift_round_0 = vdup_n_s16(-(round_0 - 1));
+    uint8x16_t s0, s1, s2, s3;
+    int32x4_t t0, t1, t2, t3;
+    int16x4_t d0, d1, d2, d3;
+
+    do {
+      assert(height >= 4);
+
+      load_u8_8x16(src_ptr, src_stride, &s0, &s1, &s2, &s3);
+
+      t0 = convolve8_4_sdot(s0, x_filter, correction, range_limit, permute_tbl);
+      t1 = convolve8_4_sdot(s1, x_filter, correction, range_limit, permute_tbl);
+      t2 = convolve8_4_sdot(s2, x_filter, correction, range_limit, permute_tbl);
+      t3 = convolve8_4_sdot(s3, x_filter, correction, range_limit, permute_tbl);
+
+      d0 = vqrshl_s16(vmovn_s32(t0), shift_round_0);
+      d1 = vqrshl_s16(vmovn_s32(t1), shift_round_0);
+      d2 = vqrshl_s16(vmovn_s32(t2), shift_round_0);
+      d3 = vqrshl_s16(vmovn_s32(t3), shift_round_0);
+
+      if (w == 2) {
+        vst1_lane_u32((uint32_t *)(dst_ptr + 0 * dst_stride),
+                      vreinterpret_u32_s16(d0), 0);
+        vst1_lane_u32((uint32_t *)(dst_ptr + 1 * dst_stride),
+                      vreinterpret_u32_s16(d1), 0);
+        vst1_lane_u32((uint32_t *)(dst_ptr + 2 * dst_stride),
+                      vreinterpret_u32_s16(d2), 0);
+        vst1_lane_u32((uint32_t *)(dst_ptr + 3 * dst_stride),
+                      vreinterpret_u32_s16(d3), 0);
+      } else {
+        vst1_s16(dst_ptr + 0 * dst_stride, d0);
+        vst1_s16(dst_ptr + 1 * dst_stride, d1);
+        vst1_s16(dst_ptr + 2 * dst_stride, d2);
+        vst1_s16(dst_ptr + 3 * dst_stride, d3);
+      }
+
+      src_ptr += 4 * src_stride;
+      dst_ptr += 4 * dst_stride;
+      height -= 4;
+    } while (height >= 4);
+
+    if (height) {
+      assert(height < 4);
+
+      do {
+        s0 = vld1q_u8(src_ptr);
+        t0 = convolve8_4_sdot(s0, x_filter, correction, range_limit,
+                              permute_tbl);
+        d0 = vqrshl_s16(vmovn_s32(t0), shift_round_0);
+
+        if (w == 2) {
+          vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_s16(d0), 0);
+        } else {
+          vst1_s16(dst_ptr, d0);
+        }
+
+        src_ptr += src_stride;
+        dst_ptr += dst_stride;
+        height--;
+      } while (height > 0);
+    }
+  } else {
+    const uint8x16x3_t permute_tbl = vld1q_u8_x3(dot_prod_permute_tbl);
+    const int16x8_t shift_round_0 = vdupq_n_s16(-(round_0 - 1));
+    uint8x16_t s0, s1, s2, s3;
+    int16x8_t d0, d1, d2, d3;
+
+    do {
+      assert(height >= 4);
+
+      const uint8_t *s = src_ptr;
+      int16_t *d = dst_ptr;
+      int width = w;
+
+      do {
+        s0 = vld1q_u8(s + 0 * src_stride);
+        s1 = vld1q_u8(s + 1 * src_stride);
+        s2 = vld1q_u8(s + 2 * src_stride);
+        s3 = vld1q_u8(s + 3 * src_stride);
+
+        d0 = convolve8_8_sdot(s0, x_filter, correction, range_limit,
+                              permute_tbl, shift_round_0);
+        d1 = convolve8_8_sdot(s1, x_filter, correction, range_limit,
+                              permute_tbl, shift_round_0);
+        d2 = convolve8_8_sdot(s2, x_filter, correction, range_limit,
+                              permute_tbl, shift_round_0);
+        d3 = convolve8_8_sdot(s3, x_filter, correction, range_limit,
+                              permute_tbl, shift_round_0);
+
+        vst1q_s16(d + 0 * dst_stride, d0);
+        vst1q_s16(d + 1 * dst_stride, d1);
+        vst1q_s16(d + 2 * dst_stride, d2);
+        vst1q_s16(d + 3 * dst_stride, d3);
+
+        s += 8;
+        d += 8;
+        width -= 8;
+      } while (width > 0);
+
+      src_ptr += 4 * src_stride;
+      dst_ptr += 4 * dst_stride;
+      height -= 4;
+    } while (height >= 4);
+
+    if (height) {
+      assert(height < 4);
+
+      do {
+        const uint8_t *s = src_ptr;
+        int16_t *d = dst_ptr;
+        int width = w;
+
+        do {
+          s0 = vld1q_u8(s);
+          d0 = convolve8_8_sdot(s0, x_filter, correction, range_limit,
+                                permute_tbl, shift_round_0);
+          vst1q_s16(d, d0);
+
+          s += 8;
+          d += 8;
+          width -= 8;
+        } while (width > 0);
+
+        src_ptr += src_stride;
+        dst_ptr += dst_stride;
+        height--;
+      } while (height > 0);
+    }
+  }
+}
+
+#else  // !(defined(__aarch64__) && defined(__ARM_FEATURE_DOTPROD))
+
 // Horizontal filtering for convolve_2d_sr for width multiple of 8
 // Processes one row at a time
 static INLINE void horiz_filter_w8_single_row(
     const uint8_t *src_ptr, int src_stride, int16_t *dst_ptr,
-    const int dst_stride, int width, int height, const int16_t *x_filter,
+    const int dst_stride, int width, int height, const int16x8_t x_filter,
     const int16x8_t horiz_const, const int16x8_t shift_round_0) {
   int16x8_t s0, s1, s2, s3, s4, s5, s6, s7;
   do {
@@ -899,7 +2474,7 @@ static INLINE void horiz_filter_w8_single_row(
 // Processes one row at a time
 static INLINE void horiz_filter_w4_single_row(
     const uint8_t *src_ptr, int src_stride, int16_t *dst_ptr,
-    const int dst_stride, int width, int height, const int16_t *x_filter,
+    const int dst_stride, int width, int height, const int16x8_t x_filter,
     const int16x4_t horiz_const, const int16x4_t shift_round_0) {
   int16x4_t s0, s1, s2, s3, s4, s5, s6, s7;
   do {
@@ -928,87 +2503,46 @@ static INLINE void horiz_filter_w4_single_row(
     int16x4_t d0 = convolve8_4x4_s16(s0, s1, s2, s3, s4, s5, s6, s7, x_filter,
                                      horiz_const, shift_round_0);
 
-    if (width == 4) {
-      vst1_s16(dst_ptr, d0);
-      dst_ptr += dst_stride;
-    } else if (width == 2) {
+    if (width == 2) {
       vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_s16(d0), 0);
-      dst_ptr += dst_stride;
+    } else {
+      vst1_s16(dst_ptr, d0);
     }
 
+    dst_ptr += dst_stride;
     src_ptr += src_stride;
     height--;
   } while (height > 0);
 }
 
-void av1_convolve_2d_sr_neon(const uint8_t *src, int src_stride, uint8_t *dst,
-                             int dst_stride, int w, int h,
-                             const InterpFilterParams *filter_params_x,
-                             const InterpFilterParams *filter_params_y,
-                             const int subpel_x_qn, const int subpel_y_qn,
-                             ConvolveParams *conv_params) {
-  if (filter_params_x->taps > 8) {
-    av1_convolve_2d_sr_c(src, src_stride, dst, dst_stride, w, h,
-                         filter_params_x, filter_params_y, subpel_x_qn,
-                         subpel_y_qn, conv_params);
-    return;
-  }
-  int im_dst_stride;
-  int width, height;
-#if defined(__aarch64__)
-  uint8x8_t t0;
-  uint8x8_t t1, t2, t3, t4, t5, t6, t7;
-  const uint8_t *s;
-#endif
-
-  DECLARE_ALIGNED(16, int16_t,
-                  im_block[(MAX_SB_SIZE + HORIZ_EXTRA_ROWS) * MAX_SB_SIZE]);
-
+static INLINE void av1_convolve_2d_sr_horiz_neon(
+    const uint8_t *src, int src_stride, int16_t *im_block, int im_stride, int w,
+    int im_h, const int16x8_t x_filter_s16, const int round_0) {
   const int bd = 8;
-  const int im_h = h + filter_params_y->taps - 1;
-  const int im_stride = MAX_SB_SIZE;
-  const int vert_offset = filter_params_y->taps / 2 - 1;
-  const int horiz_offset = filter_params_x->taps / 2 - 1;
 
-  const uint8_t *src_ptr = src - vert_offset * src_stride - horiz_offset;
+  const uint8_t *src_ptr = src;
+  int16_t *dst_ptr = im_block;
+  int dst_stride = im_stride;
 
-  int16_t *dst_ptr;
+  int height = im_h;
 
-  dst_ptr = im_block;
-  im_dst_stride = im_stride;
-  height = im_h;
-  width = w;
+  // Filter values are even, so downshift by 1 to reduce intermediate precision
+  // requirements.
+  const int16x8_t x_filter = vshrq_n_s16(x_filter_s16, 1);
 
-  const int16_t round_bits =
-      FILTER_BITS * 2 - conv_params->round_0 - conv_params->round_1;
-  const int16x8_t vec_round_bits = vdupq_n_s16(-round_bits);
-  const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
-  const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
-      filter_params_x, subpel_x_qn & SUBPEL_MASK);
-
-  int16_t x_filter_tmp[8];
-  int16x8_t filter_x_coef = vld1q_s16(x_filter);
-
-  // filter coeffs are even, so downshifting by 1 to reduce intermediate
-  // precision requirements.
-  filter_x_coef = vshrq_n_s16(filter_x_coef, 1);
-  vst1q_s16(&x_filter_tmp[0], filter_x_coef);
-
-  assert(conv_params->round_0 > 0);
+  assert(round_0 > 0);
 
   if (w <= 4) {
     const int16x4_t horiz_const = vdup_n_s16((1 << (bd + FILTER_BITS - 2)));
-    const int16x4_t shift_round_0 = vdup_n_s16(-(conv_params->round_0 - 1));
+    const int16x4_t shift_round_0 = vdup_n_s16(-(round_0 - 1));
 
 #if defined(__aarch64__)
-    int16x4_t s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, d0, d1, d2, d3;
     do {
+      int16x4_t s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, d0, d1, d2, d3;
+      uint8x8_t t0, t1, t2, t3;
+      const uint8_t *s = src_ptr;
+
       assert(height >= 4);
-      s = src_ptr;
-      __builtin_prefetch(s + 0 * src_stride);
-      __builtin_prefetch(s + 1 * src_stride);
-      __builtin_prefetch(s + 2 * src_stride);
-      __builtin_prefetch(s + 3 * src_stride);
 
       load_u8_8x4(s, src_stride, &t0, &t1, &t2, &t3);
       transpose_u8_8x4(&t0, &t1, &t2, &t3);
@@ -1021,10 +2555,6 @@ void av1_convolve_2d_sr_neon(const uint8_t *src, int src_stride, uint8_t *dst,
       s5 = vget_high_s16(vreinterpretq_s16_u16(vmovl_u8(t1)));
       s6 = vget_high_s16(vreinterpretq_s16_u16(vmovl_u8(t2)));
 
-      __builtin_prefetch(dst_ptr + 0 * im_dst_stride);
-      __builtin_prefetch(dst_ptr + 1 * im_dst_stride);
-      __builtin_prefetch(dst_ptr + 2 * im_dst_stride);
-      __builtin_prefetch(dst_ptr + 3 * im_dst_stride);
       s += 7;
 
       load_u8_8x4(s, src_stride, &t0, &t1, &t2, &t3);
@@ -1035,68 +2565,65 @@ void av1_convolve_2d_sr_neon(const uint8_t *src, int src_stride, uint8_t *dst,
       s9 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(t2)));
       s10 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(t3)));
 
-      d0 = convolve8_4x4_s16(s0, s1, s2, s3, s4, s5, s6, s7, x_filter_tmp,
+      d0 = convolve8_4x4_s16(s0, s1, s2, s3, s4, s5, s6, s7, x_filter,
                              horiz_const, shift_round_0);
-      d1 = convolve8_4x4_s16(s1, s2, s3, s4, s5, s6, s7, s8, x_filter_tmp,
+      d1 = convolve8_4x4_s16(s1, s2, s3, s4, s5, s6, s7, s8, x_filter,
                              horiz_const, shift_round_0);
-      d2 = convolve8_4x4_s16(s2, s3, s4, s5, s6, s7, s8, s9, x_filter_tmp,
+      d2 = convolve8_4x4_s16(s2, s3, s4, s5, s6, s7, s8, s9, x_filter,
                              horiz_const, shift_round_0);
-      d3 = convolve8_4x4_s16(s3, s4, s5, s6, s7, s8, s9, s10, x_filter_tmp,
+      d3 = convolve8_4x4_s16(s3, s4, s5, s6, s7, s8, s9, s10, x_filter,
                              horiz_const, shift_round_0);
 
       transpose_s16_4x4d(&d0, &d1, &d2, &d3);
-      if (w == 4) {
-        vst1_s16((dst_ptr + 0 * im_dst_stride), d0);
-        vst1_s16((dst_ptr + 1 * im_dst_stride), d1);
-        vst1_s16((dst_ptr + 2 * im_dst_stride), d2);
-        vst1_s16((dst_ptr + 3 * im_dst_stride), d3);
-      } else if (w == 2) {
-        vst1_lane_u32((uint32_t *)(dst_ptr + 0 * im_dst_stride),
+
+      if (w == 2) {
+        vst1_lane_u32((uint32_t *)(dst_ptr + 0 * dst_stride),
                       vreinterpret_u32_s16(d0), 0);
-        vst1_lane_u32((uint32_t *)(dst_ptr + 1 * im_dst_stride),
+        vst1_lane_u32((uint32_t *)(dst_ptr + 1 * dst_stride),
                       vreinterpret_u32_s16(d1), 0);
-        vst1_lane_u32((uint32_t *)(dst_ptr + 2 * im_dst_stride),
+        vst1_lane_u32((uint32_t *)(dst_ptr + 2 * dst_stride),
                       vreinterpret_u32_s16(d2), 0);
-        vst1_lane_u32((uint32_t *)(dst_ptr + 3 * im_dst_stride),
+        vst1_lane_u32((uint32_t *)(dst_ptr + 3 * dst_stride),
                       vreinterpret_u32_s16(d3), 0);
+      } else {
+        vst1_s16((dst_ptr + 0 * dst_stride), d0);
+        vst1_s16((dst_ptr + 1 * dst_stride), d1);
+        vst1_s16((dst_ptr + 2 * dst_stride), d2);
+        vst1_s16((dst_ptr + 3 * dst_stride), d3);
       }
+
       src_ptr += 4 * src_stride;
-      dst_ptr += 4 * im_dst_stride;
+      dst_ptr += 4 * dst_stride;
       height -= 4;
     } while (height >= 4);
 
     if (height) {
       assert(height < 4);
-      horiz_filter_w4_single_row(src_ptr, src_stride, dst_ptr, im_dst_stride, w,
-                                 height, x_filter_tmp, horiz_const,
-                                 shift_round_0);
+      horiz_filter_w4_single_row(src_ptr, src_stride, dst_ptr, dst_stride, w,
+                                 height, x_filter, horiz_const, shift_round_0);
     }
-#else
-    horiz_filter_w4_single_row(src_ptr, src_stride, dst_ptr, im_dst_stride, w,
-                               height, x_filter_tmp, horiz_const,
-                               shift_round_0);
-#endif
+
+#else   // !defined(__aarch64__)
+    horiz_filter_w4_single_row(src_ptr, src_stride, dst_ptr, dst_stride, w,
+                               height, x_filter, horiz_const, shift_round_0);
+#endif  // defined(__aarch64__)
 
   } else {
     const int16x8_t horiz_const = vdupq_n_s16((1 << (bd + FILTER_BITS - 2)));
-    const int16x8_t shift_round_0 = vdupq_n_s16(-(conv_params->round_0 - 1));
+    const int16x8_t shift_round_0 = vdupq_n_s16(-(round_0 - 1));
 
 #if defined(__aarch64__)
-    int16_t *d_tmp;
-    int16x8_t s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14;
-    int16x8_t res0, res1, res2, res3, res4, res5, res6, res7;
-    do {
-      assert(height >= 8);
-      __builtin_prefetch(src_ptr + 0 * src_stride);
-      __builtin_prefetch(src_ptr + 1 * src_stride);
-      __builtin_prefetch(src_ptr + 2 * src_stride);
-      __builtin_prefetch(src_ptr + 3 * src_stride);
-      __builtin_prefetch(src_ptr + 4 * src_stride);
-      __builtin_prefetch(src_ptr + 5 * src_stride);
-      __builtin_prefetch(src_ptr + 6 * src_stride);
-      __builtin_prefetch(src_ptr + 7 * src_stride);
-
-      load_u8_8x8(src_ptr, src_stride, &t0, &t1, &t2, &t3, &t4, &t5, &t6, &t7);
+
+    for (; height >= 8; height -= 8) {
+      int16x8_t s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14,
+          d0, d1, d2, d3, d4, d5, d6, d7;
+      uint8x8_t t0, t1, t2, t3, t4, t5, t6, t7;
+
+      const uint8_t *s = src_ptr;
+      int16_t *d = dst_ptr;
+      int width = w;
+
+      load_u8_8x8(s, src_stride, &t0, &t1, &t2, &t3, &t4, &t5, &t6, &t7);
 
       transpose_u8_8x8(&t0, &t1, &t2, &t3, &t4, &t5, &t6, &t7);
 
@@ -1108,18 +2635,7 @@ void av1_convolve_2d_sr_neon(const uint8_t *src, int src_stride, uint8_t *dst,
       s5 = vreinterpretq_s16_u16(vmovl_u8(t5));
       s6 = vreinterpretq_s16_u16(vmovl_u8(t6));
 
-      width = w;
-      s = src_ptr + 7;
-      d_tmp = dst_ptr;
-
-      __builtin_prefetch(dst_ptr + 0 * im_dst_stride);
-      __builtin_prefetch(dst_ptr + 1 * im_dst_stride);
-      __builtin_prefetch(dst_ptr + 2 * im_dst_stride);
-      __builtin_prefetch(dst_ptr + 3 * im_dst_stride);
-      __builtin_prefetch(dst_ptr + 4 * im_dst_stride);
-      __builtin_prefetch(dst_ptr + 5 * im_dst_stride);
-      __builtin_prefetch(dst_ptr + 6 * im_dst_stride);
-      __builtin_prefetch(dst_ptr + 7 * im_dst_stride);
+      s += 7;
 
       do {
         load_u8_8x8(s, src_stride, &t0, &t1, &t2, &t3, &t4, &t5, &t6, &t7);
@@ -1135,28 +2651,26 @@ void av1_convolve_2d_sr_neon(const uint8_t *src, int src_stride, uint8_t *dst,
         s13 = vreinterpretq_s16_u16(vmovl_u8(t6));
         s14 = vreinterpretq_s16_u16(vmovl_u8(t7));
 
-        res0 = convolve8_8x8_s16(s0, s1, s2, s3, s4, s5, s6, s7, x_filter_tmp,
-                                 horiz_const, shift_round_0);
-        res1 = convolve8_8x8_s16(s1, s2, s3, s4, s5, s6, s7, s8, x_filter_tmp,
-                                 horiz_const, shift_round_0);
-        res2 = convolve8_8x8_s16(s2, s3, s4, s5, s6, s7, s8, s9, x_filter_tmp,
-                                 horiz_const, shift_round_0);
-        res3 = convolve8_8x8_s16(s3, s4, s5, s6, s7, s8, s9, s10, x_filter_tmp,
-                                 horiz_const, shift_round_0);
-        res4 = convolve8_8x8_s16(s4, s5, s6, s7, s8, s9, s10, s11, x_filter_tmp,
-                                 horiz_const, shift_round_0);
-        res5 = convolve8_8x8_s16(s5, s6, s7, s8, s9, s10, s11, s12,
-                                 x_filter_tmp, horiz_const, shift_round_0);
-        res6 = convolve8_8x8_s16(s6, s7, s8, s9, s10, s11, s12, s13,
-                                 x_filter_tmp, horiz_const, shift_round_0);
-        res7 = convolve8_8x8_s16(s7, s8, s9, s10, s11, s12, s13, s14,
-                                 x_filter_tmp, horiz_const, shift_round_0);
-
-        transpose_s16_8x8(&res0, &res1, &res2, &res3, &res4, &res5, &res6,
-                          &res7);
-
-        store_s16_8x8(d_tmp, im_dst_stride, res0, res1, res2, res3, res4, res5,
-                      res6, res7);
+        d0 = convolve8_8x8_s16(s0, s1, s2, s3, s4, s5, s6, s7, x_filter,
+                               horiz_const, shift_round_0);
+        d1 = convolve8_8x8_s16(s1, s2, s3, s4, s5, s6, s7, s8, x_filter,
+                               horiz_const, shift_round_0);
+        d2 = convolve8_8x8_s16(s2, s3, s4, s5, s6, s7, s8, s9, x_filter,
+                               horiz_const, shift_round_0);
+        d3 = convolve8_8x8_s16(s3, s4, s5, s6, s7, s8, s9, s10, x_filter,
+                               horiz_const, shift_round_0);
+        d4 = convolve8_8x8_s16(s4, s5, s6, s7, s8, s9, s10, s11, x_filter,
+                               horiz_const, shift_round_0);
+        d5 = convolve8_8x8_s16(s5, s6, s7, s8, s9, s10, s11, s12, x_filter,
+                               horiz_const, shift_round_0);
+        d6 = convolve8_8x8_s16(s6, s7, s8, s9, s10, s11, s12, s13, x_filter,
+                               horiz_const, shift_round_0);
+        d7 = convolve8_8x8_s16(s7, s8, s9, s10, s11, s12, s13, s14, x_filter,
+                               horiz_const, shift_round_0);
+
+        transpose_s16_8x8(&d0, &d1, &d2, &d3, &d4, &d5, &d6, &d7);
+
+        store_s16_8x8(d, dst_stride, d0, d1, d2, d3, d4, d5, d6, d7);
 
         s0 = s8;
         s1 = s9;
@@ -1166,248 +2680,262 @@ void av1_convolve_2d_sr_neon(const uint8_t *src, int src_stride, uint8_t *dst,
         s5 = s13;
         s6 = s14;
         s += 8;
-        d_tmp += 8;
+        d += 8;
         width -= 8;
       } while (width > 0);
+
       src_ptr += 8 * src_stride;
-      dst_ptr += 8 * im_dst_stride;
-      height -= 8;
-    } while (height >= 8);
-
-    if (height >= 4) {
-      assert(height < 8);
-      int16x4_t reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7, reg8, reg9,
-          reg10, reg11, reg12, reg13, reg14;
-      int16x4_t d0, d1, d2, d3, d4, d5, d6, d7;
-      int16x8_t out0, out1, out2, out3;
-
-      __builtin_prefetch(src_ptr + 0 * src_stride);
-      __builtin_prefetch(src_ptr + 1 * src_stride);
-      __builtin_prefetch(src_ptr + 2 * src_stride);
-      __builtin_prefetch(src_ptr + 3 * src_stride);
+      dst_ptr += 8 * dst_stride;
+    }
+
+    for (; height >= 4; height -= 4) {
+      int16x4_t s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14,
+          dd0, dd1, dd2, dd3, dd4, dd5, dd6, dd7;
+      int16x8_t d0, d1, d2, d3;
+      uint8x8_t t0, t1, t2, t3;
+
+      const uint8_t *s = src_ptr;
+      int16_t *d = dst_ptr;
+      int width = w;
 
       load_u8_8x4(src_ptr, src_stride, &t0, &t1, &t2, &t3);
       transpose_u8_8x4(&t0, &t1, &t2, &t3);
 
-      reg0 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(t0)));
-      reg1 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(t1)));
-      reg2 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(t2)));
-      reg3 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(t3)));
-      reg4 = vget_high_s16(vreinterpretq_s16_u16(vmovl_u8(t0)));
-      reg5 = vget_high_s16(vreinterpretq_s16_u16(vmovl_u8(t1)));
-      reg6 = vget_high_s16(vreinterpretq_s16_u16(vmovl_u8(t2)));
-
-      __builtin_prefetch(dst_ptr + 0 * dst_stride);
-      __builtin_prefetch(dst_ptr + 1 * dst_stride);
-      __builtin_prefetch(dst_ptr + 2 * dst_stride);
-      __builtin_prefetch(dst_ptr + 3 * dst_stride);
-
-      s = src_ptr + 7;
-      d_tmp = dst_ptr;
-      width = w;
+      s0 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(t0)));
+      s1 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(t1)));
+      s2 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(t2)));
+      s3 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(t3)));
+      s4 = vget_high_s16(vreinterpretq_s16_u16(vmovl_u8(t0)));
+      s5 = vget_high_s16(vreinterpretq_s16_u16(vmovl_u8(t1)));
+      s6 = vget_high_s16(vreinterpretq_s16_u16(vmovl_u8(t2)));
+
+      s += 7;
 
       do {
         load_u8_8x4(s, src_stride, &t0, &t1, &t2, &t3);
         transpose_u8_8x4(&t0, &t1, &t2, &t3);
 
-        reg7 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(t0)));
-        reg8 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(t1)));
-        reg9 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(t2)));
-        reg10 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(t3)));
-        reg11 = vget_high_s16(vreinterpretq_s16_u16(vmovl_u8(t0)));
-        reg12 = vget_high_s16(vreinterpretq_s16_u16(vmovl_u8(t1)));
-        reg13 = vget_high_s16(vreinterpretq_s16_u16(vmovl_u8(t2)));
-        reg14 = vget_high_s16(vreinterpretq_s16_u16(vmovl_u8(t3)));
-
-        d0 = convolve8_4x4(reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7,
-                           x_filter_tmp);
-
-        d1 = convolve8_4x4(reg1, reg2, reg3, reg4, reg5, reg6, reg7, reg8,
-                           x_filter_tmp);
+        s7 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(t0)));
+        s8 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(t1)));
+        s9 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(t2)));
+        s10 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(t3)));
+        s11 = vget_high_s16(vreinterpretq_s16_u16(vmovl_u8(t0)));
+        s12 = vget_high_s16(vreinterpretq_s16_u16(vmovl_u8(t1)));
+        s13 = vget_high_s16(vreinterpretq_s16_u16(vmovl_u8(t2)));
+        s14 = vget_high_s16(vreinterpretq_s16_u16(vmovl_u8(t3)));
+
+        dd0 = convolve8_4x4(s0, s1, s2, s3, s4, s5, s6, s7, x_filter);
+        dd1 = convolve8_4x4(s1, s2, s3, s4, s5, s6, s7, s8, x_filter);
+        dd2 = convolve8_4x4(s2, s3, s4, s5, s6, s7, s8, s9, x_filter);
+        dd3 = convolve8_4x4(s3, s4, s5, s6, s7, s8, s9, s10, x_filter);
+        dd4 = convolve8_4x4(s4, s5, s6, s7, s8, s9, s10, s11, x_filter);
+        dd5 = convolve8_4x4(s5, s6, s7, s8, s9, s10, s11, s12, x_filter);
+        dd6 = convolve8_4x4(s6, s7, s8, s9, s10, s11, s12, s13, x_filter);
+        dd7 = convolve8_4x4(s7, s8, s9, s10, s11, s12, s13, s14, x_filter);
+
+        transpose_s16_4x8(&dd0, &dd1, &dd2, &dd3, &dd4, &dd5, &dd6, &dd7, &d0,
+                          &d1, &d2, &d3);
+
+        d0 = vaddq_s16(d0, horiz_const);
+        d1 = vaddq_s16(d1, horiz_const);
+        d2 = vaddq_s16(d2, horiz_const);
+        d3 = vaddq_s16(d3, horiz_const);
+
+        d0 = vqrshlq_s16(d0, shift_round_0);
+        d1 = vqrshlq_s16(d1, shift_round_0);
+        d2 = vqrshlq_s16(d2, shift_round_0);
+        d3 = vqrshlq_s16(d3, shift_round_0);
+
+        store_s16_8x4(d, dst_stride, d0, d1, d2, d3);
 
-        d2 = convolve8_4x4(reg2, reg3, reg4, reg5, reg6, reg7, reg8, reg9,
-                           x_filter_tmp);
+        s0 = s8;
+        s1 = s9;
+        s2 = s10;
+        s3 = s11;
+        s4 = s12;
+        s5 = s13;
+        s6 = s14;
+        s += 8;
+        d += 8;
+        width -= 8;
+      } while (width > 0);
 
-        d3 = convolve8_4x4(reg3, reg4, reg5, reg6, reg7, reg8, reg9, reg10,
-                           x_filter_tmp);
+      src_ptr += 4 * src_stride;
+      dst_ptr += 4 * dst_stride;
+    }
 
-        d4 = convolve8_4x4(reg4, reg5, reg6, reg7, reg8, reg9, reg10, reg11,
-                           x_filter_tmp);
+    if (height) {
+      assert(height < 4);
+      horiz_filter_w8_single_row(src_ptr, src_stride, dst_ptr, dst_stride, w,
+                                 height, x_filter, horiz_const, shift_round_0);
+    }
 
-        d5 = convolve8_4x4(reg5, reg6, reg7, reg8, reg9, reg10, reg11, reg12,
-                           x_filter_tmp);
+#else   // !defined(__aarch64__)
+    horiz_filter_w8_single_row(src_ptr, src_stride, dst_ptr, dst_stride, w,
+                               height, x_filter, horiz_const, shift_round_0);
+#endif  // defined(__aarch64__)
+  }
+}
 
-        d6 = convolve8_4x4(reg6, reg7, reg8, reg9, reg10, reg11, reg12, reg13,
-                           x_filter_tmp);
+#endif  // defined(__aarch64__) && defined(__ARM_FEATURE_DOTPROD)
 
-        d7 = convolve8_4x4(reg7, reg8, reg9, reg10, reg11, reg12, reg13, reg14,
-                           x_filter_tmp);
+static INLINE void av1_convolve_2d_sr_vert_neon(
+    int16_t *src_ptr, int src_stride, uint8_t *dst_ptr, int dst_stride, int w,
+    int h, const int16x8_t y_filter, ConvolveParams *conv_params) {
+  const int bd = 8;
+  const int16_t round_bits =
+      FILTER_BITS * 2 - conv_params->round_0 - conv_params->round_1;
+  const int16x8_t vec_round_bits = vdupq_n_s16(-round_bits);
+  const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
 
-        transpose_s16_4x8(&d0, &d1, &d2, &d3, &d4, &d5, &d6, &d7, &out0, &out1,
-                          &out2, &out3);
+  const int32_t sub_const = (1 << (offset_bits - conv_params->round_1)) +
+                            (1 << (offset_bits - conv_params->round_1 - 1));
 
-        out0 = vaddq_s16(out0, horiz_const);
-        out0 = vqrshlq_s16(out0, shift_round_0);
+  const int32x4_t round_shift_vec = vdupq_n_s32(-(conv_params->round_1));
+  const int32x4_t offset_const = vdupq_n_s32(1 << offset_bits);
+  const int32x4_t sub_const_vec = vdupq_n_s32(sub_const);
 
-        out1 = vaddq_s16(out1, horiz_const);
-        out1 = vqrshlq_s16(out1, shift_round_0);
+  if (w <= 4) {
+    int16x4_t s0, s1, s2, s3, s4, s5, s6, s7, d0;
+    int16x8_t dd0;
+    uint8x8_t d01;
 
-        out2 = vaddq_s16(out2, horiz_const);
-        out2 = vqrshlq_s16(out2, shift_round_0);
+#if defined(__aarch64__)
+    int16x4_t s8, s9, s10, d1, d2, d3;
+    int16x8_t dd1;
+    uint8x8_t d23;
+#endif  // defined(__aarch64__)
 
-        out3 = vaddq_s16(out3, horiz_const);
-        out3 = vqrshlq_s16(out3, shift_round_0);
+    int16_t *s = src_ptr;
+    uint8_t *d = dst_ptr;
 
-        store_s16_8x4(d_tmp, im_dst_stride, out0, out1, out2, out3);
+    load_s16_4x8(s, src_stride, &s0, &s1, &s2, &s3, &s4, &s5, &s6, &s7);
+    s += (7 * src_stride);
 
-        reg0 = reg8;
-        reg1 = reg9;
-        reg2 = reg10;
-        reg3 = reg11;
-        reg4 = reg12;
-        reg5 = reg13;
-        reg6 = reg14;
-        s += 8;
-        d_tmp += 8;
-        width -= 8;
-      } while (width > 0);
-      src_ptr += 4 * src_stride;
-      dst_ptr += 4 * im_dst_stride;
-      height -= 4;
-    }
+    do {
+#if defined(__aarch64__)
+      load_s16_4x4(s, src_stride, &s7, &s8, &s9, &s10);
+      s += (4 * src_stride);
 
-    if (height) {
-      assert(height < 4);
-      horiz_filter_w8_single_row(src_ptr, src_stride, dst_ptr, im_stride, w,
-                                 height, x_filter_tmp, horiz_const,
-                                 shift_round_0);
-    }
-#else
+      d0 = convolve8_vert_4x4_s32(s0, s1, s2, s3, s4, s5, s6, s7, y_filter,
+                                  round_shift_vec, offset_const, sub_const_vec);
+      d1 = convolve8_vert_4x4_s32(s1, s2, s3, s4, s5, s6, s7, s8, y_filter,
+                                  round_shift_vec, offset_const, sub_const_vec);
+      d2 = convolve8_vert_4x4_s32(s2, s3, s4, s5, s6, s7, s8, s9, y_filter,
+                                  round_shift_vec, offset_const, sub_const_vec);
+      d3 = convolve8_vert_4x4_s32(s3, s4, s5, s6, s7, s8, s9, s10, y_filter,
+                                  round_shift_vec, offset_const, sub_const_vec);
 
-    horiz_filter_w8_single_row(src_ptr, src_stride, dst_ptr, im_stride, w,
-                               height, x_filter_tmp, horiz_const,
-                               shift_round_0);
-#endif
-  }
+      dd0 = vqrshlq_s16(vcombine_s16(d0, d1), vec_round_bits);
+      dd1 = vqrshlq_s16(vcombine_s16(d2, d3), vec_round_bits);
 
-  // vertical
-  {
-    uint8_t *dst_u8_ptr, *d_u8;
-    int16_t *v_src_ptr, *v_s;
+      d01 = vqmovun_s16(dd0);
+      d23 = vqmovun_s16(dd1);
 
-    const int32_t sub_const = (1 << (offset_bits - conv_params->round_1)) +
-                              (1 << (offset_bits - conv_params->round_1 - 1));
-    const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
-        filter_params_y, subpel_y_qn & SUBPEL_MASK);
+      if (w == 4) {
+        vst1_lane_u32((uint32_t *)d, vreinterpret_u32_u8(d01), 0);
+        d += dst_stride;
+        vst1_lane_u32((uint32_t *)d, vreinterpret_u32_u8(d01), 1);
+        d += dst_stride;
+        if (h != 2) {
+          vst1_lane_u32((uint32_t *)d, vreinterpret_u32_u8(d23), 0);
+          d += dst_stride;
+          vst1_lane_u32((uint32_t *)d, vreinterpret_u32_u8(d23), 1);
+          d += dst_stride;
+        }
+      } else {
+        vst1_lane_u16((uint16_t *)d, vreinterpret_u16_u8(d01), 0);
+        d += dst_stride;
+        vst1_lane_u16((uint16_t *)d, vreinterpret_u16_u8(d01), 2);
+        d += dst_stride;
+        if (h != 2) {
+          vst1_lane_u16((uint16_t *)d, vreinterpret_u16_u8(d23), 0);
+          d += dst_stride;
+          vst1_lane_u16((uint16_t *)d, vreinterpret_u16_u8(d23), 2);
+          d += dst_stride;
+        }
+      }
 
-    const int32x4_t round_shift_vec = vdupq_n_s32(-(conv_params->round_1));
-    const int32x4_t offset_const = vdupq_n_s32(1 << offset_bits);
-    const int32x4_t sub_const_vec = vdupq_n_s32(sub_const);
+      s0 = s4;
+      s1 = s5;
+      s2 = s6;
+      s3 = s7;
+      s4 = s8;
+      s5 = s9;
+      s6 = s10;
+      h -= 4;
+#else   // !defined(__aarch64__)
+      s7 = vld1_s16(s);
+      s += src_stride;
 
-    src_stride = im_stride;
-    v_src_ptr = im_block;
-    dst_u8_ptr = dst;
+      d0 = convolve8_vert_4x4_s32(s0, s1, s2, s3, s4, s5, s6, s7, y_filter,
+                                  round_shift_vec, offset_const, sub_const_vec);
 
-    height = h;
-    width = w;
+      dd0 = vqrshlq_s16(vcombine_s16(d0, d0), vec_round_bits);
+      d01 = vqmovun_s16(dd0);
 
-    if (width <= 4) {
-      int16x4_t s0, s1, s2, s3, s4, s5, s6, s7;
-      uint16x4_t d0;
-      uint16x8_t dd0;
-      uint8x8_t d01;
+      if (w == 2) {
+        vst1_lane_u16((uint16_t *)d, vreinterpret_u16_u8(d01), 0);
+        d += dst_stride;
+      } else {
+        vst1_lane_u32((uint32_t *)d, vreinterpret_u32_u8(d01), 0);
+        d += dst_stride;
+      }
 
+      s0 = s1;
+      s1 = s2;
+      s2 = s3;
+      s3 = s4;
+      s4 = s5;
+      s5 = s6;
+      s6 = s7;
+      h--;
+#endif  // defined(__aarch64__)
+    } while (h > 0);
+  } else {
+    // if width is a multiple of 8 & height is a multiple of 4
+    int16x8_t s0, s1, s2, s3, s4, s5, s6, s7;
+    uint8x8_t d0;
 #if defined(__aarch64__)
-      int16x4_t s8, s9, s10;
-      uint16x4_t d1, d2, d3;
-      uint16x8_t dd1;
-      uint8x8_t d23;
-#endif
-
-      d_u8 = dst_u8_ptr;
-      v_s = v_src_ptr;
+    int16x8_t s8, s9, s10;
+    uint8x8_t d1, d2, d3;
+#endif  // defined(__aarch64__)
 
-      __builtin_prefetch(v_s + 0 * im_stride);
-      __builtin_prefetch(v_s + 1 * im_stride);
-      __builtin_prefetch(v_s + 2 * im_stride);
-      __builtin_prefetch(v_s + 3 * im_stride);
-      __builtin_prefetch(v_s + 4 * im_stride);
-      __builtin_prefetch(v_s + 5 * im_stride);
-      __builtin_prefetch(v_s + 6 * im_stride);
-      __builtin_prefetch(v_s + 7 * im_stride);
+    do {
+      int height = h;
+      int16_t *s = src_ptr;
+      uint8_t *d = dst_ptr;
 
-      load_s16_4x8(v_s, im_stride, &s0, &s1, &s2, &s3, &s4, &s5, &s6, &s7);
-      v_s += (7 * im_stride);
+      load_s16_8x8(s, src_stride, &s0, &s1, &s2, &s3, &s4, &s5, &s6, &s7);
+      s += (7 * src_stride);
 
       do {
 #if defined(__aarch64__)
-        load_s16_4x4(v_s, im_stride, &s7, &s8, &s9, &s10);
-        v_s += (im_stride << 2);
-
-        __builtin_prefetch(d_u8 + 0 * dst_stride);
-        __builtin_prefetch(d_u8 + 1 * dst_stride);
-        __builtin_prefetch(d_u8 + 2 * dst_stride);
-        __builtin_prefetch(d_u8 + 3 * dst_stride);
+        load_s16_8x4(s, src_stride, &s7, &s8, &s9, &s10);
+        s += (4 * src_stride);
 
-        d0 = convolve8_vert_4x4_s32(s0, s1, s2, s3, s4, s5, s6, s7, y_filter,
+        d0 = convolve8_vert_8x4_s32(s0, s1, s2, s3, s4, s5, s6, s7, y_filter,
                                     round_shift_vec, offset_const,
-                                    sub_const_vec);
-        d1 = convolve8_vert_4x4_s32(s1, s2, s3, s4, s5, s6, s7, s8, y_filter,
+                                    sub_const_vec, vec_round_bits);
+        d1 = convolve8_vert_8x4_s32(s1, s2, s3, s4, s5, s6, s7, s8, y_filter,
                                     round_shift_vec, offset_const,
-                                    sub_const_vec);
-        d2 = convolve8_vert_4x4_s32(s2, s3, s4, s5, s6, s7, s8, s9, y_filter,
+                                    sub_const_vec, vec_round_bits);
+        d2 = convolve8_vert_8x4_s32(s2, s3, s4, s5, s6, s7, s8, s9, y_filter,
                                     round_shift_vec, offset_const,
-                                    sub_const_vec);
-        d3 = convolve8_vert_4x4_s32(s3, s4, s5, s6, s7, s8, s9, s10, y_filter,
+                                    sub_const_vec, vec_round_bits);
+        d3 = convolve8_vert_8x4_s32(s3, s4, s5, s6, s7, s8, s9, s10, y_filter,
                                     round_shift_vec, offset_const,
-                                    sub_const_vec);
+                                    sub_const_vec, vec_round_bits);
 
-        dd0 = vqrshlq_u16(vcombine_u16(d0, d1), vec_round_bits);
-        dd1 = vqrshlq_u16(vcombine_u16(d2, d3), vec_round_bits);
-
-        d01 = vqmovn_u16(dd0);
-        d23 = vqmovn_u16(dd1);
-
-        if ((w == 4) && (h != 2)) {
-          vst1_lane_u32((uint32_t *)d_u8, vreinterpret_u32_u8(d01),
-                        0);  // 00 01 02 03
-          d_u8 += dst_stride;
-          vst1_lane_u32((uint32_t *)d_u8, vreinterpret_u32_u8(d01),
-                        1);  // 10 11 12 13
-          d_u8 += dst_stride;
-          vst1_lane_u32((uint32_t *)d_u8, vreinterpret_u32_u8(d23),
-                        0);  // 20 21 22 23
-          d_u8 += dst_stride;
-          vst1_lane_u32((uint32_t *)d_u8, vreinterpret_u32_u8(d23),
-                        1);  // 30 31 32 33
-          d_u8 += dst_stride;
-        } else if ((w == 2) && (h != 2)) {
-          vst1_lane_u16((uint16_t *)d_u8, vreinterpret_u16_u8(d01),
-                        0);  // 00 01
-          d_u8 += dst_stride;
-          vst1_lane_u16((uint16_t *)d_u8, vreinterpret_u16_u8(d01),
-                        2);  // 10 11
-          d_u8 += dst_stride;
-          vst1_lane_u16((uint16_t *)d_u8, vreinterpret_u16_u8(d23),
-                        0);  // 20 21
-          d_u8 += dst_stride;
-          vst1_lane_u16((uint16_t *)d_u8, vreinterpret_u16_u8(d23),
-                        2);  // 30 31
-          d_u8 += dst_stride;
-        } else if ((w == 4) && (h == 2)) {
-          vst1_lane_u32((uint32_t *)d_u8, vreinterpret_u32_u8(d01),
-                        0);  // 00 01 02 03
-          d_u8 += dst_stride;
-          vst1_lane_u32((uint32_t *)d_u8, vreinterpret_u32_u8(d01),
-                        1);  // 10 11 12 13
-          d_u8 += dst_stride;
-        } else if ((w == 2) && (h == 2)) {
-          vst1_lane_u16((uint16_t *)d_u8, vreinterpret_u16_u8(d01),
-                        0);  // 00 01
-          d_u8 += dst_stride;
-          vst1_lane_u16((uint16_t *)d_u8, vreinterpret_u16_u8(d01),
-                        2);  // 10 11
-          d_u8 += dst_stride;
+        vst1_u8(d, d0);
+        d += dst_stride;
+        vst1_u8(d, d1);
+        d += dst_stride;
+        if (h != 2) {
+          vst1_u8(d, d2);
+          d += dst_stride;
+          vst1_u8(d, d3);
+          d += dst_stride;
         }
 
         s0 = s4;
@@ -1418,29 +2946,16 @@ void av1_convolve_2d_sr_neon(const uint8_t *src, int src_stride, uint8_t *dst,
         s5 = s9;
         s6 = s10;
         height -= 4;
-#else
-        s7 = vld1_s16(v_s);
-        v_s += im_stride;
-
-        __builtin_prefetch(d_u8 + 0 * dst_stride);
+#else   // !defined(__aarch64__)
+        s7 = vld1q_s16(s);
+        s += src_stride;
 
-        d0 = convolve8_vert_4x4_s32(s0, s1, s2, s3, s4, s5, s6, s7, y_filter,
+        d0 = convolve8_vert_8x4_s32(s0, s1, s2, s3, s4, s5, s6, s7, y_filter,
                                     round_shift_vec, offset_const,
-                                    sub_const_vec);
-
-        dd0 = vqrshlq_u16(vcombine_u16(d0, d0), vec_round_bits);
-        d01 = vqmovn_u16(dd0);
+                                    sub_const_vec, vec_round_bits);
 
-        if (w == 4) {
-          vst1_lane_u32((uint32_t *)d_u8, vreinterpret_u32_u8(d01),
-                        0);  // 00 01 02 03
-          d_u8 += dst_stride;
-
-        } else if (w == 2) {
-          vst1_lane_u16((uint16_t *)d_u8, vreinterpret_u16_u8(d01),
-                        0);  // 00 01
-          d_u8 += dst_stride;
-        }
+        vst1_u8(d, d0);
+        d += dst_stride;
 
         s0 = s1;
         s1 = s2;
@@ -1449,109 +2964,62 @@ void av1_convolve_2d_sr_neon(const uint8_t *src, int src_stride, uint8_t *dst,
         s4 = s5;
         s5 = s6;
         s6 = s7;
-        height -= 1;
-#endif
+        height--;
+#endif  // defined(__aarch64__)
       } while (height > 0);
-    } else {
-      // if width is a multiple of 8 & height is a multiple of 4
-      int16x8_t s0, s1, s2, s3, s4, s5, s6, s7;
-      uint8x8_t res0;
-#if defined(__aarch64__)
-      int16x8_t s8, s9, s10;
-      uint8x8_t res1, res2, res3;
-#endif
 
-      do {
-        __builtin_prefetch(v_src_ptr + 0 * im_stride);
-        __builtin_prefetch(v_src_ptr + 1 * im_stride);
-        __builtin_prefetch(v_src_ptr + 2 * im_stride);
-        __builtin_prefetch(v_src_ptr + 3 * im_stride);
-        __builtin_prefetch(v_src_ptr + 4 * im_stride);
-        __builtin_prefetch(v_src_ptr + 5 * im_stride);
-        __builtin_prefetch(v_src_ptr + 6 * im_stride);
-        __builtin_prefetch(v_src_ptr + 7 * im_stride);
-
-        v_s = v_src_ptr;
-        load_s16_8x8(v_s, im_stride, &s0, &s1, &s2, &s3, &s4, &s5, &s6, &s7);
-        v_s += (7 * im_stride);
-
-        d_u8 = dst_u8_ptr;
-        height = h;
+      src_ptr += 8;
+      dst_ptr += 8;
+      w -= 8;
+    } while (w > 0);
+  }
+}
 
-        do {
-#if defined(__aarch64__)
-          load_s16_8x4(v_s, im_stride, &s7, &s8, &s9, &s10);
-          v_s += (im_stride << 2);
-
-          __builtin_prefetch(d_u8 + 4 * dst_stride);
-          __builtin_prefetch(d_u8 + 5 * dst_stride);
-          __builtin_prefetch(d_u8 + 6 * dst_stride);
-          __builtin_prefetch(d_u8 + 7 * dst_stride);
-
-          res0 = convolve8_vert_8x4_s32(s0, s1, s2, s3, s4, s5, s6, s7,
-                                        y_filter, round_shift_vec, offset_const,
-                                        sub_const_vec, vec_round_bits);
-          res1 = convolve8_vert_8x4_s32(s1, s2, s3, s4, s5, s6, s7, s8,
-                                        y_filter, round_shift_vec, offset_const,
-                                        sub_const_vec, vec_round_bits);
-          res2 = convolve8_vert_8x4_s32(s2, s3, s4, s5, s6, s7, s8, s9,
-                                        y_filter, round_shift_vec, offset_const,
-                                        sub_const_vec, vec_round_bits);
-          res3 = convolve8_vert_8x4_s32(s3, s4, s5, s6, s7, s8, s9, s10,
-                                        y_filter, round_shift_vec, offset_const,
-                                        sub_const_vec, vec_round_bits);
+void av1_convolve_2d_sr_neon(const uint8_t *src, int src_stride, uint8_t *dst,
+                             int dst_stride, int w, int h,
+                             const InterpFilterParams *filter_params_x,
+                             const InterpFilterParams *filter_params_y,
+                             const int subpel_x_qn, const int subpel_y_qn,
+                             ConvolveParams *conv_params) {
+  const int im_h = h + filter_params_y->taps - 1;
+  const int im_stride = MAX_SB_SIZE;
+  const int vert_offset = filter_params_y->taps / 2 - 1;
+  const int horiz_offset = filter_params_x->taps / 2 - 1;
+  const uint8_t *src_ptr = src - vert_offset * src_stride - horiz_offset;
 
-          if (h != 2) {
-            vst1_u8(d_u8, res0);
-            d_u8 += dst_stride;
-            vst1_u8(d_u8, res1);
-            d_u8 += dst_stride;
-            vst1_u8(d_u8, res2);
-            d_u8 += dst_stride;
-            vst1_u8(d_u8, res3);
-            d_u8 += dst_stride;
-          } else {
-            vst1_u8(d_u8, res0);
-            d_u8 += dst_stride;
-            vst1_u8(d_u8, res1);
-            d_u8 += dst_stride;
-          }
-          s0 = s4;
-          s1 = s5;
-          s2 = s6;
-          s3 = s7;
-          s4 = s8;
-          s5 = s9;
-          s6 = s10;
-          height -= 4;
-#else
-          s7 = vld1q_s16(v_s);
-          v_s += im_stride;
-
-          __builtin_prefetch(d_u8 + 0 * dst_stride);
-
-          res0 = convolve8_vert_8x4_s32(s0, s1, s2, s3, s4, s5, s6, s7,
-                                        y_filter, round_shift_vec, offset_const,
-                                        sub_const_vec, vec_round_bits);
-
-          vst1_u8(d_u8, res0);
-          d_u8 += dst_stride;
-
-          s0 = s1;
-          s1 = s2;
-          s2 = s3;
-          s3 = s4;
-          s4 = s5;
-          s5 = s6;
-          s6 = s7;
-          height -= 1;
-#endif
-        } while (height > 0);
-        v_src_ptr += 8;
-        dst_u8_ptr += 8;
-        w -= 8;
-      } while (w > 0);
-    }
+  const int16_t *x_filter_ptr = av1_get_interp_filter_subpel_kernel(
+      filter_params_x, subpel_x_qn & SUBPEL_MASK);
+  const int16_t *y_filter_ptr = av1_get_interp_filter_subpel_kernel(
+      filter_params_y, subpel_y_qn & SUBPEL_MASK);
+
+  if (filter_params_x->taps > 8) {
+    DECLARE_ALIGNED(16, int16_t,
+                    im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE]);
+
+    const int16x8_t x_filter_0_7 = vld1q_s16(x_filter_ptr);
+    const int16x4_t x_filter_8_11 = vld1_s16(x_filter_ptr + 8);
+    const int16x8_t y_filter_0_7 = vld1q_s16(y_filter_ptr);
+    const int16x4_t y_filter_8_11 = vld1_s16(y_filter_ptr + 8);
+
+    av1_convolve_2d_sr_horiz_12tap_neon(src_ptr, src_stride, im_block,
+                                        im_stride, w, im_h, x_filter_0_7,
+                                        x_filter_8_11, conv_params->round_0);
+
+    av1_convolve_2d_sr_vert_12tap_neon(im_block, im_stride, dst, dst_stride, w,
+                                       h, y_filter_0_7, y_filter_8_11,
+                                       conv_params);
+  } else {
+    DECLARE_ALIGNED(16, int16_t,
+                    im_block[(MAX_SB_SIZE + HORIZ_EXTRA_ROWS) * MAX_SB_SIZE]);
+
+    const int16x8_t x_filter = vld1q_s16(x_filter_ptr);
+    const int16x8_t y_filter = vld1q_s16(y_filter_ptr);
+
+    av1_convolve_2d_sr_horiz_neon(src_ptr, src_stride, im_block, im_stride, w,
+                                  im_h, x_filter, conv_params->round_0);
+
+    av1_convolve_2d_sr_vert_neon(im_block, im_stride, dst, dst_stride, w, h,
+                                 y_filter, conv_params);
   }
 }
 
@@ -1574,8 +3042,6 @@ static INLINE void scaledconvolve_horiz_w4(
         const uint8_t *const src_x = &src[x_q4 >> SUBPEL_BITS];
         if (x_q4 & SUBPEL_MASK) {
           const int16x8_t filters = vld1q_s16(x_filters[x_q4 & SUBPEL_MASK]);
-          const int16x4_t filter3 = vdup_lane_s16(vget_low_s16(filters), 3);
-          const int16x4_t filter4 = vdup_lane_s16(vget_high_s16(filters), 0);
           uint8x8_t s[8], d;
           int16x8_t ss[4];
           int16x4_t t[8], tt;
@@ -1597,7 +3063,7 @@ static INLINE void scaledconvolve_horiz_w4(
           t[7] = vget_high_s16(ss[3]);
 
           tt = convolve8_4(t[0], t[1], t[2], t[3], t[4], t[5], t[6], t[7],
-                           filters, filter3, filter4);
+                           filters);
           d = vqrshrun_n_s16(vcombine_s16(tt, tt), 7);
           vst1_lane_u32((uint32_t *)&temp[4 * z], vreinterpret_u32_u8(d), 0);
         } else {
@@ -1703,8 +3169,6 @@ static INLINE void scaledconvolve_vert_w4(
 
     if (y_q4 & SUBPEL_MASK) {
       const int16x8_t filters = vld1q_s16(y_filters[y_q4 & SUBPEL_MASK]);
-      const int16x4_t filter3 = vdup_lane_s16(vget_low_s16(filters), 3);
-      const int16x4_t filter4 = vdup_lane_s16(vget_high_s16(filters), 0);
       uint8x8_t s[8], d;
       int16x4_t t[8], tt;
 
@@ -1719,8 +3183,7 @@ static INLINE void scaledconvolve_vert_w4(
       t[6] = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(s[6])));
       t[7] = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(s[7])));
 
-      tt = convolve8_4(t[0], t[1], t[2], t[3], t[4], t[5], t[6], t[7], filters,
-                       filter3, filter4);
+      tt = convolve8_4(t[0], t[1], t[2], t[3], t[4], t[5], t[6], t[7], filters);
       d = vqrshrun_n_s16(vcombine_s16(tt, tt), 7);
       vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d), 0);
     } else {
diff --git a/chromium/third_party/libaom/source/libaom/av1/common/arm/convolve_neon.h b/chromium/third_party/libaom/source/libaom/av1/common/arm/convolve_neon.h
index 27a996ce9ed..bedfa89c31d 100644
--- a/chromium/third_party/libaom/source/libaom/av1/common/arm/convolve_neon.h
+++ b/chromium/third_party/libaom/source/libaom/av1/common/arm/convolve_neon.h
@@ -19,21 +19,19 @@ static INLINE int16x4_t convolve8_4(const int16x4_t s0, const int16x4_t s1,
                                     const int16x4_t s2, const int16x4_t s3,
                                     const int16x4_t s4, const int16x4_t s5,
                                     const int16x4_t s6, const int16x4_t s7,
-                                    const int16x8_t filters,
-                                    const int16x4_t filter3,
-                                    const int16x4_t filter4) {
-  const int16x4_t filters_lo = vget_low_s16(filters);
-  const int16x4_t filters_hi = vget_high_s16(filters);
+                                    const int16x8_t filter) {
+  const int16x4_t filter_lo = vget_low_s16(filter);
+  const int16x4_t filter_hi = vget_high_s16(filter);
   int16x4_t sum;
 
-  sum = vmul_lane_s16(s0, filters_lo, 0);
-  sum = vmla_lane_s16(sum, s1, filters_lo, 1);
-  sum = vmla_lane_s16(sum, s2, filters_lo, 2);
-  sum = vmla_lane_s16(sum, s5, filters_hi, 1);
-  sum = vmla_lane_s16(sum, s6, filters_hi, 2);
-  sum = vmla_lane_s16(sum, s7, filters_hi, 3);
-  sum = vqadd_s16(sum, vmul_s16(s3, filter3));
-  sum = vqadd_s16(sum, vmul_s16(s4, filter4));
+  sum = vmul_lane_s16(s0, filter_lo, 0);
+  sum = vmla_lane_s16(sum, s1, filter_lo, 1);
+  sum = vmla_lane_s16(sum, s2, filter_lo, 2);
+  sum = vmla_lane_s16(sum, s5, filter_hi, 1);
+  sum = vmla_lane_s16(sum, s6, filter_hi, 2);
+  sum = vmla_lane_s16(sum, s7, filter_hi, 3);
+  sum = vqadd_s16(sum, vmul_lane_s16(s3, filter_lo, 3));
+  sum = vqadd_s16(sum, vmul_lane_s16(s4, filter_hi, 0));
   return sum;
 }
 
@@ -41,28 +39,24 @@ static INLINE uint8x8_t convolve8_8(const int16x8_t s0, const int16x8_t s1,
                                     const int16x8_t s2, const int16x8_t s3,
                                     const int16x8_t s4, const int16x8_t s5,
                                     const int16x8_t s6, const int16x8_t s7,
-                                    const int16x8_t filters,
-                                    const int16x8_t filter3,
-                                    const int16x8_t filter4) {
-  const int16x4_t filters_lo = vget_low_s16(filters);
-  const int16x4_t filters_hi = vget_high_s16(filters);
+                                    const int16x8_t filter) {
+  const int16x4_t filter_lo = vget_low_s16(filter);
+  const int16x4_t filter_hi = vget_high_s16(filter);
   int16x8_t sum;
 
-  sum = vmulq_lane_s16(s0, filters_lo, 0);
-  sum = vmlaq_lane_s16(sum, s1, filters_lo, 1);
-  sum = vmlaq_lane_s16(sum, s2, filters_lo, 2);
-  sum = vmlaq_lane_s16(sum, s5, filters_hi, 1);
-  sum = vmlaq_lane_s16(sum, s6, filters_hi, 2);
-  sum = vmlaq_lane_s16(sum, s7, filters_hi, 3);
-  sum = vqaddq_s16(sum, vmulq_s16(s3, filter3));
-  sum = vqaddq_s16(sum, vmulq_s16(s4, filter4));
+  sum = vmulq_lane_s16(s0, filter_lo, 0);
+  sum = vmlaq_lane_s16(sum, s1, filter_lo, 1);
+  sum = vmlaq_lane_s16(sum, s2, filter_lo, 2);
+  sum = vmlaq_lane_s16(sum, s5, filter_hi, 1);
+  sum = vmlaq_lane_s16(sum, s6, filter_hi, 2);
+  sum = vmlaq_lane_s16(sum, s7, filter_hi, 3);
+  sum = vqaddq_s16(sum, vmulq_lane_s16(s3, filter_lo, 3));
+  sum = vqaddq_s16(sum, vmulq_lane_s16(s4, filter_hi, 0));
   return vqrshrun_n_s16(sum, 7);
 }
 
 static INLINE uint8x8_t scale_filter_8(const uint8x8_t *const s,
-                                       const int16x8_t filters) {
-  const int16x8_t filter3 = vdupq_lane_s16(vget_low_s16(filters), 3);
-  const int16x8_t filter4 = vdupq_lane_s16(vget_high_s16(filters), 0);
+                                       const int16x8_t filter) {
   int16x8_t ss[8];
 
   ss[0] = vreinterpretq_s16_u16(vmovl_u8(s[0]));
@@ -75,7 +69,7 @@ static INLINE uint8x8_t scale_filter_8(const uint8x8_t *const s,
   ss[7] = vreinterpretq_s16_u16(vmovl_u8(s[7]));
 
   return convolve8_8(ss[0], ss[1], ss[2], ss[3], ss[4], ss[5], ss[6], ss[7],
-                     filters, filter3, filter4);
+                     filter);
 }
 
 static INLINE uint8x8_t wiener_convolve8_vert_4x8(
@@ -85,28 +79,27 @@ static INLINE uint8x8_t wiener_convolve8_vert_4x8(
     const int round1_bits) {
   int16x8_t ss0, ss1, ss2;
   int32x4_t sum0, sum1;
-  uint16x4_t tmp0, tmp1;
-  uint16x8_t tmp;
+  int16x8_t tmp;
   uint8x8_t res;
 
   const int32_t round_const = (1 << (bd + round1_bits - 1));
   const int32x4_t round_bits = vdupq_n_s32(-round1_bits);
-  const int32x4_t zero = vdupq_n_s32(0);
   const int32x4_t round_vec = vdupq_n_s32(round_const);
+  const int16x4_t filter = vld1_s16(filter_y);
 
   ss0 = vaddq_s16(s0, s6);
   ss1 = vaddq_s16(s1, s5);
   ss2 = vaddq_s16(s2, s4);
 
-  sum0 = vmull_n_s16(vget_low_s16(ss0), filter_y[0]);
-  sum0 = vmlal_n_s16(sum0, vget_low_s16(ss1), filter_y[1]);
-  sum0 = vmlal_n_s16(sum0, vget_low_s16(ss2), filter_y[2]);
-  sum0 = vmlal_n_s16(sum0, vget_low_s16(s3), filter_y[3]);
+  sum0 = vmull_lane_s16(vget_low_s16(ss0), filter, 0);
+  sum0 = vmlal_lane_s16(sum0, vget_low_s16(ss1), filter, 1);
+  sum0 = vmlal_lane_s16(sum0, vget_low_s16(ss2), filter, 2);
+  sum0 = vmlal_lane_s16(sum0, vget_low_s16(s3), filter, 3);
 
-  sum1 = vmull_n_s16(vget_high_s16(ss0), filter_y[0]);
-  sum1 = vmlal_n_s16(sum1, vget_high_s16(ss1), filter_y[1]);
-  sum1 = vmlal_n_s16(sum1, vget_high_s16(ss2), filter_y[2]);
-  sum1 = vmlal_n_s16(sum1, vget_high_s16(s3), filter_y[3]);
+  sum1 = vmull_lane_s16(vget_high_s16(ss0), filter, 0);
+  sum1 = vmlal_lane_s16(sum1, vget_high_s16(ss1), filter, 1);
+  sum1 = vmlal_lane_s16(sum1, vget_high_s16(ss2), filter, 2);
+  sum1 = vmlal_lane_s16(sum1, vget_high_s16(s3), filter, 3);
 
   sum0 = vsubq_s32(sum0, round_vec);
   sum1 = vsubq_s32(sum1, round_vec);
@@ -115,14 +108,9 @@ static INLINE uint8x8_t wiener_convolve8_vert_4x8(
   sum0 = vrshlq_s32(sum0, round_bits);
   sum1 = vrshlq_s32(sum1, round_bits);
 
-  sum0 = vmaxq_s32(sum0, zero);
-  sum1 = vmaxq_s32(sum1, zero);
-
   /* from int32x4_t to uint8x8_t */
-  tmp0 = vqmovn_u32(vreinterpretq_u32_s32(sum0));
-  tmp1 = vqmovn_u32(vreinterpretq_u32_s32(sum1));
-  tmp = vcombine_u16(tmp0, tmp1);
-  res = vqmovn_u16(tmp);
+  tmp = vcombine_s16(vmovn_s32(sum0), vmovn_s32(sum1));
+  res = vqmovun_s16(tmp);
 
   return res;
 }
@@ -143,10 +131,11 @@ static INLINE uint16x8_t wiener_convolve8_horiz_8x8(
 
   const int32x4_t round_vec_0 = vdupq_n_s32(round_const_0);
   const int32x4_t round_vec_1 = vdupq_n_s32(round_const_1);
+  const int16x4_t filter = vld1_s16(filter_x);
 
-  sum = vmulq_n_s16(s0, filter_x[0]);
-  sum = vmlaq_n_s16(sum, s1, filter_x[1]);
-  sum = vmlaq_n_s16(sum, s2, filter_x[2]);
+  sum = vmulq_lane_s16(s0, filter, 0);
+  sum = vmlaq_lane_s16(sum, s1, filter, 1);
+  sum = vmlaq_lane_s16(sum, s2, filter, 2);
 
   /* sum from 16x8 to 2 32x4 registers */
   sum_0 = vmovl_s16(vget_low_s16(sum));
@@ -156,8 +145,8 @@ static INLINE uint16x8_t wiener_convolve8_horiz_8x8(
    *  then max value possible = 128*128*255 exceeding 16 bit
    */
 
-  s3_0 = vmull_n_s16(vget_low_s16(s3), filter_x[3]);
-  s3_1 = vmull_n_s16(vget_high_s16(s3), filter_x[3]);
+  s3_0 = vmull_lane_s16(vget_low_s16(s3), filter, 3);
+  s3_1 = vmull_lane_s16(vget_high_s16(s3), filter, 3);
   sum_0 = vaddq_s32(sum_0, s3_0);
   sum_1 = vaddq_s32(sum_1, s3_1);
 
@@ -189,73 +178,201 @@ static INLINE uint16x4_t wiener_convolve8_horiz_4x8(
   const int32_t round_const_0 = (1 << (bd + FILTER_BITS - 1));
   const int32_t round_const_1 = (1 << (bd + 1 + FILTER_BITS - round0_bits)) - 1;
   const int32x4_t round_bits = vdupq_n_s32(-round0_bits);
-  const int32x4_t zero = vdupq_n_s32(0);
   const int32x4_t round_vec_0 = vdupq_n_s32(round_const_0);
   const int32x4_t round_vec_1 = vdupq_n_s32(round_const_1);
+  const int16x4_t filter = vld1_s16(filter_x);
 
   temp0 = vadd_s16(s0, s6);
   temp1 = vadd_s16(s1, s5);
   temp2 = vadd_s16(s2, s4);
 
-  sum = vmul_n_s16(temp0, filter_x[0]);
-  sum = vmla_n_s16(sum, temp1, filter_x[1]);
-  sum = vmla_n_s16(sum, temp2, filter_x[2]);
+  sum = vmul_lane_s16(temp0, filter, 0);
+  sum = vmla_lane_s16(sum, temp1, filter, 1);
+  sum = vmla_lane_s16(sum, temp2, filter, 2);
   sum_0 = vmovl_s16(sum);
 
   /* s[3]*128 -- and filter coff max can be 128.
    * then max value possible = 128*128*255 Therefore, 32 bits are required to
    * hold the result.
    */
-  s3_0 = vmull_n_s16(s3, filter_x[3]);
+  s3_0 = vmull_lane_s16(s3, filter, 3);
   sum_0 = vaddq_s32(sum_0, s3_0);
 
   sum_0 = vaddq_s32(sum_0, round_vec_0);
   sum_0 = vrshlq_s32(sum_0, round_bits);
 
-  sum_0 = vmaxq_s32(sum_0, zero);
   sum_0 = vminq_s32(sum_0, round_vec_1);
   res = vqmovun_s32(sum_0);
   return res;
 }
 
-static INLINE int16x8_t
-convolve8_8x8_s16(const int16x8_t s0, const int16x8_t s1, const int16x8_t s2,
-                  const int16x8_t s3, const int16x8_t s4, const int16x8_t s5,
-                  const int16x8_t s6, const int16x8_t s7, const int16_t *filter,
-                  const int16x8_t horiz_const, const int16x8_t shift_round_0) {
+static INLINE int16x8_t convolve8_8x8_s16(
+    const int16x8_t s0, const int16x8_t s1, const int16x8_t s2,
+    const int16x8_t s3, const int16x8_t s4, const int16x8_t s5,
+    const int16x8_t s6, const int16x8_t s7, const int16x8_t filter,
+    const int16x8_t horiz_const, const int16x8_t shift_round_0) {
+  const int16x4_t filter_lo = vget_low_s16(filter);
+  const int16x4_t filter_hi = vget_high_s16(filter);
   int16x8_t sum;
-  int16x8_t res;
 
   sum = horiz_const;
-  sum = vmlaq_n_s16(sum, s0, filter[0]);
-  sum = vmlaq_n_s16(sum, s1, filter[1]);
-  sum = vmlaq_n_s16(sum, s2, filter[2]);
-  sum = vmlaq_n_s16(sum, s3, filter[3]);
-  sum = vmlaq_n_s16(sum, s4, filter[4]);
-  sum = vmlaq_n_s16(sum, s5, filter[5]);
-  sum = vmlaq_n_s16(sum, s6, filter[6]);
-  sum = vmlaq_n_s16(sum, s7, filter[7]);
+  sum = vmlaq_lane_s16(sum, s0, filter_lo, 0);
+  sum = vmlaq_lane_s16(sum, s1, filter_lo, 1);
+  sum = vmlaq_lane_s16(sum, s2, filter_lo, 2);
+  sum = vmlaq_lane_s16(sum, s3, filter_lo, 3);
+  sum = vmlaq_lane_s16(sum, s4, filter_hi, 0);
+  sum = vmlaq_lane_s16(sum, s5, filter_hi, 1);
+  sum = vmlaq_lane_s16(sum, s6, filter_hi, 2);
+  sum = vmlaq_lane_s16(sum, s7, filter_hi, 3);
 
-  res = vqrshlq_s16(sum, shift_round_0);
+  sum = vqrshlq_s16(sum, shift_round_0);
 
-  return res;
+  return sum;
+}
+
+#if defined(__aarch64__) && defined(__ARM_FEATURE_DOTPROD)
+
+DECLARE_ALIGNED(16, static const uint8_t, dot_prod_permute_tbl[48]) = {
+  0, 1, 2,  3,  1, 2,  3,  4,  2,  3,  4,  5,  3,  4,  5,  6,
+  4, 5, 6,  7,  5, 6,  7,  8,  6,  7,  8,  9,  7,  8,  9,  10,
+  8, 9, 10, 11, 9, 10, 11, 12, 10, 11, 12, 13, 11, 12, 13, 14
+};
+
+#endif  // defined(__aarch64__) && defined(__ARM_FEATURE_DOTPROD)
+
+#if defined(__aarch64__) && defined(__ARM_FEATURE_MATMUL_INT8)
+
+static INLINE int32x4_t convolve8_4_usdot(uint8x16_t samples,
+                                          const int8x8_t filters,
+                                          const uint8x16x2_t permute_tbl,
+                                          const int32x4_t horiz_const) {
+  uint8x16_t permuted_samples[2];
+  int32x4_t sum;
+
+  /* Permute samples ready for dot product. */
+  /* { 0,  1,  2,  3,  1,  2,  3,  4,  2,  3,  4,  5,  3,  4,  5,  6 } */
+  permuted_samples[0] = vqtbl1q_u8(samples, permute_tbl.val[0]);
+  /* { 4,  5,  6,  7,  5,  6,  7,  8,  6,  7,  8,  9,  7,  8,  9, 10 } */
+  permuted_samples[1] = vqtbl1q_u8(samples, permute_tbl.val[1]);
+
+  /* First 4 output values. */
+  sum = vusdotq_lane_s32(horiz_const, permuted_samples[0], filters, 0);
+  sum = vusdotq_lane_s32(sum, permuted_samples[1], filters, 1);
+
+  /* Narrowing and packing is performed by the caller. */
+  return sum;
+}
+
+static INLINE int16x8_t convolve8_8_usdot(uint8x16_t samples,
+                                          const int8x8_t filters,
+                                          const uint8x16x3_t permute_tbl,
+                                          const int32x4_t horiz_const,
+                                          const int16x8_t shift_round_0) {
+  uint8x16_t permuted_samples[3];
+  int32x4_t sum0, sum1;
+  int16x8_t sum;
+
+  /* Permute samples ready for dot product. */
+  /* { 0,  1,  2,  3,  1,  2,  3,  4,  2,  3,  4,  5,  3,  4,  5,  6 } */
+  permuted_samples[0] = vqtbl1q_u8(samples, permute_tbl.val[0]);
+  /* { 4,  5,  6,  7,  5,  6,  7,  8,  6,  7,  8,  9,  7,  8,  9, 10 } */
+  permuted_samples[1] = vqtbl1q_u8(samples, permute_tbl.val[1]);
+  /* { 8,  9, 10, 11,  9, 10, 11, 12, 10, 11, 12, 13, 11, 12, 13, 14 } */
+  permuted_samples[2] = vqtbl1q_u8(samples, permute_tbl.val[2]);
+
+  /* First 4 output values. */
+  sum0 = vusdotq_lane_s32(horiz_const, permuted_samples[0], filters, 0);
+  sum0 = vusdotq_lane_s32(sum0, permuted_samples[1], filters, 1);
+  /* Second 4 output values. */
+  sum1 = vusdotq_lane_s32(horiz_const, permuted_samples[1], filters, 0);
+  sum1 = vusdotq_lane_s32(sum1, permuted_samples[2], filters, 1);
+
+  /* Narrow and re-pack. */
+  sum = vcombine_s16(vmovn_s32(sum0), vmovn_s32(sum1));
+  return vqrshlq_s16(sum, shift_round_0);
 }
 
-static INLINE int16x4_t
-convolve8_4x4_s16(const int16x4_t s0, const int16x4_t s1, const int16x4_t s2,
-                  const int16x4_t s3, const int16x4_t s4, const int16x4_t s5,
-                  const int16x4_t s6, const int16x4_t s7, const int16_t *filter,
-                  const int16x4_t horiz_const, const int16x4_t shift_round_0) {
+#elif defined(__aarch64__) && defined(__ARM_FEATURE_DOTPROD)
+
+static INLINE int32x4_t convolve8_4_sdot(uint8x16_t samples,
+                                         const int8x8_t filters,
+                                         const int32x4_t correction,
+                                         const uint8x16_t range_limit,
+                                         const uint8x16x2_t permute_tbl) {
+  int8x16_t clamped_samples, permuted_samples[2];
+  int32x4_t sum;
+
+  /* Clamp sample range to [-128, 127] for 8-bit signed dot product. */
+  clamped_samples = vreinterpretq_s8_u8(vsubq_u8(samples, range_limit));
+
+  /* Permute samples ready for dot product. */
+  /* { 0,  1,  2,  3,  1,  2,  3,  4,  2,  3,  4,  5,  3,  4,  5,  6 } */
+  permuted_samples[0] = vqtbl1q_s8(clamped_samples, permute_tbl.val[0]);
+  /* { 4,  5,  6,  7,  5,  6,  7,  8,  6,  7,  8,  9,  7,  8,  9, 10 } */
+  permuted_samples[1] = vqtbl1q_s8(clamped_samples, permute_tbl.val[1]);
+
+  /* Accumulate dot product into 'correction' to account for range clamp. */
+  sum = vdotq_lane_s32(correction, permuted_samples[0], filters, 0);
+  sum = vdotq_lane_s32(sum, permuted_samples[1], filters, 1);
+
+  /* Narrowing and packing is performed by the caller. */
+  return sum;
+}
+
+static INLINE int16x8_t convolve8_8_sdot(uint8x16_t samples,
+                                         const int8x8_t filters,
+                                         const int32x4_t correction,
+                                         const uint8x16_t range_limit,
+                                         const uint8x16x3_t permute_tbl,
+                                         const int16x8_t shift_round_0) {
+  int8x16_t clamped_samples, permuted_samples[3];
+  int32x4_t sum0, sum1;
+  int16x8_t sum;
+
+  /* Clamp sample range to [-128, 127] for 8-bit signed dot product. */
+  clamped_samples = vreinterpretq_s8_u8(vsubq_u8(samples, range_limit));
+
+  /* Permute samples ready for dot product. */
+  /* { 0,  1,  2,  3,  1,  2,  3,  4,  2,  3,  4,  5,  3,  4,  5,  6 } */
+  permuted_samples[0] = vqtbl1q_s8(clamped_samples, permute_tbl.val[0]);
+  /* { 4,  5,  6,  7,  5,  6,  7,  8,  6,  7,  8,  9,  7,  8,  9, 10 } */
+  permuted_samples[1] = vqtbl1q_s8(clamped_samples, permute_tbl.val[1]);
+  /* { 8,  9, 10, 11,  9, 10, 11, 12, 10, 11, 12, 13, 11, 12, 13, 14 } */
+  permuted_samples[2] = vqtbl1q_s8(clamped_samples, permute_tbl.val[2]);
+
+  /* Accumulate dot product into 'correction' to account for range clamp. */
+  /* First 4 output values. */
+  sum0 = vdotq_lane_s32(correction, permuted_samples[0], filters, 0);
+  sum0 = vdotq_lane_s32(sum0, permuted_samples[1], filters, 1);
+  /* Second 4 output values. */
+  sum1 = vdotq_lane_s32(correction, permuted_samples[1], filters, 0);
+  sum1 = vdotq_lane_s32(sum1, permuted_samples[2], filters, 1);
+
+  /* Narrow and re-pack. */
+  sum = vcombine_s16(vmovn_s32(sum0), vmovn_s32(sum1));
+  return vqrshlq_s16(sum, shift_round_0);
+}
+
+#endif  // defined(__aarch64__) && defined(__ARM_FEATURE_DOTPROD)
+
+static INLINE int16x4_t convolve8_4x4_s16(
+    const int16x4_t s0, const int16x4_t s1, const int16x4_t s2,
+    const int16x4_t s3, const int16x4_t s4, const int16x4_t s5,
+    const int16x4_t s6, const int16x4_t s7, const int16x8_t filter,
+    const int16x4_t horiz_const, const int16x4_t shift_round_0) {
+  const int16x4_t filter_lo = vget_low_s16(filter);
+  const int16x4_t filter_hi = vget_high_s16(filter);
   int16x4_t sum;
+
   sum = horiz_const;
-  sum = vmla_n_s16(sum, s0, filter[0]);
-  sum = vmla_n_s16(sum, s1, filter[1]);
-  sum = vmla_n_s16(sum, s2, filter[2]);
-  sum = vmla_n_s16(sum, s3, filter[3]);
-  sum = vmla_n_s16(sum, s4, filter[4]);
-  sum = vmla_n_s16(sum, s5, filter[5]);
-  sum = vmla_n_s16(sum, s6, filter[6]);
-  sum = vmla_n_s16(sum, s7, filter[7]);
+  sum = vmla_lane_s16(sum, s0, filter_lo, 0);
+  sum = vmla_lane_s16(sum, s1, filter_lo, 1);
+  sum = vmla_lane_s16(sum, s2, filter_lo, 2);
+  sum = vmla_lane_s16(sum, s3, filter_lo, 3);
+  sum = vmla_lane_s16(sum, s4, filter_hi, 0);
+  sum = vmla_lane_s16(sum, s5, filter_hi, 1);
+  sum = vmla_lane_s16(sum, s6, filter_hi, 2);
+  sum = vmla_lane_s16(sum, s7, filter_hi, 3);
 
   sum = vqrshl_s16(sum, shift_round_0);
 
@@ -265,27 +382,25 @@ convolve8_4x4_s16(const int16x4_t s0, const int16x4_t s1, const int16x4_t s2,
 static INLINE uint16x4_t convolve8_4x4_s32(
     const int16x4_t s0, const int16x4_t s1, const int16x4_t s2,
     const int16x4_t s3, const int16x4_t s4, const int16x4_t s5,
-    const int16x4_t s6, const int16x4_t s7, const int16_t *y_filter,
+    const int16x4_t s6, const int16x4_t s7, const int16x8_t y_filter,
     const int32x4_t round_shift_vec, const int32x4_t offset_const) {
-  int32x4_t sum0;
-  uint16x4_t res;
-  const int32x4_t zero = vdupq_n_s32(0);
-
-  sum0 = vmull_n_s16(s0, y_filter[0]);
-  sum0 = vmlal_n_s16(sum0, s1, y_filter[1]);
-  sum0 = vmlal_n_s16(sum0, s2, y_filter[2]);
-  sum0 = vmlal_n_s16(sum0, s3, y_filter[3]);
-  sum0 = vmlal_n_s16(sum0, s4, y_filter[4]);
-  sum0 = vmlal_n_s16(sum0, s5, y_filter[5]);
-  sum0 = vmlal_n_s16(sum0, s6, y_filter[6]);
-  sum0 = vmlal_n_s16(sum0, s7, y_filter[7]);
-
-  sum0 = vaddq_s32(sum0, offset_const);
-  sum0 = vqrshlq_s32(sum0, round_shift_vec);
-  sum0 = vmaxq_s32(sum0, zero);
-  res = vmovn_u32(vreinterpretq_u32_s32(sum0));
-
-  return res;
+  const int16x4_t y_filter_lo = vget_low_s16(y_filter);
+  const int16x4_t y_filter_hi = vget_high_s16(y_filter);
+  int32x4_t sum;
+
+  sum = vmull_lane_s16(s0, y_filter_lo, 0);
+  sum = vmlal_lane_s16(sum, s1, y_filter_lo, 1);
+  sum = vmlal_lane_s16(sum, s2, y_filter_lo, 2);
+  sum = vmlal_lane_s16(sum, s3, y_filter_lo, 3);
+  sum = vmlal_lane_s16(sum, s4, y_filter_hi, 0);
+  sum = vmlal_lane_s16(sum, s5, y_filter_hi, 1);
+  sum = vmlal_lane_s16(sum, s6, y_filter_hi, 2);
+  sum = vmlal_lane_s16(sum, s7, y_filter_hi, 3);
+
+  sum = vaddq_s32(sum, offset_const);
+  sum = vqrshlq_s32(sum, round_shift_vec);
+
+  return vqmovun_s32(sum);
 }
 
 #endif  // AOM_AV1_COMMON_ARM_CONVOLVE_NEON_H_
diff --git a/chromium/third_party/libaom/source/libaom/av1/common/arm/jnt_convolve_neon.c b/chromium/third_party/libaom/source/libaom/av1/common/arm/jnt_convolve_neon.c
index e0b76a87bce..26f1a31397f 100644
--- a/chromium/third_party/libaom/source/libaom/av1/common/arm/jnt_convolve_neon.c
+++ b/chromium/third_party/libaom/source/libaom/av1/common/arm/jnt_convolve_neon.c
@@ -45,7 +45,7 @@ static INLINE void compute_avg_4x1(
 
     dst0 = vqrshlq_s32(dst0, round_bits_vec);
 
-    tmp0 = vqmovn_s32(dst0);
+    tmp0 = vmovn_s32(dst0);
     tmp4 = vcombine_s16(tmp0, tmp0);
 
     *t0 = vqmovun_s16(tmp4);
@@ -57,7 +57,7 @@ static INLINE void compute_avg_4x1(
 
     tmp0 = vqrshl_s16(tmp0, round_bits_vec);
 
-    tmp4 = vcombine_s16(tmp0, tmp0);
+    tmp4 = vcombine_s16(tmp0, vdup_n_s16(0));
 
     *t0 = vqmovun_s16(tmp4);
   }
@@ -67,7 +67,6 @@ static INLINE void compute_avg_8x1(
     uint16x8_t res0, uint16x8_t d0, const uint16_t fwd_offset,
     const uint16_t bck_offset, const int16x4_t sub_const,
     const int16_t round_bits, const int use_dist_wtd_comp_avg, uint8x8_t *t0) {
-  int16x4_t tmp0, tmp2;
   int16x8_t f0;
   uint32x4_t sum0, sum2;
   int32x4_t dst0, dst2;
@@ -92,10 +91,7 @@ static INLINE void compute_avg_8x1(
     dst0 = vqrshlq_s32(dst0, round_bits_vec);
     dst2 = vqrshlq_s32(dst2, round_bits_vec);
 
-    tmp0 = vqmovn_s32(dst0);
-    tmp2 = vqmovn_s32(dst2);
-
-    f0 = vcombine_s16(tmp0, tmp2);
+    f0 = vcombine_s16(vmovn_s32(dst0), vmovn_s32(dst2));
 
     *t0 = vqmovun_s16(f0);
 
@@ -126,7 +122,6 @@ static INLINE void compute_avg_4x4(
 
   int32x4_t dst0, dst1, dst2, dst3;
   int16x8_t tmp4, tmp5;
-  const int16x8_t zero = vdupq_n_s16(0);
 
   if (use_dist_wtd_comp_avg) {
     const int32x4_t round_bits_vec = vdupq_n_s32((int32_t)(-round_bits));
@@ -156,17 +151,11 @@ static INLINE void compute_avg_4x4(
     dst2 = vqrshlq_s32(dst2, round_bits_vec);
     dst3 = vqrshlq_s32(dst3, round_bits_vec);
 
-    tmp0 = vqmovn_s32(dst0);
-    tmp1 = vqmovn_s32(dst1);
-    tmp2 = vqmovn_s32(dst2);
-    tmp3 = vqmovn_s32(dst3);
-    tmp4 = vcombine_s16(tmp0, tmp1);
-    tmp5 = vcombine_s16(tmp2, tmp3);
-    tmp4 = vmaxq_s16(tmp4, zero);
-    tmp5 = vmaxq_s16(tmp5, zero);
+    tmp4 = vcombine_s16(vmovn_s32(dst0), vmovn_s32(dst1));
+    tmp5 = vcombine_s16(vmovn_s32(dst2), vmovn_s32(dst3));
 
-    *t0 = vqmovn_u16(vreinterpretq_u16_s16(tmp4));
-    *t1 = vqmovn_u16(vreinterpretq_u16_s16(tmp5));
+    *t0 = vqmovun_s16(tmp4);
+    *t1 = vqmovun_s16(tmp5);
   } else {
     const int16x4_t round_bits_vec = vdup_n_s16(-round_bits);
     tmp_u0 = vhadd_u16(res0, d0);
@@ -186,11 +175,9 @@ static INLINE void compute_avg_4x4(
 
     tmp4 = vcombine_s16(tmp0, tmp1);
     tmp5 = vcombine_s16(tmp2, tmp3);
-    tmp4 = vmaxq_s16(tmp4, zero);
-    tmp5 = vmaxq_s16(tmp5, zero);
 
-    *t0 = vqmovn_u16(vreinterpretq_u16_s16(tmp4));
-    *t1 = vqmovn_u16(vreinterpretq_u16_s16(tmp5));
+    *t0 = vqmovun_s16(tmp4);
+    *t1 = vqmovun_s16(tmp5);
   }
 }
 
@@ -201,14 +188,12 @@ static INLINE void compute_avg_8x4(
     const int16x4_t sub_const, const int16_t round_bits,
     const int use_dist_wtd_comp_avg, uint8x8_t *t0, uint8x8_t *t1,
     uint8x8_t *t2, uint8x8_t *t3) {
-  int16x4_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
   int16x8_t f0, f1, f2, f3;
   uint32x4_t sum0, sum1, sum2, sum3;
   uint32x4_t sum4, sum5, sum6, sum7;
   int32x4_t dst0, dst1, dst2, dst3;
   int32x4_t dst4, dst5, dst6, dst7;
   uint16x8_t tmp_u0, tmp_u1, tmp_u2, tmp_u3;
-  const int16x8_t zero = vdupq_n_s16(0);
 
   if (use_dist_wtd_comp_avg) {
     const int32x4_t sub_const_vec = vmovl_s16(sub_const);
@@ -260,29 +245,15 @@ static INLINE void compute_avg_8x4(
     dst6 = vqrshlq_s32(dst6, round_bits_vec);
     dst7 = vqrshlq_s32(dst7, round_bits_vec);
 
-    tmp0 = vqmovn_s32(dst0);
-    tmp1 = vqmovn_s32(dst1);
-    tmp2 = vqmovn_s32(dst2);
-    tmp3 = vqmovn_s32(dst3);
-    tmp4 = vqmovn_s32(dst4);
-    tmp5 = vqmovn_s32(dst5);
-    tmp6 = vqmovn_s32(dst6);
-    tmp7 = vqmovn_s32(dst7);
-
-    f0 = vcombine_s16(tmp0, tmp2);
-    f1 = vcombine_s16(tmp1, tmp3);
-    f2 = vcombine_s16(tmp4, tmp6);
-    f3 = vcombine_s16(tmp5, tmp7);
-
-    f0 = vmaxq_s16(f0, zero);
-    f1 = vmaxq_s16(f1, zero);
-    f2 = vmaxq_s16(f2, zero);
-    f3 = vmaxq_s16(f3, zero);
-
-    *t0 = vqmovn_u16(vreinterpretq_u16_s16(f0));
-    *t1 = vqmovn_u16(vreinterpretq_u16_s16(f1));
-    *t2 = vqmovn_u16(vreinterpretq_u16_s16(f2));
-    *t3 = vqmovn_u16(vreinterpretq_u16_s16(f3));
+    f0 = vcombine_s16(vmovn_s32(dst0), vmovn_s32(dst2));
+    f1 = vcombine_s16(vmovn_s32(dst1), vmovn_s32(dst3));
+    f2 = vcombine_s16(vmovn_s32(dst4), vmovn_s32(dst6));
+    f3 = vcombine_s16(vmovn_s32(dst5), vmovn_s32(dst7));
+
+    *t0 = vqmovun_s16(f0);
+    *t1 = vqmovun_s16(f1);
+    *t2 = vqmovun_s16(f2);
+    *t3 = vqmovun_s16(f3);
 
   } else {
     const int16x8_t sub_const_vec = vcombine_s16(sub_const, sub_const);
@@ -303,21 +274,205 @@ static INLINE void compute_avg_8x4(
     f2 = vqrshlq_s16(f2, round_bits_vec);
     f3 = vqrshlq_s16(f3, round_bits_vec);
 
-    f0 = vmaxq_s16(f0, zero);
-    f1 = vmaxq_s16(f1, zero);
-    f2 = vmaxq_s16(f2, zero);
-    f3 = vmaxq_s16(f3, zero);
+    *t0 = vqmovun_s16(f0);
+    *t1 = vqmovun_s16(f1);
+    *t2 = vqmovun_s16(f2);
+    *t3 = vqmovun_s16(f3);
+  }
+}
+
+#if defined(__aarch64__) && defined(__ARM_FEATURE_MATMUL_INT8)
+
+static INLINE void dist_wtd_convolve_2d_horiz_neon(
+    const uint8_t *src, int src_stride, int16_t *im_block, const int im_stride,
+    const int16x8_t x_filter_s16, const int im_h, int w, const int round_0) {
+  const int bd = 8;
+  int16_t *dst_ptr = im_block;
+  int dst_stride = im_stride;
+  int width = w;
+  int height = im_h;
+
+  const int8x8_t x_filter = vmovn_s16(x_filter_s16);
+  const int32x4_t horiz_const = vdupq_n_s32(1 << (bd + FILTER_BITS - 2));
+
+  if (w == 4) {
+    const uint8x16x2_t permute_tbl = vld1q_u8_x2(dot_prod_permute_tbl);
+    const int16x4_t shift_round_0 = vdup_n_s16(-(round_0));
+    uint8x16_t s0, s1, s2, s3;
+    int32x4_t t0, t1, t2, t3;
+    int16x4_t d0, d1, d2, d3;
+
+    do {
+      s0 = vld1q_u8(src + 0 * src_stride);
+      s1 = vld1q_u8(src + 1 * src_stride);
+      s2 = vld1q_u8(src + 2 * src_stride);
+      s3 = vld1q_u8(src + 3 * src_stride);
+
+      t0 = convolve8_4_usdot(s0, x_filter, permute_tbl, horiz_const);
+      t1 = convolve8_4_usdot(s1, x_filter, permute_tbl, horiz_const);
+      t2 = convolve8_4_usdot(s2, x_filter, permute_tbl, horiz_const);
+      t3 = convolve8_4_usdot(s3, x_filter, permute_tbl, horiz_const);
+
+      d0 = vqrshl_s16(vmovn_s32(t0), shift_round_0);
+      d1 = vqrshl_s16(vmovn_s32(t1), shift_round_0);
+      d2 = vqrshl_s16(vmovn_s32(t2), shift_round_0);
+      d3 = vqrshl_s16(vmovn_s32(t3), shift_round_0);
+
+      vst1_s16((dst_ptr + 0 * dst_stride), d0);
+      vst1_s16((dst_ptr + 1 * dst_stride), d1);
+      vst1_s16((dst_ptr + 2 * dst_stride), d2);
+      vst1_s16((dst_ptr + 3 * dst_stride), d3);
+
+      src += 4 * src_stride;
+      dst_ptr += 4 * dst_stride;
+      height -= 4;
+    } while (height > 0);
+  } else {
+    const uint8x16x3_t permute_tbl = vld1q_u8_x3(dot_prod_permute_tbl);
+    const int16x8_t shift_round_0 = vdupq_n_s16(-(round_0));
+    const uint8_t *s;
+    int16_t *d;
+    uint8x16_t s0, s1, s2, s3;
+    int16x8_t d0, d1, d2, d3;
+
+    do {
+      width = w;
+      s = src;
+      d = dst_ptr;
+
+      do {
+        s0 = vld1q_u8(s + 0 * src_stride);
+        s1 = vld1q_u8(s + 1 * src_stride);
+        s2 = vld1q_u8(s + 2 * src_stride);
+        s3 = vld1q_u8(s + 3 * src_stride);
+
+        d0 = convolve8_8_usdot(s0, x_filter, permute_tbl, horiz_const,
+                               shift_round_0);
+        d1 = convolve8_8_usdot(s1, x_filter, permute_tbl, horiz_const,
+                               shift_round_0);
+        d2 = convolve8_8_usdot(s2, x_filter, permute_tbl, horiz_const,
+                               shift_round_0);
+        d3 = convolve8_8_usdot(s3, x_filter, permute_tbl, horiz_const,
+                               shift_round_0);
+
+        vst1q_s16(d + 0 * dst_stride, d0);
+        vst1q_s16(d + 1 * dst_stride, d1);
+        vst1q_s16(d + 2 * dst_stride, d2);
+        vst1q_s16(d + 3 * dst_stride, d3);
+
+        s += 8;
+        d += 8;
+        width -= 8;
+      } while (width > 0);
+
+      src += 4 * src_stride;
+      dst_ptr += 4 * dst_stride;
+      height -= 4;
+    } while (height > 0);
+  }
+}
+
+#elif defined(__aarch64__) && defined(__ARM_FEATURE_DOTPROD)
+
+static INLINE void dist_wtd_convolve_2d_horiz_neon(
+    const uint8_t *src, int src_stride, int16_t *im_block, const int im_stride,
+    const int16x8_t x_filter_s16, const int im_h, int w, const int round_0) {
+  const int bd = 8;
+  int16_t *dst_ptr = im_block;
+  int dst_stride = im_stride;
+  int width = w;
+  int height = im_h;
+
+  const int8x8_t x_filter = vmovn_s16(x_filter_s16);
+  const int32_t horiz_const = (1 << (bd + FILTER_BITS - 2));
+  // Dot product constants.
+  const int16x8_t correct_tmp = vshlq_n_s16(x_filter_s16, 7);
+  const int32x4_t correction =
+      vdupq_n_s32(vaddlvq_s16(correct_tmp) + horiz_const);
+  const uint8x16_t range_limit = vdupq_n_u8(128);
+
+  if (w == 4) {
+    const uint8x16x2_t permute_tbl = vld1q_u8_x2(dot_prod_permute_tbl);
+    const int16x4_t shift_round_0 = vdup_n_s16(-(round_0));
+    uint8x16_t s0, s1, s2, s3;
+    int32x4_t t0, t1, t2, t3;
+    int16x4_t d0, d1, d2, d3;
+
+    do {
+      s0 = vld1q_u8(src + 0 * src_stride);
+      s1 = vld1q_u8(src + 1 * src_stride);
+      s2 = vld1q_u8(src + 2 * src_stride);
+      s3 = vld1q_u8(src + 3 * src_stride);
+
+      t0 = convolve8_4_sdot(s0, x_filter, correction, range_limit, permute_tbl);
+      t1 = convolve8_4_sdot(s1, x_filter, correction, range_limit, permute_tbl);
+      t2 = convolve8_4_sdot(s2, x_filter, correction, range_limit, permute_tbl);
+      t3 = convolve8_4_sdot(s3, x_filter, correction, range_limit, permute_tbl);
+
+      d0 = vqrshl_s16(vmovn_s32(t0), shift_round_0);
+      d1 = vqrshl_s16(vmovn_s32(t1), shift_round_0);
+      d2 = vqrshl_s16(vmovn_s32(t2), shift_round_0);
+      d3 = vqrshl_s16(vmovn_s32(t3), shift_round_0);
+
+      vst1_s16((dst_ptr + 0 * dst_stride), d0);
+      vst1_s16((dst_ptr + 1 * dst_stride), d1);
+      vst1_s16((dst_ptr + 2 * dst_stride), d2);
+      vst1_s16((dst_ptr + 3 * dst_stride), d3);
+
+      src += 4 * src_stride;
+      dst_ptr += 4 * dst_stride;
+      height -= 4;
+    } while (height > 0);
+  } else {
+    const uint8x16x3_t permute_tbl = vld1q_u8_x3(dot_prod_permute_tbl);
+    const int16x8_t shift_round_0 = vdupq_n_s16(-(round_0));
+    const uint8_t *s;
+    int16_t *d;
+    uint8x16_t s0, s1, s2, s3;
+    int16x8_t d0, d1, d2, d3;
+
+    do {
+      width = w;
+      s = src;
+      d = dst_ptr;
+
+      do {
+        s0 = vld1q_u8(s + 0 * src_stride);
+        s1 = vld1q_u8(s + 1 * src_stride);
+        s2 = vld1q_u8(s + 2 * src_stride);
+        s3 = vld1q_u8(s + 3 * src_stride);
+
+        d0 = convolve8_8_sdot(s0, x_filter, correction, range_limit,
+                              permute_tbl, shift_round_0);
+        d1 = convolve8_8_sdot(s1, x_filter, correction, range_limit,
+                              permute_tbl, shift_round_0);
+        d2 = convolve8_8_sdot(s2, x_filter, correction, range_limit,
+                              permute_tbl, shift_round_0);
+        d3 = convolve8_8_sdot(s3, x_filter, correction, range_limit,
+                              permute_tbl, shift_round_0);
+
+        vst1q_s16(d + 0 * dst_stride, d0);
+        vst1q_s16(d + 1 * dst_stride, d1);
+        vst1q_s16(d + 2 * dst_stride, d2);
+        vst1q_s16(d + 3 * dst_stride, d3);
+
+        s += 8;
+        d += 8;
+        width -= 8;
+      } while (width > 0);
 
-    *t0 = vqmovn_u16(vreinterpretq_u16_s16(f0));
-    *t1 = vqmovn_u16(vreinterpretq_u16_s16(f1));
-    *t2 = vqmovn_u16(vreinterpretq_u16_s16(f2));
-    *t3 = vqmovn_u16(vreinterpretq_u16_s16(f3));
+      src += 4 * src_stride;
+      dst_ptr += 4 * dst_stride;
+      height -= 4;
+    } while (height > 0);
   }
 }
 
+#else  // !(defined(__aarch64__) && defined(__ARM_FEATURE_DOTPROD))
+
 static INLINE void dist_wtd_convolve_2d_horiz_neon(
     const uint8_t *src, int src_stride, int16_t *im_block, const int im_stride,
-    int16_t *x_filter_tmp, const int im_h, int w, const int round_0) {
+    const int16x8_t x_filter, const int im_h, int w, const int round_0) {
   const int bd = 8;
   const uint8_t *s;
   int16_t *dst_ptr;
@@ -380,13 +535,13 @@ static INLINE void dist_wtd_convolve_2d_horiz_neon(
       s9 = vget_low_s16(tt2);
       s10 = vget_low_s16(tt3);
 
-      d0 = convolve8_4x4_s16(s0, s1, s2, s3, s4, s5, s6, s7, x_filter_tmp,
+      d0 = convolve8_4x4_s16(s0, s1, s2, s3, s4, s5, s6, s7, x_filter,
                              horiz_const, shift_round_0);
-      d1 = convolve8_4x4_s16(s1, s2, s3, s4, s5, s6, s7, s8, x_filter_tmp,
+      d1 = convolve8_4x4_s16(s1, s2, s3, s4, s5, s6, s7, s8, x_filter,
                              horiz_const, shift_round_0);
-      d2 = convolve8_4x4_s16(s2, s3, s4, s5, s6, s7, s8, s9, x_filter_tmp,
+      d2 = convolve8_4x4_s16(s2, s3, s4, s5, s6, s7, s8, s9, x_filter,
                              horiz_const, shift_round_0);
-      d3 = convolve8_4x4_s16(s3, s4, s5, s6, s7, s8, s9, s10, x_filter_tmp,
+      d3 = convolve8_4x4_s16(s3, s4, s5, s6, s7, s8, s9, s10, x_filter,
                              horiz_const, shift_round_0);
 
       transpose_s16_4x4d(&d0, &d1, &d2, &d3);
@@ -418,7 +573,7 @@ static INLINE void dist_wtd_convolve_2d_horiz_neon(
       s6 = vext_s16(s4, s7, 2);  // a6 a7 a8 a9
       s7 = vext_s16(s4, s7, 3);  // a7 a8 a9 a10
 
-      d0 = convolve8_4x4_s16(s0, s1, s2, s3, s4, s5, s6, s7, x_filter_tmp,
+      d0 = convolve8_4x4_s16(s0, s1, s2, s3, s4, s5, s6, s7, x_filter,
                              horiz_const, shift_round_0);
 
       vst1_s16(dst_ptr, d0);
@@ -483,22 +638,22 @@ static INLINE void dist_wtd_convolve_2d_horiz_neon(
         s13 = vreinterpretq_s16_u16(vmovl_u8(t6));
         s14 = vreinterpretq_s16_u16(vmovl_u8(t7));
 
-        res0 = convolve8_8x8_s16(s0, s1, s2, s3, s4, s5, s6, s7, x_filter_tmp,
+        res0 = convolve8_8x8_s16(s0, s1, s2, s3, s4, s5, s6, s7, x_filter,
                                  horiz_const, shift_round_0);
-        res1 = convolve8_8x8_s16(s1, s2, s3, s4, s5, s6, s7, s8, x_filter_tmp,
+        res1 = convolve8_8x8_s16(s1, s2, s3, s4, s5, s6, s7, s8, x_filter,
                                  horiz_const, shift_round_0);
-        res2 = convolve8_8x8_s16(s2, s3, s4, s5, s6, s7, s8, s9, x_filter_tmp,
+        res2 = convolve8_8x8_s16(s2, s3, s4, s5, s6, s7, s8, s9, x_filter,
                                  horiz_const, shift_round_0);
-        res3 = convolve8_8x8_s16(s3, s4, s5, s6, s7, s8, s9, s10, x_filter_tmp,
+        res3 = convolve8_8x8_s16(s3, s4, s5, s6, s7, s8, s9, s10, x_filter,
                                  horiz_const, shift_round_0);
-        res4 = convolve8_8x8_s16(s4, s5, s6, s7, s8, s9, s10, s11, x_filter_tmp,
+        res4 = convolve8_8x8_s16(s4, s5, s6, s7, s8, s9, s10, s11, x_filter,
+                                 horiz_const, shift_round_0);
+        res5 = convolve8_8x8_s16(s5, s6, s7, s8, s9, s10, s11, s12, x_filter,
+                                 horiz_const, shift_round_0);
+        res6 = convolve8_8x8_s16(s6, s7, s8, s9, s10, s11, s12, s13, x_filter,
+                                 horiz_const, shift_round_0);
+        res7 = convolve8_8x8_s16(s7, s8, s9, s10, s11, s12, s13, s14, x_filter,
                                  horiz_const, shift_round_0);
-        res5 = convolve8_8x8_s16(s5, s6, s7, s8, s9, s10, s11, s12,
-                                 x_filter_tmp, horiz_const, shift_round_0);
-        res6 = convolve8_8x8_s16(s6, s7, s8, s9, s10, s11, s12, s13,
-                                 x_filter_tmp, horiz_const, shift_round_0);
-        res7 = convolve8_8x8_s16(s7, s8, s9, s10, s11, s12, s13, s14,
-                                 x_filter_tmp, horiz_const, shift_round_0);
 
         transpose_s16_8x8(&res0, &res1, &res2, &res3, &res4, &res5, &res6,
                           &res7);
@@ -543,8 +698,8 @@ static INLINE void dist_wtd_convolve_2d_horiz_neon(
         s6 = vextq_s16(temp_0, s7, 6);  // a6 a7 a8 a9 a10 a11 a12 a13
         s7 = vextq_s16(temp_0, s7, 7);  // a7 a8 a9 a10 a11 a12 a13 a14
 
-        res0 = convolve8_8x8_s16(temp_0, s1, s2, s3, s4, s5, s6, s7,
-                                 x_filter_tmp, horiz_const, shift_round_0);
+        res0 = convolve8_8x8_s16(temp_0, s1, s2, s3, s4, s5, s6, s7, x_filter,
+                                 horiz_const, shift_round_0);
         vst1q_s16(d_tmp, res0);
 
         s += 8;
@@ -559,9 +714,11 @@ static INLINE void dist_wtd_convolve_2d_horiz_neon(
   }
 }
 
+#endif  // defined(__aarch64__) && defined(__ARM_FEATURE_DOTPROD)
+
 static INLINE void dist_wtd_convolve_2d_vert_neon(
     int16_t *im_block, const int im_stride, uint8_t *dst8, int dst8_stride,
-    ConvolveParams *conv_params, const int16_t *y_filter, int h, int w) {
+    ConvolveParams *conv_params, const int16x8_t y_filter, int h, int w) {
   uint8_t *dst_u8_ptr, *d_u8;
   CONV_BUF_TYPE *dst_ptr, *dst;
   int16_t *src_ptr, *s;
@@ -731,21 +888,18 @@ void av1_dist_wtd_convolve_2d_neon(const uint8_t *src, int src_stride,
   const int horiz_offset = filter_params_x->taps / 2 - 1;
   const int round_0 = conv_params->round_0 - 1;
   const uint8_t *src_ptr = src - vert_offset * src_stride - horiz_offset;
-  const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
+  const int16_t *x_filter_ptr = av1_get_interp_filter_subpel_kernel(
       filter_params_x, subpel_x_qn & SUBPEL_MASK);
-  const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
+  const int16_t *y_filter_ptr = av1_get_interp_filter_subpel_kernel(
       filter_params_y, subpel_y_qn & SUBPEL_MASK);
 
-  int16_t x_filter_tmp[8];
-  int16x8_t filter_x_coef = vld1q_s16(x_filter);
-
-  // filter coeffs are even, so downshifting by 1 to reduce intermediate
-  // precision requirements.
-  filter_x_coef = vshrq_n_s16(filter_x_coef, 1);
-  vst1q_s16(&x_filter_tmp[0], filter_x_coef);
+  // Filter values are even, so downshift by 1 to reduce intermediate precision
+  // requirements.
+  const int16x8_t x_filter = vshrq_n_s16(vld1q_s16(x_filter_ptr), 1);
+  const int16x8_t y_filter = vld1q_s16(y_filter_ptr);
 
   dist_wtd_convolve_2d_horiz_neon(src_ptr, src_stride, im_block, im_stride,
-                                  x_filter_tmp, im_h, w, round_0);
+                                  x_filter, im_h, w, round_0);
 
   dist_wtd_convolve_2d_vert_neon(im_block, im_stride, dst8, dst8_stride,
                                  conv_params, y_filter, h, w);
@@ -869,6 +1023,376 @@ void av1_dist_wtd_convolve_2d_copy_neon(const uint8_t *src, int src_stride,
   }
 }
 
+#if defined(__aarch64__) && defined(__ARM_FEATURE_MATMUL_INT8)
+
+void av1_dist_wtd_convolve_x_neon(const uint8_t *src, int src_stride,
+                                  uint8_t *dst8, int dst8_stride, int w, int h,
+                                  const InterpFilterParams *filter_params_x,
+                                  const int subpel_x_qn,
+                                  ConvolveParams *conv_params) {
+  assert(!(w % 4));
+  assert(!(h % 4));
+
+  const int horiz_offset = filter_params_x->taps / 2 - 1;
+  const int bits = FILTER_BITS - conv_params->round_1;
+  const int bd = 8;
+  const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
+  const int round_offset = (1 << (offset_bits - conv_params->round_1)) +
+                           (1 << (offset_bits - conv_params->round_1 - 1));
+  const int round_bits =
+      2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
+  const uint16_t fwd_offset = conv_params->fwd_offset;
+  const uint16_t bck_offset = conv_params->bck_offset;
+  const int use_dist_wtd_comp_avg = conv_params->use_dist_wtd_comp_avg;
+  const int16x4_t round_offset64 = vdup_n_s16(round_offset);
+  const int16x8_t round_offset128 = vdupq_n_s16(round_offset);
+  const int16x8_t shift_round_0 = vdupq_n_s16(-conv_params->round_0 + 1);
+  const int16x8_t horiz_const = vdupq_n_s16(bits);
+
+  // Horizontal filter.
+  const int16_t *x_filter_ptr = av1_get_interp_filter_subpel_kernel(
+      filter_params_x, subpel_x_qn & SUBPEL_MASK);
+  // Filter values are even, so downshift by 1 to reduce intermediate precision
+  // requirements.
+  const int8x8_t x_filter = vshrn_n_s16(vld1q_s16(x_filter_ptr), 1);
+
+  const uint8_t *src_ptr = src - horiz_offset;
+  CONV_BUF_TYPE *dst = conv_params->dst;
+  CONV_BUF_TYPE *dst_ptr = dst;
+  uint8_t *dst_u8_ptr = dst8;
+  int dst_stride = conv_params->dst_stride;
+  int width = w;
+  int height = h;
+
+  if (w == 4) {
+    const uint8x16x2_t permute_tbl = vld1q_u8_x2(dot_prod_permute_tbl);
+
+    do {
+      uint8x16_t s0, s1, s2, s3;
+      int32x4_t d0, d1, d2, d3;
+      int16x8_t d01, d23;
+      uint16x4_t dd0, dd1, dd2, dd3;
+      uint8x8_t d01_u8, d23_u8;
+
+      s0 = vld1q_u8(src_ptr + 0 * src_stride);
+      s1 = vld1q_u8(src_ptr + 1 * src_stride);
+      s2 = vld1q_u8(src_ptr + 2 * src_stride);
+      s3 = vld1q_u8(src_ptr + 3 * src_stride);
+
+      d0 = convolve8_4_usdot(s0, x_filter, permute_tbl, vdupq_n_s32(0));
+      d1 = convolve8_4_usdot(s1, x_filter, permute_tbl, vdupq_n_s32(0));
+      d2 = convolve8_4_usdot(s2, x_filter, permute_tbl, vdupq_n_s32(0));
+      d3 = convolve8_4_usdot(s3, x_filter, permute_tbl, vdupq_n_s32(0));
+
+      d01 = vcombine_s16(vmovn_s32(d0), vmovn_s32(d1));
+      d23 = vcombine_s16(vmovn_s32(d2), vmovn_s32(d3));
+
+      d01 = vqrshlq_s16(d01, shift_round_0);
+      d23 = vqrshlq_s16(d23, shift_round_0);
+
+      d01 = vrshlq_s16(d01, horiz_const);
+      d23 = vrshlq_s16(d23, horiz_const);
+
+      d01 = vaddq_s16(d01, round_offset128);
+      d23 = vaddq_s16(d23, round_offset128);
+
+      if (conv_params->do_average) {
+        dd0 = vld1_u16(dst_ptr);
+        dst_ptr += dst_stride;
+        dd1 = vld1_u16(dst_ptr);
+        dst_ptr += dst_stride;
+        dd2 = vld1_u16(dst_ptr);
+        dst_ptr += dst_stride;
+        dd3 = vld1_u16(dst_ptr);
+        dst_ptr += dst_stride;
+
+        compute_avg_4x4(dd0, dd1, dd2, dd3,
+                        vreinterpret_u16_s16(vget_low_s16(d01)),
+                        vreinterpret_u16_s16(vget_high_s16(d01)),
+                        vreinterpret_u16_s16(vget_low_s16(d23)),
+                        vreinterpret_u16_s16(vget_high_s16(d23)), fwd_offset,
+                        bck_offset, round_offset64, round_bits,
+                        use_dist_wtd_comp_avg, &d01_u8, &d23_u8);
+
+        vst1_lane_u32((uint32_t *)dst_u8_ptr, vreinterpret_u32_u8(d01_u8), 0);
+        dst_u8_ptr += dst8_stride;
+        vst1_lane_u32((uint32_t *)dst_u8_ptr, vreinterpret_u32_u8(d01_u8), 1);
+        dst_u8_ptr += dst8_stride;
+        vst1_lane_u32((uint32_t *)dst_u8_ptr, vreinterpret_u32_u8(d23_u8), 0);
+        dst_u8_ptr += dst8_stride;
+        vst1_lane_u32((uint32_t *)dst_u8_ptr, vreinterpret_u32_u8(d23_u8), 1);
+        dst_u8_ptr += dst8_stride;
+      } else {
+        vst1q_lane_u64((uint64_t *)dst_ptr, vreinterpretq_u64_s16(d01), 0);
+        dst_ptr += dst_stride;
+        vst1q_lane_u64((uint64_t *)dst_ptr, vreinterpretq_u64_s16(d01), 1);
+        dst_ptr += dst_stride;
+        vst1q_lane_u64((uint64_t *)dst_ptr, vreinterpretq_u64_s16(d23), 0);
+        dst_ptr += dst_stride;
+        vst1q_lane_u64((uint64_t *)dst_ptr, vreinterpretq_u64_s16(d23), 1);
+        dst_ptr += dst_stride;
+      }
+
+      src_ptr += 4 * src_stride;
+      height -= 4;
+    } while (height > 0);
+  } else {
+    const uint8x16x3_t permute_tbl = vld1q_u8_x3(dot_prod_permute_tbl);
+
+    do {
+      const uint8_t *s = src_ptr;
+      CONV_BUF_TYPE *d = dst_ptr;
+      uint8_t *d_u8 = dst_u8_ptr;
+      width = w;
+
+      do {
+        uint8x16_t s0, s1, s2, s3;
+        int16x8_t d0, d1, d2, d3;
+        uint16x8_t dd0, dd1, dd2, dd3;
+        uint8x8_t d0_u8, d1_u8, d2_u8, d3_u8;
+
+        s0 = vld1q_u8(s + 0 * src_stride);
+        s1 = vld1q_u8(s + 1 * src_stride);
+        s2 = vld1q_u8(s + 2 * src_stride);
+        s3 = vld1q_u8(s + 3 * src_stride);
+
+        d0 = convolve8_8_usdot(s0, x_filter, permute_tbl, vdupq_n_s32(0),
+                               shift_round_0);
+        d1 = convolve8_8_usdot(s1, x_filter, permute_tbl, vdupq_n_s32(0),
+                               shift_round_0);
+        d2 = convolve8_8_usdot(s2, x_filter, permute_tbl, vdupq_n_s32(0),
+                               shift_round_0);
+        d3 = convolve8_8_usdot(s3, x_filter, permute_tbl, vdupq_n_s32(0),
+                               shift_round_0);
+
+        d0 = vrshlq_s16(d0, horiz_const);
+        d1 = vrshlq_s16(d1, horiz_const);
+        d2 = vrshlq_s16(d2, horiz_const);
+        d3 = vrshlq_s16(d3, horiz_const);
+
+        d0 = vaddq_s16(d0, round_offset128);
+        d1 = vaddq_s16(d1, round_offset128);
+        d2 = vaddq_s16(d2, round_offset128);
+        d3 = vaddq_s16(d3, round_offset128);
+
+        if (conv_params->do_average) {
+          load_u16_8x4(d, dst_stride, &dd0, &dd1, &dd2, &dd3);
+
+          compute_avg_8x4(dd0, dd1, dd2, dd3, vreinterpretq_u16_s16(d0),
+                          vreinterpretq_u16_s16(d1), vreinterpretq_u16_s16(d2),
+                          vreinterpretq_u16_s16(d3), fwd_offset, bck_offset,
+                          round_offset64, round_bits, use_dist_wtd_comp_avg,
+                          &d0_u8, &d1_u8, &d2_u8, &d3_u8);
+
+          store_u8_8x4(d_u8, dst8_stride, d0_u8, d1_u8, d2_u8, d3_u8);
+        } else {
+          store_u16_8x4(d, dst_stride, vreinterpretq_u16_s16(d0),
+                        vreinterpretq_u16_s16(d1), vreinterpretq_u16_s16(d2),
+                        vreinterpretq_u16_s16(d3));
+        }
+
+        s += 8;
+        d += 8;
+        d_u8 += 8;
+        width -= 8;
+      } while (width > 0);
+
+      src_ptr += 4 * src_stride;
+      dst_ptr += 4 * dst_stride;
+      dst_u8_ptr += 4 * dst8_stride;
+      height -= 4;
+    } while (height > 0);
+  }
+}
+
+#elif defined(__aarch64__) && defined(__ARM_FEATURE_DOTPROD)
+
+void av1_dist_wtd_convolve_x_neon(const uint8_t *src, int src_stride,
+                                  uint8_t *dst8, int dst8_stride, int w, int h,
+                                  const InterpFilterParams *filter_params_x,
+                                  const int subpel_x_qn,
+                                  ConvolveParams *conv_params) {
+  assert(!(w % 4));
+  assert(!(h % 4));
+
+  const int horiz_offset = filter_params_x->taps / 2 - 1;
+  const int bits = FILTER_BITS - conv_params->round_1;
+  const int bd = 8;
+  const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
+  const int round_offset = (1 << (offset_bits - conv_params->round_1)) +
+                           (1 << (offset_bits - conv_params->round_1 - 1));
+  const int round_bits =
+      2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
+  const uint16_t fwd_offset = conv_params->fwd_offset;
+  const uint16_t bck_offset = conv_params->bck_offset;
+  const int use_dist_wtd_comp_avg = conv_params->use_dist_wtd_comp_avg;
+  const int16x4_t round_offset64 = vdup_n_s16(round_offset);
+  const int16x8_t round_offset128 = vdupq_n_s16(round_offset);
+  const int16x8_t shift_round_0 = vdupq_n_s16(-conv_params->round_0 + 1);
+  const int16x8_t horiz_const = vdupq_n_s16(bits);
+
+  // Horizontal filter.
+  const int16_t *x_filter_ptr = av1_get_interp_filter_subpel_kernel(
+      filter_params_x, subpel_x_qn & SUBPEL_MASK);
+  // Filter values are even, so downshift by 1 to reduce intermediate precision
+  // requirements.
+  const int8x8_t x_filter = vshrn_n_s16(vld1q_s16(x_filter_ptr), 1);
+  // Dot-product constants.
+  const uint8x16_t range_limit = vdupq_n_u8(128);
+  const int32_t correction_s32 = vaddlvq_s16(vshll_n_s8(x_filter, 7));
+  const int32x4_t correction = vdupq_n_s32(correction_s32);
+
+  const uint8_t *src_ptr = src - horiz_offset;
+  CONV_BUF_TYPE *dst = conv_params->dst;
+  CONV_BUF_TYPE *dst_ptr = dst;
+  uint8_t *dst_u8_ptr = dst8;
+  int dst_stride = conv_params->dst_stride;
+  int width = w;
+  int height = h;
+
+  if (w == 4) {
+    const uint8x16x2_t permute_tbl = vld1q_u8_x2(dot_prod_permute_tbl);
+
+    do {
+      uint8x16_t s0, s1, s2, s3;
+      int32x4_t d0, d1, d2, d3;
+      int16x8_t d01, d23;
+      uint16x4_t dd0, dd1, dd2, dd3;
+      uint8x8_t d01_u8, d23_u8;
+
+      s0 = vld1q_u8(src_ptr + 0 * src_stride);
+      s1 = vld1q_u8(src_ptr + 1 * src_stride);
+      s2 = vld1q_u8(src_ptr + 2 * src_stride);
+      s3 = vld1q_u8(src_ptr + 3 * src_stride);
+
+      d0 = convolve8_4_sdot(s0, x_filter, correction, range_limit, permute_tbl);
+      d1 = convolve8_4_sdot(s1, x_filter, correction, range_limit, permute_tbl);
+      d2 = convolve8_4_sdot(s2, x_filter, correction, range_limit, permute_tbl);
+      d3 = convolve8_4_sdot(s3, x_filter, correction, range_limit, permute_tbl);
+
+      d01 = vcombine_s16(vmovn_s32(d0), vmovn_s32(d1));
+      d23 = vcombine_s16(vmovn_s32(d2), vmovn_s32(d3));
+
+      d01 = vqrshlq_s16(d01, shift_round_0);
+      d23 = vqrshlq_s16(d23, shift_round_0);
+
+      d01 = vrshlq_s16(d01, horiz_const);
+      d23 = vrshlq_s16(d23, horiz_const);
+
+      d01 = vaddq_s16(d01, round_offset128);
+      d23 = vaddq_s16(d23, round_offset128);
+
+      if (conv_params->do_average) {
+        dd0 = vld1_u16(dst_ptr);
+        dst_ptr += dst_stride;
+        dd1 = vld1_u16(dst_ptr);
+        dst_ptr += dst_stride;
+        dd2 = vld1_u16(dst_ptr);
+        dst_ptr += dst_stride;
+        dd3 = vld1_u16(dst_ptr);
+        dst_ptr += dst_stride;
+
+        compute_avg_4x4(dd0, dd1, dd2, dd3,
+                        vreinterpret_u16_s16(vget_low_s16(d01)),
+                        vreinterpret_u16_s16(vget_high_s16(d01)),
+                        vreinterpret_u16_s16(vget_low_s16(d23)),
+                        vreinterpret_u16_s16(vget_high_s16(d23)), fwd_offset,
+                        bck_offset, round_offset64, round_bits,
+                        use_dist_wtd_comp_avg, &d01_u8, &d23_u8);
+
+        vst1_lane_u32((uint32_t *)dst_u8_ptr, vreinterpret_u32_u8(d01_u8), 0);
+        dst_u8_ptr += dst8_stride;
+        vst1_lane_u32((uint32_t *)dst_u8_ptr, vreinterpret_u32_u8(d01_u8), 1);
+        dst_u8_ptr += dst8_stride;
+        vst1_lane_u32((uint32_t *)dst_u8_ptr, vreinterpret_u32_u8(d23_u8), 0);
+        dst_u8_ptr += dst8_stride;
+        vst1_lane_u32((uint32_t *)dst_u8_ptr, vreinterpret_u32_u8(d23_u8), 1);
+        dst_u8_ptr += dst8_stride;
+      } else {
+        vst1q_lane_u64((uint64_t *)dst_ptr, vreinterpretq_u64_s16(d01), 0);
+        dst_ptr += dst_stride;
+        vst1q_lane_u64((uint64_t *)dst_ptr, vreinterpretq_u64_s16(d01), 1);
+        dst_ptr += dst_stride;
+        vst1q_lane_u64((uint64_t *)dst_ptr, vreinterpretq_u64_s16(d23), 0);
+        dst_ptr += dst_stride;
+        vst1q_lane_u64((uint64_t *)dst_ptr, vreinterpretq_u64_s16(d23), 1);
+        dst_ptr += dst_stride;
+      }
+
+      src_ptr += 4 * src_stride;
+      height -= 4;
+    } while (height > 0);
+  } else {
+    const uint8x16x3_t permute_tbl = vld1q_u8_x3(dot_prod_permute_tbl);
+
+    do {
+      const uint8_t *s = src_ptr;
+      CONV_BUF_TYPE *d = dst_ptr;
+      uint8_t *d_u8 = dst_u8_ptr;
+      width = w;
+
+      do {
+        uint8x16_t s0, s1, s2, s3;
+        int16x8_t d0, d1, d2, d3;
+        uint16x8_t dd0, dd1, dd2, dd3;
+        uint8x8_t d0_u8, d1_u8, d2_u8, d3_u8;
+
+        s0 = vld1q_u8(s + 0 * src_stride);
+        s1 = vld1q_u8(s + 1 * src_stride);
+        s2 = vld1q_u8(s + 2 * src_stride);
+        s3 = vld1q_u8(s + 3 * src_stride);
+
+        d0 = convolve8_8_sdot(s0, x_filter, correction, range_limit,
+                              permute_tbl, shift_round_0);
+        d1 = convolve8_8_sdot(s1, x_filter, correction, range_limit,
+                              permute_tbl, shift_round_0);
+        d2 = convolve8_8_sdot(s2, x_filter, correction, range_limit,
+                              permute_tbl, shift_round_0);
+        d3 = convolve8_8_sdot(s3, x_filter, correction, range_limit,
+                              permute_tbl, shift_round_0);
+
+        d0 = vrshlq_s16(d0, horiz_const);
+        d1 = vrshlq_s16(d1, horiz_const);
+        d2 = vrshlq_s16(d2, horiz_const);
+        d3 = vrshlq_s16(d3, horiz_const);
+
+        d0 = vaddq_s16(d0, round_offset128);
+        d1 = vaddq_s16(d1, round_offset128);
+        d2 = vaddq_s16(d2, round_offset128);
+        d3 = vaddq_s16(d3, round_offset128);
+
+        if (conv_params->do_average) {
+          load_u16_8x4(d, dst_stride, &dd0, &dd1, &dd2, &dd3);
+
+          compute_avg_8x4(dd0, dd1, dd2, dd3, vreinterpretq_u16_s16(d0),
+                          vreinterpretq_u16_s16(d1), vreinterpretq_u16_s16(d2),
+                          vreinterpretq_u16_s16(d3), fwd_offset, bck_offset,
+                          round_offset64, round_bits, use_dist_wtd_comp_avg,
+                          &d0_u8, &d1_u8, &d2_u8, &d3_u8);
+
+          store_u8_8x4(d_u8, dst8_stride, d0_u8, d1_u8, d2_u8, d3_u8);
+        } else {
+          store_u16_8x4(d, dst_stride, vreinterpretq_u16_s16(d0),
+                        vreinterpretq_u16_s16(d1), vreinterpretq_u16_s16(d2),
+                        vreinterpretq_u16_s16(d3));
+        }
+
+        s += 8;
+        d += 8;
+        d_u8 += 8;
+        width -= 8;
+      } while (width > 0);
+
+      src_ptr += 4 * src_stride;
+      dst_ptr += 4 * dst_stride;
+      dst_u8_ptr += 4 * dst8_stride;
+      height -= 4;
+    } while (height > 0);
+  }
+}
+
+#else  // !(defined(__aarch64__) && defined(__ARM_FEATURE_DOTPROD))
+
 void av1_dist_wtd_convolve_x_neon(const uint8_t *src, int src_stride,
                                   uint8_t *dst8, int dst8_stride, int w, int h,
                                   const InterpFilterParams *filter_params_x,
@@ -892,18 +1416,14 @@ void av1_dist_wtd_convolve_x_neon(const uint8_t *src, int src_stride,
   const int use_dist_wtd_comp_avg = conv_params->use_dist_wtd_comp_avg;
 
   // horizontal filter
-  const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
+  const int16_t *x_filter_ptr = av1_get_interp_filter_subpel_kernel(
       filter_params_x, subpel_x_qn & SUBPEL_MASK);
 
   const uint8_t *src_ptr = src - horiz_offset;
 
-  int16_t x_filter_tmp[8];
-  int16x8_t filter_x_coef = vld1q_s16(x_filter);
-
-  // filter coeffs are even, so downshifting by 1 to reduce intermediate
-  // precision requirements.
-  filter_x_coef = vshrq_n_s16(filter_x_coef, 1);
-  vst1q_s16(&x_filter_tmp[0], filter_x_coef);
+  // Filter values are even, so downshift by 1 to reduce intermediate precision
+  // requirements.
+  const int16x8_t x_filter = vshrq_n_s16(vld1q_s16(x_filter_ptr), 1);
 
   const uint8_t *s;
   uint8_t *d_u8;
@@ -980,20 +1500,20 @@ void av1_dist_wtd_convolve_x_neon(const uint8_t *src, int src_stride,
         s9 = vget_high_s16(u0);
         s10 = vget_high_s16(u1);
 
-        d0 = convolve8_4x4_s16(s0, s1, s2, s3, s4, s5, s6, s7, x_filter_tmp,
-                               zero, shift_round_0);
+        d0 = convolve8_4x4_s16(s0, s1, s2, s3, s4, s5, s6, s7, x_filter, zero,
+                               shift_round_0);
         d0 = vrshl_s16(d0, horiz_const);
         d0 = vadd_s16(d0, round_offset_vec);
-        d1 = convolve8_4x4_s16(s1, s2, s3, s4, s5, s6, s7, s8, x_filter_tmp,
-                               zero, shift_round_0);
+        d1 = convolve8_4x4_s16(s1, s2, s3, s4, s5, s6, s7, s8, x_filter, zero,
+                               shift_round_0);
         d1 = vrshl_s16(d1, horiz_const);
         d1 = vadd_s16(d1, round_offset_vec);
-        d2 = convolve8_4x4_s16(s2, s3, s4, s5, s6, s7, s8, s9, x_filter_tmp,
-                               zero, shift_round_0);
+        d2 = convolve8_4x4_s16(s2, s3, s4, s5, s6, s7, s8, s9, x_filter, zero,
+                               shift_round_0);
         d2 = vrshl_s16(d2, horiz_const);
         d2 = vadd_s16(d2, round_offset_vec);
-        d3 = convolve8_4x4_s16(s3, s4, s5, s6, s7, s8, s9, s10, x_filter_tmp,
-                               zero, shift_round_0);
+        d3 = convolve8_4x4_s16(s3, s4, s5, s6, s7, s8, s9, s10, x_filter, zero,
+                               shift_round_0);
         d3 = vrshl_s16(d3, horiz_const);
         d3 = vadd_s16(d3, round_offset_vec);
 
@@ -1073,8 +1593,8 @@ void av1_dist_wtd_convolve_x_neon(const uint8_t *src, int src_stride,
         s6 = vext_s16(s4, s7, 2);  // a6 a7 a8 a9
         s7 = vext_s16(s4, s7, 3);  // a7 a8 a9 a10
 
-        d0 = convolve8_4x4_s16(s0, s1, s2, s3, s4, s5, s6, s7, x_filter_tmp,
-                               zero, shift_round_0);
+        d0 = convolve8_4x4_s16(s0, s1, s2, s3, s4, s5, s6, s7, x_filter, zero,
+                               shift_round_0);
         d0 = vrshl_s16(d0, horiz_const);
         d0 = vadd_s16(d0, round_offset_vec);
         s0 = s4;
@@ -1173,38 +1693,38 @@ void av1_dist_wtd_convolve_x_neon(const uint8_t *src, int src_stride,
         s13 = vreinterpretq_s16_u16(vmovl_u8(t6));
         s14 = vreinterpretq_s16_u16(vmovl_u8(t7));
 
-        res0 = convolve8_8x8_s16(s0, s1, s2, s3, s4, s5, s6, s7, x_filter_tmp,
-                                 zero, shift_round_0);
+        res0 = convolve8_8x8_s16(s0, s1, s2, s3, s4, s5, s6, s7, x_filter, zero,
+                                 shift_round_0);
 
         res0 = vrshlq_s16(res0, horiz_const);
         res0 = vaddq_s16(res0, round_offset128);
 
-        res1 = convolve8_8x8_s16(s1, s2, s3, s4, s5, s6, s7, s8, x_filter_tmp,
-                                 zero, shift_round_0);
+        res1 = convolve8_8x8_s16(s1, s2, s3, s4, s5, s6, s7, s8, x_filter, zero,
+                                 shift_round_0);
         res1 = vrshlq_s16(res1, horiz_const);
         res1 = vaddq_s16(res1, round_offset128);
-        res2 = convolve8_8x8_s16(s2, s3, s4, s5, s6, s7, s8, s9, x_filter_tmp,
-                                 zero, shift_round_0);
+        res2 = convolve8_8x8_s16(s2, s3, s4, s5, s6, s7, s8, s9, x_filter, zero,
+                                 shift_round_0);
         res2 = vrshlq_s16(res2, horiz_const);
         res2 = vaddq_s16(res2, round_offset128);
-        res3 = convolve8_8x8_s16(s3, s4, s5, s6, s7, s8, s9, s10, x_filter_tmp,
+        res3 = convolve8_8x8_s16(s3, s4, s5, s6, s7, s8, s9, s10, x_filter,
                                  zero, shift_round_0);
         res3 = vrshlq_s16(res3, horiz_const);
         res3 = vaddq_s16(res3, round_offset128);
-        res4 = convolve8_8x8_s16(s4, s5, s6, s7, s8, s9, s10, s11, x_filter_tmp,
+        res4 = convolve8_8x8_s16(s4, s5, s6, s7, s8, s9, s10, s11, x_filter,
                                  zero, shift_round_0);
         res4 = vrshlq_s16(res4, horiz_const);
         res4 = vaddq_s16(res4, round_offset128);
-        res5 = convolve8_8x8_s16(s5, s6, s7, s8, s9, s10, s11, s12,
-                                 x_filter_tmp, zero, shift_round_0);
+        res5 = convolve8_8x8_s16(s5, s6, s7, s8, s9, s10, s11, s12, x_filter,
+                                 zero, shift_round_0);
         res5 = vrshlq_s16(res5, horiz_const);
         res5 = vaddq_s16(res5, round_offset128);
-        res6 = convolve8_8x8_s16(s6, s7, s8, s9, s10, s11, s12, s13,
-                                 x_filter_tmp, zero, shift_round_0);
+        res6 = convolve8_8x8_s16(s6, s7, s8, s9, s10, s11, s12, s13, x_filter,
+                                 zero, shift_round_0);
         res6 = vrshlq_s16(res6, horiz_const);
         res6 = vaddq_s16(res6, round_offset128);
-        res7 = convolve8_8x8_s16(s7, s8, s9, s10, s11, s12, s13, s14,
-                                 x_filter_tmp, zero, shift_round_0);
+        res7 = convolve8_8x8_s16(s7, s8, s9, s10, s11, s12, s13, s14, x_filter,
+                                 zero, shift_round_0);
         res7 = vrshlq_s16(res7, horiz_const);
         res7 = vaddq_s16(res7, round_offset128);
 
@@ -1293,8 +1813,8 @@ void av1_dist_wtd_convolve_x_neon(const uint8_t *src, int src_stride,
         s6 = vextq_s16(temp_0, s7, 6);  // a6 a7 a8 a9 a10 a11 a12 a13
         s7 = vextq_s16(temp_0, s7, 7);  // a7 a8 a9 a10 a11 a12 a13 a14
 
-        res0 = convolve8_8x8_s16(temp_0, s1, s2, s3, s4, s5, s6, s7,
-                                 x_filter_tmp, zero, shift_round_0);
+        res0 = convolve8_8x8_s16(temp_0, s1, s2, s3, s4, s5, s6, s7, x_filter,
+                                 zero, shift_round_0);
 
         res0 = vrshlq_s16(res0, horiz_const);
         res0 = vaddq_s16(res0, round_offset128);
@@ -1328,6 +1848,8 @@ void av1_dist_wtd_convolve_x_neon(const uint8_t *src, int src_stride,
   }
 }
 
+#endif  // defined(__aarch64__) && defined(__ARM_FEATURE_DOTPROD)
+
 void av1_dist_wtd_convolve_y_neon(const uint8_t *src, int src_stride,
                                   uint8_t *dst8, int dst8_stride, int w, int h,
                                   const InterpFilterParams *filter_params_y,
@@ -1352,18 +1874,14 @@ void av1_dist_wtd_convolve_y_neon(const uint8_t *src, int src_stride,
   const int shift_value = (conv_params->round_1 - 1 - bits);
 
   // vertical filter
-  const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
+  const int16_t *y_filter_ptr = av1_get_interp_filter_subpel_kernel(
       filter_params_y, subpel_y_qn & SUBPEL_MASK);
 
   const uint8_t *src_ptr = src - (vert_offset * src_stride);
 
-  int16_t y_filter_tmp[8];
-  int16x8_t filter_y_coef = vld1q_s16(y_filter);
-
-  // filter coeffs are even, so downshifting by 1 to reduce intermediate
-  // precision requirements.
-  filter_y_coef = vshrq_n_s16(filter_y_coef, 1);
-  vst1q_s16(&y_filter_tmp[0], filter_y_coef);
+  // Filter values are even, so downshift by 1 to reduce intermediate precision
+  // requirements.
+  const int16x8_t y_filter = vshrq_n_s16(vld1q_s16(y_filter_ptr), 1);
 
   const uint8_t *s;
   uint8_t *d_u8;
@@ -1441,17 +1959,17 @@ void av1_dist_wtd_convolve_y_neon(const uint8_t *src, int src_stride,
         s9 = vget_low_s16(u1);
         s10 = vget_high_s16(u1);
 
-        d0 = convolve8_4x4_s16(s0, s1, s2, s3, s4, s5, s6, s7, y_filter_tmp,
-                               zero, shift_vec);
+        d0 = convolve8_4x4_s16(s0, s1, s2, s3, s4, s5, s6, s7, y_filter, zero,
+                               shift_vec);
         d0 = vadd_s16(d0, round_offset64);
-        d1 = convolve8_4x4_s16(s1, s2, s3, s4, s5, s6, s7, s8, y_filter_tmp,
-                               zero, shift_vec);
+        d1 = convolve8_4x4_s16(s1, s2, s3, s4, s5, s6, s7, s8, y_filter, zero,
+                               shift_vec);
         d1 = vadd_s16(d1, round_offset64);
-        d2 = convolve8_4x4_s16(s2, s3, s4, s5, s6, s7, s8, s9, y_filter_tmp,
-                               zero, shift_vec);
+        d2 = convolve8_4x4_s16(s2, s3, s4, s5, s6, s7, s8, s9, y_filter, zero,
+                               shift_vec);
         d2 = vadd_s16(d2, round_offset64);
-        d3 = convolve8_4x4_s16(s3, s4, s5, s6, s7, s8, s9, s10, y_filter_tmp,
-                               zero, shift_vec);
+        d3 = convolve8_4x4_s16(s3, s4, s5, s6, s7, s8, s9, s10, y_filter, zero,
+                               shift_vec);
         d3 = vadd_s16(d3, round_offset64);
 
         if (conv_params->do_average) {
@@ -1504,8 +2022,8 @@ void av1_dist_wtd_convolve_y_neon(const uint8_t *src, int src_stride,
         u0 = vreinterpretq_s16_u16(vmovl_u8(vreinterpret_u8_u32(tu0)));
         s7 = vget_low_s16(u0);
 
-        d0 = convolve8_4x4_s16(s0, s1, s2, s3, s4, s5, s6, s7, y_filter_tmp,
-                               zero, shift_vec);
+        d0 = convolve8_4x4_s16(s0, s1, s2, s3, s4, s5, s6, s7, y_filter, zero,
+                               shift_vec);
 
         d0 = vadd_s16(d0, round_offset64);
 
@@ -1602,29 +2120,29 @@ void av1_dist_wtd_convolve_y_neon(const uint8_t *src, int src_stride,
         __builtin_prefetch(dst_ptr + 2 * dst_stride);
         __builtin_prefetch(dst_ptr + 3 * dst_stride);
 
-        res0 = convolve8_8x8_s16(s0, s1, s2, s3, s4, s5, s6, s7, y_filter_tmp,
-                                 zero, shift_vec);
+        res0 = convolve8_8x8_s16(s0, s1, s2, s3, s4, s5, s6, s7, y_filter, zero,
+                                 shift_vec);
         res0 = vaddq_s16(res0, round_offset128);
-        res1 = convolve8_8x8_s16(s1, s2, s3, s4, s5, s6, s7, s8, y_filter_tmp,
-                                 zero, shift_vec);
+        res1 = convolve8_8x8_s16(s1, s2, s3, s4, s5, s6, s7, s8, y_filter, zero,
+                                 shift_vec);
         res1 = vaddq_s16(res1, round_offset128);
-        res2 = convolve8_8x8_s16(s2, s3, s4, s5, s6, s7, s8, s9, y_filter_tmp,
-                                 zero, shift_vec);
+        res2 = convolve8_8x8_s16(s2, s3, s4, s5, s6, s7, s8, s9, y_filter, zero,
+                                 shift_vec);
         res2 = vaddq_s16(res2, round_offset128);
-        res3 = convolve8_8x8_s16(s3, s4, s5, s6, s7, s8, s9, s10, y_filter_tmp,
+        res3 = convolve8_8x8_s16(s3, s4, s5, s6, s7, s8, s9, s10, y_filter,
                                  zero, shift_vec);
         res3 = vaddq_s16(res3, round_offset128);
-        res4 = convolve8_8x8_s16(s4, s5, s6, s7, s8, s9, s10, s11, y_filter_tmp,
+        res4 = convolve8_8x8_s16(s4, s5, s6, s7, s8, s9, s10, s11, y_filter,
                                  zero, shift_vec);
         res4 = vaddq_s16(res4, round_offset128);
-        res5 = convolve8_8x8_s16(s5, s6, s7, s8, s9, s10, s11, s12,
-                                 y_filter_tmp, zero, shift_vec);
+        res5 = convolve8_8x8_s16(s5, s6, s7, s8, s9, s10, s11, s12, y_filter,
+                                 zero, shift_vec);
         res5 = vaddq_s16(res5, round_offset128);
-        res6 = convolve8_8x8_s16(s6, s7, s8, s9, s10, s11, s12, s13,
-                                 y_filter_tmp, zero, shift_vec);
+        res6 = convolve8_8x8_s16(s6, s7, s8, s9, s10, s11, s12, s13, y_filter,
+                                 zero, shift_vec);
         res6 = vaddq_s16(res6, round_offset128);
-        res7 = convolve8_8x8_s16(s7, s8, s9, s10, s11, s12, s13, s14,
-                                 y_filter_tmp, zero, shift_vec);
+        res7 = convolve8_8x8_s16(s7, s8, s9, s10, s11, s12, s13, s14, y_filter,
+                                 zero, shift_vec);
         res7 = vaddq_s16(res7, round_offset128);
 
         if (conv_params->do_average) {
@@ -1682,8 +2200,8 @@ void av1_dist_wtd_convolve_y_neon(const uint8_t *src, int src_stride,
 
         __builtin_prefetch(dst_ptr);
 
-        res0 = convolve8_8x8_s16(s0, s1, s2, s3, s4, s5, s6, s7, y_filter_tmp,
-                                 zero, shift_vec);
+        res0 = convolve8_8x8_s16(s0, s1, s2, s3, s4, s5, s6, s7, y_filter, zero,
+                                 shift_vec);
         res0 = vaddq_s16(res0, round_offset128);
 
         s0 = s1;
diff --git a/chromium/third_party/libaom/source/libaom/av1/common/av1_loopfilter.c b/chromium/third_party/libaom/source/libaom/av1/common/av1_loopfilter.c
index 3e97e0495ee..5dede53d336 100644
--- a/chromium/third_party/libaom/source/libaom/av1/common/av1_loopfilter.c
+++ b/chromium/third_party/libaom/source/libaom/av1/common/av1_loopfilter.c
@@ -697,9 +697,11 @@ static AOM_FORCE_INLINE void set_one_param_for_line_luma(
 
     const int32_t pu_edge = mi_prev != mbmi;
 
-    // The quad loop filter assumes that all the transform blocks within a given
-    // prediction block are of the same size.
-    assert(IMPLIES(!pu_edge, pv_ts == ts));
+    // The quad loop filter assumes that all the transform blocks within a
+    // 8x16/16x8/16x16 prediction block are of the same size.
+    assert(IMPLIES(
+        !pu_edge && (mbmi->bsize >= BLOCK_8X16 && mbmi->bsize <= BLOCK_16X16),
+        pv_ts == ts));
 
     if (!pu_edge) {
       curr_skipped = mbmi->skip_txfm && is_inter_block(mbmi);
@@ -769,7 +771,8 @@ static AOM_FORCE_INLINE void set_one_param_for_line_chroma(
     const EDGE_DIR edge_dir, uint32_t mi_col, uint32_t mi_row, int coord,
     bool is_first_block, TX_SIZE prev_tx_size,
     const struct macroblockd_plane *const plane_ptr, const ptrdiff_t mode_step,
-    const int scale_horz, const int scale_vert, int *min_dim) {
+    const int scale_horz, const int scale_vert, int *min_dim, int plane,
+    int joint_filter_chroma) {
   const int is_vert = edge_dir == VERT_EDGE;
   (void)plane_ptr;
   assert((mi_col << MI_SIZE_LOG2) <
@@ -790,7 +793,7 @@ static AOM_FORCE_INLINE void set_one_param_for_line_chroma(
   const MB_MODE_INFO *mbmi = mi[0];
   assert(mbmi);
 
-  const TX_SIZE ts = get_transform_size(xd, mi[0], mi_row, mi_col, AOM_PLANE_U,
+  const TX_SIZE ts = get_transform_size(xd, mi[0], mi_row, mi_col, plane,
                                         scale_horz, scale_vert);
   *tx_size = ts;
 
@@ -812,27 +815,30 @@ static AOM_FORCE_INLINE void set_one_param_for_line_chroma(
     const int pv_row = is_vert ? (mi_row) : (mi_row - (1 << scale_vert));
     const int pv_col = is_vert ? (mi_col - (1 << scale_horz)) : (mi_col);
     const TX_SIZE pv_ts =
-        is_first_block ? get_transform_size(xd, mi_prev, pv_row, pv_col,
-                                            AOM_PLANE_U, scale_horz, scale_vert)
+        is_first_block ? get_transform_size(xd, mi_prev, pv_row, pv_col, plane,
+                                            scale_horz, scale_vert)
                        : prev_tx_size;
     if (is_first_block) {
       *min_dim = is_vert ? tx_size_high[pv_ts] : tx_size_wide[pv_ts];
     }
 
-    uint8_t u_level =
-        av1_get_filter_level(cm, &cm->lf_info, edge_dir, AOM_PLANE_U, mbmi);
-    if (!u_level) {
-      u_level = av1_get_filter_level(cm, &cm->lf_info, edge_dir, AOM_PLANE_U,
-                                     mi_prev);
+    uint8_t level =
+        av1_get_filter_level(cm, &cm->lf_info, edge_dir, plane, mbmi);
+    if (!level) {
+      level = av1_get_filter_level(cm, &cm->lf_info, edge_dir, plane, mi_prev);
     }
 #ifndef NDEBUG
-    uint8_t v_level =
-        av1_get_filter_level(cm, &cm->lf_info, edge_dir, AOM_PLANE_V, mbmi);
-    if (!v_level) {
-      v_level = av1_get_filter_level(cm, &cm->lf_info, edge_dir, AOM_PLANE_V,
-                                     mi_prev);
+    if (joint_filter_chroma) {
+      uint8_t v_level =
+          av1_get_filter_level(cm, &cm->lf_info, edge_dir, AOM_PLANE_V, mbmi);
+      if (!v_level) {
+        v_level = av1_get_filter_level(cm, &cm->lf_info, edge_dir, AOM_PLANE_V,
+                                       mi_prev);
+      }
+      assert(level == v_level);
     }
-    assert(u_level == v_level);
+#else
+    (void)joint_filter_chroma;
 #endif  // NDEBUG
     const int32_t pu_edge = mi_prev != mbmi;
 
@@ -840,12 +846,12 @@ static AOM_FORCE_INLINE void set_one_param_for_line_chroma(
       curr_skipped = mbmi->skip_txfm && is_inter_block(mbmi);
     }
     // For realtime mode, u and v have the same level
-    if ((!curr_skipped || pu_edge) && u_level) {
+    if ((!curr_skipped || pu_edge) && level) {
       params->filter_length = is_vert ? vert_filter_length_chroma[ts][pv_ts]
                                       : horz_filter_length_chroma[ts][pv_ts];
 
       const loop_filter_thresh *const limits = cm->lf_info.lfthr;
-      params->lfthr = limits + u_level;
+      params->lfthr = limits + level;
     }
   }
   const int tx_dim = is_vert ? tx_size_high[ts] : tx_size_wide[ts];
@@ -858,7 +864,7 @@ static AOM_FORCE_INLINE void set_lpf_parameters_for_line_chroma(
     const EDGE_DIR edge_dir, uint32_t mi_col, uint32_t mi_row,
     const struct macroblockd_plane *const plane_ptr, const uint32_t mi_range,
     const ptrdiff_t mode_step, const int scale_horz, const int scale_vert,
-    int *min_dim) {
+    int *min_dim, int plane, int joint_filter_chroma) {
   const int is_vert = edge_dir == VERT_EDGE;
 
   AV1_DEBLOCKING_PARAMETERS *params = params_buf;
@@ -868,9 +874,10 @@ static AOM_FORCE_INLINE void set_lpf_parameters_for_line_chroma(
   TX_SIZE prev_tx_size = TX_INVALID;
 
   // Unroll the first iteration of the loop
-  set_one_param_for_line_chroma(
-      params, tx_size, cm, xd, edge_dir, mi_col, mi_row, *counter_ptr, true,
-      prev_tx_size, plane_ptr, mode_step, scale_horz, scale_vert, min_dim);
+  set_one_param_for_line_chroma(params, tx_size, cm, xd, edge_dir, mi_col,
+                                mi_row, *counter_ptr, true, prev_tx_size,
+                                plane_ptr, mode_step, scale_horz, scale_vert,
+                                min_dim, plane, joint_filter_chroma);
 
   // Advance
   int advance_units =
@@ -881,9 +888,10 @@ static AOM_FORCE_INLINE void set_lpf_parameters_for_line_chroma(
   tx_size += advance_units;
 
   while (*counter_ptr < mi_range) {
-    set_one_param_for_line_chroma(
-        params, tx_size, cm, xd, edge_dir, mi_col, mi_row, *counter_ptr, false,
-        prev_tx_size, plane_ptr, mode_step, scale_horz, scale_vert, min_dim);
+    set_one_param_for_line_chroma(params, tx_size, cm, xd, edge_dir, mi_col,
+                                  mi_row, *counter_ptr, false, prev_tx_size,
+                                  plane_ptr, mode_step, scale_horz, scale_vert,
+                                  min_dim, plane, joint_filter_chroma);
 
     // Advance
     advance_units =
@@ -1342,25 +1350,25 @@ void av1_filter_block_plane_vert(const AV1_COMMON *const cm,
   }
 }
 
-void av1_filter_block_plane_vert_rt(const AV1_COMMON *const cm,
-                                    const MACROBLOCKD *const xd,
-                                    const MACROBLOCKD_PLANE *const plane_ptr,
-                                    const uint32_t mi_row,
-                                    const uint32_t mi_col,
-                                    AV1_DEBLOCKING_PARAMETERS *params_buf,
-                                    TX_SIZE *tx_buf) {
+void av1_filter_block_plane_vert_opt(const AV1_COMMON *const cm,
+                                     const MACROBLOCKD *const xd,
+                                     const MACROBLOCKD_PLANE *const plane_ptr,
+                                     const uint32_t mi_row,
+                                     const uint32_t mi_col,
+                                     AV1_DEBLOCKING_PARAMETERS *params_buf,
+                                     TX_SIZE *tx_buf) {
   uint8_t *const dst_ptr = plane_ptr->dst.buf;
   const int dst_stride = plane_ptr->dst.stride;
   // Ensure that mi_cols/mi_rows are calculated based on frame dimension aligned
   // to MI_SIZE.
   const int plane_mi_cols =
-      (plane_ptr->dst.width + MI_SIZE - 1) >> MI_SIZE_LOG2;
+      CEIL_POWER_OF_TWO(plane_ptr->dst.width, MI_SIZE_LOG2);
   const int plane_mi_rows =
-      (plane_ptr->dst.height + MI_SIZE - 1) >> MI_SIZE_LOG2;
+      CEIL_POWER_OF_TWO(plane_ptr->dst.height, MI_SIZE_LOG2);
   const int y_range = AOMMIN((int)(plane_mi_rows - mi_row), MAX_MIB_SIZE);
   const int x_range = AOMMIN((int)(plane_mi_cols - mi_col), MAX_MIB_SIZE);
   const ptrdiff_t mode_step = 1;
-  for (int y = 0; y < y_range; y += 2) {
+  for (int y = 0; y < y_range; y++) {
     const uint32_t curr_y = mi_row + y;
     const uint32_t x_start = mi_col;
     const uint32_t x_end = mi_col + x_range;
@@ -1381,9 +1389,10 @@ void av1_filter_block_plane_vert_rt(const AV1_COMMON *const cm,
       // prediction block. This is because dim 16 can only happen every unit of
       // 4 mi's.
       use_filter_type = USE_QUAD;
-      y += 2;
-    } else if ((y + 1) < y_range) {
+      y += 3;
+    } else if ((y + 1) < y_range && min_block_height >= 8) {
       use_filter_type = USE_DUAL;
+      y += 1;
     }
 
     for (int x = 0; x < x_range;) {
@@ -1404,20 +1413,22 @@ void av1_filter_block_plane_vert_rt(const AV1_COMMON *const cm,
   }
 }
 
-void av1_filter_block_plane_vert_rt_chroma(
+void av1_filter_block_plane_vert_opt_chroma(
     const AV1_COMMON *const cm, const MACROBLOCKD *const xd,
     const MACROBLOCKD_PLANE *const plane_ptr, const uint32_t mi_row,
     const uint32_t mi_col, AV1_DEBLOCKING_PARAMETERS *params_buf,
-    TX_SIZE *tx_buf) {
+    TX_SIZE *tx_buf, int plane, bool joint_filter_chroma) {
   const uint32_t scale_horz = plane_ptr->subsampling_x;
   const uint32_t scale_vert = plane_ptr->subsampling_y;
-  uint8_t *const u_dst_ptr = plane_ptr[0].dst.buf;
-  uint8_t *const v_dst_ptr = plane_ptr[1].dst.buf;
   const int dst_stride = plane_ptr->dst.stride;
-  const int plane_mi_rows =
-      ROUND_POWER_OF_TWO(cm->mi_params.mi_rows, scale_vert);
-  const int plane_mi_cols =
-      ROUND_POWER_OF_TWO(cm->mi_params.mi_cols, scale_horz);
+  // Ensure that mi_cols/mi_rows are calculated based on frame dimension aligned
+  // to MI_SIZE.
+  const int mi_cols =
+      ((plane_ptr->dst.width << scale_horz) + MI_SIZE - 1) >> MI_SIZE_LOG2;
+  const int mi_rows =
+      ((plane_ptr->dst.height << scale_vert) + MI_SIZE - 1) >> MI_SIZE_LOG2;
+  const int plane_mi_rows = ROUND_POWER_OF_TWO(mi_rows, scale_vert);
+  const int plane_mi_cols = ROUND_POWER_OF_TWO(mi_cols, scale_horz);
   const int y_range = AOMMIN((int)(plane_mi_rows - (mi_row >> scale_vert)),
                              (MAX_MIB_SIZE >> scale_vert));
   const int x_range = AOMMIN((int)(plane_mi_cols - (mi_col >> scale_horz)),
@@ -1429,30 +1440,29 @@ void av1_filter_block_plane_vert_rt_chroma(
     const uint32_t x_start = mi_col + (0 << scale_horz);
     const uint32_t x_end = mi_col + (x_range << scale_horz);
     int min_height = tx_size_high[TX_64X64];
-    set_lpf_parameters_for_line_chroma(
-        params_buf, tx_buf, cm, xd, VERT_EDGE, x_start, curr_y, plane_ptr,
-        x_end, mode_step, scale_horz, scale_vert, &min_height);
+    set_lpf_parameters_for_line_chroma(params_buf, tx_buf, cm, xd, VERT_EDGE,
+                                       x_start, curr_y, plane_ptr, x_end,
+                                       mode_step, scale_horz, scale_vert,
+                                       &min_height, plane, joint_filter_chroma);
 
     AV1_DEBLOCKING_PARAMETERS *params = params_buf;
     TX_SIZE *tx_size = tx_buf;
-
-    uint8_t *u_dst = u_dst_ptr + y * MI_SIZE * dst_stride;
-    uint8_t *v_dst = v_dst_ptr + y * MI_SIZE * dst_stride;
-
     int use_filter_type = USE_SINGLE;
+    int y_inc = 0;
+
     if ((y & 3) == 0 && (y + 3) < y_range && min_height >= 16) {
       // If we are on a row which is a multiple of 4, and the minimum height is
       // 16 pixels, then the current and below 3 rows must contain the same tx
       // block. This is because dim 16 can only happen every unit of 4 mi's.
       use_filter_type = USE_QUAD;
-      y += 3;
+      y_inc = 3;
     } else if (y % 2 == 0 && (y + 1) < y_range && min_height >= 8) {
       // If we are on an even row, and the minimum height is 8 pixels, then the
       // current and below rows must contain the same tx block. This is because
       // dim 4 can only happen every unit of 2**0, and 8 every unit of 2**1,
       // etc.
       use_filter_type = USE_DUAL;
-      y++;
+      y_inc = 1;
     }
 
     for (int x = 0; x < x_range;) {
@@ -1465,17 +1475,25 @@ void av1_filter_block_plane_vert_rt_chroma(
         *tx_size = TX_4X4;
       }
 
-      filter_vert_chroma(u_dst, v_dst, dst_stride, params, cm->seq_params,
-                         use_filter_type);
+      const int offset = y * MI_SIZE * dst_stride + x * MI_SIZE;
+      if (joint_filter_chroma) {
+        uint8_t *u_dst = plane_ptr[0].dst.buf + offset;
+        uint8_t *v_dst = plane_ptr[1].dst.buf + offset;
+        filter_vert_chroma(u_dst, v_dst, dst_stride, params, cm->seq_params,
+                           use_filter_type);
+      } else {
+        uint8_t *dst_ptr = plane_ptr->dst.buf + offset;
+        filter_vert(dst_ptr, dst_stride, params, cm->seq_params,
+                    use_filter_type);
+      }
 
       // advance the destination pointer
       const uint32_t advance_units = tx_size_wide_unit[*tx_size];
       x += advance_units;
-      u_dst += advance_units * MI_SIZE;
-      v_dst += advance_units * MI_SIZE;
       params += advance_units;
       tx_size += advance_units;
     }
+    y += y_inc;
   }
 }
 
@@ -1925,26 +1943,26 @@ void av1_filter_block_plane_horz(const AV1_COMMON *const cm,
   }
 }
 
-void av1_filter_block_plane_horz_rt(const AV1_COMMON *const cm,
-                                    const MACROBLOCKD *const xd,
-                                    const MACROBLOCKD_PLANE *const plane_ptr,
-                                    const uint32_t mi_row,
-                                    const uint32_t mi_col,
-                                    AV1_DEBLOCKING_PARAMETERS *params_buf,
-                                    TX_SIZE *tx_buf) {
+void av1_filter_block_plane_horz_opt(const AV1_COMMON *const cm,
+                                     const MACROBLOCKD *const xd,
+                                     const MACROBLOCKD_PLANE *const plane_ptr,
+                                     const uint32_t mi_row,
+                                     const uint32_t mi_col,
+                                     AV1_DEBLOCKING_PARAMETERS *params_buf,
+                                     TX_SIZE *tx_buf) {
   uint8_t *const dst_ptr = plane_ptr->dst.buf;
   const int dst_stride = plane_ptr->dst.stride;
   // Ensure that mi_cols/mi_rows are calculated based on frame dimension aligned
   // to MI_SIZE.
   const int plane_mi_cols =
-      (plane_ptr->dst.width + MI_SIZE - 1) >> MI_SIZE_LOG2;
+      CEIL_POWER_OF_TWO(plane_ptr->dst.width, MI_SIZE_LOG2);
   const int plane_mi_rows =
-      (plane_ptr->dst.height + MI_SIZE - 1) >> MI_SIZE_LOG2;
+      CEIL_POWER_OF_TWO(plane_ptr->dst.height, MI_SIZE_LOG2);
   const int y_range = AOMMIN((int)(plane_mi_rows - mi_row), MAX_MIB_SIZE);
   const int x_range = AOMMIN((int)(plane_mi_cols - mi_col), MAX_MIB_SIZE);
 
   const ptrdiff_t mode_step = cm->mi_params.mi_stride;
-  for (int x = 0; x < x_range; x += 2) {
+  for (int x = 0; x < x_range; x++) {
     const uint32_t curr_x = mi_col + x;
     const uint32_t y_start = mi_row;
     const uint32_t y_end = mi_row + y_range;
@@ -1965,9 +1983,10 @@ void av1_filter_block_plane_horz_rt(const AV1_COMMON *const cm,
       // prediction block. This is because dim 16 can only happen every unit of
       // 4 mi's.
       filter_type = USE_QUAD;
-      x += 2;
-    } else if ((x + 1) < x_range) {
+      x += 3;
+    } else if ((x + 1) < x_range && min_block_width >= 8) {
       filter_type = USE_DUAL;
+      x += 1;
     }
 
     for (int y = 0; y < y_range;) {
@@ -1988,20 +2007,22 @@ void av1_filter_block_plane_horz_rt(const AV1_COMMON *const cm,
   }
 }
 
-void av1_filter_block_plane_horz_rt_chroma(
+void av1_filter_block_plane_horz_opt_chroma(
     const AV1_COMMON *const cm, const MACROBLOCKD *const xd,
     const MACROBLOCKD_PLANE *const plane_ptr, const uint32_t mi_row,
     const uint32_t mi_col, AV1_DEBLOCKING_PARAMETERS *params_buf,
-    TX_SIZE *tx_buf) {
+    TX_SIZE *tx_buf, int plane, bool joint_filter_chroma) {
   const uint32_t scale_horz = plane_ptr->subsampling_x;
   const uint32_t scale_vert = plane_ptr->subsampling_y;
-  uint8_t *const u_dst_ptr = plane_ptr[0].dst.buf;
-  uint8_t *const v_dst_ptr = plane_ptr[1].dst.buf;
   const int dst_stride = plane_ptr->dst.stride;
-  const int plane_mi_rows =
-      ROUND_POWER_OF_TWO(cm->mi_params.mi_rows, scale_vert);
-  const int plane_mi_cols =
-      ROUND_POWER_OF_TWO(cm->mi_params.mi_cols, scale_horz);
+  // Ensure that mi_cols/mi_rows are calculated based on frame dimension aligned
+  // to MI_SIZE.
+  const int mi_cols =
+      ((plane_ptr->dst.width << scale_horz) + MI_SIZE - 1) >> MI_SIZE_LOG2;
+  const int mi_rows =
+      ((plane_ptr->dst.height << scale_vert) + MI_SIZE - 1) >> MI_SIZE_LOG2;
+  const int plane_mi_rows = ROUND_POWER_OF_TWO(mi_rows, scale_vert);
+  const int plane_mi_cols = ROUND_POWER_OF_TWO(mi_cols, scale_horz);
   const int y_range = AOMMIN((int)(plane_mi_rows - (mi_row >> scale_vert)),
                              (MAX_MIB_SIZE >> scale_vert));
   const int x_range = AOMMIN((int)(plane_mi_cols - (mi_col >> scale_horz)),
@@ -2012,30 +2033,29 @@ void av1_filter_block_plane_horz_rt_chroma(
     const uint32_t curr_x = mi_col + (x << scale_horz);
     const uint32_t y_end = mi_row + (y_range << scale_vert);
     int min_width = tx_size_wide[TX_64X64];
-    set_lpf_parameters_for_line_chroma(
-        params_buf, tx_buf, cm, xd, HORZ_EDGE, curr_x, y_start, plane_ptr,
-        y_end, mode_step, scale_horz, scale_vert, &min_width);
+    set_lpf_parameters_for_line_chroma(params_buf, tx_buf, cm, xd, HORZ_EDGE,
+                                       curr_x, y_start, plane_ptr, y_end,
+                                       mode_step, scale_horz, scale_vert,
+                                       &min_width, plane, joint_filter_chroma);
 
     AV1_DEBLOCKING_PARAMETERS *params = params_buf;
     TX_SIZE *tx_size = tx_buf;
-
-    uint8_t *u_dst = u_dst_ptr + x * MI_SIZE;
-    uint8_t *v_dst = v_dst_ptr + x * MI_SIZE;
-
     USE_FILTER_TYPE use_filter_type = USE_SINGLE;
+    int x_inc = 0;
+
     if ((x & 3) == 0 && (x + 3) < x_range && min_width >= 16) {
       // If we are on a col which is a multiple of 4, and the minimum width is
       // 16 pixels, then the current and right 3 cols must contain the same tx
       // block. This is because dim 16 can only happen every unit of 4 mi's.
       use_filter_type = USE_QUAD;
-      x += 3;
+      x_inc = 3;
     } else if (x % 2 == 0 && (x + 1) < x_range && min_width >= 8) {
       // If we are on an even col, and the minimum width is 8 pixels, then the
       // current and left cols must contain the same tx block. This is because
       // dim 4 can only happen every unit of 2**0, and 8 every unit of 2**1,
       // etc.
       use_filter_type = USE_DUAL;
-      x++;
+      x_inc = 1;
     }
 
     for (int y = 0; y < y_range;) {
@@ -2048,16 +2068,24 @@ void av1_filter_block_plane_horz_rt_chroma(
         *tx_size = TX_4X4;
       }
 
-      filter_horz_chroma(u_dst, v_dst, dst_stride, params, cm->seq_params,
-                         use_filter_type);
+      const int offset = y * MI_SIZE * dst_stride + x * MI_SIZE;
+      if (joint_filter_chroma) {
+        uint8_t *u_dst = plane_ptr[0].dst.buf + offset;
+        uint8_t *v_dst = plane_ptr[1].dst.buf + offset;
+        filter_horz_chroma(u_dst, v_dst, dst_stride, params, cm->seq_params,
+                           use_filter_type);
+      } else {
+        uint8_t *dst_ptr = plane_ptr->dst.buf + offset;
+        filter_horz(dst_ptr, dst_stride, params, cm->seq_params,
+                    use_filter_type);
+      }
 
       // advance the destination pointer
       const int advance_units = tx_size_high_unit[*tx_size];
       y += advance_units;
-      u_dst += advance_units * dst_stride * MI_SIZE;
-      v_dst += advance_units * dst_stride * MI_SIZE;
       params += advance_units;
       tx_size += advance_units;
     }
+    x += x_inc;
   }
 }
diff --git a/chromium/third_party/libaom/source/libaom/av1/common/av1_loopfilter.h b/chromium/third_party/libaom/source/libaom/av1/common/av1_loopfilter.h
index 0c93bc15492..43bd780eb5c 100644
--- a/chromium/third_party/libaom/source/libaom/av1/common/av1_loopfilter.h
+++ b/chromium/third_party/libaom/source/libaom/av1/common/av1_loopfilter.h
@@ -110,33 +110,33 @@ void av1_filter_block_plane_horz(const struct AV1Common *const cm,
                                  const MACROBLOCKD_PLANE *const plane_ptr,
                                  const uint32_t mi_row, const uint32_t mi_col);
 
-void av1_filter_block_plane_vert_rt(const struct AV1Common *const cm,
-                                    const MACROBLOCKD *const xd,
-                                    const MACROBLOCKD_PLANE *const plane_ptr,
-                                    const uint32_t mi_row,
-                                    const uint32_t mi_col,
-                                    AV1_DEBLOCKING_PARAMETERS *params_buf,
-                                    TX_SIZE *tx_buf);
-
-void av1_filter_block_plane_vert_rt_chroma(
+void av1_filter_block_plane_vert_opt(const struct AV1Common *const cm,
+                                     const MACROBLOCKD *const xd,
+                                     const MACROBLOCKD_PLANE *const plane_ptr,
+                                     const uint32_t mi_row,
+                                     const uint32_t mi_col,
+                                     AV1_DEBLOCKING_PARAMETERS *params_buf,
+                                     TX_SIZE *tx_buf);
+
+void av1_filter_block_plane_vert_opt_chroma(
     const struct AV1Common *const cm, const MACROBLOCKD *const xd,
     const MACROBLOCKD_PLANE *const plane_ptr, const uint32_t mi_row,
     const uint32_t mi_col, AV1_DEBLOCKING_PARAMETERS *params_buf,
-    TX_SIZE *tx_buf);
+    TX_SIZE *tx_buf, int plane, bool joint_filter_chroma);
 
-void av1_filter_block_plane_horz_rt(const struct AV1Common *const cm,
-                                    const MACROBLOCKD *const xd,
-                                    const MACROBLOCKD_PLANE *const plane_ptr,
-                                    const uint32_t mi_row,
-                                    const uint32_t mi_col,
-                                    AV1_DEBLOCKING_PARAMETERS *params_buf,
-                                    TX_SIZE *tx_buf);
+void av1_filter_block_plane_horz_opt(const struct AV1Common *const cm,
+                                     const MACROBLOCKD *const xd,
+                                     const MACROBLOCKD_PLANE *const plane_ptr,
+                                     const uint32_t mi_row,
+                                     const uint32_t mi_col,
+                                     AV1_DEBLOCKING_PARAMETERS *params_buf,
+                                     TX_SIZE *tx_buf);
 
-void av1_filter_block_plane_horz_rt_chroma(
+void av1_filter_block_plane_horz_opt_chroma(
     const struct AV1Common *const cm, const MACROBLOCKD *const xd,
     const MACROBLOCKD_PLANE *const plane_ptr, const uint32_t mi_row,
     const uint32_t mi_col, AV1_DEBLOCKING_PARAMETERS *params_buf,
-    TX_SIZE *tx_buf);
+    TX_SIZE *tx_buf, int plane, bool joint_filter_chroma);
 
 uint8_t av1_get_filter_level(const struct AV1Common *cm,
                              const loop_filter_info_n *lfi_n, const int dir_idx,
diff --git a/chromium/third_party/libaom/source/libaom/av1/common/av1_rtcd_defs.pl b/chromium/third_party/libaom/source/libaom/av1/common/av1_rtcd_defs.pl
index a781dd8320b..333a72d9416 100644
--- a/chromium/third_party/libaom/source/libaom/av1/common/av1_rtcd_defs.pl
+++ b/chromium/third_party/libaom/source/libaom/av1/common/av1_rtcd_defs.pl
@@ -403,19 +403,20 @@ if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
   #
   if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") {
     add_proto qw/void av1_apply_temporal_filter/, "const struct yv12_buffer_config *ref_frame, const struct macroblockd *mbd, const BLOCK_SIZE block_size, const int mb_row, const int mb_col, const int num_planes, const double *noise_levels, const MV *subblock_mvs, const int *subblock_mses, const int q_factor, const int filter_strength, const uint8_t *pred, uint32_t *accum, uint16_t *count";
-    specialize qw/av1_apply_temporal_filter sse2 avx2/;
+    specialize qw/av1_apply_temporal_filter sse2 avx2 neon/;
     if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
-    add_proto qw/void av1_highbd_apply_temporal_filter/, "const struct yv12_buffer_config *ref_frame, const struct macroblockd *mbd, const BLOCK_SIZE block_size, const int mb_row, const int mb_col, const int num_planes, const double *noise_levels, const MV *subblock_mvs, const int *subblock_mses, const int q_factor, const int filter_strength, const uint8_t *pred, uint32_t *accum, uint16_t *count";
-    specialize qw/av1_highbd_apply_temporal_filter sse2 avx2/;
-  }
+      add_proto qw/void av1_highbd_apply_temporal_filter/, "const struct yv12_buffer_config *ref_frame, const struct macroblockd *mbd, const BLOCK_SIZE block_size, const int mb_row, const int mb_col, const int num_planes, const double *noise_levels, const MV *subblock_mvs, const int *subblock_mses, const int q_factor, const int filter_strength, const uint8_t *pred, uint32_t *accum, uint16_t *count";
+      specialize qw/av1_highbd_apply_temporal_filter sse2 avx2/;
+    }
   }
+
   add_proto qw/void av1_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr, int log_scale";
 
-add_proto qw/void av1_calc_indices_dim1/, "const int *data, const int *centroids, uint8_t *indices, int n, int k";
+  add_proto qw/void av1_calc_indices_dim1/, "const int *data, const int *centroids, uint8_t *indices, int n, int k";
   specialize qw/av1_calc_indices_dim1 sse2 avx2/;
 
-# TODO(any): Disable av1_calc_indices_dim2 sse2 version due to c/SIMD mismatch. Re-enable it after mismatch is fixed.
-add_proto qw/void av1_calc_indices_dim2/, "const int *data, const int *centroids, uint8_t *indices, int n, int k";
+  # TODO(any): Disable av1_calc_indices_dim2 sse2 version due to c/SIMD mismatch. Re-enable it after mismatch is fixed.
+  add_proto qw/void av1_calc_indices_dim2/, "const int *data, const int *centroids, uint8_t *indices, int n, int k";
   specialize qw/av1_calc_indices_dim2 avx2/;
 
   # ENCODEMB INVOKE
@@ -426,7 +427,7 @@ add_proto qw/void av1_calc_indices_dim2/, "const int *data, const int *centroids
 
   if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
     add_proto qw/void av1_highbd_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, int log_scale";
-    specialize qw/av1_highbd_quantize_fp sse4_1 avx2/;
+    specialize qw/av1_highbd_quantize_fp sse4_1 avx2 neon/;
   }
 
   add_proto qw/void av1_highbd_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride";
@@ -441,7 +442,7 @@ add_proto qw/void av1_calc_indices_dim2/, "const int *data, const int *centroids
   specialize qw/av1_txb_init_levels sse4_1 avx2 neon/;
 
   add_proto qw/uint64_t av1_wedge_sse_from_residuals/, "const int16_t *r1, const int16_t *d, const uint8_t *m, int N";
-  specialize qw/av1_wedge_sse_from_residuals sse2 avx2/;
+  specialize qw/av1_wedge_sse_from_residuals sse2 avx2 neon/;
   add_proto qw/int8_t av1_wedge_sign_from_residuals/, "const int16_t *ds, const uint8_t *m, int N, int64_t limit";
   specialize qw/av1_wedge_sign_from_residuals sse2 avx2/;
   add_proto qw/void av1_wedge_compute_delta_squares/, "int16_t *d, const int16_t *a, const int16_t *b, int N";
@@ -449,7 +450,7 @@ add_proto qw/void av1_calc_indices_dim2/, "const int *data, const int *centroids
 
   # hash
   add_proto qw/uint32_t av1_get_crc32c_value/, "void *crc_calculator, uint8_t *p, size_t length";
-  specialize qw/av1_get_crc32c_value sse4_2/;
+  specialize qw/av1_get_crc32c_value sse4_2 arm_crc32/;
 
   if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") {
     add_proto qw/void av1_compute_stats/,  "int wiener_win, const uint8_t *dgd8, const uint8_t *src8, int h_start, int h_end, int v_start, int v_end, int dgd_stride, int src_stride, int64_t *M, int64_t *H, int use_downsampled_wiener_stats";
@@ -484,7 +485,7 @@ add_proto qw/void av1_calc_indices_dim2/, "const int *data, const int *centroids
   if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") {
     add_proto qw/void av1_cnn_activate/, " float **input, int channels, int width, int height, int stride, ACTIVATION layer_activation";
     add_proto qw/void av1_cnn_add/, " float **input, int channels, int width, int height, int stride, const float **add";
-    add_proto qw/void av1_cnn_predict/, " const float **input, int in_width, int in_height, int in_stride, const CNN_CONFIG *cnn_config, const CNN_THREAD_DATA *thread_data, CNN_MULTI_OUT *output_struct";
+    add_proto qw/bool av1_cnn_predict/, " const float **input, int in_width, int in_height, int in_stride, const CNN_CONFIG *cnn_config, const CNN_THREAD_DATA *thread_data, CNN_MULTI_OUT *output_struct";
     add_proto qw/void av1_cnn_convolve_no_maxpool_padding_valid/, " const float **input, int in_width, int in_height, int in_stride, const CNN_LAYER_CONFIG *layer_config, float **output, int out_stride, int start_idx, int cstep, int channel_step";
     if (aom_config("CONFIG_EXCLUDE_SIMD_MISMATCH") ne "yes") {
       specialize qw/av1_cnn_convolve_no_maxpool_padding_valid avx2/;
@@ -526,7 +527,7 @@ add_proto qw/void cdef_copy_rect8_16bit_to_16bit/, "uint16_t *dst, int dstride,
 # hard to support, so optimizations for this target are disabled.
 if ($opts{config} !~ /libs-x86-win32-vs.*/) {
   specialize qw/cdef_find_dir sse2 ssse3 sse4_1 avx2 neon/;
-  specialize qw/cdef_find_dir_dual sse2 ssse3 sse4_1 avx2/;
+  specialize qw/cdef_find_dir_dual sse2 ssse3 sse4_1 avx2 neon/;
 
   specialize qw/cdef_filter_8_0 sse2 ssse3 sse4_1 avx2 neon/;
   specialize qw/cdef_filter_8_1 sse2 ssse3 sse4_1 avx2 neon/;
@@ -543,18 +544,16 @@ if ($opts{config} !~ /libs-x86-win32-vs.*/) {
 }
 
 # WARPED_MOTION / GLOBAL_MOTION functions
-if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes" && aom_config("CONFIG_REALTIME_ONLY") ne "yes") {
+if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
   add_proto qw/void av1_highbd_warp_affine/, "const int32_t *mat, const uint16_t *ref, int width, int height, int stride, uint16_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int bd, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta";
   specialize qw/av1_highbd_warp_affine sse4_1 avx2/;
 }
 
-if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") {
-  add_proto qw/void av1_warp_affine/, "const int32_t *mat, const uint8_t *ref, int width, int height, int stride, uint8_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta";
-  specialize qw/av1_warp_affine sse4_1 avx2 neon/;
+add_proto qw/void av1_warp_affine/, "const int32_t *mat, const uint8_t *ref, int width, int height, int stride, uint8_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta";
+specialize qw/av1_warp_affine sse4_1 avx2 neon/;
 
-  add_proto qw/int64_t av1_calc_frame_error/, "const uint8_t *const ref, int stride, const uint8_t *const dst, int p_width, int p_height, int p_stride";
-  specialize qw/av1_calc_frame_error sse2 avx2/;
-}
+add_proto qw/int64_t av1_calc_frame_error/, "const uint8_t *const ref, int stride, const uint8_t *const dst, int p_width, int p_height, int p_stride";
+specialize qw/av1_calc_frame_error sse2 avx2/;
 
 if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
   add_proto qw/double av1_compute_cross_correlation/, "unsigned char *im1, int stride1, int x1, int y1, unsigned char *im2, int stride2, int x2, int y2";
@@ -562,15 +561,13 @@ if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
 }
 
 # LOOP_RESTORATION functions
-if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") {
-  add_proto qw/void av1_apply_selfguided_restoration/, "const uint8_t *dat, int width, int height, int stride, int eps, const int *xqd, uint8_t *dst, int dst_stride, int32_t *tmpbuf, int bit_depth, int highbd";
-  specialize qw/av1_apply_selfguided_restoration sse4_1 avx2 neon/;
-
-  add_proto qw/int av1_selfguided_restoration/, "const uint8_t *dgd8, int width, int height,
-                                  int dgd_stride, int32_t *flt0, int32_t *flt1, int flt_stride,
-                                  int sgr_params_idx, int bit_depth, int highbd";
-  specialize qw/av1_selfguided_restoration sse4_1 avx2 neon/;
-}
+add_proto qw/void av1_apply_selfguided_restoration/, "const uint8_t *dat, int width, int height, int stride, int eps, const int *xqd, uint8_t *dst, int dst_stride, int32_t *tmpbuf, int bit_depth, int highbd";
+specialize qw/av1_apply_selfguided_restoration sse4_1 avx2 neon/;
+
+add_proto qw/int av1_selfguided_restoration/, "const uint8_t *dgd8, int width, int height,
+                                int dgd_stride, int32_t *flt0, int32_t *flt1, int flt_stride,
+                                int sgr_params_idx, int bit_depth, int highbd";
+specialize qw/av1_selfguided_restoration sse4_1 avx2 neon/;
 
 # CONVOLVE_ROUND/COMPOUND_ROUND functions
 
diff --git a/chromium/third_party/libaom/source/libaom/av1/common/blockd.h b/chromium/third_party/libaom/source/libaom/av1/common/blockd.h
index b2e72d2e462..931d9e5f11f 100644
--- a/chromium/third_party/libaom/source/libaom/av1/common/blockd.h
+++ b/chromium/third_party/libaom/source/libaom/av1/common/blockd.h
@@ -188,6 +188,7 @@ static const PREDICTION_MODE fimode_to_intradir[FILTER_INTRA_MODES] = {
 
 typedef struct RD_STATS {
   int rate;
+  int zero_rate;
   int64_t dist;
   // Please be careful of using rdcost, it's not guaranteed to be set all the
   // time.
@@ -196,8 +197,7 @@ typedef struct RD_STATS {
   // rate/dist.
   int64_t rdcost;
   int64_t sse;
-  int skip_txfm;  // sse should equal to dist when skip_txfm == 1
-  int zero_rate;
+  uint8_t skip_txfm;  // sse should equal to dist when skip_txfm == 1
 #if CONFIG_RD_DEBUG
   int txb_coeff_cost[MAX_MB_PLANE];
 #endif  // CONFIG_RD_DEBUG
@@ -285,7 +285,7 @@ typedef struct MB_MODE_INFO {
    ****************************************************************************/
   /**@{*/
   /*! \brief Whether to skip transforming and sending. */
-  int8_t skip_txfm;
+  uint8_t skip_txfm;
   /*! \brief Transform size when fixed size txfm is used (e.g. intra modes). */
   TX_SIZE tx_size;
   /*! \brief Transform size when recursive txfm tree is on. */
@@ -326,9 +326,6 @@ typedef struct MB_MODE_INFO {
   int8_t cdef_strength : 4;
   /**@}*/
 
-  /*! \brief Skip CDEF for this superblock */
-  uint8_t skip_cdef_curr_sb;
-
 #if CONFIG_RD_DEBUG
   /*! \brief RD info used for debugging */
   RD_STATS rd_stats;
diff --git a/chromium/third_party/libaom/source/libaom/av1/common/cdef.c b/chromium/third_party/libaom/source/libaom/av1/common/cdef.c
index 7807bb73980..5e0638a8922 100644
--- a/chromium/third_party/libaom/source/libaom/av1/common/cdef.c
+++ b/chromium/third_party/libaom/source/libaom/av1/common/cdef.c
@@ -100,15 +100,6 @@ void av1_cdef_copy_sb8_16(const AV1_COMMON *const cm, uint16_t *const dst,
   }
 }
 
-static INLINE void fill_rect(uint16_t *dst, int dstride, int v, int h,
-                             uint16_t x) {
-  for (int i = 0; i < v; i++) {
-    for (int j = 0; j < h; j++) {
-      dst[i * dstride + j] = x;
-    }
-  }
-}
-
 static INLINE void copy_rect(uint16_t *dst, int dstride, const uint16_t *src,
                              int sstride, int v, int h) {
   for (int i = 0; i < v; i++) {
diff --git a/chromium/third_party/libaom/source/libaom/av1/common/cdef.h b/chromium/third_party/libaom/source/libaom/av1/common/cdef.h
index 5bf40e47103..e166f4b2078 100644
--- a/chromium/third_party/libaom/source/libaom/av1/common/cdef.h
+++ b/chromium/third_party/libaom/source/libaom/av1/common/cdef.h
@@ -89,7 +89,7 @@ typedef void (*cdef_init_fb_row_t)(
  * \param[in]       xd        Pointer to common current coding block structure
  * \param[in]       cdef_init_fb_row_fn   Function Pointer
  *
- * \return Nothing is returned. Instead, the filtered frame is output in
+ * \remark Nothing is returned. Instead, the filtered frame is output in
  * \c frame.
  */
 void av1_cdef_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *const cm,
diff --git a/chromium/third_party/libaom/source/libaom/av1/common/cdef_block.h b/chromium/third_party/libaom/source/libaom/av1/common/cdef_block.h
index 679f1ef2aba..262f617bc04 100644
--- a/chromium/third_party/libaom/source/libaom/av1/common/cdef_block.h
+++ b/chromium/third_party/libaom/source/libaom/av1/common/cdef_block.h
@@ -56,4 +56,13 @@ void av1_cdef_filter_fb(uint8_t *dst8, uint16_t *dst16, int dstride,
                         int var[CDEF_NBLOCKS][CDEF_NBLOCKS], int pli,
                         cdef_list *dlist, int cdef_count, int level,
                         int sec_strength, int damping, int coeff_shift);
+
+static INLINE void fill_rect(uint16_t *dst, int dstride, int v, int h,
+                             uint16_t x) {
+  for (int i = 0; i < v; i++) {
+    for (int j = 0; j < h; j++) {
+      dst[i * dstride + j] = x;
+    }
+  }
+}
 #endif  // AOM_AV1_COMMON_CDEF_BLOCK_H_
diff --git a/chromium/third_party/libaom/source/libaom/av1/common/common.h b/chromium/third_party/libaom/source/libaom/av1/common/common.h
index 0c0feb34fb7..ccb45b68cee 100644
--- a/chromium/third_party/libaom/source/libaom/av1/common/common.h
+++ b/chromium/third_party/libaom/source/libaom/av1/common/common.h
@@ -48,7 +48,7 @@ static INLINE int get_unsigned_bits(unsigned int num_values) {
 }
 
 #define CHECK_MEM_ERROR(cm, lval, expr) \
-  AOM_CHECK_MEM_ERROR(cm->error, lval, expr)
+  AOM_CHECK_MEM_ERROR((cm)->error, lval, expr)
 
 #define AOM_FRAME_MARKER 0x2
 
diff --git a/chromium/third_party/libaom/source/libaom/av1/common/convolve.h b/chromium/third_party/libaom/source/libaom/av1/common/convolve.h
index 5f3e59625bf..f597d8e038e 100644
--- a/chromium/third_party/libaom/source/libaom/av1/common/convolve.h
+++ b/chromium/third_party/libaom/source/libaom/av1/common/convolve.h
@@ -72,12 +72,16 @@ static INLINE ConvolveParams get_conv_params_no_round(int cmp_index, int plane,
   conv_params.round_0 = ROUND0_BITS;
   conv_params.round_1 = is_compound ? COMPOUND_ROUND1_BITS
                                     : 2 * FILTER_BITS - conv_params.round_0;
+#if CONFIG_AV1_HIGHBITDEPTH
   const int intbufrange = bd + FILTER_BITS - conv_params.round_0 + 2;
   assert(IMPLIES(bd < 12, intbufrange <= 16));
   if (intbufrange > 16) {
     conv_params.round_0 += intbufrange - 16;
     if (!is_compound) conv_params.round_1 -= intbufrange - 16;
   }
+#else
+  (void)bd;
+#endif  // CONFIG_AV1_HIGHBITDEPTH
   // TODO(yunqing): The following dst should only be valid while
   // is_compound = 1;
   conv_params.dst = dst;
diff --git a/chromium/third_party/libaom/source/libaom/av1/common/filter.h b/chromium/third_party/libaom/source/libaom/av1/common/filter.h
index ded5ce5ae51..4344aea9164 100644
--- a/chromium/third_party/libaom/source/libaom/av1/common/filter.h
+++ b/chromium/third_party/libaom/source/libaom/av1/common/filter.h
@@ -294,6 +294,25 @@ static INLINE uint8_t get_interp_filter_allowed_mask(
   return (allow_interp_mask >> filt_type) & 1;
 }
 
+static AOM_INLINE int get_filter_tap(
+    const InterpFilterParams *const filter_params, int subpel_qn) {
+  const int16_t *const filter = av1_get_interp_filter_subpel_kernel(
+      filter_params, subpel_qn & SUBPEL_MASK);
+  if (filter_params->taps == 12) {
+    return 12;
+  }
+  if (filter[0] | filter[7]) {
+    return 8;
+  }
+  if (filter[1] | filter[6]) {
+    return 6;
+  }
+  if (filter[2] | filter[5]) {
+    return 4;
+  }
+  return 2;
+}
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/chromium/third_party/libaom/source/libaom/av1/common/mv.h b/chromium/third_party/libaom/source/libaom/av1/common/mv.h
index c7eaf76d081..e70ce903dae 100644
--- a/chromium/third_party/libaom/source/libaom/av1/common/mv.h
+++ b/chromium/third_party/libaom/source/libaom/av1/common/mv.h
@@ -282,6 +282,17 @@ static INLINE int_mv gm_get_motion_vector(const WarpedMotionParams *gm,
     // After the right shifts, there are 3 fractional bits of precision. If
     // allow_hp is false, the bottom bit is always zero (so we don't need a
     // call to convert_to_trans_prec here)
+    //
+    // Note: There is an AV1 specification bug here:
+    //
+    // gm->wmmat[0] is supposed to be the horizontal translation, and so should
+    // go into res.as_mv.col, and gm->wmmat[1] is supposed to be the vertical
+    // translation and so should go into res.as_mv.row
+    //
+    // However, in the spec, these assignments are accidentally reversed, and so
+    // we must keep this incorrect logic to match the spec.
+    //
+    // See also: https://crbug.com/aomedia/3328
     res.as_mv.row = gm->wmmat[0] >> GM_TRANS_ONLY_PREC_DIFF;
     res.as_mv.col = gm->wmmat[1] >> GM_TRANS_ONLY_PREC_DIFF;
     assert(IMPLIES(1 & (res.as_mv.row | res.as_mv.col), allow_hp));
diff --git a/chromium/third_party/libaom/source/libaom/av1/common/pred_common.h b/chromium/third_party/libaom/source/libaom/av1/common/pred_common.h
index 3db9dd69efc..6ad7ba3cd7b 100644
--- a/chromium/third_party/libaom/source/libaom/av1/common/pred_common.h
+++ b/chromium/third_party/libaom/source/libaom/av1/common/pred_common.h
@@ -44,7 +44,8 @@ static INLINE int get_segment_id(const CommonModeInfoParams *const mi_params,
 
 static INLINE int av1_get_spatial_seg_pred(const AV1_COMMON *const cm,
                                            const MACROBLOCKD *const xd,
-                                           int *cdf_index) {
+                                           int *cdf_index, int skip_over4x4) {
+  const int step_size = skip_over4x4 ? 2 : 1;
   int prev_ul = -1;  // top left segment_id
   int prev_l = -1;   // left segment_id
   int prev_u = -1;   // top segment_id
@@ -53,16 +54,16 @@ static INLINE int av1_get_spatial_seg_pred(const AV1_COMMON *const cm,
   const CommonModeInfoParams *const mi_params = &cm->mi_params;
   const uint8_t *seg_map = cm->cur_frame->seg_map;
   if ((xd->up_available) && (xd->left_available)) {
-    prev_ul =
-        get_segment_id(mi_params, seg_map, BLOCK_4X4, mi_row - 1, mi_col - 1);
+    prev_ul = get_segment_id(mi_params, seg_map, BLOCK_4X4, mi_row - step_size,
+                             mi_col - step_size);
   }
   if (xd->up_available) {
-    prev_u =
-        get_segment_id(mi_params, seg_map, BLOCK_4X4, mi_row - 1, mi_col - 0);
+    prev_u = get_segment_id(mi_params, seg_map, BLOCK_4X4, mi_row - step_size,
+                            mi_col - 0);
   }
   if (xd->left_available) {
-    prev_l =
-        get_segment_id(mi_params, seg_map, BLOCK_4X4, mi_row - 0, mi_col - 1);
+    prev_l = get_segment_id(mi_params, seg_map, BLOCK_4X4, mi_row - 0,
+                            mi_col - step_size);
   }
   // This property follows from the fact that get_segment_id() returns a
   // nonnegative value. This allows us to test for all edge cases with a simple
diff --git a/chromium/third_party/libaom/source/libaom/av1/common/reconinter.c b/chromium/third_party/libaom/source/libaom/av1/common/reconinter.c
index a6b93bb6f6b..c6c66d27292 100644
--- a/chromium/third_party/libaom/source/libaom/av1/common/reconinter.c
+++ b/chromium/third_party/libaom/source/libaom/av1/common/reconinter.c
@@ -57,44 +57,6 @@ int av1_allow_warp(const MB_MODE_INFO *const mbmi,
   return 0;
 }
 
-void av1_init_inter_params(InterPredParams *inter_pred_params, int block_width,
-                           int block_height, int pix_row, int pix_col,
-                           int subsampling_x, int subsampling_y, int bit_depth,
-                           int use_hbd_buf, int is_intrabc,
-                           const struct scale_factors *sf,
-                           const struct buf_2d *ref_buf,
-                           int_interpfilters interp_filters) {
-  inter_pred_params->block_width = block_width;
-  inter_pred_params->block_height = block_height;
-  inter_pred_params->pix_row = pix_row;
-  inter_pred_params->pix_col = pix_col;
-  inter_pred_params->subsampling_x = subsampling_x;
-  inter_pred_params->subsampling_y = subsampling_y;
-  inter_pred_params->bit_depth = bit_depth;
-  inter_pred_params->use_hbd_buf = use_hbd_buf;
-  inter_pred_params->is_intrabc = is_intrabc;
-  inter_pred_params->scale_factors = sf;
-  inter_pred_params->ref_frame_buf = *ref_buf;
-  inter_pred_params->mode = TRANSLATION_PRED;
-  inter_pred_params->comp_mode = UNIFORM_SINGLE;
-
-  if (is_intrabc) {
-    inter_pred_params->interp_filter_params[0] = &av1_intrabc_filter_params;
-    inter_pred_params->interp_filter_params[1] = &av1_intrabc_filter_params;
-  } else {
-    inter_pred_params->interp_filter_params[0] =
-        av1_get_interp_filter_params_with_block_size(
-            interp_filters.as_filters.x_filter, block_width);
-    inter_pred_params->interp_filter_params[1] =
-        av1_get_interp_filter_params_with_block_size(
-            interp_filters.as_filters.y_filter, block_height);
-  }
-}
-
-void av1_init_comp_mode(InterPredParams *inter_pred_params) {
-  inter_pred_params->comp_mode = UNIFORM_COMP;
-}
-
 void av1_init_warp_params(InterPredParams *inter_pred_params,
                           const WarpTypesAllowed *warp_types, int ref,
                           const MACROBLOCKD *xd, const MB_MODE_INFO *mi) {
@@ -106,10 +68,6 @@ void av1_init_warp_params(InterPredParams *inter_pred_params,
   if (av1_allow_warp(mi, warp_types, &xd->global_motion[mi->ref_frame[ref]], 0,
                      inter_pred_params->scale_factors,
                      &inter_pred_params->warp_params)) {
-#if CONFIG_REALTIME_ONLY
-    aom_internal_error(xd->error_info, AOM_CODEC_UNSUP_FEATURE,
-                       "Warped motion is disabled in realtime only build.");
-#endif
     inter_pred_params->mode = WARP_PRED;
   }
 }
@@ -145,7 +103,6 @@ void av1_make_inter_predictor(const uint8_t *src, int src_stride, uint8_t *dst,
                     inter_pred_params->interp_filter_params);
 #endif
   }
-#if !CONFIG_REALTIME_ONLY
   // TODO(jingning): av1_warp_plane() can be further cleaned up.
   else if (inter_pred_params->mode == WARP_PRED) {
     av1_warp_plane(
@@ -158,9 +115,7 @@ void av1_make_inter_predictor(const uint8_t *src, int src_stride, uint8_t *dst,
         inter_pred_params->block_width, inter_pred_params->block_height,
         dst_stride, inter_pred_params->subsampling_x,
         inter_pred_params->subsampling_y, &inter_pred_params->conv_params);
-  }
-#endif
-  else {
+  } else {
     assert(0 && "Unsupported inter_pred_params->mode");
   }
 }
diff --git a/chromium/third_party/libaom/source/libaom/av1/common/reconinter.h b/chromium/third_party/libaom/source/libaom/av1/common/reconinter.h
index 056dc67d073..cf856286d0d 100644
--- a/chromium/third_party/libaom/source/libaom/av1/common/reconinter.h
+++ b/chromium/third_party/libaom/source/libaom/av1/common/reconinter.h
@@ -123,15 +123,42 @@ typedef struct InterPredParams {
   int is_intrabc;
 } InterPredParams;
 
-void av1_init_inter_params(InterPredParams *inter_pred_params, int block_width,
-                           int block_height, int pix_row, int pix_col,
-                           int subsampling_x, int subsampling_y, int bit_depth,
-                           int use_hbd_buf, int is_intrabc,
-                           const struct scale_factors *sf,
-                           const struct buf_2d *ref_buf,
-                           int_interpfilters interp_filters);
-
-void av1_init_comp_mode(InterPredParams *inter_pred_params);
+static AOM_INLINE void av1_init_inter_params(
+    InterPredParams *inter_pred_params, int block_width, int block_height,
+    int pix_row, int pix_col, int subsampling_x, int subsampling_y,
+    int bit_depth, int use_hbd_buf, int is_intrabc,
+    const struct scale_factors *sf, const struct buf_2d *ref_buf,
+    int_interpfilters interp_filters) {
+  inter_pred_params->block_width = block_width;
+  inter_pred_params->block_height = block_height;
+  inter_pred_params->pix_row = pix_row;
+  inter_pred_params->pix_col = pix_col;
+  inter_pred_params->subsampling_x = subsampling_x;
+  inter_pred_params->subsampling_y = subsampling_y;
+  inter_pred_params->bit_depth = bit_depth;
+  inter_pred_params->use_hbd_buf = use_hbd_buf;
+  inter_pred_params->is_intrabc = is_intrabc;
+  inter_pred_params->scale_factors = sf;
+  inter_pred_params->ref_frame_buf = *ref_buf;
+  inter_pred_params->mode = TRANSLATION_PRED;
+  inter_pred_params->comp_mode = UNIFORM_SINGLE;
+
+  if (is_intrabc) {
+    inter_pred_params->interp_filter_params[0] = &av1_intrabc_filter_params;
+    inter_pred_params->interp_filter_params[1] = &av1_intrabc_filter_params;
+  } else {
+    inter_pred_params->interp_filter_params[0] =
+        av1_get_interp_filter_params_with_block_size(
+            (InterpFilter)interp_filters.as_filters.x_filter, block_width);
+    inter_pred_params->interp_filter_params[1] =
+        av1_get_interp_filter_params_with_block_size(
+            (InterpFilter)interp_filters.as_filters.y_filter, block_height);
+  }
+}
+
+static AOM_INLINE void av1_init_comp_mode(InterPredParams *inter_pred_params) {
+  inter_pred_params->comp_mode = UNIFORM_COMP;
+}
 
 void av1_init_warp_params(InterPredParams *inter_pred_params,
                           const WarpTypesAllowed *warp_types, int ref,
diff --git a/chromium/third_party/libaom/source/libaom/av1/common/reconintra.c b/chromium/third_party/libaom/source/libaom/av1/common/reconintra.c
index 4d2cf089632..d5f806e587f 100644
--- a/chromium/third_party/libaom/source/libaom/av1/common/reconintra.c
+++ b/chromium/third_party/libaom/source/libaom/av1/common/reconintra.c
@@ -463,17 +463,6 @@ static intra_high_pred_fn dc_pred_high[2][2][TX_SIZES_ALL];
 static void init_intra_predictors_internal(void) {
   assert(NELEMENTS(mode_to_angle_map) == INTRA_MODES);
 
-#if CONFIG_REALTIME_ONLY
-#define INIT_RECTANGULAR(p, type)             \
-  p[TX_4X8] = aom_##type##_predictor_4x8;     \
-  p[TX_8X4] = aom_##type##_predictor_8x4;     \
-  p[TX_8X16] = aom_##type##_predictor_8x16;   \
-  p[TX_16X8] = aom_##type##_predictor_16x8;   \
-  p[TX_16X32] = aom_##type##_predictor_16x32; \
-  p[TX_32X16] = aom_##type##_predictor_32x16; \
-  p[TX_32X64] = aom_##type##_predictor_32x64; \
-  p[TX_64X32] = aom_##type##_predictor_64x32;
-#else
 #define INIT_RECTANGULAR(p, type)             \
   p[TX_4X8] = aom_##type##_predictor_4x8;     \
   p[TX_8X4] = aom_##type##_predictor_8x4;     \
@@ -489,7 +478,6 @@ static void init_intra_predictors_internal(void) {
   p[TX_32X8] = aom_##type##_predictor_32x8;   \
   p[TX_16X64] = aom_##type##_predictor_16x64; \
   p[TX_64X16] = aom_##type##_predictor_64x16;
-#endif
 
 #define INIT_NO_4X4(p, type)                  \
   p[TX_8X8] = aom_##type##_predictor_8x8;     \
@@ -627,7 +615,7 @@ void av1_dr_prediction_z3_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
     for (r = 0; r < bh; ++r, base += base_inc) {
       if (base < max_base_y) {
         val = left[base] * (32 - shift) + left[base + 1] * shift;
-        dst[r * stride + c] = val = ROUND_POWER_OF_TWO(val, 5);
+        dst[r * stride + c] = ROUND_POWER_OF_TWO(val, 5);
       } else {
         for (; r < bh; ++r) dst[r * stride + c] = left[max_base_y];
         break;
@@ -1144,7 +1132,7 @@ void av1_upsample_intra_edge_high_c(uint16_t *p, int sz, int bd) {
 #if CONFIG_AV1_HIGHBITDEPTH
 static void build_intra_predictors_high(
     const uint8_t *ref8, int ref_stride, uint8_t *dst8, int dst_stride,
-    PREDICTION_MODE mode, int angle_delta, FILTER_INTRA_MODE filter_intra_mode,
+    PREDICTION_MODE mode, int p_angle, FILTER_INTRA_MODE filter_intra_mode,
     TX_SIZE tx_size, int disable_edge_filter, int n_top_px, int n_topright_px,
     int n_left_px, int n_bottomleft_px, int intra_edge_filter_type,
     int bit_depth) {
@@ -1162,7 +1150,6 @@ static void build_intra_predictors_high(
   int need_above_left = extend_modes[mode] & NEED_ABOVELEFT;
   const uint16_t *above_ref = ref - ref_stride;
   const uint16_t *left_ref = ref - 1;
-  int p_angle = 0;
   const int is_dr_mode = av1_is_directional_mode(mode);
   const int use_filter_intra = filter_intra_mode != FILTER_INTRA_MODES;
   int base = 128 << (bit_depth - 8);
@@ -1181,7 +1168,6 @@ static void build_intra_predictors_high(
   // base+1   G      H  ..     S      T      T      T      T      T
 
   if (is_dr_mode) {
-    p_angle = mode_to_angle_map[mode] + angle_delta;
     if (p_angle <= 90)
       need_above = 1, need_left = 0, need_above_left = 1;
     else if (p_angle < 180)
@@ -1192,9 +1178,9 @@ static void build_intra_predictors_high(
   if (use_filter_intra) need_left = need_above = need_above_left = 1;
 
   assert(n_top_px >= 0);
-  assert(n_topright_px >= 0);
+  assert(n_topright_px >= -1);
   assert(n_left_px >= 0);
-  assert(n_bottomleft_px >= 0);
+  assert(n_bottomleft_px >= -1);
 
   if ((!need_above && n_left_px == 0) || (!need_left && n_top_px == 0)) {
     int val;
@@ -1212,14 +1198,12 @@ static void build_intra_predictors_high(
 
   // NEED_LEFT
   if (need_left) {
-    int need_bottom = extend_modes[mode] & NEED_BOTTOMLEFT;
-    if (use_filter_intra) need_bottom = 0;
-    if (is_dr_mode) need_bottom = p_angle > 180;
-    const int num_left_pixels_needed = txhpx + (need_bottom ? txwpx : 0);
+    const int num_left_pixels_needed =
+        txhpx + (n_bottomleft_px >= 0 ? txwpx : 0);
     i = 0;
     if (n_left_px > 0) {
       for (; i < n_left_px; i++) left_col[i] = left_ref[i * ref_stride];
-      if (need_bottom && n_bottomleft_px > 0) {
+      if (n_bottomleft_px > 0) {
         assert(i == txhpx);
         for (; i < txhpx + n_bottomleft_px; i++)
           left_col[i] = left_ref[i * ref_stride];
@@ -1233,14 +1217,11 @@ static void build_intra_predictors_high(
 
   // NEED_ABOVE
   if (need_above) {
-    int need_right = extend_modes[mode] & NEED_ABOVERIGHT;
-    if (use_filter_intra) need_right = 0;
-    if (is_dr_mode) need_right = p_angle < 90;
-    const int num_top_pixels_needed = txwpx + (need_right ? txhpx : 0);
+    const int num_top_pixels_needed = txwpx + (n_topright_px >= 0 ? txhpx : 0);
     if (n_top_px > 0) {
       memcpy(above_row, above_ref, n_top_px * sizeof(above_ref[0]));
       i = n_top_px;
-      if (need_right && n_topright_px > 0) {
+      if (n_topright_px > 0) {
         assert(n_top_px == txwpx);
         memcpy(above_row + txwpx, above_ref + txwpx,
                n_topright_px * sizeof(above_ref[0]));
@@ -1327,7 +1308,7 @@ static void build_intra_predictors_high(
 
 static void build_intra_predictors(
     const uint8_t *ref, int ref_stride, uint8_t *dst, int dst_stride,
-    PREDICTION_MODE mode, int angle_delta, FILTER_INTRA_MODE filter_intra_mode,
+    PREDICTION_MODE mode, int p_angle, FILTER_INTRA_MODE filter_intra_mode,
     TX_SIZE tx_size, int disable_edge_filter, int n_top_px, int n_topright_px,
     int n_left_px, int n_bottomleft_px, int intra_edge_filter_type) {
   int i;
@@ -1342,7 +1323,6 @@ static void build_intra_predictors(
   int need_left = extend_modes[mode] & NEED_LEFT;
   int need_above = extend_modes[mode] & NEED_ABOVE;
   int need_above_left = extend_modes[mode] & NEED_ABOVELEFT;
-  int p_angle = 0;
   const int is_dr_mode = av1_is_directional_mode(mode);
   const int use_filter_intra = filter_intra_mode != FILTER_INTRA_MODES;
   // The left_data, above_data buffers must be zeroed to fix some intermittent
@@ -1361,7 +1341,6 @@ static void build_intra_predictors(
   // ..
 
   if (is_dr_mode) {
-    p_angle = mode_to_angle_map[mode] + angle_delta;
     if (p_angle <= 90)
       need_above = 1, need_left = 0, need_above_left = 1;
     else if (p_angle < 180)
@@ -1372,9 +1351,9 @@ static void build_intra_predictors(
   if (use_filter_intra) need_left = need_above = need_above_left = 1;
 
   assert(n_top_px >= 0);
-  assert(n_topright_px >= 0);
+  assert(n_topright_px >= -1);
   assert(n_left_px >= 0);
-  assert(n_bottomleft_px >= 0);
+  assert(n_bottomleft_px >= -1);
 
   if ((!need_above && n_left_px == 0) || (!need_left && n_top_px == 0)) {
     int val;
@@ -1392,14 +1371,12 @@ static void build_intra_predictors(
 
   // NEED_LEFT
   if (need_left) {
-    int need_bottom = extend_modes[mode] & NEED_BOTTOMLEFT;
-    if (use_filter_intra) need_bottom = 0;
-    if (is_dr_mode) need_bottom = p_angle > 180;
-    const int num_left_pixels_needed = txhpx + (need_bottom ? txwpx : 0);
+    const int num_left_pixels_needed =
+        txhpx + (n_bottomleft_px >= 0 ? txwpx : 0);
     i = 0;
     if (n_left_px > 0) {
       for (; i < n_left_px; i++) left_col[i] = left_ref[i * ref_stride];
-      if (need_bottom && n_bottomleft_px > 0) {
+      if (n_bottomleft_px > 0) {
         assert(i == txhpx);
         for (; i < txhpx + n_bottomleft_px; i++)
           left_col[i] = left_ref[i * ref_stride];
@@ -1413,14 +1390,11 @@ static void build_intra_predictors(
 
   // NEED_ABOVE
   if (need_above) {
-    int need_right = extend_modes[mode] & NEED_ABOVERIGHT;
-    if (use_filter_intra) need_right = 0;
-    if (is_dr_mode) need_right = p_angle < 90;
-    const int num_top_pixels_needed = txwpx + (need_right ? txhpx : 0);
+    const int num_top_pixels_needed = txwpx + (n_topright_px >= 0 ? txhpx : 0);
     if (n_top_px > 0) {
       memcpy(above_row, above_ref, n_top_px);
       i = n_top_px;
-      if (need_right && n_topright_px > 0) {
+      if (n_topright_px > 0) {
         assert(n_top_px == txwpx);
         memcpy(above_row + txwpx, above_ref + txwpx, n_topright_px);
         i += n_topright_px;
@@ -1622,33 +1596,58 @@ void av1_predict_intra_block(const MACROBLOCKD *xd, BLOCK_SIZE sb_size,
     bsize = scale_chroma_bsize(bsize, ss_x, ss_y);
   }
 
+  const int is_dr_mode = av1_is_directional_mode(mode);
+  const int use_filter_intra = filter_intra_mode != FILTER_INTRA_MODES;
+  int p_angle = 0;
+  int need_top_right = extend_modes[mode] & NEED_ABOVERIGHT;
+  int need_bottom_left = extend_modes[mode] & NEED_BOTTOMLEFT;
+
+  if (use_filter_intra) {
+    need_top_right = 0;
+    need_bottom_left = 0;
+  }
+  if (is_dr_mode) {
+    p_angle = mode_to_angle_map[mode] + angle_delta;
+    need_top_right = p_angle < 90;
+    need_bottom_left = p_angle > 180;
+  }
+
+  // Possible states for have_top_right(TR) and have_bottom_left(BL)
+  // -1 : TR and BL are not needed
+  //  0 : TR and BL are needed but not available
+  // > 0 : TR and BL are needed and pixels are available
   const int have_top_right =
-      has_top_right(sb_size, bsize, mi_row, mi_col, have_top, right_available,
-                    partition, tx_size, row_off, col_off, ss_x, ss_y);
-  const int have_bottom_left = has_bottom_left(
-      sb_size, bsize, mi_row, mi_col, bottom_available, have_left, partition,
-      tx_size, row_off, col_off, ss_x, ss_y);
+      need_top_right ? has_top_right(sb_size, bsize, mi_row, mi_col, have_top,
+                                     right_available, partition, tx_size,
+                                     row_off, col_off, ss_x, ss_y)
+                     : -1;
+  const int have_bottom_left =
+      need_bottom_left ? has_bottom_left(sb_size, bsize, mi_row, mi_col,
+                                         bottom_available, have_left, partition,
+                                         tx_size, row_off, col_off, ss_x, ss_y)
+                       : -1;
 
   const int disable_edge_filter = !enable_intra_edge_filter;
   const int intra_edge_filter_type = get_intra_edge_filter_type(xd, plane);
 #if CONFIG_AV1_HIGHBITDEPTH
   if (is_cur_buf_hbd(xd)) {
     build_intra_predictors_high(
-        ref, ref_stride, dst, dst_stride, mode, angle_delta, filter_intra_mode,
+        ref, ref_stride, dst, dst_stride, mode, p_angle, filter_intra_mode,
         tx_size, disable_edge_filter, have_top ? AOMMIN(txwpx, xr + txwpx) : 0,
-        have_top_right ? AOMMIN(txwpx, xr) : 0,
+        have_top_right > 0 ? AOMMIN(txwpx, xr) : have_top_right,
         have_left ? AOMMIN(txhpx, yd + txhpx) : 0,
-        have_bottom_left ? AOMMIN(txhpx, yd) : 0, intra_edge_filter_type,
-        xd->bd);
+        have_bottom_left > 0 ? AOMMIN(txhpx, yd) : have_bottom_left,
+        intra_edge_filter_type, xd->bd);
     return;
   }
 #endif
   build_intra_predictors(
-      ref, ref_stride, dst, dst_stride, mode, angle_delta, filter_intra_mode,
+      ref, ref_stride, dst, dst_stride, mode, p_angle, filter_intra_mode,
       tx_size, disable_edge_filter, have_top ? AOMMIN(txwpx, xr + txwpx) : 0,
-      have_top_right ? AOMMIN(txwpx, xr) : 0,
+      have_top_right > 0 ? AOMMIN(txwpx, xr) : have_top_right,
       have_left ? AOMMIN(txhpx, yd + txhpx) : 0,
-      have_bottom_left ? AOMMIN(txhpx, yd) : 0, intra_edge_filter_type);
+      have_bottom_left > 0 ? AOMMIN(txhpx, yd) : have_bottom_left,
+      intra_edge_filter_type);
 }
 
 void av1_predict_intra_block_facade(const AV1_COMMON *cm, MACROBLOCKD *xd,
diff --git a/chromium/third_party/libaom/source/libaom/av1/common/resize.c b/chromium/third_party/libaom/source/libaom/av1/common/resize.c
index 322363fa1e1..22629458398 100644
--- a/chromium/third_party/libaom/source/libaom/av1/common/resize.c
+++ b/chromium/third_party/libaom/source/libaom/av1/common/resize.c
@@ -12,6 +12,7 @@
 #include <assert.h>
 #include <limits.h>
 #include <math.h>
+#include <stdbool.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -700,7 +701,7 @@ Error:
   aom_free(arrbuf2);
 }
 
-static void upscale_normative_rect(const uint8_t *const input, int height,
+static bool upscale_normative_rect(const uint8_t *const input, int height,
                                    int width, int in_stride, uint8_t *output,
                                    int height2, int width2, int out_stride,
                                    int x_step_qn, int x0_qn, int pad_left,
@@ -725,6 +726,7 @@ static void upscale_normative_rect(const uint8_t *const input, int height,
   uint8_t *const in_tr = (uint8_t *)(input + width);
   if (pad_left) {
     tmp_left = (uint8_t *)aom_malloc(sizeof(*tmp_left) * border_cols * height);
+    if (!tmp_left) return false;
     for (int i = 0; i < height; i++) {
       memcpy(tmp_left + i * border_cols, in_tl + i * in_stride, border_cols);
       memset(in_tl + i * in_stride, input[i * in_stride], border_cols);
@@ -733,6 +735,10 @@ static void upscale_normative_rect(const uint8_t *const input, int height,
   if (pad_right) {
     tmp_right =
         (uint8_t *)aom_malloc(sizeof(*tmp_right) * border_cols * height);
+    if (!tmp_right) {
+      aom_free(tmp_left);
+      return false;
+    }
     for (int i = 0; i < height; i++) {
       memcpy(tmp_right + i * border_cols, in_tr + i * in_stride, border_cols);
       memset(in_tr + i * in_stride, input[i * in_stride + width - 1],
@@ -757,6 +763,7 @@ static void upscale_normative_rect(const uint8_t *const input, int height,
     }
     aom_free(tmp_right);
   }
+  return true;
 }
 
 #if CONFIG_AV1_HIGHBITDEPTH
@@ -1045,7 +1052,7 @@ Error:
   aom_free(arrbuf2);
 }
 
-static void highbd_upscale_normative_rect(const uint8_t *const input,
+static bool highbd_upscale_normative_rect(const uint8_t *const input,
                                           int height, int width, int in_stride,
                                           uint8_t *output, int height2,
                                           int width2, int out_stride,
@@ -1073,6 +1080,7 @@ static void highbd_upscale_normative_rect(const uint8_t *const input,
   uint16_t *const in_tr = input16 + width;
   if (pad_left) {
     tmp_left = (uint16_t *)aom_malloc(sizeof(*tmp_left) * border_cols * height);
+    if (!tmp_left) return false;
     for (int i = 0; i < height; i++) {
       memcpy(tmp_left + i * border_cols, in_tl + i * in_stride, border_size);
       aom_memset16(in_tl + i * in_stride, input16[i * in_stride], border_cols);
@@ -1081,6 +1089,10 @@ static void highbd_upscale_normative_rect(const uint8_t *const input,
   if (pad_right) {
     tmp_right =
         (uint16_t *)aom_malloc(sizeof(*tmp_right) * border_cols * height);
+    if (!tmp_right) {
+      aom_free(tmp_left);
+      return false;
+    }
     for (int i = 0; i < height; i++) {
       memcpy(tmp_right + i * border_cols, in_tr + i * in_stride, border_size);
       aom_memset16(in_tr + i * in_stride, input16[i * in_stride + width - 1],
@@ -1106,6 +1118,7 @@ static void highbd_upscale_normative_rect(const uint8_t *const input,
     }
     aom_free(tmp_right);
   }
+  return true;
 }
 #endif  // CONFIG_AV1_HIGHBITDEPTH
 
@@ -1304,21 +1317,26 @@ void av1_upscale_normative_rows(const AV1_COMMON *cm, const uint8_t *src,
     const int pad_left = (j == 0);
     const int pad_right = (j == cm->tiles.cols - 1);
 
+    bool success;
 #if CONFIG_AV1_HIGHBITDEPTH
     if (cm->seq_params->use_highbitdepth)
-      highbd_upscale_normative_rect(src_ptr, rows, src_width, src_stride,
-                                    dst_ptr, rows, dst_width, dst_stride,
-                                    x_step_qn, x0_qn, pad_left, pad_right,
-                                    cm->seq_params->bit_depth);
+      success = highbd_upscale_normative_rect(
+          src_ptr, rows, src_width, src_stride, dst_ptr, rows, dst_width,
+          dst_stride, x_step_qn, x0_qn, pad_left, pad_right,
+          cm->seq_params->bit_depth);
     else
-      upscale_normative_rect(src_ptr, rows, src_width, src_stride, dst_ptr,
-                             rows, dst_width, dst_stride, x_step_qn, x0_qn,
-                             pad_left, pad_right);
+      success = upscale_normative_rect(src_ptr, rows, src_width, src_stride,
+                                       dst_ptr, rows, dst_width, dst_stride,
+                                       x_step_qn, x0_qn, pad_left, pad_right);
 #else
-    upscale_normative_rect(src_ptr, rows, src_width, src_stride, dst_ptr, rows,
-                           dst_width, dst_stride, x_step_qn, x0_qn, pad_left,
-                           pad_right);
+    success = upscale_normative_rect(src_ptr, rows, src_width, src_stride,
+                                     dst_ptr, rows, dst_width, dst_stride,
+                                     x_step_qn, x0_qn, pad_left, pad_right);
 #endif
+    if (!success) {
+      aom_internal_error(cm->error, AOM_CODEC_MEM_ERROR,
+                         "Error upscaling frame");
+    }
     // Update the fractional pixel offset to prepare for the next tile column.
     x0_qn += (dst_width * x_step_qn) - (src_width << RS_SCALE_SUBPEL_BITS);
   }
@@ -1362,7 +1380,7 @@ YV12_BUFFER_CONFIG *av1_realloc_and_scale_if_required(
             scaled, scaled_width, scaled_height, seq_params->subsampling_x,
             seq_params->subsampling_y, seq_params->use_highbitdepth,
             border_in_pixels, cm->features.byte_alignment, NULL, NULL, NULL,
-            alloc_y_buffer_8bit))
+            alloc_y_buffer_8bit, 0))
       aom_internal_error(cm->error, AOM_CODEC_MEM_ERROR,
                          "Failed to allocate scaled buffer");
 
@@ -1462,7 +1480,7 @@ void av1_superres_upscale(AV1_COMMON *cm, BufferPool *const pool) {
   if (aom_alloc_frame_buffer(
           &copy_buffer, aligned_width, cm->height, seq_params->subsampling_x,
           seq_params->subsampling_y, seq_params->use_highbitdepth,
-          AOM_BORDER_IN_PIXELS, byte_alignment))
+          AOM_BORDER_IN_PIXELS, byte_alignment, 0))
     aom_internal_error(cm->error, AOM_CODEC_MEM_ERROR,
                        "Failed to allocate copy buffer for superres upscaling");
 
@@ -1494,7 +1512,7 @@ void av1_superres_upscale(AV1_COMMON *cm, BufferPool *const pool) {
             frame_to_show, cm->superres_upscaled_width,
             cm->superres_upscaled_height, seq_params->subsampling_x,
             seq_params->subsampling_y, seq_params->use_highbitdepth,
-            AOM_BORDER_IN_PIXELS, byte_alignment, fb, cb, cb_priv, 0)) {
+            AOM_BORDER_IN_PIXELS, byte_alignment, fb, cb, cb_priv, 0, 0)) {
       unlock_buffer_pool(pool);
       aom_internal_error(
           cm->error, AOM_CODEC_MEM_ERROR,
@@ -1511,7 +1529,7 @@ void av1_superres_upscale(AV1_COMMON *cm, BufferPool *const pool) {
             frame_to_show, cm->superres_upscaled_width,
             cm->superres_upscaled_height, seq_params->subsampling_x,
             seq_params->subsampling_y, seq_params->use_highbitdepth,
-            AOM_BORDER_IN_PIXELS, byte_alignment))
+            AOM_BORDER_IN_PIXELS, byte_alignment, 0))
       aom_internal_error(
           cm->error, AOM_CODEC_MEM_ERROR,
           "Failed to reallocate current frame buffer for superres upscaling");
diff --git a/chromium/third_party/libaom/source/libaom/av1/common/resize.h b/chromium/third_party/libaom/source/libaom/av1/common/resize.h
index 9bc23b3ffac..f3a0ed5b6a3 100644
--- a/chromium/third_party/libaom/source/libaom/av1/common/resize.h
+++ b/chromium/third_party/libaom/source/libaom/av1/common/resize.h
@@ -102,7 +102,7 @@ static INLINE int av1_superres_scaled(const AV1_COMMON *cm) {
   // Note: for some corner cases (e.g. cm->width of 1), there may be no scaling
   // required even though cm->superres_scale_denominator != SCALE_NUMERATOR.
   // So, the following check is more accurate.
-  return !(cm->width == cm->superres_upscaled_width);
+  return (cm->width != cm->superres_upscaled_width);
 }
 
 // There's SIMD optimizations for 1/4, 1/2 and 3/4 downscaling.
diff --git a/chromium/third_party/libaom/source/libaom/av1/common/restoration.c b/chromium/third_party/libaom/source/libaom/av1/common/restoration.c
index 202953c8899..dbfd1cc0dea 100644
--- a/chromium/third_party/libaom/source/libaom/av1/common/restoration.c
+++ b/chromium/third_party/libaom/source/libaom/av1/common/restoration.c
@@ -1117,7 +1117,7 @@ void av1_loop_restoration_filter_frame_init(AV1LrStruct *lr_ctxt,
   if (aom_realloc_frame_buffer(
           lr_ctxt->dst, frame_width, frame_height, seq_params->subsampling_x,
           seq_params->subsampling_y, highbd, AOM_RESTORATION_FRAME_BORDER,
-          cm->features.byte_alignment, NULL, NULL, NULL, 0) < 0)
+          cm->features.byte_alignment, NULL, NULL, NULL, 0, 0) < 0)
     aom_internal_error(cm->error, AOM_CODEC_MEM_ERROR,
                        "Failed to allocate restoration dst buffer");
 
diff --git a/chromium/third_party/libaom/source/libaom/av1/common/restoration.h b/chromium/third_party/libaom/source/libaom/av1/common/restoration.h
index 65ccd0900c0..a87b9ba7b8e 100644
--- a/chromium/third_party/libaom/source/libaom/av1/common/restoration.h
+++ b/chromium/third_party/libaom/source/libaom/av1/common/restoration.h
@@ -397,7 +397,7 @@ void av1_decode_xq(const int *xqd, int *xq, const sgr_params_type *params);
  *                           should be at least SGRPROJ_TMPBUF_SIZE big.
  * \param[in]  optimized_lr  Whether to use fast optimized Loop Restoration
  *
- * \return Nothing is returned. Instead, the filtered unit is output in
+ * \remark Nothing is returned. Instead, the filtered unit is output in
  * \c dst8 at the proper restoration unit offset.
  */
 void av1_loop_restoration_filter_unit(
@@ -417,7 +417,7 @@ void av1_loop_restoration_filter_unit(
  * \param[in]       optimized_lr  Whether to use fast optimized Loop Restoration
  * \param[in]       lr_ctxt       Loop restoration context
  *
- * \return Nothing is returned. Instead, the filtered frame is output in
+ * \remark Nothing is returned. Instead, the filtered frame is output in
  * \c frame.
  */
 void av1_loop_restoration_filter_frame(YV12_BUFFER_CONFIG *frame,
diff --git a/chromium/third_party/libaom/source/libaom/av1/common/scale.h b/chromium/third_party/libaom/source/libaom/av1/common/scale.h
index fd30416dfa7..28bcec4936e 100644
--- a/chromium/third_party/libaom/source/libaom/av1/common/scale.h
+++ b/chromium/third_party/libaom/source/libaom/av1/common/scale.h
@@ -52,6 +52,7 @@ static INLINE int av1_is_scaled(const struct scale_factors *sf) {
          (sf->x_scale_fp != REF_NO_SCALE || sf->y_scale_fp != REF_NO_SCALE);
 }
 
+// See AV1 spec, Section 6.8.6. Frame size with refs semantics.
 static INLINE int valid_ref_frame_size(int ref_width, int ref_height,
                                        int this_width, int this_height) {
   return 2 * this_width >= ref_width && 2 * this_height >= ref_height &&
diff --git a/chromium/third_party/libaom/source/libaom/av1/common/thread_common.c b/chromium/third_party/libaom/source/libaom/av1/common/thread_common.c
index b1b1a7fcd2b..b951ad3201d 100644
--- a/chromium/third_party/libaom/source/libaom/av1/common/thread_common.c
+++ b/chromium/third_party/libaom/source/libaom/av1/common/thread_common.c
@@ -19,6 +19,7 @@
 #include "av1/common/entropymode.h"
 #include "av1/common/thread_common.h"
 #include "av1/common/reconinter.h"
+#include "av1/common/reconintra.h"
 
 // Set up nsync by width.
 static INLINE int get_sync_range(int width) {
@@ -34,7 +35,6 @@ static INLINE int get_sync_range(int width) {
     return 8;
 }
 
-#if !CONFIG_REALTIME_ONLY
 static INLINE int get_lr_sync_range(int width) {
 #if 0
   // nsync numbers are picked by testing. For example, for 4k
@@ -52,7 +52,6 @@ static INLINE int get_lr_sync_range(int width) {
   return 1;
 #endif
 }
-#endif
 
 // Allocate memory for lf row synchronization
 void av1_loop_filter_alloc(AV1LfSync *lf_sync, AV1_COMMON *cm, int rows,
@@ -264,9 +263,10 @@ static INLINE void sync_write(AV1LfSync *const lf_sync, int r, int c,
 
 static AOM_FORCE_INLINE bool skip_loop_filter_plane(const int planes_to_lf[3],
                                                     int plane,
-                                                    bool is_realtime) {
-  // In realtime mode, we have the option to filter both chroma planes together
-  if (is_realtime) {
+                                                    int lpf_opt_level) {
+  // If LPF_PICK_METHOD is LPF_PICK_FROM_Q, we have the option to filter both
+  // chroma planes together
+  if (lpf_opt_level == 2) {
     if (plane == AOM_PLANE_Y) {
       return !planes_to_lf[plane];
     }
@@ -286,7 +286,7 @@ static AOM_FORCE_INLINE bool skip_loop_filter_plane(const int planes_to_lf[3],
 }
 
 static void enqueue_lf_jobs(AV1LfSync *lf_sync, int start, int stop,
-                            const int planes_to_lf[3], int is_realtime) {
+                            const int planes_to_lf[3], int lpf_opt_level) {
   int mi_row, plane, dir;
   AV1LfMTInfo *lf_job_queue = lf_sync->job_queue;
   lf_sync->jobs_enqueued = 0;
@@ -298,14 +298,14 @@ static void enqueue_lf_jobs(AV1LfSync *lf_sync, int start, int stop,
   for (dir = 0; dir < 2; ++dir) {
     for (mi_row = start; mi_row < stop; mi_row += MAX_MIB_SIZE) {
       for (plane = 0; plane < 3; ++plane) {
-        if (skip_loop_filter_plane(planes_to_lf, plane, is_realtime)) {
+        if (skip_loop_filter_plane(planes_to_lf, plane, lpf_opt_level)) {
           continue;
         }
         if (!planes_to_lf[plane]) continue;
         lf_job_queue->mi_row = mi_row;
         lf_job_queue->plane = plane;
         lf_job_queue->dir = dir;
-        lf_job_queue->is_realtime = is_realtime;
+        lf_job_queue->lpf_opt_level = lpf_opt_level;
         lf_job_queue++;
         lf_sync->jobs_enqueued++;
       }
@@ -336,15 +336,14 @@ static AV1LfMTInfo *get_lf_job_info(AV1LfSync *lf_sync) {
 static INLINE void thread_loop_filter_rows(
     const YV12_BUFFER_CONFIG *const frame_buffer, AV1_COMMON *const cm,
     struct macroblockd_plane *planes, MACROBLOCKD *xd, int mi_row, int plane,
-    int dir, int is_realtime, AV1LfSync *const lf_sync,
+    int dir, int lpf_opt_level, AV1LfSync *const lf_sync,
     AV1_DEBLOCKING_PARAMETERS *params_buf, TX_SIZE *tx_buf) {
   const int sb_cols =
-      ALIGN_POWER_OF_TWO(cm->mi_params.mi_cols, MAX_MIB_SIZE_LOG2) >>
-      MAX_MIB_SIZE_LOG2;
+      CEIL_POWER_OF_TWO(cm->mi_params.mi_cols, MAX_MIB_SIZE_LOG2);
   const int r = mi_row >> MAX_MIB_SIZE_LOG2;
   int mi_col, c;
 
-  const bool joint_filter_chroma = is_realtime && plane > AOM_PLANE_Y;
+  const bool joint_filter_chroma = (lpf_opt_level == 2) && plane > AOM_PLANE_Y;
   const int num_planes = joint_filter_chroma ? 2 : 1;
   assert(IMPLIES(joint_filter_chroma, plane == AOM_PLANE_U));
 
@@ -354,13 +353,14 @@ static INLINE void thread_loop_filter_rows(
 
       av1_setup_dst_planes(planes, cm->seq_params->sb_size, frame_buffer,
                            mi_row, mi_col, plane, plane + num_planes);
-      if (is_realtime) {
+      if (lpf_opt_level) {
         if (plane == AOM_PLANE_Y) {
-          av1_filter_block_plane_vert_rt(cm, xd, &planes[plane], mi_row, mi_col,
-                                         params_buf, tx_buf);
+          av1_filter_block_plane_vert_opt(cm, xd, &planes[plane], mi_row,
+                                          mi_col, params_buf, tx_buf);
         } else {
-          av1_filter_block_plane_vert_rt_chroma(cm, xd, &planes[plane], mi_row,
-                                                mi_col, params_buf, tx_buf);
+          av1_filter_block_plane_vert_opt_chroma(cm, xd, &planes[plane], mi_row,
+                                                 mi_col, params_buf, tx_buf,
+                                                 plane, joint_filter_chroma);
         }
       } else {
         av1_filter_block_plane_vert(cm, xd, plane, &planes[plane], mi_row,
@@ -385,13 +385,14 @@ static INLINE void thread_loop_filter_rows(
 
       av1_setup_dst_planes(planes, cm->seq_params->sb_size, frame_buffer,
                            mi_row, mi_col, plane, plane + num_planes);
-      if (is_realtime) {
+      if (lpf_opt_level) {
         if (plane == AOM_PLANE_Y) {
-          av1_filter_block_plane_horz_rt(cm, xd, &planes[plane], mi_row, mi_col,
-                                         params_buf, tx_buf);
+          av1_filter_block_plane_horz_opt(cm, xd, &planes[plane], mi_row,
+                                          mi_col, params_buf, tx_buf);
         } else {
-          av1_filter_block_plane_horz_rt_chroma(cm, xd, &planes[plane], mi_row,
-                                                mi_col, params_buf, tx_buf);
+          av1_filter_block_plane_horz_opt_chroma(cm, xd, &planes[plane], mi_row,
+                                                 mi_col, params_buf, tx_buf,
+                                                 plane, joint_filter_chroma);
         }
       } else {
         av1_filter_block_plane_horz(cm, xd, plane, &planes[plane], mi_row,
@@ -407,11 +408,11 @@ static int loop_filter_row_worker(void *arg1, void *arg2) {
   LFWorkerData *const lf_data = (LFWorkerData *)arg2;
   AV1LfMTInfo *cur_job_info;
   while ((cur_job_info = get_lf_job_info(lf_sync)) != NULL) {
-    const int is_realtime = cur_job_info->is_realtime;
-    thread_loop_filter_rows(lf_data->frame_buffer, lf_data->cm, lf_data->planes,
-                            lf_data->xd, cur_job_info->mi_row,
-                            cur_job_info->plane, cur_job_info->dir, is_realtime,
-                            lf_sync, lf_data->params_buf, lf_data->tx_buf);
+    const int lpf_opt_level = cur_job_info->lpf_opt_level;
+    thread_loop_filter_rows(
+        lf_data->frame_buffer, lf_data->cm, lf_data->planes, lf_data->xd,
+        cur_job_info->mi_row, cur_job_info->plane, cur_job_info->dir,
+        lpf_opt_level, lf_sync, lf_data->params_buf, lf_data->tx_buf);
   }
   return 1;
 }
@@ -420,12 +421,11 @@ static void loop_filter_rows_mt(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
                                 MACROBLOCKD *xd, int start, int stop,
                                 const int planes_to_lf[3], AVxWorker *workers,
                                 int num_workers, AV1LfSync *lf_sync,
-                                int is_realtime) {
+                                int lpf_opt_level) {
   const AVxWorkerInterface *const winterface = aom_get_worker_interface();
   // Number of superblock rows and cols
   const int sb_rows =
-      ALIGN_POWER_OF_TWO(cm->mi_params.mi_rows, MAX_MIB_SIZE_LOG2) >>
-      MAX_MIB_SIZE_LOG2;
+      CEIL_POWER_OF_TWO(cm->mi_params.mi_rows, MAX_MIB_SIZE_LOG2);
   int i;
 
   if (!lf_sync->sync_range || sb_rows != lf_sync->rows ||
@@ -440,7 +440,7 @@ static void loop_filter_rows_mt(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
            sizeof(*(lf_sync->cur_sb_col[i])) * sb_rows);
   }
 
-  enqueue_lf_jobs(lf_sync, start, stop, planes_to_lf, is_realtime);
+  enqueue_lf_jobs(lf_sync, start, stop, planes_to_lf, lpf_opt_level);
 
   // Set up loopfilter thread data.
   for (i = num_workers - 1; i >= 0; --i) {
@@ -470,7 +470,7 @@ static void loop_filter_rows_mt(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
 
 static void loop_filter_rows(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
                              MACROBLOCKD *xd, int start, int stop,
-                             const int planes_to_lf[3], int is_realtime) {
+                             const int planes_to_lf[3], int lpf_opt_level) {
   // Filter top rows of all planes first, in case the output can be partially
   // reconstructed row by row.
   int mi_row, plane, dir;
@@ -479,13 +479,13 @@ static void loop_filter_rows(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
   TX_SIZE tx_buf[MAX_MIB_SIZE];
   for (mi_row = start; mi_row < stop; mi_row += MAX_MIB_SIZE) {
     for (plane = 0; plane < 3; ++plane) {
-      if (skip_loop_filter_plane(planes_to_lf, plane, is_realtime)) {
+      if (skip_loop_filter_plane(planes_to_lf, plane, lpf_opt_level)) {
         continue;
       }
 
       for (dir = 0; dir < 2; ++dir) {
         thread_loop_filter_rows(frame, cm, xd->plane, xd, mi_row, plane, dir,
-                                is_realtime, /*lf_sync=*/NULL, params_buf,
+                                lpf_opt_level, /*lf_sync=*/NULL, params_buf,
                                 tx_buf);
       }
     }
@@ -496,7 +496,7 @@ void av1_loop_filter_frame_mt(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
                               MACROBLOCKD *xd, int plane_start, int plane_end,
                               int partial_frame, AVxWorker *workers,
                               int num_workers, AV1LfSync *lf_sync,
-                              int is_realtime) {
+                              int lpf_opt_level) {
   int start_mi_row, end_mi_row, mi_rows_to_filter;
   int planes_to_lf[3];
 
@@ -523,15 +523,14 @@ void av1_loop_filter_frame_mt(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
   if (num_workers > 1) {
     // Enqueue and execute loopfiltering jobs.
     loop_filter_rows_mt(frame, cm, xd, start_mi_row, end_mi_row, planes_to_lf,
-                        workers, num_workers, lf_sync, is_realtime);
+                        workers, num_workers, lf_sync, lpf_opt_level);
   } else {
     // Directly filter in the main thread.
     loop_filter_rows(frame, cm, xd, start_mi_row, end_mi_row, planes_to_lf,
-                     is_realtime);
+                     lpf_opt_level);
   }
 }
 
-#if !CONFIG_REALTIME_ONLY
 static INLINE void lr_sync_read(void *const lr_sync, int r, int c, int plane) {
 #if CONFIG_MULTITHREAD
   AV1LrSync *const loop_res_sync = (AV1LrSync *)lr_sync;
@@ -846,6 +845,12 @@ static int loop_restoration_row_worker(void *arg1, void *arg2) {
       copy_funs[plane](lr_ctxt->dst, lr_ctxt->frame, ctxt[plane].tile_rect.left,
                        ctxt[plane].tile_rect.right, cur_job_info->v_copy_start,
                        cur_job_info->v_copy_end);
+
+      if (lrworkerdata->do_extend_border) {
+        aom_extend_frame_borders_plane_row(lr_ctxt->frame, plane,
+                                           cur_job_info->v_copy_start,
+                                           cur_job_info->v_copy_end);
+      }
     } else {
       break;
     }
@@ -855,7 +860,8 @@ static int loop_restoration_row_worker(void *arg1, void *arg2) {
 
 static void foreach_rest_unit_in_planes_mt(AV1LrStruct *lr_ctxt,
                                            AVxWorker *workers, int nworkers,
-                                           AV1LrSync *lr_sync, AV1_COMMON *cm) {
+                                           AV1LrSync *lr_sync, AV1_COMMON *cm,
+                                           int do_extend_border) {
   FilterFrameCtxt *ctxt = lr_ctxt->ctxt;
 
   const int num_planes = av1_num_planes(cm);
@@ -898,6 +904,7 @@ static void foreach_rest_unit_in_planes_mt(AV1LrStruct *lr_ctxt,
   for (i = num_workers - 1; i >= 0; --i) {
     AVxWorker *const worker = &workers[i];
     lr_sync->lrworkerdata[i].lr_ctxt = (void *)lr_ctxt;
+    lr_sync->lrworkerdata[i].do_extend_border = do_extend_border;
     worker->hook = loop_restoration_row_worker;
     worker->data1 = lr_sync;
     worker->data2 = &lr_sync->lrworkerdata[i];
@@ -919,7 +926,8 @@ static void foreach_rest_unit_in_planes_mt(AV1LrStruct *lr_ctxt,
 void av1_loop_restoration_filter_frame_mt(YV12_BUFFER_CONFIG *frame,
                                           AV1_COMMON *cm, int optimized_lr,
                                           AVxWorker *workers, int num_workers,
-                                          AV1LrSync *lr_sync, void *lr_ctxt) {
+                                          AV1LrSync *lr_sync, void *lr_ctxt,
+                                          int do_extend_border) {
   assert(!cm->features.all_lossless);
 
   const int num_planes = av1_num_planes(cm);
@@ -930,9 +938,8 @@ void av1_loop_restoration_filter_frame_mt(YV12_BUFFER_CONFIG *frame,
                                          optimized_lr, num_planes);
 
   foreach_rest_unit_in_planes_mt(loop_rest_ctxt, workers, num_workers, lr_sync,
-                                 cm);
+                                 cm, do_extend_border);
 }
-#endif
 
 // Initializes cdef_sync parameters.
 static AOM_INLINE void reset_cdef_job_info(AV1CdefSync *const cdef_sync) {
@@ -1004,13 +1011,27 @@ static AOM_INLINE int get_cdef_row_next_job(AV1CdefSync *const cdef_sync,
 static int cdef_sb_row_worker_hook(void *arg1, void *arg2) {
   AV1CdefSync *const cdef_sync = (AV1CdefSync *)arg1;
   AV1CdefWorkerData *const cdef_worker = (AV1CdefWorkerData *)arg2;
-  const int nvfb =
-      (cdef_worker->cm->mi_params.mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
+  AV1_COMMON *cm = cdef_worker->cm;
+  const int nvfb = (cm->mi_params.mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
   int cur_fbr;
+  const int num_planes = av1_num_planes(cm);
   while (get_cdef_row_next_job(cdef_sync, &cur_fbr, nvfb)) {
-    av1_cdef_fb_row(cdef_worker->cm, cdef_worker->xd, cdef_worker->linebuf,
-                    cdef_worker->colbuf, cdef_worker->srcbuf, cur_fbr,
+    MACROBLOCKD *xd = cdef_worker->xd;
+    av1_cdef_fb_row(cm, xd, cdef_worker->linebuf, cdef_worker->colbuf,
+                    cdef_worker->srcbuf, cur_fbr,
                     cdef_worker->cdef_init_fb_row_fn, cdef_sync);
+    if (cdef_worker->do_extend_border) {
+      for (int plane = 0; plane < num_planes; ++plane) {
+        const YV12_BUFFER_CONFIG *ybf = &cm->cur_frame->buf;
+        const int is_uv = plane > 0;
+        const int mi_high = MI_SIZE_LOG2 - xd->plane[plane].subsampling_y;
+        const int unit_height = MI_SIZE_64X64 << mi_high;
+        const int v_start = cur_fbr * unit_height;
+        const int v_end =
+            AOMMIN(v_start + unit_height, ybf->crop_heights[is_uv]);
+        aom_extend_frame_borders_plane_row(ybf, plane, v_start, v_end);
+      }
+    }
   }
   return 1;
 }
@@ -1019,7 +1040,8 @@ static int cdef_sb_row_worker_hook(void *arg1, void *arg2) {
 static void prepare_cdef_frame_workers(
     AV1_COMMON *const cm, MACROBLOCKD *xd, AV1CdefWorkerData *const cdef_worker,
     AVxWorkerHook hook, AVxWorker *const workers, AV1CdefSync *const cdef_sync,
-    int num_workers, cdef_init_fb_row_t cdef_init_fb_row_fn) {
+    int num_workers, cdef_init_fb_row_t cdef_init_fb_row_fn,
+    int do_extend_border) {
   const int num_planes = av1_num_planes(cm);
 
   cdef_worker[0].srcbuf = cm->cdef_info.srcbuf;
@@ -1030,6 +1052,7 @@ static void prepare_cdef_frame_workers(
     cdef_worker[i].cm = cm;
     cdef_worker[i].xd = xd;
     cdef_worker[i].cdef_init_fb_row_fn = cdef_init_fb_row_fn;
+    cdef_worker[i].do_extend_border = do_extend_border;
     for (int plane = 0; plane < num_planes; plane++)
       cdef_worker[i].linebuf[plane] = cm->cdef_info.linebuf[plane];
 
@@ -1113,8 +1136,8 @@ void av1_cdef_init_fb_row_mt(const AV1_COMMON *const cm,
 void av1_cdef_frame_mt(AV1_COMMON *const cm, MACROBLOCKD *const xd,
                        AV1CdefWorkerData *const cdef_worker,
                        AVxWorker *const workers, AV1CdefSync *const cdef_sync,
-                       int num_workers,
-                       cdef_init_fb_row_t cdef_init_fb_row_fn) {
+                       int num_workers, cdef_init_fb_row_t cdef_init_fb_row_fn,
+                       int do_extend_border) {
   YV12_BUFFER_CONFIG *frame = &cm->cur_frame->buf;
   const int num_planes = av1_num_planes(cm);
 
@@ -1124,7 +1147,18 @@ void av1_cdef_frame_mt(AV1_COMMON *const cm, MACROBLOCKD *const xd,
   reset_cdef_job_info(cdef_sync);
   prepare_cdef_frame_workers(cm, xd, cdef_worker, cdef_sb_row_worker_hook,
                              workers, cdef_sync, num_workers,
-                             cdef_init_fb_row_fn);
+                             cdef_init_fb_row_fn, do_extend_border);
   launch_cdef_workers(workers, num_workers);
   sync_cdef_workers(workers, cm, num_workers);
 }
+
+int av1_get_intrabc_extra_top_right_sb_delay(const AV1_COMMON *cm) {
+  // No additional top-right delay when intraBC tool is not enabled.
+  if (!av1_allow_intrabc(cm)) return 0;
+  // Due to the hardware constraints on processing the intraBC tool with row
+  // multithreading, a top-right delay of 3 superblocks of size 128x128 or 5
+  // superblocks of size 64x64 is mandated. However, a minimum top-right delay
+  // of 1 superblock is assured with 'sync_range'. Hence return only the
+  // additional superblock delay when the intraBC tool is enabled.
+  return cm->seq_params->sb_size == BLOCK_128X128 ? 2 : 4;
+}
diff --git a/chromium/third_party/libaom/source/libaom/av1/common/thread_common.h b/chromium/third_party/libaom/source/libaom/av1/common/thread_common.h
index ab6e4df9122..b1e622f9953 100644
--- a/chromium/third_party/libaom/source/libaom/av1/common/thread_common.h
+++ b/chromium/third_party/libaom/source/libaom/av1/common/thread_common.h
@@ -28,7 +28,7 @@ typedef struct AV1LfMTInfo {
   int mi_row;
   int plane;
   int dir;
-  int is_realtime;
+  int lpf_opt_level;
 } AV1LfMTInfo;
 
 // Loopfilter row synchronization
@@ -70,6 +70,7 @@ typedef struct LoopRestorationWorkerData {
   int32_t *rst_tmpbuf;
   void *rlbs;
   void *lr_ctxt;
+  int do_extend_border;
 } LRWorkerData;
 
 // Looprestoration row synchronization
@@ -106,6 +107,7 @@ typedef struct AV1CdefWorker {
   uint16_t *srcbuf;
   uint16_t *linebuf[MAX_MB_PLANE];
   cdef_init_fb_row_t cdef_init_fb_row_fn;
+  int do_extend_border;
 } AV1CdefWorkerData;
 
 typedef struct AV1CdefRowSync {
@@ -135,7 +137,8 @@ typedef struct AV1CdefSyncData {
 void av1_cdef_frame_mt(AV1_COMMON *const cm, MACROBLOCKD *const xd,
                        AV1CdefWorkerData *const cdef_worker,
                        AVxWorker *const workers, AV1CdefSync *const cdef_sync,
-                       int num_workers, cdef_init_fb_row_t cdef_init_fb_row_fn);
+                       int num_workers, cdef_init_fb_row_t cdef_init_fb_row_fn,
+                       int do_extend_border);
 void av1_cdef_init_fb_row_mt(const AV1_COMMON *const cm,
                              const MACROBLOCKD *const xd,
                              CdefBlockInfo *const fb_info,
@@ -157,19 +160,18 @@ void av1_loop_filter_frame_mt(YV12_BUFFER_CONFIG *frame, struct AV1Common *cm,
                               struct macroblockd *xd, int plane_start,
                               int plane_end, int partial_frame,
                               AVxWorker *workers, int num_workers,
-                              AV1LfSync *lf_sync, int is_realtime);
+                              AV1LfSync *lf_sync, int lpf_opt_level);
 
-#if !CONFIG_REALTIME_ONLY
 void av1_loop_restoration_filter_frame_mt(YV12_BUFFER_CONFIG *frame,
                                           struct AV1Common *cm,
                                           int optimized_lr, AVxWorker *workers,
                                           int num_workers, AV1LrSync *lr_sync,
-                                          void *lr_ctxt);
+                                          void *lr_ctxt, int do_extend_border);
 void av1_loop_restoration_dealloc(AV1LrSync *lr_sync, int num_workers);
 void av1_loop_restoration_alloc(AV1LrSync *lr_sync, AV1_COMMON *cm,
                                 int num_workers, int num_rows_lr,
                                 int num_planes, int width);
-#endif
+int av1_get_intrabc_extra_top_right_sb_delay(const AV1_COMMON *cm);
 
 #ifdef __cplusplus
 }  // extern "C"
diff --git a/chromium/third_party/libaom/source/libaom/av1/common/tile_common.c b/chromium/third_party/libaom/source/libaom/av1/common/tile_common.c
index 33226c973a2..6ecead8183b 100644
--- a/chromium/third_party/libaom/source/libaom/av1/common/tile_common.c
+++ b/chromium/third_party/libaom/source/libaom/av1/common/tile_common.c
@@ -30,12 +30,10 @@ static int tile_log2(int blk_size, int target) {
 void av1_get_tile_limits(AV1_COMMON *const cm) {
   const SequenceHeader *const seq_params = cm->seq_params;
   CommonTileParams *const tiles = &cm->tiles;
-  const int mi_cols =
-      ALIGN_POWER_OF_TWO(cm->mi_params.mi_cols, seq_params->mib_size_log2);
-  const int mi_rows =
-      ALIGN_POWER_OF_TWO(cm->mi_params.mi_rows, seq_params->mib_size_log2);
-  const int sb_cols = mi_cols >> seq_params->mib_size_log2;
-  const int sb_rows = mi_rows >> seq_params->mib_size_log2;
+  const int sb_cols =
+      CEIL_POWER_OF_TWO(cm->mi_params.mi_cols, seq_params->mib_size_log2);
+  const int sb_rows =
+      CEIL_POWER_OF_TWO(cm->mi_params.mi_rows, seq_params->mib_size_log2);
 
   const int sb_size_log2 = seq_params->mib_size_log2 + MI_SIZE_LOG2;
   tiles->max_width_sb = MAX_TILE_WIDTH >> sb_size_log2;
@@ -51,10 +49,8 @@ void av1_get_tile_limits(AV1_COMMON *const cm) {
 void av1_calculate_tile_cols(const SequenceHeader *const seq_params,
                              int cm_mi_rows, int cm_mi_cols,
                              CommonTileParams *const tiles) {
-  int mi_cols = ALIGN_POWER_OF_TWO(cm_mi_cols, seq_params->mib_size_log2);
-  int mi_rows = ALIGN_POWER_OF_TWO(cm_mi_rows, seq_params->mib_size_log2);
-  int sb_cols = mi_cols >> seq_params->mib_size_log2;
-  int sb_rows = mi_rows >> seq_params->mib_size_log2;
+  int sb_cols = CEIL_POWER_OF_TWO(cm_mi_cols, seq_params->mib_size_log2);
+  int sb_rows = CEIL_POWER_OF_TWO(cm_mi_rows, seq_params->mib_size_log2);
   int i;
 
   // This will be overridden if there is at least two columns of tiles
@@ -63,8 +59,7 @@ void av1_calculate_tile_cols(const SequenceHeader *const seq_params,
 
   if (tiles->uniform_spacing) {
     int start_sb;
-    int size_sb = ALIGN_POWER_OF_TWO(sb_cols, tiles->log2_cols);
-    size_sb >>= tiles->log2_cols;
+    int size_sb = CEIL_POWER_OF_TWO(sb_cols, tiles->log2_cols);
     assert(size_sb > 0);
     for (i = 0, start_sb = 0; start_sb < sb_cols; i++) {
       tiles->col_start_sb[i] = start_sb;
@@ -105,13 +100,11 @@ void av1_calculate_tile_cols(const SequenceHeader *const seq_params,
 
 void av1_calculate_tile_rows(const SequenceHeader *const seq_params,
                              int cm_mi_rows, CommonTileParams *const tiles) {
-  int mi_rows = ALIGN_POWER_OF_TWO(cm_mi_rows, seq_params->mib_size_log2);
-  int sb_rows = mi_rows >> seq_params->mib_size_log2;
+  int sb_rows = CEIL_POWER_OF_TWO(cm_mi_rows, seq_params->mib_size_log2);
   int start_sb, size_sb, i;
 
   if (tiles->uniform_spacing) {
-    size_sb = ALIGN_POWER_OF_TWO(sb_rows, tiles->log2_rows);
-    size_sb >>= tiles->log2_rows;
+    size_sb = CEIL_POWER_OF_TWO(sb_rows, tiles->log2_rows);
     assert(size_sb > 0);
     for (i = 0, start_sb = 0; start_sb < sb_rows; i++) {
       tiles->row_start_sb[i] = start_sb;
@@ -152,19 +145,13 @@ void av1_tile_set_col(TileInfo *tile, const AV1_COMMON *cm, int col) {
 }
 
 int av1_get_sb_rows_in_tile(AV1_COMMON *cm, const TileInfo *tile) {
-  int mi_rows_aligned_to_sb = ALIGN_POWER_OF_TWO(
-      tile->mi_row_end - tile->mi_row_start, cm->seq_params->mib_size_log2);
-  int sb_rows = mi_rows_aligned_to_sb >> cm->seq_params->mib_size_log2;
-
-  return sb_rows;
+  return CEIL_POWER_OF_TWO(tile->mi_row_end - tile->mi_row_start,
+                           cm->seq_params->mib_size_log2);
 }
 
 int av1_get_sb_cols_in_tile(AV1_COMMON *cm, const TileInfo *tile) {
-  int mi_cols_aligned_to_sb = ALIGN_POWER_OF_TWO(
-      tile->mi_col_end - tile->mi_col_start, cm->seq_params->mib_size_log2);
-  int sb_cols = mi_cols_aligned_to_sb >> cm->seq_params->mib_size_log2;
-
-  return sb_cols;
+  return CEIL_POWER_OF_TWO(tile->mi_col_end - tile->mi_col_start,
+                           cm->seq_params->mib_size_log2);
 }
 
 AV1PixelRect av1_get_tile_rect(const TileInfo *tile_info, const AV1_COMMON *cm,
diff --git a/chromium/third_party/libaom/source/libaom/av1/common/txb_common.h b/chromium/third_party/libaom/source/libaom/av1/common/txb_common.h
index 5ba3951e8bb..40fcffca999 100644
--- a/chromium/third_party/libaom/source/libaom/av1/common/txb_common.h
+++ b/chromium/third_party/libaom/source/libaom/av1/common/txb_common.h
@@ -78,71 +78,6 @@ static INLINE int get_padded_idx(const int idx, const int bwl) {
   return idx + ((idx >> bwl) << TX_PAD_HOR_LOG2);
 }
 
-static INLINE int get_base_ctx_from_count_mag(int row, int col, int count,
-                                              int sig_mag) {
-  const int ctx = base_level_count_to_index[count];
-  int ctx_idx = -1;
-
-  if (row == 0 && col == 0) {
-    if (sig_mag >= 2) return 0;
-
-    if (sig_mag == 1) {
-      if (count >= 2)
-        ctx_idx = 1;
-      else
-        ctx_idx = 2;
-
-      return ctx_idx;
-    }
-
-    ctx_idx = 3 + ctx;
-    assert(ctx_idx <= 6);
-    return ctx_idx;
-  } else if (row == 0) {
-    if (sig_mag >= 2) return 6;
-    if (sig_mag == 1) {
-      if (count >= 2)
-        ctx_idx = 7;
-      else
-        ctx_idx = 8;
-      return ctx_idx;
-    }
-
-    ctx_idx = 9 + ctx;
-    assert(ctx_idx <= 11);
-    return ctx_idx;
-  } else if (col == 0) {
-    if (sig_mag >= 2) return 12;
-    if (sig_mag == 1) {
-      if (count >= 2)
-        ctx_idx = 13;
-      else
-        ctx_idx = 14;
-
-      return ctx_idx;
-    }
-
-    ctx_idx = 15 + ctx;
-    assert(ctx_idx <= 17);
-    // TODO(angiebird): turn this on once the optimization is finalized
-    // assert(ctx_idx < 28);
-  } else {
-    if (sig_mag >= 2) return 18;
-    if (sig_mag == 1) {
-      if (count >= 2)
-        ctx_idx = 19;
-      else
-        ctx_idx = 20;
-      return ctx_idx;
-    }
-
-    ctx_idx = 21 + ctx;
-
-    assert(ctx_idx <= 24);
-  }
-  return ctx_idx;
-}
-
 static INLINE int get_br_ctx_2d(const uint8_t *const levels,
                                 const int c,  // raster order
                                 const int bwl) {
@@ -351,11 +286,11 @@ static INLINE void set_dc_sign(int *cul_level, int dc_val) {
     *cul_level += 2 << COEFF_CONTEXT_BITS;
 }
 
-static INLINE void get_txb_ctx(const BLOCK_SIZE plane_bsize,
-                               const TX_SIZE tx_size, const int plane,
-                               const ENTROPY_CONTEXT *const a,
-                               const ENTROPY_CONTEXT *const l,
-                               TXB_CTX *const txb_ctx) {
+static void get_txb_ctx_general(const BLOCK_SIZE plane_bsize,
+                                const TX_SIZE tx_size, const int plane,
+                                const ENTROPY_CONTEXT *const a,
+                                const ENTROPY_CONTEXT *const l,
+                                TXB_CTX *const txb_ctx) {
 #define MAX_TX_SIZE_UNIT 16
   static const int8_t signs[3] = { 0, -1, 1 };
   static const int8_t dc_sign_contexts[4 * MAX_TX_SIZE_UNIT + 1] = {
@@ -437,7 +372,100 @@ static INLINE void get_txb_ctx(const BLOCK_SIZE plane_bsize,
                                : 7;
     txb_ctx->txb_skip_ctx = ctx_base + ctx_offset;
   }
-#undef MAX_TX_SIZE_UNIT
 }
 
+#define SPECIALIZE_GET_TXB_CTX(w, h)                                          \
+  static void get_txb_ctx_##w##x##h(                                          \
+      const BLOCK_SIZE plane_bsize, const int plane,                          \
+      const ENTROPY_CONTEXT *const a, const ENTROPY_CONTEXT *const l,         \
+      TXB_CTX *const txb_ctx) {                                               \
+    static const int8_t signs[3] = { 0, -1, 1 };                              \
+    static const int8_t dc_sign_contexts[4 * MAX_TX_SIZE_UNIT + 1] = {        \
+      1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,       \
+      1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,       \
+      2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2           \
+    };                                                                        \
+    const TX_SIZE tx_size = TX_##w##X##h;                                     \
+    const int txb_w_unit = tx_size_wide_unit[tx_size];                        \
+    const int txb_h_unit = tx_size_high_unit[tx_size];                        \
+    int dc_sign = 0;                                                          \
+    int k = 0;                                                                \
+                                                                              \
+    do {                                                                      \
+      const unsigned int sign = ((uint8_t)a[k]) >> COEFF_CONTEXT_BITS;        \
+      assert(sign <= 2);                                                      \
+      dc_sign += signs[sign];                                                 \
+    } while (++k < txb_w_unit);                                               \
+                                                                              \
+    k = 0;                                                                    \
+    do {                                                                      \
+      const unsigned int sign = ((uint8_t)l[k]) >> COEFF_CONTEXT_BITS;        \
+      assert(sign <= 2);                                                      \
+      dc_sign += signs[sign];                                                 \
+    } while (++k < txb_h_unit);                                               \
+                                                                              \
+    txb_ctx->dc_sign_ctx = dc_sign_contexts[dc_sign + 2 * MAX_TX_SIZE_UNIT];  \
+                                                                              \
+    if (plane == 0) {                                                         \
+      if (plane_bsize == txsize_to_bsize[tx_size]) {                          \
+        txb_ctx->txb_skip_ctx = 0;                                            \
+      } else {                                                                \
+        static const uint8_t skip_contexts[5][5] = { { 1, 2, 2, 2, 3 },       \
+                                                     { 2, 4, 4, 4, 5 },       \
+                                                     { 2, 4, 4, 4, 5 },       \
+                                                     { 2, 4, 4, 4, 5 },       \
+                                                     { 3, 5, 5, 5, 6 } };     \
+        int top = 0;                                                          \
+        int left = 0;                                                         \
+                                                                              \
+        k = 0;                                                                \
+        do {                                                                  \
+          top |= a[k];                                                        \
+        } while (++k < txb_w_unit);                                           \
+        top &= COEFF_CONTEXT_MASK;                                            \
+        top = AOMMIN(top, 4);                                                 \
+                                                                              \
+        k = 0;                                                                \
+        do {                                                                  \
+          left |= l[k];                                                       \
+        } while (++k < txb_h_unit);                                           \
+        left &= COEFF_CONTEXT_MASK;                                           \
+        left = AOMMIN(left, 4);                                               \
+                                                                              \
+        txb_ctx->txb_skip_ctx = skip_contexts[top][left];                     \
+      }                                                                       \
+    } else {                                                                  \
+      const int ctx_base = get_entropy_context(tx_size, a, l);                \
+      const int ctx_offset = (num_pels_log2_lookup[plane_bsize] >             \
+                              num_pels_log2_lookup[txsize_to_bsize[tx_size]]) \
+                                 ? 10                                         \
+                                 : 7;                                         \
+      txb_ctx->txb_skip_ctx = ctx_base + ctx_offset;                          \
+    }                                                                         \
+  }
+
+SPECIALIZE_GET_TXB_CTX(4, 4)
+SPECIALIZE_GET_TXB_CTX(8, 8)
+SPECIALIZE_GET_TXB_CTX(16, 16)
+SPECIALIZE_GET_TXB_CTX(32, 32)
+
+// Wrapper for get_txb_ctx that calls the specialized version of get_txb_ctc_*
+// so that the compiler can compile away the while loops.
+static INLINE void get_txb_ctx(const BLOCK_SIZE plane_bsize,
+                               const TX_SIZE tx_size, const int plane,
+                               const ENTROPY_CONTEXT *const a,
+                               const ENTROPY_CONTEXT *const l,
+                               TXB_CTX *const txb_ctx) {
+  switch (tx_size) {
+    case TX_4X4: get_txb_ctx_4x4(plane_bsize, plane, a, l, txb_ctx); break;
+    case TX_8X8: get_txb_ctx_8x8(plane_bsize, plane, a, l, txb_ctx); break;
+    case TX_16X16: get_txb_ctx_16x16(plane_bsize, plane, a, l, txb_ctx); break;
+    case TX_32X32: get_txb_ctx_32x32(plane_bsize, plane, a, l, txb_ctx); break;
+    default:
+      get_txb_ctx_general(plane_bsize, tx_size, plane, a, l, txb_ctx);
+      break;
+  }
+}
+#undef MAX_TX_SIZE_UNIT
+
 #endif  // AOM_AV1_COMMON_TXB_COMMON_H_
diff --git a/chromium/third_party/libaom/source/libaom/av1/common/x86/av1_convolve_scale_sse4.c b/chromium/third_party/libaom/source/libaom/av1/common/x86/av1_convolve_scale_sse4.c
index 19661817607..67b28bc290f 100644
--- a/chromium/third_party/libaom/source/libaom/av1/common/x86/av1_convolve_scale_sse4.c
+++ b/chromium/third_party/libaom/source/libaom/av1/common/x86/av1_convolve_scale_sse4.c
@@ -22,7 +22,7 @@
 // av1_convolve_2d_scale_sse4_1. This version only supports 8 tap filters.
 static void hfilter8(const uint8_t *src, int src_stride, int16_t *dst, int w,
                      int h, int subpel_x_qn, int x_step_qn,
-                     const InterpFilterParams *filter_params, unsigned round) {
+                     const InterpFilterParams *filter_params, int round) {
   const int bd = 8;
   const int ntaps = 8;
 
@@ -168,11 +168,11 @@ static void vfilter8(const int16_t *src, int src_stride, uint8_t *dst,
           _mm_sra_epi32(_mm_add_epi32(conv, round_shift_add), round_shift);
 
       uint8_t *dst_x = dst + y * dst_stride + x;
-      CONV_BUF_TYPE *dst_16_x = dst16 + y * dst16_stride + x;
       __m128i result;
       __m128i shifted_16 = _mm_packus_epi32(shifted, shifted);
 
       if (conv_params->is_compound) {
+        CONV_BUF_TYPE *dst_16_x = dst16 + y * dst16_stride + x;
         if (conv_params->do_average) {
           const __m128i p_16 = _mm_loadl_epi64((__m128i *)dst_16_x);
           if (conv_params->use_dist_wtd_comp_avg) {
@@ -187,7 +187,7 @@ static void vfilter8(const int16_t *src, int src_stride, uint8_t *dst,
           const __m128i subbed = _mm_sub_epi16(shifted_16, sub);
           result = _mm_sra_epi16(_mm_add_epi16(subbed, bits_const), bits_shift);
           const __m128i result_8 = _mm_packus_epi16(result, result);
-          *(uint32_t *)dst_x = _mm_cvtsi128_si32(result_8);
+          *(int *)dst_x = _mm_cvtsi128_si32(result_8);
         } else {
           _mm_storel_epi64((__m128i *)dst_16_x, shifted_16);
         }
@@ -195,7 +195,7 @@ static void vfilter8(const int16_t *src, int src_stride, uint8_t *dst,
         const __m128i subbed = _mm_sub_epi16(shifted_16, sub);
         result = _mm_sra_epi16(_mm_add_epi16(subbed, bits_const), bits_shift);
         const __m128i result_8 = _mm_packus_epi16(result, result);
-        *(uint32_t *)dst_x = _mm_cvtsi128_si32(result_8);
+        *(int *)dst_x = _mm_cvtsi128_si32(result_8);
       }
     }
     for (; x < w; ++x) {
@@ -260,8 +260,8 @@ void av1_convolve_2d_scale_sse4_1(const uint8_t *src, int src_stride,
 // filters.
 static void highbd_hfilter8(const uint16_t *src, int src_stride, int16_t *dst,
                             int w, int h, int subpel_x_qn, int x_step_qn,
-                            const InterpFilterParams *filter_params,
-                            unsigned round, int bd) {
+                            const InterpFilterParams *filter_params, int round,
+                            int bd) {
   const int ntaps = 8;
 
   src -= ntaps / 2 - 1;
@@ -399,10 +399,10 @@ static void highbd_vfilter8(const int16_t *src, int src_stride, uint16_t *dst,
           _mm_sra_epi32(_mm_add_epi32(conv, round_shift_add), round_shift);
 
       uint16_t *dst_x = dst + y * dst_stride + x;
-      CONV_BUF_TYPE *dst_16_x = dst16 + y * dst16_stride + x;
 
       __m128i result;
       if (conv_params->is_compound) {
+        CONV_BUF_TYPE *dst_16_x = dst16 + y * dst16_stride + x;
         if (conv_params->do_average) {
           __m128i p_32 =
               _mm_cvtepu16_epi32(_mm_loadl_epi64((__m128i *)dst_16_x));
@@ -414,20 +414,20 @@ static void highbd_vfilter8(const int16_t *src, int src_stride, uint16_t *dst,
           } else {
             shifted = _mm_srai_epi32(_mm_add_epi32(p_32, shifted), 1);
           }
-          __m128i res32 = _mm_sub_epi32(shifted, sub);
-          res32 = _mm_sra_epi32(_mm_add_epi32(res32, round_bits_const),
-                                round_bits_shift);
+          result = _mm_sub_epi32(shifted, sub);
+          result = _mm_sra_epi32(_mm_add_epi32(result, round_bits_const),
+                                 round_bits_shift);
 
-          __m128i res16 = _mm_packus_epi32(res32, res32);
-          res16 = _mm_min_epi16(res16, clip_pixel_);
-          _mm_storel_epi64((__m128i *)dst_x, res16);
+          result = _mm_packus_epi32(result, result);
+          result = _mm_min_epi16(result, clip_pixel_);
+          _mm_storel_epi64((__m128i *)dst_x, result);
         } else {
           __m128i shifted_16 = _mm_packus_epi32(shifted, shifted);
           _mm_storel_epi64((__m128i *)dst_16_x, shifted_16);
         }
       } else {
-        const __m128i subbed = _mm_sub_epi32(shifted, sub);
-        result = _mm_sra_epi16(_mm_add_epi32(subbed, bits_const), bits_shift);
+        result = _mm_sub_epi32(shifted, sub);
+        result = _mm_sra_epi16(_mm_add_epi32(result, bits_const), bits_shift);
         result = _mm_packus_epi32(result, result);
         result = _mm_min_epi16(result, clip_pixel_);
         _mm_storel_epi64((__m128i *)dst_x, result);
diff --git a/chromium/third_party/libaom/source/libaom/av1/common/x86/av1_inv_txfm_ssse3.c b/chromium/third_party/libaom/source/libaom/av1/common/x86/av1_inv_txfm_ssse3.c
index a2a43f8d8df..738cc9848f0 100644
--- a/chromium/third_party/libaom/source/libaom/av1/common/x86/av1_inv_txfm_ssse3.c
+++ b/chromium/third_party/libaom/source/libaom/av1/common/x86/av1_inv_txfm_ssse3.c
@@ -2246,10 +2246,10 @@ static INLINE void lowbd_write_buffer_4xn_sse2(__m128i *in, uint8_t *output,
   const int step = flipud ? -1 : 1;
   const __m128i zero = _mm_setzero_si128();
   for (int i = 0; i < height; ++i, j += step) {
-    const __m128i v = _mm_cvtsi32_si128(*((uint32_t *)(output + i * stride)));
+    const __m128i v = _mm_cvtsi32_si128(*((int *)(output + i * stride)));
     __m128i u = _mm_adds_epi16(in[j], _mm_unpacklo_epi8(v, zero));
     u = _mm_packus_epi16(u, zero);
-    *((uint32_t *)(output + i * stride)) = _mm_cvtsi128_si32(u);
+    *((int *)(output + i * stride)) = _mm_cvtsi128_si32(u);
   }
 }
 
diff --git a/chromium/third_party/libaom/source/libaom/av1/common/x86/convolve_2d_avx2.c b/chromium/third_party/libaom/source/libaom/av1/common/x86/convolve_2d_avx2.c
index 04112ff9b90..1b39a0a8d5b 100644
--- a/chromium/third_party/libaom/source/libaom/av1/common/x86/convolve_2d_avx2.c
+++ b/chromium/third_party/libaom/source/libaom/av1/common/x86/convolve_2d_avx2.c
@@ -13,19 +13,21 @@
 
 #include "config/av1_rtcd.h"
 
+#include "third_party/SVT-AV1/convolve_2d_avx2.h"
+
 #include "aom_dsp/x86/convolve_avx2.h"
-#include "aom_dsp/x86/convolve_common_intrin.h"
-#include "aom_dsp/aom_dsp_common.h"
 #include "aom_dsp/aom_filter.h"
 #include "aom_dsp/x86/synonyms.h"
+
 #include "av1/common/convolve.h"
 
-void av1_convolve_2d_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst,
-                             int dst_stride, int w, int h,
-                             const InterpFilterParams *filter_params_x,
-                             const InterpFilterParams *filter_params_y,
-                             const int subpel_x_qn, const int subpel_y_qn,
-                             ConvolveParams *conv_params) {
+void av1_convolve_2d_sr_general_avx2(const uint8_t *src, int src_stride,
+                                     uint8_t *dst, int dst_stride, int w, int h,
+                                     const InterpFilterParams *filter_params_x,
+                                     const InterpFilterParams *filter_params_y,
+                                     const int subpel_x_qn,
+                                     const int subpel_y_qn,
+                                     ConvolveParams *conv_params) {
   if (filter_params_x->taps > 8) {
     const int bd = 8;
     int im_stride = 8, i;
@@ -92,29 +94,11 @@ void av1_convolve_2d_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst,
 
     __m256i filt[4], coeffs_h[4], coeffs_v[4];
 
-    filt[0] = _mm256_load_si256((__m256i const *)(filt_global_avx2));
-    filt[1] = _mm256_load_si256((__m256i const *)(filt_global_avx2 + 32));
-
     prepare_coeffs_lowbd(filter_params_x, subpel_x_qn, coeffs_h);
     prepare_coeffs(filter_params_y, subpel_y_qn, coeffs_v);
 
-    const int16_t *const filter_x = av1_get_interp_filter_subpel_kernel(
-        filter_params_x, subpel_x_qn & SUBPEL_MASK);
-    const int16_t *const filter_y = av1_get_interp_filter_subpel_kernel(
-        filter_params_y, subpel_y_qn & SUBPEL_MASK);
-
-    int horiz_tap = SUBPEL_TAPS;
-    int vert_tap = SUBPEL_TAPS;
-
-    if (!(filter_x[0] | filter_x[1] | filter_x[6] | filter_x[7]))
-      horiz_tap = 4;
-    else if (!(filter_x[0] | filter_x[7]))
-      horiz_tap = 6;
-
-    if (!(filter_y[0] | filter_y[1] | filter_y[6] | filter_y[7]))
-      vert_tap = 4;
-    else if (!(filter_y[0] | filter_y[7]))
-      vert_tap = 6;
+    int horiz_tap = get_filter_tap(filter_params_x, subpel_x_qn);
+    int vert_tap = get_filter_tap(filter_params_y, subpel_y_qn);
 
     if (horiz_tap == 6)
       prepare_coeffs_6t_lowbd(filter_params_x, subpel_x_qn, coeffs_h);
@@ -131,8 +115,10 @@ void av1_convolve_2d_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst,
     const int fo_horiz = horiz_tap / 2 - 1;
     const uint8_t *const src_ptr = src - fo_vert * src_stride - fo_horiz;
 
-    filt[2] = _mm256_load_si256((__m256i const *)(filt_global_avx2 + 32 * 2));
-    filt[3] = _mm256_load_si256((__m256i const *)(filt_global_avx2 + 32 * 3));
+    filt[0] = _mm256_load_si256((__m256i const *)filt1_global_avx2);
+    filt[1] = _mm256_load_si256((__m256i const *)filt2_global_avx2);
+    filt[2] = _mm256_load_si256((__m256i const *)filt3_global_avx2);
+    filt[3] = _mm256_load_si256((__m256i const *)filt4_global_avx2);
 
     for (int j = 0; j < w; j += 8) {
       if (horiz_tap == 4) {
@@ -153,3 +139,23 @@ void av1_convolve_2d_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst,
     }
   }
 }
+
+void av1_convolve_2d_sr_avx2(
+    const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride,
+    int32_t w, int32_t h, const InterpFilterParams *filter_params_x,
+    const InterpFilterParams *filter_params_y, const int32_t subpel_x_q4,
+    const int32_t subpel_y_q4, ConvolveParams *conv_params) {
+  const int32_t tap_x = get_filter_tap(filter_params_x, subpel_x_q4);
+  const int32_t tap_y = get_filter_tap(filter_params_y, subpel_y_q4);
+
+  const bool use_general = (tap_x == 12 || tap_y == 12);
+  if (use_general) {
+    av1_convolve_2d_sr_general_avx2(src, src_stride, dst, dst_stride, w, h,
+                                    filter_params_x, filter_params_y,
+                                    subpel_x_q4, subpel_y_q4, conv_params);
+  } else {
+    av1_convolve_2d_sr_specialized_avx2(src, src_stride, dst, dst_stride, w, h,
+                                        filter_params_x, filter_params_y,
+                                        subpel_x_q4, subpel_y_q4, conv_params);
+  }
+}
diff --git a/chromium/third_party/libaom/source/libaom/av1/common/x86/convolve_2d_sse2.c b/chromium/third_party/libaom/source/libaom/av1/common/x86/convolve_2d_sse2.c
index ca88bd7d5df..1b85f372949 100644
--- a/chromium/third_party/libaom/source/libaom/av1/common/x86/convolve_2d_sse2.c
+++ b/chromium/third_party/libaom/source/libaom/av1/common/x86/convolve_2d_sse2.c
@@ -426,7 +426,7 @@ void av1_convolve_2d_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst,
           if (w == 2) {
             *(uint16_t *)p = (uint16_t)_mm_cvtsi128_si32(res);
           } else if (w == 4) {
-            *(uint32_t *)p = _mm_cvtsi128_si32(res);
+            *(int *)p = _mm_cvtsi128_si32(res);
           } else {
             _mm_storel_epi64(p, res);
           }
@@ -534,7 +534,7 @@ void av1_dist_wtd_convolve_2d_copy_sse2(const uint8_t *src, int src_stride,
           if (w > 4)
             _mm_storel_epi64((__m128i *)(&dst0[j]), res_8);
           else
-            *(uint32_t *)(&dst0[j]) = _mm_cvtsi128_si32(res_8);
+            *(int *)(&dst0[j]) = _mm_cvtsi128_si32(res_8);
         } else {
           _mm_store_si128((__m128i *)(&dst[j]), res_unsigned);
         }
diff --git a/chromium/third_party/libaom/source/libaom/av1/common/x86/convolve_avx2.c b/chromium/third_party/libaom/source/libaom/av1/common/x86/convolve_avx2.c
index c7d1141a63d..30de9823250 100644
--- a/chromium/third_party/libaom/source/libaom/av1/common/x86/convolve_avx2.c
+++ b/chromium/third_party/libaom/source/libaom/av1/common/x86/convolve_avx2.c
@@ -13,16 +13,16 @@
 
 #include "config/av1_rtcd.h"
 
+#include "third_party/SVT-AV1/convolve_avx2.h"
+
 #include "aom_dsp/aom_dsp_common.h"
-#include "aom_dsp/x86/convolve_common_intrin.h"
 #include "aom_dsp/x86/convolve_avx2.h"
+#include "aom_dsp/x86/convolve_common_intrin.h"
 #include "aom_dsp/x86/synonyms.h"
 
-void av1_convolve_y_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst,
-                            int dst_stride, int w, int h,
-                            const InterpFilterParams *filter_params_y,
-                            const int subpel_y_qn) {
-  int i, j, vert_tap = SUBPEL_TAPS;
+static AOM_INLINE void av1_convolve_y_sr_general_avx2(
+    const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w,
+    int h, const InterpFilterParams *filter_params_y, const int subpel_y_qn) {
   // right shift is F-1 because we are already dividing
   // filter co-efficients by 2
   const int right_shift_bits = (FILTER_BITS - 1);
@@ -32,16 +32,7 @@ void av1_convolve_y_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst,
   __m256i coeffs[6], s[12];
   __m128i d[10];
 
-  // Condition for checking valid vert_filt taps
-  const int16_t *const filter = av1_get_interp_filter_subpel_kernel(
-      filter_params_y, subpel_y_qn & SUBPEL_MASK);
-  if (filter_params_y->taps == 12) {
-    vert_tap = 12;
-  } else if (!(filter[0] | filter[1] | filter[6] | filter[7])) {
-    vert_tap = 4;
-  } else if (!(filter[0] | filter[7])) {
-    vert_tap = 6;
-  }
+  int i, vert_tap = get_filter_tap(filter_params_y, subpel_y_qn);
 
   if (vert_tap == 6)
     prepare_coeffs_6t_lowbd(filter_params_y, subpel_y_qn, coeffs);
@@ -55,7 +46,7 @@ void av1_convolve_y_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst,
   if (vert_tap == 4) {
     const int fo_vert = 1;
     const uint8_t *const src_ptr = src - fo_vert * src_stride;
-    for (j = 0; j < w; j += 16) {
+    for (int j = 0; j < w; j += 16) {
       const uint8_t *data = &src_ptr[j];
       d[0] = _mm_loadu_si128((__m128i *)(data + 0 * src_stride));
       d[1] = _mm_loadu_si128((__m128i *)(data + 1 * src_stride));
@@ -150,7 +141,7 @@ void av1_convolve_y_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst,
     const int fo_vert = vert_tap / 2 - 1;
     const uint8_t *const src_ptr = src - fo_vert * src_stride;
 
-    for (j = 0; j < w; j += 16) {
+    for (int j = 0; j < w; j += 16) {
       const uint8_t *data = &src_ptr[j];
       __m256i src6;
 
@@ -255,7 +246,7 @@ void av1_convolve_y_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst,
     right_shift = _mm_cvtsi32_si128(FILTER_BITS);
     right_shift_const = _mm256_set1_epi32((1 << FILTER_BITS) >> 1);
 
-    for (j = 0; j < w; j += 8) {
+    for (int j = 0; j < w; j += 8) {
       const uint8_t *data = &src_ptr[j];
       __m256i src10;
 
@@ -376,10 +367,9 @@ void av1_convolve_y_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst,
           const __m128i res_0 = _mm256_extracti128_si256(res_8b_lo, 0);
           const __m128i res_1 = _mm256_extracti128_si256(res_8b_lo, 1);
           if (w - j > 2) {
-            *(uint32_t *)&dst[i * dst_stride + j] =
-                (uint32_t)_mm_cvtsi128_si32(res_0);
-            *(uint32_t *)&dst[i * dst_stride + j + dst_stride] =
-                (uint32_t)_mm_cvtsi128_si32(res_1);
+            *(int *)&dst[i * dst_stride + j] = _mm_cvtsi128_si32(res_0);
+            *(int *)&dst[i * dst_stride + j + dst_stride] =
+                _mm_cvtsi128_si32(res_1);
           } else {
             *(uint16_t *)&dst[i * dst_stride + j] =
                 (uint16_t)_mm_cvtsi128_si32(res_0);
@@ -404,7 +394,7 @@ void av1_convolve_y_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst,
     const int fo_vert = filter_params_y->taps / 2 - 1;
     const uint8_t *const src_ptr = src - fo_vert * src_stride;
 
-    for (j = 0; j < w; j += 16) {
+    for (int j = 0; j < w; j += 16) {
       const uint8_t *data = &src_ptr[j];
       __m256i src6;
 
@@ -518,18 +508,33 @@ void av1_convolve_y_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst,
   }
 }
 
-void av1_convolve_x_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst,
-                            int dst_stride, int w, int h,
-                            const InterpFilterParams *filter_params_x,
-                            const int subpel_x_qn,
-                            ConvolveParams *conv_params) {
+void av1_convolve_y_sr_avx2(const uint8_t *src, int32_t src_stride,
+                            uint8_t *dst, int32_t dst_stride, int32_t w,
+                            int32_t h,
+                            const InterpFilterParams *filter_params_y,
+                            const int32_t subpel_y_q4) {
+  const int vert_tap = get_filter_tap(filter_params_y, subpel_y_q4);
+
+  if (vert_tap == 12) {
+    av1_convolve_y_sr_general_avx2(src, src_stride, dst, dst_stride, w, h,
+                                   filter_params_y, subpel_y_q4);
+  } else {
+    av1_convolve_y_sr_specialized_avx2(src, src_stride, dst, dst_stride, w, h,
+                                       filter_params_y, subpel_y_q4);
+  }
+}
+
+static AOM_INLINE void av1_convolve_x_sr_general_avx2(
+    const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w,
+    int h, const InterpFilterParams *filter_params_x, const int subpel_x_qn,
+    ConvolveParams *conv_params) {
   const int bits = FILTER_BITS - conv_params->round_0;
   const __m128i round_shift = _mm_cvtsi32_si128(bits);
   __m256i round_0_const =
       _mm256_set1_epi16((1 << (conv_params->round_0 - 1)) >> 1);
   __m128i round_0_shift = _mm_cvtsi32_si128(conv_params->round_0 - 1);
   __m256i round_const = _mm256_set1_epi16((1 << bits) >> 1);
-  int i, horiz_tap = SUBPEL_TAPS;
+  int i, horiz_tap = get_filter_tap(filter_params_x, subpel_x_qn);
 
   assert(bits >= 0);
   assert((FILTER_BITS - conv_params->round_1) >= 0 ||
@@ -540,16 +545,6 @@ void av1_convolve_x_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst,
   filt[0] = _mm256_load_si256((__m256i const *)(filt_global_avx2));
   filt[1] = _mm256_load_si256((__m256i const *)(filt_global_avx2 + 32));
 
-  const int16_t *const filter = av1_get_interp_filter_subpel_kernel(
-      filter_params_x, subpel_x_qn & SUBPEL_MASK);
-  if (filter_params_x->taps == 12) {
-    horiz_tap = 12;
-  } else if (!(filter[0] | filter[1] | filter[6] | filter[7])) {
-    horiz_tap = 4;
-  } else if (!(filter[0] | filter[7])) {
-    horiz_tap = 6;
-  }
-
   if (horiz_tap == 6)
     prepare_coeffs_6t_lowbd(filter_params_x, subpel_x_qn, coeffs);
   else if (horiz_tap == 12) {
@@ -767,11 +762,9 @@ void av1_convolve_x_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst,
         const __m128i res_1 = _mm256_extracti128_si256(res_8b_lo, 1);
         if (w > 2) {
           // 00 01 02 03
-          *(uint32_t *)&dst[i * dst_stride] =
-              (uint32_t)_mm_cvtsi128_si32(res_0);
+          *(int *)&dst[i * dst_stride] = _mm_cvtsi128_si32(res_0);
           // 10 11 12 13
-          *(uint32_t *)&dst[i * dst_stride + dst_stride] =
-              (uint32_t)_mm_cvtsi128_si32(res_1);
+          *(int *)&dst[i * dst_stride + dst_stride] = _mm_cvtsi128_si32(res_1);
         } else {
           // 00 01
           *(uint16_t *)&dst[i * dst_stride] =
@@ -824,10 +817,8 @@ void av1_convolve_x_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst,
           __m256i res_8b_lo = _mm256_packus_epi16(res_16b_lo, res_16b_lo);
           const __m128i res_0 = _mm256_extracti128_si256(res_8b_lo, 0);
           const __m128i res_1 = _mm256_extracti128_si256(res_8b_lo, 1);
-          *(uint32_t *)&dst[i * dst_stride + j] =
-              (uint32_t)_mm_cvtsi128_si32(res_0);
-          *(uint32_t *)&dst[i * dst_stride + j + 4] =
-              (uint32_t)_mm_cvtsi128_si32(res_1);
+          *(int *)&dst[i * dst_stride + j] = _mm_cvtsi128_si32(res_0);
+          *(int *)&dst[i * dst_stride + j + 4] = _mm_cvtsi128_si32(res_1);
         }
       }
     }
@@ -869,8 +860,8 @@ void av1_convolve_x_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst,
         } else {
           __m128i *const p_0 = (__m128i *)&dst[i * dst_stride];
           __m128i *const p_1 = (__m128i *)&dst[i * dst_stride + dst_stride];
-          *(uint16_t *)p_0 = _mm_cvtsi128_si32(res_0);
-          *(uint16_t *)p_1 = _mm_cvtsi128_si32(res_1);
+          *(uint16_t *)p_0 = (uint16_t)_mm_cvtsi128_si32(res_0);
+          *(uint16_t *)p_1 = (uint16_t)_mm_cvtsi128_si32(res_1);
         }
       }
     } else {
@@ -905,3 +896,21 @@ void av1_convolve_x_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst,
     }
   }
 }
+
+void av1_convolve_x_sr_avx2(const uint8_t *src, int32_t src_stride,
+                            uint8_t *dst, int32_t dst_stride, int32_t w,
+                            int32_t h,
+                            const InterpFilterParams *filter_params_x,
+                            const int32_t subpel_x_q4,
+                            ConvolveParams *conv_params) {
+  const int horz_tap = get_filter_tap(filter_params_x, subpel_x_q4);
+
+  if (horz_tap == 12) {
+    av1_convolve_x_sr_general_avx2(src, src_stride, dst, dst_stride, w, h,
+                                   filter_params_x, subpel_x_q4, conv_params);
+  } else {
+    av1_convolve_x_sr_specialized_avx2(src, src_stride, dst, dst_stride, w, h,
+                                       filter_params_x, subpel_x_q4,
+                                       conv_params);
+  }
+}
diff --git a/chromium/third_party/libaom/source/libaom/av1/common/x86/convolve_sse2.c b/chromium/third_party/libaom/source/libaom/av1/common/x86/convolve_sse2.c
index cd5521e333f..012e75c1ae4 100644
--- a/chromium/third_party/libaom/source/libaom/av1/common/x86/convolve_sse2.c
+++ b/chromium/third_party/libaom/source/libaom/av1/common/x86/convolve_sse2.c
@@ -199,32 +199,32 @@ void av1_convolve_y_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst,
 
     if (w <= 4) {
       __m128i s[8], src6, res, res_round, res16;
-      uint32_t res_int;
-      src6 = _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 6 * src_stride));
+      int res_int;
+      src6 = _mm_cvtsi32_si128(*(int *)(src_ptr + 6 * src_stride));
       s[0] = _mm_unpacklo_epi8(
-          _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 0 * src_stride)),
-          _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 1 * src_stride)));
+          _mm_cvtsi32_si128(*(int *)(src_ptr + 0 * src_stride)),
+          _mm_cvtsi32_si128(*(int *)(src_ptr + 1 * src_stride)));
       s[1] = _mm_unpacklo_epi8(
-          _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 1 * src_stride)),
-          _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 2 * src_stride)));
+          _mm_cvtsi32_si128(*(int *)(src_ptr + 1 * src_stride)),
+          _mm_cvtsi32_si128(*(int *)(src_ptr + 2 * src_stride)));
       s[2] = _mm_unpacklo_epi8(
-          _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 2 * src_stride)),
-          _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 3 * src_stride)));
+          _mm_cvtsi32_si128(*(int *)(src_ptr + 2 * src_stride)),
+          _mm_cvtsi32_si128(*(int *)(src_ptr + 3 * src_stride)));
       s[3] = _mm_unpacklo_epi8(
-          _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 3 * src_stride)),
-          _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 4 * src_stride)));
+          _mm_cvtsi32_si128(*(int *)(src_ptr + 3 * src_stride)),
+          _mm_cvtsi32_si128(*(int *)(src_ptr + 4 * src_stride)));
       s[4] = _mm_unpacklo_epi8(
-          _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 4 * src_stride)),
-          _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 5 * src_stride)));
+          _mm_cvtsi32_si128(*(int *)(src_ptr + 4 * src_stride)),
+          _mm_cvtsi32_si128(*(int *)(src_ptr + 5 * src_stride)));
       s[5] = _mm_unpacklo_epi8(
-          _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 5 * src_stride)), src6);
+          _mm_cvtsi32_si128(*(int *)(src_ptr + 5 * src_stride)), src6);
 
       do {
         s[6] = _mm_unpacklo_epi8(
-            src6, _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 7 * src_stride)));
-        src6 = _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 8 * src_stride));
+            src6, _mm_cvtsi32_si128(*(int *)(src_ptr + 7 * src_stride)));
+        src6 = _mm_cvtsi32_si128(*(int *)(src_ptr + 8 * src_stride));
         s[7] = _mm_unpacklo_epi8(
-            _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 7 * src_stride)), src6);
+            _mm_cvtsi32_si128(*(int *)(src_ptr + 7 * src_stride)), src6);
 
         res = convolve_lo_y(s + 0, coeffs);
         res_round = _mm_sra_epi32(_mm_add_epi32(res, round_const), round_shift);
@@ -234,7 +234,7 @@ void av1_convolve_y_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst,
         if (w == 2)
           *(uint16_t *)dst = (uint16_t)res_int;
         else
-          *(uint32_t *)dst = res_int;
+          *(int *)dst = res_int;
 
         src_ptr += src_stride;
         dst += dst_stride;
@@ -247,7 +247,7 @@ void av1_convolve_y_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst,
         if (w == 2)
           *(uint16_t *)dst = (uint16_t)res_int;
         else
-          *(uint32_t *)dst = res_int;
+          *(int *)dst = res_int;
 
         src_ptr += src_stride;
         dst += dst_stride;
@@ -443,11 +443,11 @@ void av1_convolve_x_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst,
         const __m128i res16 = _mm_packs_epi32(res_lo_round, res_lo_round);
         const __m128i res = _mm_packus_epi16(res16, res16);
 
-        uint32_t r = _mm_cvtsi128_si32(res);
+        int r = _mm_cvtsi128_si32(res);
         if (w == 2)
           *(uint16_t *)dst = (uint16_t)r;
         else
-          *(uint32_t *)dst = r;
+          *(int *)dst = r;
 
         src_ptr += src_stride;
         dst += dst_stride;
diff --git a/chromium/third_party/libaom/source/libaom/av1/common/x86/highbd_convolve_2d_avx2.c b/chromium/third_party/libaom/source/libaom/av1/common/x86/highbd_convolve_2d_avx2.c
index 12046e40c74..de850ee5998 100644
--- a/chromium/third_party/libaom/source/libaom/av1/common/x86/highbd_convolve_2d_avx2.c
+++ b/chromium/third_party/libaom/source/libaom/av1/common/x86/highbd_convolve_2d_avx2.c
@@ -80,7 +80,7 @@ void av1_highbd_convolve_2d_sr_avx2(const uint16_t *src, int src_stride,
       for (i = 0; i < im_h; i += 2) {
         const __m256i row0 =
             _mm256_loadu_si256((__m256i *)&src_ptr[i * src_stride + j]);
-        __m256i row1 = _mm256_set1_epi16(0);
+        __m256i row1 = _mm256_setzero_si256();
         if (i + 1 < im_h)
           row1 =
               _mm256_loadu_si256((__m256i *)&src_ptr[(i + 1) * src_stride + j]);
@@ -181,9 +181,9 @@ void av1_highbd_convolve_2d_sr_avx2(const uint16_t *src, int src_stride,
           res_a_round = _mm256_min_epi16(res_a_round, clip_pixel);
           res_a_round = _mm256_max_epi16(res_a_round, zero);
 
-          xx_storel_32((__m128i *)&dst[i * dst_stride + j],
+          xx_storel_32(&dst[i * dst_stride + j],
                        _mm256_castsi256_si128(res_a_round));
-          xx_storel_32((__m128i *)&dst[i * dst_stride + j + dst_stride],
+          xx_storel_32(&dst[i * dst_stride + j + dst_stride],
                        _mm256_extracti128_si256(res_a_round, 1));
         }
 
diff --git a/chromium/third_party/libaom/source/libaom/av1/common/x86/highbd_convolve_2d_ssse3.c b/chromium/third_party/libaom/source/libaom/av1/common/x86/highbd_convolve_2d_ssse3.c
index 148543f667f..832404474a2 100644
--- a/chromium/third_party/libaom/source/libaom/av1/common/x86/highbd_convolve_2d_ssse3.c
+++ b/chromium/third_party/libaom/source/libaom/av1/common/x86/highbd_convolve_2d_ssse3.c
@@ -211,10 +211,10 @@ void av1_highbd_convolve_2d_sr_ssse3(
             res_a_round1 = _mm_min_epi16(res_a_round1, clip_pixel);
             res_a_round1 = _mm_max_epi16(res_a_round1, zero);
 
-            *((uint32_t *)(&dst[i * dst_stride + j])) =
+            *((int *)(&dst[i * dst_stride + j])) =
                 _mm_cvtsi128_si32(res_a_round0);
 
-            *((uint32_t *)(&dst[i * dst_stride + j + dst_stride])) =
+            *((int *)(&dst[i * dst_stride + j + dst_stride])) =
                 _mm_cvtsi128_si32(res_a_round1);
           }
           s[0] = s[1];
@@ -384,10 +384,10 @@ void av1_highbd_convolve_2d_sr_ssse3(
             res_a_round1 = _mm_min_epi16(res_a_round1, clip_pixel);
             res_a_round1 = _mm_max_epi16(res_a_round1, zero);
 
-            *((uint32_t *)(&dst[i * dst_stride + j])) =
+            *((int *)(&dst[i * dst_stride + j])) =
                 _mm_cvtsi128_si32(res_a_round0);
 
-            *((uint32_t *)(&dst[i * dst_stride + j + dst_stride])) =
+            *((int *)(&dst[i * dst_stride + j + dst_stride])) =
                 _mm_cvtsi128_si32(res_a_round1);
           }
           s[0] = s[1];
diff --git a/chromium/third_party/libaom/source/libaom/av1/common/x86/highbd_inv_txfm_sse4.c b/chromium/third_party/libaom/source/libaom/av1/common/x86/highbd_inv_txfm_sse4.c
index 37f8f42b298..de3af3ada92 100644
--- a/chromium/third_party/libaom/source/libaom/av1/common/x86/highbd_inv_txfm_sse4.c
+++ b/chromium/third_party/libaom/source/libaom/av1/common/x86/highbd_inv_txfm_sse4.c
@@ -517,7 +517,7 @@ static void idct4x4_sse4_1(__m128i *in, __m128i *out, int bit, int do_cols,
 static void iadst4x4_sse4_1(__m128i *in, __m128i *out, int bit, int do_cols,
                             int bd, int out_shift) {
   const int32_t *sinpi = sinpi_arr(bit);
-  const __m128i zero = _mm_set1_epi32(0);
+  const __m128i zero = _mm_setzero_si128();
   __m128i rnding = _mm_set1_epi32(1 << (bit + 4 - 1));
   rnding = _mm_unpacklo_epi32(rnding, zero);
   const __m128i mul = _mm_set1_epi32(1 << 4);
@@ -698,7 +698,7 @@ static void iidentity4_sse4_1(__m128i *in, __m128i *out, int bit, int do_cols,
                               int bd, int out_shift) {
   (void)bit;
   __m128i v[4];
-  __m128i zero = _mm_set1_epi32(0);
+  __m128i zero = _mm_setzero_si128();
   __m128i fact = _mm_set1_epi32(NewSqrt2);
   __m128i offset = _mm_set1_epi32(1 << (NewSqrt2Bits - 1));
   __m128i a0_low, a1_low;
@@ -3142,7 +3142,7 @@ static void iidentity16_sse4_1(__m128i *in, __m128i *out, int bit, int do_cols,
   __m128i fact = _mm_set1_epi32(2 * NewSqrt2);
   __m128i offset = _mm_set1_epi32(1 << (NewSqrt2Bits - 1));
   __m128i a0_low, a0_high, a1_low, a1_high;
-  __m128i zero = _mm_set1_epi32(0);
+  __m128i zero = _mm_setzero_si128();
   offset = _mm_unpacklo_epi32(offset, zero);
 
   for (int i = 0; i < 16; i++) {
diff --git a/chromium/third_party/libaom/source/libaom/av1/common/x86/highbd_jnt_convolve_avx2.c b/chromium/third_party/libaom/source/libaom/av1/common/x86/highbd_jnt_convolve_avx2.c
index 9cedd449a24..da52ecdc703 100644
--- a/chromium/third_party/libaom/source/libaom/av1/common/x86/highbd_jnt_convolve_avx2.c
+++ b/chromium/third_party/libaom/source/libaom/av1/common/x86/highbd_jnt_convolve_avx2.c
@@ -282,7 +282,7 @@ void av1_highbd_dist_wtd_convolve_2d_avx2(
       for (i = 0; i < im_h; i += 2) {
         const __m256i row0 =
             _mm256_loadu_si256((__m256i *)&src_ptr[i * src_stride + j]);
-        __m256i row1 = _mm256_set1_epi16(0);
+        __m256i row1 = _mm256_setzero_si256();
         if (i + 1 < im_h)
           row1 =
               _mm256_loadu_si256((__m256i *)&src_ptr[(i + 1) * src_stride + j]);
diff --git a/chromium/third_party/libaom/source/libaom/av1/common/x86/highbd_warp_affine_avx2.c b/chromium/third_party/libaom/source/libaom/av1/common/x86/highbd_warp_affine_avx2.c
index 87b1a66a4a1..7f6aceb88fc 100644
--- a/chromium/third_party/libaom/source/libaom/av1/common/x86/highbd_warp_affine_avx2.c
+++ b/chromium/third_party/libaom/source/libaom/av1/common/x86/highbd_warp_affine_avx2.c
@@ -158,7 +158,7 @@ void av1_highbd_warp_affine_avx2(const int32_t *mat, const uint16_t *ref,
             iy = iy * stride;
 
             __m256i v_refl = _mm256_inserti128_si256(
-                _mm256_set1_epi16(0),
+                _mm256_setzero_si256(),
                 _mm_loadu_si128((__m128i *)&ref[iy + ix4 - 7]), 0);
             v_refl = _mm256_inserti128_si256(
                 v_refl, _mm_loadu_si128((__m128i *)&ref[iy + ix4 + 1]),
@@ -218,7 +218,7 @@ void av1_highbd_warp_affine_avx2(const int32_t *mat, const uint16_t *ref,
                 _mm_shuffle_epi32(v_01, 3));  // A7A6A7A6A7A6A7A6
 
             __m256i v_refl = _mm256_inserti128_si256(
-                _mm256_set1_epi16(0),
+                _mm256_setzero_si256(),
                 _mm_loadu_si128((__m128i *)&ref[iy + ix4 - 7]), 0);
             v_refl = _mm256_inserti128_si256(
                 v_refl, _mm_loadu_si128((__m128i *)&ref[iy + ix4 + 1]),
@@ -333,7 +333,7 @@ void av1_highbd_warp_affine_avx2(const int32_t *mat, const uint16_t *ref,
             iy = iy * stride;
 
             __m256i v_refl = _mm256_inserti128_si256(
-                _mm256_set1_epi16(0),
+                _mm256_setzero_si256(),
                 _mm_loadu_si128((__m128i *)&ref[iy + ix4 - 7]), 0);
             v_refl = _mm256_inserti128_si256(
                 v_refl, _mm_loadu_si128((__m128i *)&ref[iy + ix4 + 1]),
@@ -454,7 +454,7 @@ void av1_highbd_warp_affine_avx2(const int32_t *mat, const uint16_t *ref,
                 _mm256_unpackhi_epi64(v_c0123u, v_c4567u);  // H7H6 ... A7A6
 
             __m256i v_refl = _mm256_inserti128_si256(
-                _mm256_set1_epi16(0),
+                _mm256_setzero_si256(),
                 _mm_loadu_si128((__m128i *)&ref[iy + ix4 - 7]), 0);
             v_refl = _mm256_inserti128_si256(
                 v_refl, _mm_loadu_si128((__m128i *)&ref[iy + ix4 + 1]),
diff --git a/chromium/third_party/libaom/source/libaom/av1/common/x86/intra_edge_sse4.c b/chromium/third_party/libaom/source/libaom/av1/common/x86/intra_edge_sse4.c
index fc69f41d793..f025f79174a 100644
--- a/chromium/third_party/libaom/source/libaom/av1/common/x86/intra_edge_sse4.c
+++ b/chromium/third_party/libaom/source/libaom/av1/common/x86/intra_edge_sse4.c
@@ -33,7 +33,7 @@ void av1_filter_intra_edge_sse4_1(uint8_t *p, int sz, int strength) {
 
   // Extend the first and last samples to simplify the loop for the 5-tap case
   p[-1] = p[0];
-  __m128i last = _mm_set1_epi8(p[sz - 1]);
+  __m128i last = _mm_set1_epi8((char)p[sz - 1]);
   _mm_storeu_si128((__m128i *)&p[sz], last);
 
   // Adjust input pointer for filter support area
diff --git a/chromium/third_party/libaom/source/libaom/av1/common/x86/jnt_convolve_avx2.c b/chromium/third_party/libaom/source/libaom/av1/common/x86/jnt_convolve_avx2.c
index 7a13d4a67be..6ec649e8e64 100644
--- a/chromium/third_party/libaom/source/libaom/av1/common/x86/jnt_convolve_avx2.c
+++ b/chromium/third_party/libaom/source/libaom/av1/common/x86/jnt_convolve_avx2.c
@@ -9,15 +9,19 @@
  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
  */
 
+#include <emmintrin.h>
 #include <immintrin.h>
 
 #include "config/aom_dsp_rtcd.h"
 
+#include "aom_dsp/aom_dsp_common.h"
+#include "aom_dsp/aom_filter.h"
 #include "aom_dsp/x86/convolve_avx2.h"
 #include "aom_dsp/x86/convolve_common_intrin.h"
 #include "aom_dsp/x86/convolve_sse4_1.h"
-#include "aom_dsp/aom_dsp_common.h"
-#include "aom_dsp/aom_filter.h"
+#include "aom_dsp/x86/mem_sse2.h"
+#include "aom_dsp/x86/synonyms_avx2.h"
+
 #include "av1/common/convolve.h"
 
 static INLINE __m256i unpack_weights_avx2(ConvolveParams *conv_params) {
@@ -110,9 +114,8 @@ void av1_dist_wtd_convolve_x_avx2(const uint8_t *src, int src_stride,
             _mm_storel_epi64(
                 (__m128i *)((&dst0[i * dst_stride0 + j + dst_stride0])), res_1);
           } else {
-            *(uint32_t *)(&dst0[i * dst_stride0 + j]) =
-                _mm_cvtsi128_si32(res_0);
-            *(uint32_t *)(&dst0[i * dst_stride0 + j + dst_stride0]) =
+            *(int *)(&dst0[i * dst_stride0 + j]) = _mm_cvtsi128_si32(res_0);
+            *(int *)(&dst0[i * dst_stride0 + j + dst_stride0]) =
                 _mm_cvtsi128_si32(res_1);
           }
         } else {
@@ -165,9 +168,8 @@ void av1_dist_wtd_convolve_x_avx2(const uint8_t *src, int src_stride,
             _mm_storel_epi64(
                 (__m128i *)((&dst0[i * dst_stride0 + j + dst_stride0])), res_1);
           } else {
-            *(uint32_t *)(&dst0[i * dst_stride0 + j]) =
-                _mm_cvtsi128_si32(res_0);
-            *(uint32_t *)(&dst0[i * dst_stride0 + j + dst_stride0]) =
+            *(int *)(&dst0[i * dst_stride0 + j]) = _mm_cvtsi128_si32(res_0);
+            *(int *)(&dst0[i * dst_stride0 + j + dst_stride0]) =
                 _mm_cvtsi128_si32(res_1);
           }
         } else {
@@ -304,9 +306,8 @@ void av1_dist_wtd_convolve_y_avx2(const uint8_t *src, int src_stride,
                   (__m128i *)((&dst0[i * dst_stride0 + j + dst_stride0])),
                   res_1);
             } else {
-              *(uint32_t *)(&dst0[i * dst_stride0 + j]) =
-                  _mm_cvtsi128_si32(res_0);
-              *(uint32_t *)(&dst0[i * dst_stride0 + j + dst_stride0]) =
+              *(int *)(&dst0[i * dst_stride0 + j]) = _mm_cvtsi128_si32(res_0);
+              *(int *)(&dst0[i * dst_stride0 + j + dst_stride0]) =
                   _mm_cvtsi128_si32(res_1);
             }
           } else {
@@ -483,9 +484,8 @@ void av1_dist_wtd_convolve_y_avx2(const uint8_t *src, int src_stride,
                   (__m128i *)((&dst0[i * dst_stride0 + j + dst_stride0])),
                   res_1);
             } else {
-              *(uint32_t *)(&dst0[i * dst_stride0 + j]) =
-                  _mm_cvtsi128_si32(res_0);
-              *(uint32_t *)(&dst0[i * dst_stride0 + j + dst_stride0]) =
+              *(int *)(&dst0[i * dst_stride0 + j]) = _mm_cvtsi128_si32(res_0);
+              *(int *)(&dst0[i * dst_stride0 + j + dst_stride0]) =
                   _mm_cvtsi128_si32(res_1);
             }
           } else {
@@ -754,9 +754,8 @@ void av1_dist_wtd_convolve_2d_avx2(const uint8_t *src, int src_stride,
             const __m128i res_0 = _mm256_castsi256_si128(res_8);
             const __m128i res_1 = _mm256_extracti128_si256(res_8, 1);
 
-            *(uint32_t *)(&dst0[i * dst_stride0 + j]) =
-                _mm_cvtsi128_si32(res_0);
-            *(uint32_t *)(&dst0[i * dst_stride0 + j + dst_stride0]) =
+            *(int *)(&dst0[i * dst_stride0 + j]) = _mm_cvtsi128_si32(res_0);
+            *(int *)(&dst0[i * dst_stride0 + j + dst_stride0]) =
                 _mm_cvtsi128_si32(res_1);
 
           } else {
@@ -793,16 +792,311 @@ void av1_dist_wtd_convolve_2d_avx2(const uint8_t *src, int src_stride,
   }
 }
 
+#define DO_NO_AVG_2D_COPY_4X16(r0, c0, r1, c1, r2, c2, r3, c3)          \
+  do {                                                                  \
+    src_0 = _mm256_cvtepu8_epi16(                                       \
+        _mm_loadu_si128((__m128i *)(&src[r0 * src_stride + c0])));      \
+    src_1 = _mm256_cvtepu8_epi16(                                       \
+        _mm_loadu_si128((__m128i *)(&src[r1 * src_stride + c1])));      \
+    src_2 = _mm256_cvtepu8_epi16(                                       \
+        _mm_loadu_si128((__m128i *)(&src[r2 * src_stride + c2])));      \
+    src_3 = _mm256_cvtepu8_epi16(                                       \
+        _mm_loadu_si128((__m128i *)(&src[r3 * src_stride + c3])));      \
+                                                                        \
+    src_0 = _mm256_slli_epi16(src_0, LEFT_SHIFT);                       \
+    src_1 = _mm256_slli_epi16(src_1, LEFT_SHIFT);                       \
+    src_2 = _mm256_slli_epi16(src_2, LEFT_SHIFT);                       \
+    src_3 = _mm256_slli_epi16(src_3, LEFT_SHIFT);                       \
+                                                                        \
+    src_0 = _mm256_add_epi16(src_0, offset_const);                      \
+    src_1 = _mm256_add_epi16(src_1, offset_const);                      \
+    src_2 = _mm256_add_epi16(src_2, offset_const);                      \
+    src_3 = _mm256_add_epi16(src_3, offset_const);                      \
+                                                                        \
+    _mm256_store_si256((__m256i *)(&dst[r0 * dst_stride + c0]), src_0); \
+    _mm256_store_si256((__m256i *)(&dst[r1 * dst_stride + c1]), src_1); \
+    _mm256_store_si256((__m256i *)(&dst[r2 * dst_stride + c2]), src_2); \
+    _mm256_store_si256((__m256i *)(&dst[r3 * dst_stride + c3]), src_3); \
+  } while (0)
+
+#define LEFT_SHIFT (2 * FILTER_BITS - 3 - 7)
+static AOM_INLINE void av1_dist_wtd_convolve_2d_no_avg_copy_avx2(
+    const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride,
+    int w, int h, const __m256i offset_const) {
+  int i = h;
+  if (w >= 16) {
+    __m256i src_0, src_1, src_2, src_3;
+    if (w == 128) {
+      do {
+        DO_NO_AVG_2D_COPY_4X16(0, 0, 0, 16, 0, 32, 0, 48);
+        DO_NO_AVG_2D_COPY_4X16(0, 64, 0, 80, 0, 96, 0, 112);
+        src += 1 * src_stride;
+        dst += 1 * dst_stride;
+        i -= 1;
+      } while (i);
+    } else if (w == 64) {
+      do {
+        DO_NO_AVG_2D_COPY_4X16(0, 0, 0, 16, 0, 32, 0, 48);
+        src += 1 * src_stride;
+        dst += 1 * dst_stride;
+        i -= 1;
+      } while (i);
+    } else if (w == 32) {
+      do {
+        DO_NO_AVG_2D_COPY_4X16(0, 0, 1, 0, 0, 16, 1, 16);
+        src += 2 * src_stride;
+        dst += 2 * dst_stride;
+        i -= 2;
+      } while (i);
+    } else if (w == 16) {
+      do {
+        DO_NO_AVG_2D_COPY_4X16(0, 0, 1, 0, 2, 0, 3, 0);
+        src += 4 * src_stride;
+        dst += 4 * dst_stride;
+        i -= 4;
+      } while (i);
+    }
+  } else {
+    const __m256i zero = _mm256_setzero_si256();
+    do {
+      const __m128i src_row_0 =
+          _mm_loadl_epi64((__m128i *)(&src[0 * src_stride]));
+      const __m128i src_row_1 =
+          _mm_loadl_epi64((__m128i *)(&src[1 * src_stride]));
+      const __m128i src_row_2 =
+          _mm_loadl_epi64((__m128i *)(&src[2 * src_stride]));
+      const __m128i src_row_3 =
+          _mm_loadl_epi64((__m128i *)(&src[3 * src_stride]));
+
+      __m256i src_10 = _mm256_insertf128_si256(
+          _mm256_castsi128_si256(src_row_0), src_row_1, 1);
+      __m256i src_32 = _mm256_insertf128_si256(
+          _mm256_castsi128_si256(src_row_2), src_row_3, 1);
+
+      src_10 = _mm256_unpacklo_epi8(src_10, zero);
+      src_32 = _mm256_unpacklo_epi8(src_32, zero);
+
+      src_10 = _mm256_slli_epi16(src_10, LEFT_SHIFT);
+      src_32 = _mm256_slli_epi16(src_32, LEFT_SHIFT);
+
+      src_10 = _mm256_add_epi16(src_10, offset_const);
+      src_32 = _mm256_add_epi16(src_32, offset_const);
+
+      // Accumulate values into the destination buffer
+      _mm_store_si128((__m128i *)(&dst[0 * dst_stride]),
+                      _mm256_castsi256_si128(src_10));
+      _mm_store_si128((__m128i *)(&dst[1 * dst_stride]),
+                      _mm256_extracti128_si256(src_10, 1));
+      _mm_store_si128((__m128i *)(&dst[2 * dst_stride]),
+                      _mm256_castsi256_si128(src_32));
+      _mm_store_si128((__m128i *)(&dst[3 * dst_stride]),
+                      _mm256_extracti128_si256(src_32, 1));
+
+      src += 4 * src_stride;
+      dst += 4 * dst_stride;
+      i -= 4;
+    } while (i);
+  }
+}
+
+#define DO_AVG_2D_COPY_4X16(USE_DIST_WEIGHTED, r0, c0, r1, c1, r2, c2, r3, c3) \
+  do {                                                                         \
+    src_0 = _mm256_cvtepu8_epi16(                                              \
+        _mm_loadu_si128((__m128i *)(&src[r0 * src_stride + c0])));             \
+    src_1 = _mm256_cvtepu8_epi16(                                              \
+        _mm_loadu_si128((__m128i *)(&src[r1 * src_stride + c1])));             \
+    src_2 = _mm256_cvtepu8_epi16(                                              \
+        _mm_loadu_si128((__m128i *)(&src[r2 * src_stride + c2])));             \
+    src_3 = _mm256_cvtepu8_epi16(                                              \
+        _mm_loadu_si128((__m128i *)(&src[r3 * src_stride + c3])));             \
+                                                                               \
+    src_0 = _mm256_slli_epi16(src_0, LEFT_SHIFT);                              \
+    src_1 = _mm256_slli_epi16(src_1, LEFT_SHIFT);                              \
+    src_2 = _mm256_slli_epi16(src_2, LEFT_SHIFT);                              \
+    src_3 = _mm256_slli_epi16(src_3, LEFT_SHIFT);                              \
+    src_0 = _mm256_add_epi16(src_0, offset_const);                             \
+    src_1 = _mm256_add_epi16(src_1, offset_const);                             \
+    src_2 = _mm256_add_epi16(src_2, offset_const);                             \
+    src_3 = _mm256_add_epi16(src_3, offset_const);                             \
+                                                                               \
+    ref_0 = _mm256_loadu_si256((__m256i *)(&dst[r0 * dst_stride + c0]));       \
+    ref_1 = _mm256_loadu_si256((__m256i *)(&dst[r1 * dst_stride + c1]));       \
+    ref_2 = _mm256_loadu_si256((__m256i *)(&dst[r2 * dst_stride + c2]));       \
+    ref_3 = _mm256_loadu_si256((__m256i *)(&dst[r3 * dst_stride + c3]));       \
+                                                                               \
+    res_0 = comp_avg(&ref_0, &src_0, &wt, USE_DIST_WEIGHTED);                  \
+    res_1 = comp_avg(&ref_1, &src_1, &wt, USE_DIST_WEIGHTED);                  \
+    res_2 = comp_avg(&ref_2, &src_2, &wt, USE_DIST_WEIGHTED);                  \
+    res_3 = comp_avg(&ref_3, &src_3, &wt, USE_DIST_WEIGHTED);                  \
+                                                                               \
+    res_0 = convolve_rounding(&res_0, &offset_const, &rounding_const,          \
+                              rounding_shift);                                 \
+    res_1 = convolve_rounding(&res_1, &offset_const, &rounding_const,          \
+                              rounding_shift);                                 \
+    res_2 = convolve_rounding(&res_2, &offset_const, &rounding_const,          \
+                              rounding_shift);                                 \
+    res_3 = convolve_rounding(&res_3, &offset_const, &rounding_const,          \
+                              rounding_shift);                                 \
+                                                                               \
+    res_10 = _mm256_packus_epi16(res_0, res_1);                                \
+    res_32 = _mm256_packus_epi16(res_2, res_3);                                \
+    res_10 = _mm256_permute4x64_epi64(res_10, 0xD8);                           \
+    res_32 = _mm256_permute4x64_epi64(res_32, 0xD8);                           \
+                                                                               \
+    _mm_store_si128((__m128i *)(&dst0[r0 * dst_stride0 + c0]),                 \
+                    _mm256_castsi256_si128(res_10));                           \
+    _mm_store_si128((__m128i *)(&dst0[r1 * dst_stride0 + c1]),                 \
+                    _mm256_extracti128_si256(res_10, 1));                      \
+    _mm_store_si128((__m128i *)(&dst0[r2 * dst_stride0 + c2]),                 \
+                    _mm256_castsi256_si128(res_32));                           \
+    _mm_store_si128((__m128i *)(&dst0[r3 * dst_stride0 + c3]),                 \
+                    _mm256_extracti128_si256(res_32, 1));                      \
+  } while (0)
+
+#define DO_AVG_2D_COPY(USE_DIST_WEIGHTED)                                     \
+  int i = h;                                                                  \
+  if (w >= 16) {                                                              \
+    __m256i src_0, src_1, src_2, src_3;                                       \
+    __m256i ref_0, ref_1, ref_2, ref_3;                                       \
+    __m256i res_0, res_1, res_2, res_3;                                       \
+    __m256i res_10, res_32;                                                   \
+    if (w == 128) {                                                           \
+      do {                                                                    \
+        DO_AVG_2D_COPY_4X16(USE_DIST_WEIGHTED, 0, 0, 0, 16, 0, 32, 0, 48);    \
+        DO_AVG_2D_COPY_4X16(USE_DIST_WEIGHTED, 0, 64, 0, 80, 0, 96, 0, 112);  \
+        i -= 1;                                                               \
+        src += 1 * src_stride;                                                \
+        dst += 1 * dst_stride;                                                \
+        dst0 += 1 * dst_stride0;                                              \
+      } while (i);                                                            \
+    } else if (w == 64) {                                                     \
+      do {                                                                    \
+        DO_AVG_2D_COPY_4X16(USE_DIST_WEIGHTED, 0, 0, 0, 16, 0, 32, 0, 48);    \
+                                                                              \
+        i -= 1;                                                               \
+        src += 1 * src_stride;                                                \
+        dst += 1 * dst_stride;                                                \
+        dst0 += 1 * dst_stride0;                                              \
+      } while (i);                                                            \
+    } else if (w == 32) {                                                     \
+      do {                                                                    \
+        DO_AVG_2D_COPY_4X16(USE_DIST_WEIGHTED, 0, 0, 1, 0, 0, 16, 1, 16);     \
+                                                                              \
+        i -= 2;                                                               \
+        src += 2 * src_stride;                                                \
+        dst += 2 * dst_stride;                                                \
+        dst0 += 2 * dst_stride0;                                              \
+      } while (i);                                                            \
+    } else {                                                                  \
+      assert(w == 16);                                                        \
+      do {                                                                    \
+        DO_AVG_2D_COPY_4X16(USE_DIST_WEIGHTED, 0, 0, 1, 0, 2, 0, 3, 0);       \
+                                                                              \
+        i -= 4;                                                               \
+        src += 4 * src_stride;                                                \
+        dst += 4 * dst_stride;                                                \
+        dst0 += 4 * dst_stride0;                                              \
+      } while (i);                                                            \
+    }                                                                         \
+  } else if (w == 8) {                                                        \
+    do {                                                                      \
+      const __m128i src_0 =                                                   \
+          _mm_loadl_epi64((__m128i *)(&src[0 * src_stride]));                 \
+      const __m128i src_1 =                                                   \
+          _mm_loadl_epi64((__m128i *)(&src[1 * src_stride]));                 \
+      const __m128i src_2 =                                                   \
+          _mm_loadl_epi64((__m128i *)(&src[2 * src_stride]));                 \
+      const __m128i src_3 =                                                   \
+          _mm_loadl_epi64((__m128i *)(&src[3 * src_stride]));                 \
+      __m256i src_10 =                                                        \
+          _mm256_insertf128_si256(_mm256_castsi128_si256(src_0), src_1, 1);   \
+      __m256i src_32 =                                                        \
+          _mm256_insertf128_si256(_mm256_castsi128_si256(src_2), src_3, 1);   \
+                                                                              \
+      src_10 = _mm256_unpacklo_epi8(src_10, zero);                            \
+      src_32 = _mm256_unpacklo_epi8(src_32, zero);                            \
+                                                                              \
+      src_10 = _mm256_slli_epi16(src_10, LEFT_SHIFT);                         \
+      src_32 = _mm256_slli_epi16(src_32, LEFT_SHIFT);                         \
+                                                                              \
+      src_10 = _mm256_add_epi16(src_10, offset_const);                        \
+      src_32 = _mm256_add_epi16(src_32, offset_const);                        \
+                                                                              \
+      const __m256i ref_10 =                                                  \
+          load_line2_avx2(&dst[0 * dst_stride], &dst[1 * dst_stride]);        \
+      const __m256i ref_32 =                                                  \
+          load_line2_avx2(&dst[2 * dst_stride], &dst[3 * dst_stride]);        \
+      __m256i res_10 = comp_avg(&ref_10, &src_10, &wt, USE_DIST_WEIGHTED);    \
+      __m256i res_32 = comp_avg(&ref_32, &src_32, &wt, USE_DIST_WEIGHTED);    \
+                                                                              \
+      res_10 = convolve_rounding(&res_10, &offset_const, &rounding_const,     \
+                                 rounding_shift);                             \
+      res_32 = convolve_rounding(&res_32, &offset_const, &rounding_const,     \
+                                 rounding_shift);                             \
+                                                                              \
+      __m256i res = _mm256_packus_epi16(res_10, res_32);                      \
+      const __m128i res_20 = _mm256_castsi256_si128(res);                     \
+      const __m128i res_31 = _mm256_extracti128_si256(res, 1);                \
+                                                                              \
+      _mm_storel_epi64((__m128i *)(&dst0[0 * dst_stride0]), res_20);          \
+      _mm_storel_epi64((__m128i *)((&dst0[1 * dst_stride0])), res_31);        \
+      _mm_storeh_epi64((__m128i *)(&dst0[2 * dst_stride0]), res_20);          \
+      _mm_storeh_epi64((__m128i *)((&dst0[3 * dst_stride0])), res_31);        \
+      i -= 4;                                                                 \
+      src += 4 * src_stride;                                                  \
+      dst += 4 * dst_stride;                                                  \
+      dst0 += 4 * dst_stride0;                                                \
+    } while (i);                                                              \
+  } else {                                                                    \
+    assert(w == 4);                                                           \
+    do {                                                                      \
+      __m256i src_3210_8bit =                                                 \
+          _mm256_setr_epi32(*(int32_t *)(src + 0 * src_stride),               \
+                            *(int32_t *)(src + 1 * src_stride), 0, 0,         \
+                            *(int32_t *)(src + 2 * src_stride),               \
+                            *(int32_t *)(src + 3 * src_stride), 0, 0);        \
+                                                                              \
+      __m256i src_3210 = _mm256_unpacklo_epi8(src_3210_8bit, zero);           \
+      src_3210 = _mm256_slli_epi16(src_3210, LEFT_SHIFT);                     \
+      src_3210 = _mm256_add_epi16(src_3210, offset_const);                    \
+                                                                              \
+      __m256i ref_3210 =                                                      \
+          _mm256_setr_epi64x(*(int64_t *)(dst + 0 * dst_stride),              \
+                             *(int64_t *)(dst + 1 * dst_stride),              \
+                             *(int64_t *)(dst + 2 * dst_stride),              \
+                             *(int64_t *)(dst + 3 * dst_stride));             \
+      __m256i res_3210 =                                                      \
+          comp_avg(&ref_3210, &src_3210, &wt, USE_DIST_WEIGHTED);             \
+                                                                              \
+      res_3210 = convolve_rounding(&res_3210, &offset_const, &rounding_const, \
+                                   rounding_shift);                           \
+                                                                              \
+      res_3210 = _mm256_packus_epi16(res_3210, res_3210);                     \
+      const __m128i res_10 = _mm256_castsi256_si128(res_3210);                \
+      const __m128i res_32 = _mm256_extracti128_si256(res_3210, 1);           \
+                                                                              \
+      *(int *)(&dst0[0 * dst_stride0]) = _mm_cvtsi128_si32(res_10);           \
+      *(int *)(&dst0[2 * dst_stride0]) = _mm_cvtsi128_si32(res_32);           \
+      *(int *)(&dst0[1 * dst_stride0]) = _mm_extract_epi32(res_10, 1);        \
+      *(int *)(&dst0[3 * dst_stride0]) = _mm_extract_epi32(res_32, 1);        \
+      i -= 4;                                                                 \
+      src += 4 * src_stride;                                                  \
+      dst += 4 * dst_stride;                                                  \
+      dst0 += 4 * dst_stride0;                                                \
+    } while (i);                                                              \
+  }
+
 void av1_dist_wtd_convolve_2d_copy_avx2(const uint8_t *src, int src_stride,
                                         uint8_t *dst0, int dst_stride0, int w,
                                         int h, ConvolveParams *conv_params) {
   const int bd = 8;
   CONV_BUF_TYPE *dst = conv_params->dst;
   int dst_stride = conv_params->dst_stride;
+  assert(conv_params->round_0 == 3);
+  assert(conv_params->round_1 == 7);
+  assert(w % 4 == 0);
+  assert(h % 4 == 0);
 
-  const int bits =
-      FILTER_BITS * 2 - conv_params->round_1 - conv_params->round_0;
-  const __m128i left_shift = _mm_cvtsi32_si128(bits);
   const int do_average = conv_params->do_average;
   const int use_dist_wtd_comp_avg = conv_params->use_dist_wtd_comp_avg;
   const __m256i wt = unpack_weights_avx2(conv_params);
@@ -815,88 +1109,16 @@ void av1_dist_wtd_convolve_2d_copy_avx2(const uint8_t *src, int src_stride,
   const int rounding_shift =
       2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
   const __m256i rounding_const = _mm256_set1_epi16((1 << rounding_shift) >> 1);
-  int i, j;
-
-  if (!(w % 16)) {
-    for (i = 0; i < h; i += 1) {
-      for (j = 0; j < w; j += 16) {
-        const __m256i src_16bit = _mm256_cvtepu8_epi16(
-            _mm_loadu_si128((__m128i *)(&src[i * src_stride + j])));
-
-        const __m256i res = _mm256_sll_epi16(src_16bit, left_shift);
-        const __m256i res_unsigned = _mm256_add_epi16(res, offset_const);
-
-        if (do_average) {
-          const __m256i data_ref_0 =
-              _mm256_loadu_si256((__m256i *)(&dst[i * dst_stride + j]));
-
-          const __m256i comp_avg_res =
-              comp_avg(&data_ref_0, &res_unsigned, &wt, use_dist_wtd_comp_avg);
-
-          const __m256i round_result = convolve_rounding(
-              &comp_avg_res, &offset_const, &rounding_const, rounding_shift);
-
-          const __m256i res_8 = _mm256_packus_epi16(round_result, round_result);
-          const __m256i res_0 = _mm256_permute4x64_epi64(res_8, 0xD8);
-
-          _mm_store_si128((__m128i *)(&dst0[i * dst_stride0 + j]),
-                          _mm256_castsi256_si128(res_0));
-        } else {
-          _mm256_store_si256((__m256i *)(&dst[i * dst_stride + j]),
-                             res_unsigned);
-        }
-      }
-    }
-  } else if (!(w % 4)) {
-    for (i = 0; i < h; i += 2) {
-      for (j = 0; j < w; j += 8) {
-        const __m128i src_row_0 =
-            _mm_loadl_epi64((__m128i *)(&src[i * src_stride + j]));
-        const __m128i src_row_1 =
-            _mm_loadl_epi64((__m128i *)(&src[i * src_stride + j + src_stride]));
-        // since not all compilers yet support _mm256_set_m128i()
-        const __m256i src_10 = _mm256_insertf128_si256(
-            _mm256_castsi128_si256(src_row_0), src_row_1, 1);
 
-        const __m256i src_16bit = _mm256_unpacklo_epi8(src_10, zero);
-
-        const __m256i res = _mm256_sll_epi16(src_16bit, left_shift);
-
-        const __m256i res_unsigned = _mm256_add_epi16(res, offset_const);
-
-        // Accumulate values into the destination buffer
-        if (do_average) {
-          const __m256i data_ref_0 = load_line2_avx2(
-              &dst[i * dst_stride + j], &dst[i * dst_stride + j + dst_stride]);
-          const __m256i comp_avg_res =
-              comp_avg(&data_ref_0, &res_unsigned, &wt, use_dist_wtd_comp_avg);
-
-          const __m256i round_result = convolve_rounding(
-              &comp_avg_res, &offset_const, &rounding_const, rounding_shift);
-
-          const __m256i res_8 = _mm256_packus_epi16(round_result, round_result);
-          const __m128i res_0 = _mm256_castsi256_si128(res_8);
-          const __m128i res_1 = _mm256_extracti128_si256(res_8, 1);
-
-          if (w > 4) {
-            _mm_storel_epi64((__m128i *)(&dst0[i * dst_stride0 + j]), res_0);
-            _mm_storel_epi64(
-                (__m128i *)((&dst0[i * dst_stride0 + j + dst_stride0])), res_1);
-          } else {
-            *(uint32_t *)(&dst0[i * dst_stride0 + j]) =
-                _mm_cvtsi128_si32(res_0);
-            *(uint32_t *)(&dst0[i * dst_stride0 + j + dst_stride0]) =
-                _mm_cvtsi128_si32(res_1);
-          }
-        } else {
-          const __m128i res_0 = _mm256_castsi256_si128(res_unsigned);
-          _mm_store_si128((__m128i *)(&dst[i * dst_stride + j]), res_0);
-
-          const __m128i res_1 = _mm256_extracti128_si256(res_unsigned, 1);
-          _mm_store_si128((__m128i *)(&dst[i * dst_stride + j + dst_stride]),
-                          res_1);
-        }
-      }
+  if (do_average) {
+    if (use_dist_wtd_comp_avg) {
+      DO_AVG_2D_COPY(1)
+    } else {
+      DO_AVG_2D_COPY(0)
     }
+  } else {
+    av1_dist_wtd_convolve_2d_no_avg_copy_avx2(src, src_stride, dst, dst_stride,
+                                              w, h, offset_const);
   }
 }
+#undef LEFT_SHIFT
diff --git a/chromium/third_party/libaom/source/libaom/av1/common/x86/jnt_convolve_sse2.c b/chromium/third_party/libaom/source/libaom/av1/common/x86/jnt_convolve_sse2.c
index b8400c062d5..ab937f92d88 100644
--- a/chromium/third_party/libaom/source/libaom/av1/common/x86/jnt_convolve_sse2.c
+++ b/chromium/third_party/libaom/source/libaom/av1/common/x86/jnt_convolve_sse2.c
@@ -79,7 +79,7 @@ void av1_dist_wtd_convolve_x_sse2(const uint8_t *src, int src_stride,
             &comp_avg_res, &offset_const, &rounding_const, rounding_shift);
 
         const __m128i res_8 = _mm_packus_epi16(round_result, round_result);
-        *(uint32_t *)(&dst0[0]) = _mm_cvtsi128_si32(res_8);
+        *(int *)(&dst0[0]) = _mm_cvtsi128_si32(res_8);
       } else {
         _mm_store_si128((__m128i *)(&dst[0]), res_unsigned);
       }
@@ -178,31 +178,31 @@ void av1_dist_wtd_convolve_y_sse2(const uint8_t *src, int src_stride,
 
   if (w == 4) {
     __m128i s[8], src6, res, res_shift;
-    src6 = _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 6 * src_stride));
+    src6 = _mm_cvtsi32_si128(*(int *)(src_ptr + 6 * src_stride));
     s[0] = _mm_unpacklo_epi8(
-        _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 0 * src_stride)),
-        _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 1 * src_stride)));
+        _mm_cvtsi32_si128(*(int *)(src_ptr + 0 * src_stride)),
+        _mm_cvtsi32_si128(*(int *)(src_ptr + 1 * src_stride)));
     s[1] = _mm_unpacklo_epi8(
-        _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 1 * src_stride)),
-        _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 2 * src_stride)));
+        _mm_cvtsi32_si128(*(int *)(src_ptr + 1 * src_stride)),
+        _mm_cvtsi32_si128(*(int *)(src_ptr + 2 * src_stride)));
     s[2] = _mm_unpacklo_epi8(
-        _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 2 * src_stride)),
-        _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 3 * src_stride)));
+        _mm_cvtsi32_si128(*(int *)(src_ptr + 2 * src_stride)),
+        _mm_cvtsi32_si128(*(int *)(src_ptr + 3 * src_stride)));
     s[3] = _mm_unpacklo_epi8(
-        _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 3 * src_stride)),
-        _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 4 * src_stride)));
+        _mm_cvtsi32_si128(*(int *)(src_ptr + 3 * src_stride)),
+        _mm_cvtsi32_si128(*(int *)(src_ptr + 4 * src_stride)));
     s[4] = _mm_unpacklo_epi8(
-        _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 4 * src_stride)),
-        _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 5 * src_stride)));
+        _mm_cvtsi32_si128(*(int *)(src_ptr + 4 * src_stride)),
+        _mm_cvtsi32_si128(*(int *)(src_ptr + 5 * src_stride)));
     s[5] = _mm_unpacklo_epi8(
-        _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 5 * src_stride)), src6);
+        _mm_cvtsi32_si128(*(int *)(src_ptr + 5 * src_stride)), src6);
 
     do {
       s[6] = _mm_unpacklo_epi8(
-          src6, _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 7 * src_stride)));
-      src6 = _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 8 * src_stride));
+          src6, _mm_cvtsi32_si128(*(int *)(src_ptr + 7 * src_stride)));
+      src6 = _mm_cvtsi32_si128(*(int *)(src_ptr + 8 * src_stride));
       s[7] = _mm_unpacklo_epi8(
-          _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 7 * src_stride)), src6);
+          _mm_cvtsi32_si128(*(int *)(src_ptr + 7 * src_stride)), src6);
 
       res = convolve_lo_y(s + 0, coeffs);
       res_shift = _mm_sll_epi32(res, left_shift);
@@ -223,7 +223,7 @@ void av1_dist_wtd_convolve_y_sse2(const uint8_t *src, int src_stride,
             &comp_avg_res, &offset_const, &rounding_const, rounding_shift);
 
         const __m128i res_8 = _mm_packus_epi16(round_result, round_result);
-        *(uint32_t *)(&dst0[0]) = _mm_cvtsi128_si32(res_8);
+        *(int *)(&dst0[0]) = _mm_cvtsi128_si32(res_8);
 
       } else {
         _mm_store_si128((__m128i *)dst, res_unsigned);
@@ -252,7 +252,7 @@ void av1_dist_wtd_convolve_y_sse2(const uint8_t *src, int src_stride,
             &comp_avg_res, &offset_const, &rounding_const, rounding_shift);
 
         const __m128i res_8 = _mm_packus_epi16(round_result, round_result);
-        *(uint32_t *)(&dst0[0]) = _mm_cvtsi128_si32(res_8);
+        *(int *)(&dst0[0]) = _mm_cvtsi128_si32(res_8);
 
       } else {
         _mm_store_si128((__m128i *)dst, res_unsigned);
@@ -596,8 +596,7 @@ void av1_dist_wtd_convolve_2d_sse2(const uint8_t *src, int src_stride,
           if (w > 4)
             _mm_storel_epi64((__m128i *)(&dst0[i * dst_stride0 + j]), res_8);
           else
-            *(uint32_t *)(&dst0[i * dst_stride0 + j]) =
-                _mm_cvtsi128_si32(res_8);
+            *(int *)(&dst0[i * dst_stride0 + j]) = _mm_cvtsi128_si32(res_8);
         } else {
           _mm_store_si128((__m128i *)(&dst[i * dst_stride + j]), res_unsigned);
         }
diff --git a/chromium/third_party/libaom/source/libaom/av1/common/x86/jnt_convolve_ssse3.c b/chromium/third_party/libaom/source/libaom/av1/common/x86/jnt_convolve_ssse3.c
index f45e3b2671b..d0cf76391d1 100644
--- a/chromium/third_party/libaom/source/libaom/av1/common/x86/jnt_convolve_ssse3.c
+++ b/chromium/third_party/libaom/source/libaom/av1/common/x86/jnt_convolve_ssse3.c
@@ -220,8 +220,7 @@ void av1_dist_wtd_convolve_2d_ssse3(
           if (w > 4)
             _mm_storel_epi64((__m128i *)(&dst0[i * dst_stride0 + j]), res_8);
           else
-            *(uint32_t *)(&dst0[i * dst_stride0 + j]) =
-                _mm_cvtsi128_si32(res_8);
+            *(int *)(&dst0[i * dst_stride0 + j]) = _mm_cvtsi128_si32(res_8);
         } else {
           _mm_store_si128((__m128i *)(&dst[i * dst_stride + j]), res_unsigned);
         }
diff --git a/chromium/third_party/libaom/source/libaom/av1/common/x86/reconinter_sse4.c b/chromium/third_party/libaom/source/libaom/av1/common/x86/reconinter_sse4.c
index 5171ca49346..95814b48089 100644
--- a/chromium/third_party/libaom/source/libaom/av1/common/x86/reconinter_sse4.c
+++ b/chromium/third_party/libaom/source/libaom/av1/common/x86/reconinter_sse4.c
@@ -33,21 +33,21 @@ void av1_build_compound_diffwtd_mask_sse4_1(uint8_t *mask,
   int i = 0;
   if (4 == w) {
     do {
-      const __m128i s0A = _mm_cvtsi32_si128(*(uint32_t *)src0);
-      const __m128i s0B = _mm_cvtsi32_si128(*(uint32_t *)(src0 + stride0));
+      const __m128i s0A = _mm_cvtsi32_si128(*(int *)src0);
+      const __m128i s0B = _mm_cvtsi32_si128(*(int *)(src0 + stride0));
       const __m128i s0AB = _mm_unpacklo_epi32(s0A, s0B);
       const __m128i s0 = _mm_cvtepu8_epi16(s0AB);
 
-      const __m128i s1A = _mm_cvtsi32_si128(*(uint32_t *)src1);
-      const __m128i s1B = _mm_cvtsi32_si128(*(uint32_t *)(src1 + stride1));
+      const __m128i s1A = _mm_cvtsi32_si128(*(int *)src1);
+      const __m128i s1B = _mm_cvtsi32_si128(*(int *)(src1 + stride1));
       const __m128i s1AB = _mm_unpacklo_epi32(s1A, s1B);
       const __m128i s1 = _mm_cvtepu8_epi16(s1AB);
 
       const __m128i m16 = calc_mask(mask_base, s0, s1);
       const __m128i m8 = _mm_packus_epi16(m16, m16);
 
-      *(uint32_t *)mask = _mm_cvtsi128_si32(m8);
-      *(uint32_t *)(mask + w) = _mm_extract_epi32(m8, 1);
+      *(int *)mask = _mm_cvtsi128_si32(m8);
+      *(int *)(mask + w) = _mm_extract_epi32(m8, 1);
       src0 += (stride0 << 1);
       src1 += (stride1 << 1);
       mask += 8;
@@ -146,7 +146,7 @@ void av1_build_compound_diffwtd_mask_d16_sse4_1(
       if ((w - j) > 4) {
         _mm_storel_epi64(dst, res_8);
       } else {  // w==4
-        *(uint32_t *)dst = _mm_cvtsi128_si32(res_8);
+        *(int *)dst = _mm_cvtsi128_si32(res_8);
       }
     }
   }
diff --git a/chromium/third_party/libaom/source/libaom/av1/common/x86/selfguided_avx2.c b/chromium/third_party/libaom/source/libaom/av1/common/x86/selfguided_avx2.c
index 3c5558dda72..4ab35e808b4 100644
--- a/chromium/third_party/libaom/source/libaom/av1/common/x86/selfguided_avx2.c
+++ b/chromium/third_party/libaom/source/libaom/av1/common/x86/selfguided_avx2.c
@@ -230,7 +230,7 @@ static void calc_ab(int32_t *A, int32_t *B, const int32_t *C, const int32_t *D,
   const __m256i rnd_res = round_for_shift(SGRPROJ_RECIP_BITS);
 
   // Set up masks
-  const __m128i ones32 = _mm_set_epi32(0, 0, 0xffffffff, 0xffffffff);
+  const __m128i ones32 = _mm_set_epi32(0, 0, ~0, ~0);
   __m256i mask[8];
   for (int idx = 0; idx < 8; idx++) {
     const __m128i shift = _mm_cvtsi32_si128(8 * (8 - idx));
@@ -367,7 +367,7 @@ static void calc_ab_fast(int32_t *A, int32_t *B, const int32_t *C,
   const __m256i rnd_res = round_for_shift(SGRPROJ_RECIP_BITS);
 
   // Set up masks
-  const __m128i ones32 = _mm_set_epi32(0, 0, 0xffffffff, 0xffffffff);
+  const __m128i ones32 = _mm_set_epi32(0, 0, ~0, ~0);
   __m256i mask[8];
   for (int idx = 0; idx < 8; idx++) {
     const __m128i shift = _mm_cvtsi32_si128(8 * (8 - idx));
diff --git a/chromium/third_party/libaom/source/libaom/av1/common/x86/selfguided_sse4.c b/chromium/third_party/libaom/source/libaom/av1/common/x86/selfguided_sse4.c
index 72c7708f199..948bbfbf0f7 100644
--- a/chromium/third_party/libaom/source/libaom/av1/common/x86/selfguided_sse4.c
+++ b/chromium/third_party/libaom/source/libaom/av1/common/x86/selfguided_sse4.c
@@ -181,7 +181,7 @@ static void calc_ab(int32_t *A, int32_t *B, const int32_t *C, const int32_t *D,
   const __m128i rnd_res = round_for_shift(SGRPROJ_RECIP_BITS);
 
   // Set up masks
-  const __m128i ones32 = _mm_set_epi32(0, 0, 0xffffffff, 0xffffffff);
+  const __m128i ones32 = _mm_set_epi32(0, 0, ~0, ~0);
   __m128i mask[4];
   for (int idx = 0; idx < 4; idx++) {
     const __m128i shift = _mm_cvtsi32_si128(8 * (4 - idx));
@@ -322,7 +322,7 @@ static void calc_ab_fast(int32_t *A, int32_t *B, const int32_t *C,
   const __m128i rnd_res = round_for_shift(SGRPROJ_RECIP_BITS);
 
   // Set up masks
-  const __m128i ones32 = _mm_set_epi32(0, 0, 0xffffffff, 0xffffffff);
+  const __m128i ones32 = _mm_set_epi32(0, 0, ~0, ~0);
   __m128i mask[4];
   for (int idx = 0; idx < 4; idx++) {
     const __m128i shift = _mm_cvtsi32_si128(8 * (4 - idx));
diff --git a/chromium/third_party/libaom/source/libaom/av1/common/x86/warp_plane_avx2.c b/chromium/third_party/libaom/source/libaom/av1/common/x86/warp_plane_avx2.c
index f6aaa8887b9..ceb836ea6cf 100644
--- a/chromium/third_party/libaom/source/libaom/av1/common/x86/warp_plane_avx2.c
+++ b/chromium/third_party/libaom/source/libaom/av1/common/x86/warp_plane_avx2.c
@@ -699,8 +699,8 @@ static INLINE void store_vertical_filter_output_avx2(
       const __m256i res_8_lo = _mm256_packus_epi16(res_lo_16, res_lo_16);
       const __m128i res_8_lo_0 = _mm256_castsi256_si128(res_8_lo);
       const __m128i res_8_lo_1 = _mm256_extracti128_si256(res_8_lo, 1);
-      *(uint32_t *)dst8_0 = _mm_cvtsi128_si32(res_8_lo_0);
-      *(uint32_t *)dst8_1 = _mm_cvtsi128_si32(res_8_lo_1);
+      *(int *)dst8_0 = _mm_cvtsi128_si32(res_8_lo_0);
+      *(int *)dst8_1 = _mm_cvtsi128_si32(res_8_lo_1);
     } else {
       const __m128i temp_lo_16_0 = _mm256_castsi256_si128(temp_lo_16);
       const __m128i temp_lo_16_1 = _mm256_extracti128_si256(temp_lo_16, 1);
@@ -742,8 +742,8 @@ static INLINE void store_vertical_filter_output_avx2(
         __m256i res_8_hi = _mm256_packus_epi16(res_hi_16, res_hi_16);
         const __m128i res_8_hi_0 = _mm256_castsi256_si128(res_8_hi);
         const __m128i res_8_hi_1 = _mm256_extracti128_si256(res_8_hi, 1);
-        *(uint32_t *)dst8_4_0 = _mm_cvtsi128_si32(res_8_hi_0);
-        *(uint32_t *)dst8_4_1 = _mm_cvtsi128_si32(res_8_hi_1);
+        *(int *)dst8_4_0 = _mm_cvtsi128_si32(res_8_hi_0);
+        *(int *)dst8_4_1 = _mm_cvtsi128_si32(res_8_hi_1);
       } else {
         const __m128i temp_hi_16_0 = _mm256_castsi256_si128(temp_hi_16);
         const __m128i temp_hi_16_1 = _mm256_extracti128_si256(temp_hi_16, 1);
@@ -767,8 +767,8 @@ static INLINE void store_vertical_filter_output_avx2(
     __m128i *const p1 = (__m128i *)&pred[(i + (k + 1) + 4) * p_stride + j];
 
     if (p_width == 4) {
-      *(uint32_t *)p = _mm_cvtsi128_si32(res_8bit0);
-      *(uint32_t *)p1 = _mm_cvtsi128_si32(res_8bit1);
+      *(int *)p = _mm_cvtsi128_si32(res_8bit0);
+      *(int *)p1 = _mm_cvtsi128_si32(res_8bit1);
     } else {
       _mm_storel_epi64(p, res_8bit0);
       _mm_storel_epi64(p1, res_8bit1);
@@ -1028,12 +1028,12 @@ int64_t av1_calc_frame_error_avx2(const uint8_t *const ref, int ref_stride,
   int64_t sum_error = 0;
   int i, j;
   __m256i row_error, col_error;
-  __m256i zero = _mm256_set1_epi16(0);
+  __m256i zero = _mm256_setzero_si256();
   __m256i dup_255 = _mm256_set1_epi16(255);
   col_error = zero;
 
   for (i = 0; i < (p_height / 4); i++) {
-    row_error = _mm256_set1_epi16(0);
+    row_error = _mm256_setzero_si256();
     for (j = 0; j < (p_width / 16); j++) {
       __m256i ref_1_16 = _mm256_cvtepu8_epi16(_mm_load_si128(
           (__m128i *)(ref + (j * 16) + (((i * 4) + 0) * ref_stride))));
diff --git a/chromium/third_party/libaom/source/libaom/av1/common/x86/warp_plane_sse2.c b/chromium/third_party/libaom/source/libaom/av1/common/x86/warp_plane_sse2.c
index 6ff666518d9..f8fe578e952 100644
--- a/chromium/third_party/libaom/source/libaom/av1/common/x86/warp_plane_sse2.c
+++ b/chromium/third_party/libaom/source/libaom/av1/common/x86/warp_plane_sse2.c
@@ -21,7 +21,7 @@ int64_t av1_calc_frame_error_sse2(const uint8_t *const ref, int ref_stride,
   int64_t sum_error = 0;
   int i, j;
   __m128i row_error, col_error;
-  __m128i zero = _mm_set1_epi16(0);
+  __m128i zero = _mm_setzero_si128();
   __m128i dup_255 = _mm_set1_epi16(255);
   col_error = zero;
   for (i = 0; i < (p_height); i++) {
diff --git a/chromium/third_party/libaom/source/libaom/av1/common/x86/warp_plane_sse4.c b/chromium/third_party/libaom/source/libaom/av1/common/x86/warp_plane_sse4.c
index b1df486f47a..e35b55756e2 100644
--- a/chromium/third_party/libaom/source/libaom/av1/common/x86/warp_plane_sse4.c
+++ b/chromium/third_party/libaom/source/libaom/av1/common/x86/warp_plane_sse4.c
@@ -613,7 +613,7 @@ static INLINE void store_vertical_filter_output(
       res_lo_16 = _mm_srai_epi16(_mm_add_epi16(res_lo_16, *round_bits_const),
                                  round_bits);
       __m128i res_8_lo = _mm_packus_epi16(res_lo_16, res_lo_16);
-      *(uint32_t *)dst8 = _mm_cvtsi128_si32(res_8_lo);
+      *(int *)dst8 = _mm_cvtsi128_si32(res_8_lo);
     } else {
       _mm_storel_epi64(p, temp_lo_16);
     }
@@ -645,7 +645,7 @@ static INLINE void store_vertical_filter_output(
         res_hi_16 = _mm_srai_epi16(_mm_add_epi16(res_hi_16, *round_bits_const),
                                    round_bits);
         __m128i res_8_hi = _mm_packus_epi16(res_hi_16, res_hi_16);
-        *(uint32_t *)dst8_4 = _mm_cvtsi128_si32(res_8_hi);
+        *(int *)dst8_4 = _mm_cvtsi128_si32(res_8_hi);
 
       } else {
         _mm_storel_epi64(p4, temp_hi_16);
@@ -667,7 +667,7 @@ static INLINE void store_vertical_filter_output(
     // to only output 4 pixels at this point, to avoid encode/decode
     // mismatches when encoding with multiple threads.
     if (p_width == 4) {
-      *(uint32_t *)p = _mm_cvtsi128_si32(res_8bit);
+      *(int *)p = _mm_cvtsi128_si32(res_8bit);
     } else {
       _mm_storel_epi64(p, res_8bit);
     }
diff --git a/chromium/third_party/libaom/source/libaom/av1/decoder/accounting.c b/chromium/third_party/libaom/source/libaom/av1/decoder/accounting.c
index 2e58d09e0db..1ded380ec38 100644
--- a/chromium/third_party/libaom/source/libaom/av1/decoder/accounting.c
+++ b/chromium/third_party/libaom/source/libaom/av1/decoder/accounting.c
@@ -47,6 +47,7 @@ int aom_accounting_dictionary_lookup(Accounting *accounting, const char *str) {
   accounting->hash_dictionary[hash] = dictionary->num_strs;
   len = strlen(str);
   dictionary->strs[dictionary->num_strs] = malloc(len + 1);
+  if (!dictionary->strs[dictionary->num_strs]) abort();
   snprintf(dictionary->strs[dictionary->num_strs], len + 1, "%s", str);
   dictionary->num_strs++;
   return dictionary->num_strs - 1;
@@ -57,6 +58,7 @@ void aom_accounting_init(Accounting *accounting) {
   accounting->num_syms_allocated = 1000;
   accounting->syms.syms =
       malloc(sizeof(AccountingSymbol) * accounting->num_syms_allocated);
+  if (!accounting->syms.syms) abort();
   accounting->syms.dictionary.num_strs = 0;
   assert(AOM_ACCOUNTING_HASH_SIZE > 2 * MAX_SYMBOL_TYPES);
   for (i = 0; i < AOM_ACCOUNTING_HASH_SIZE; i++)
@@ -116,7 +118,7 @@ void aom_accounting_record(Accounting *accounting, const char *str,
     accounting->syms.syms =
         realloc(accounting->syms.syms,
                 sizeof(AccountingSymbol) * accounting->num_syms_allocated);
-    assert(accounting->syms.syms != NULL);
+    if (!accounting->syms.syms) abort();
   }
   accounting->syms.syms[accounting->syms.num_syms++] = sym;
 }
diff --git a/chromium/third_party/libaom/source/libaom/av1/decoder/decodeframe.c b/chromium/third_party/libaom/source/libaom/av1/decoder/decodeframe.c
index 77b036fde8b..34dd4389324 100644
--- a/chromium/third_party/libaom/source/libaom/av1/decoder/decodeframe.c
+++ b/chromium/third_party/libaom/source/libaom/av1/decoder/decodeframe.c
@@ -116,11 +116,9 @@ static AOM_INLINE void set_planes_to_neutral_grey(
   }
 }
 
-#if !CONFIG_REALTIME_ONLY
 static AOM_INLINE void loop_restoration_read_sb_coeffs(
     const AV1_COMMON *const cm, MACROBLOCKD *xd, aom_reader *const r, int plane,
     int runit_idx);
-#endif
 
 static int read_is_valid(const uint8_t *start, size_t len, const uint8_t *end) {
   return len != 0 && len <= (size_t)(end - start);
@@ -915,14 +913,6 @@ static AOM_INLINE void decode_token_recon_block(AV1Decoder *const pbi,
           if (plane && !xd->is_chroma_ref) break;
           const struct macroblockd_plane *const pd = &xd->plane[plane];
           const TX_SIZE tx_size = av1_get_tx_size(plane, xd);
-#if CONFIG_REALTIME_ONLY
-          // Realtime only build doesn't support 4x rectangular txfm sizes.
-          if (tx_size >= TX_4X16) {
-            aom_internal_error(xd->error_info, AOM_CODEC_UNSUP_FEATURE,
-                               "Realtime only build doesn't support 4x "
-                               "rectangular txfm sizes");
-          }
-#endif
           const int stepr = tx_size_high_unit[tx_size];
           const int stepc = tx_size_wide_unit[tx_size];
 
@@ -1282,9 +1272,6 @@ static AOM_INLINE void decode_partition(AV1Decoder *const pbi,
   if (parse_decode_flag & 1) {
     const int num_planes = av1_num_planes(cm);
     for (int plane = 0; plane < num_planes; ++plane) {
-#if CONFIG_REALTIME_ONLY
-      assert(cm->rst_info[plane].frame_restoration_type == RESTORE_NONE);
-#else
       int rcol0, rcol1, rrow0, rrow1;
       if (av1_loop_restoration_corners_in_sb(cm, plane, mi_row, mi_col, bsize,
                                              &rcol0, &rcol1, &rrow0, &rrow1)) {
@@ -1296,7 +1283,6 @@ static AOM_INLINE void decode_partition(AV1Decoder *const pbi,
           }
         }
       }
-#endif
     }
 
     partition = (bsize < BLOCK_8X8) ? PARTITION_NONE
@@ -1525,10 +1511,6 @@ static AOM_INLINE void decode_restoration_mode(AV1_COMMON *cm,
     }
   }
   if (!all_none) {
-#if CONFIG_REALTIME_ONLY
-    aom_internal_error(cm->error, AOM_CODEC_UNSUP_FEATURE,
-                       "Realtime only build doesn't support loop restoration");
-#endif
     assert(cm->seq_params->sb_size == BLOCK_64X64 ||
            cm->seq_params->sb_size == BLOCK_128X128);
     const int sb_size = cm->seq_params->sb_size == BLOCK_128X128 ? 128 : 64;
@@ -1565,7 +1547,6 @@ static AOM_INLINE void decode_restoration_mode(AV1_COMMON *cm,
   }
 }
 
-#if !CONFIG_REALTIME_ONLY
 static AOM_INLINE void read_wiener_filter(int wiener_win,
                                           WienerInfo *wiener_info,
                                           WienerInfo *ref_wiener_info,
@@ -1706,7 +1687,6 @@ static AOM_INLINE void loop_restoration_read_sb_coeffs(
     }
   }
 }
-#endif  // !CONFIG_REALTIME_ONLY
 
 static AOM_INLINE void setup_loopfilter(AV1_COMMON *cm,
                                         struct aom_read_bit_buffer *rb) {
@@ -1911,10 +1891,8 @@ static AOM_INLINE void resize_context_buffers(AV1_COMMON *cm, int width,
                        width, height, DECODE_WIDTH_LIMIT, DECODE_HEIGHT_LIMIT);
 #endif
   if (cm->width != width || cm->height != height) {
-    const int new_mi_rows =
-        ALIGN_POWER_OF_TWO(height, MI_SIZE_LOG2) >> MI_SIZE_LOG2;
-    const int new_mi_cols =
-        ALIGN_POWER_OF_TWO(width, MI_SIZE_LOG2) >> MI_SIZE_LOG2;
+    const int new_mi_rows = CEIL_POWER_OF_TWO(height, MI_SIZE_LOG2);
+    const int new_mi_cols = CEIL_POWER_OF_TWO(width, MI_SIZE_LOG2);
 
     // Allocations in av1_alloc_context_buffers() depend on individual
     // dimensions as well as the overall size.
@@ -1951,7 +1929,7 @@ static AOM_INLINE void setup_buffer_pool(AV1_COMMON *cm) {
           &cm->cur_frame->buf, cm->width, cm->height, seq_params->subsampling_x,
           seq_params->subsampling_y, seq_params->use_highbitdepth,
           AOM_DEC_BORDER_IN_PIXELS, cm->features.byte_alignment,
-          &cm->cur_frame->raw_frame_buffer, pool->get_fb_cb, pool->cb_priv,
+          &cm->cur_frame->raw_frame_buffer, pool->get_fb_cb, pool->cb_priv, 0,
           0)) {
     unlock_buffer_pool(pool);
     aom_internal_error(cm->error, AOM_CODEC_MEM_ERROR,
@@ -2095,12 +2073,10 @@ static AOM_INLINE void read_tile_info_max_tile(
     AV1_COMMON *const cm, struct aom_read_bit_buffer *const rb) {
   const SequenceHeader *const seq_params = cm->seq_params;
   CommonTileParams *const tiles = &cm->tiles;
-  int width_mi =
-      ALIGN_POWER_OF_TWO(cm->mi_params.mi_cols, seq_params->mib_size_log2);
-  int height_mi =
-      ALIGN_POWER_OF_TWO(cm->mi_params.mi_rows, seq_params->mib_size_log2);
-  int width_sb = width_mi >> seq_params->mib_size_log2;
-  int height_sb = height_mi >> seq_params->mib_size_log2;
+  int width_sb =
+      CEIL_POWER_OF_TWO(cm->mi_params.mi_cols, seq_params->mib_size_log2);
+  int height_sb =
+      CEIL_POWER_OF_TWO(cm->mi_params.mi_rows, seq_params->mib_size_log2);
 
   av1_get_tile_limits(cm);
   tiles->uniform_spacing = aom_rb_read_bit(rb);
@@ -2554,7 +2530,8 @@ static INLINE void sync_read(AV1DecRowMTSync *const dec_row_mt_sync, int r,
     pthread_mutex_t *const mutex = &dec_row_mt_sync->mutex_[r - 1];
     pthread_mutex_lock(mutex);
 
-    while (c > dec_row_mt_sync->cur_sb_col[r - 1] - nsync) {
+    while (c > dec_row_mt_sync->cur_sb_col[r - 1] - nsync -
+                   dec_row_mt_sync->intrabc_extra_top_right_sb_delay) {
       pthread_cond_wait(&dec_row_mt_sync->cond_[r - 1], mutex);
     }
     pthread_mutex_unlock(mutex);
@@ -2577,7 +2554,7 @@ static INLINE void sync_write(AV1DecRowMTSync *const dec_row_mt_sync, int r,
     cur = c;
     if (c % nsync) sig = 0;
   } else {
-    cur = sb_cols + nsync;
+    cur = sb_cols + nsync + dec_row_mt_sync->intrabc_extra_top_right_sb_delay;
   }
 
   if (sig) {
@@ -3699,6 +3676,8 @@ static AOM_INLINE void row_mt_frame_init(AV1Decoder *pbi, int tile_rows_start,
       tile_data->dec_row_mt_sync.mi_cols =
           ALIGN_POWER_OF_TWO(tile_info->mi_col_end - tile_info->mi_col_start,
                              cm->seq_params->mib_size_log2);
+      tile_data->dec_row_mt_sync.intrabc_extra_top_right_sb_delay =
+          av1_get_intrabc_extra_top_right_sb_delay(cm);
 
       frame_row_mt_info->mi_rows_to_decode +=
           tile_data->dec_row_mt_sync.mi_rows;
@@ -4345,14 +4324,10 @@ static int read_global_motion_params(WarpedMotionParams *params,
                        trans_dec_factor;
   }
 
-#if !CONFIG_REALTIME_ONLY
-  // For realtime only build, warped motion is disabled, so this section is not
-  // needed.
   if (params->wmtype <= AFFINE) {
     int good_shear_params = av1_get_shear_params(params);
     if (!good_shear_params) return 0;
   }
-#endif
 
   return 1;
 }
@@ -4790,7 +4765,8 @@ static int read_uncompressed_header(AV1Decoder *pbi,
                   seq_params->max_frame_height, seq_params->subsampling_x,
                   seq_params->subsampling_y, seq_params->use_highbitdepth,
                   AOM_BORDER_IN_PIXELS, features->byte_alignment,
-                  &buf->raw_frame_buffer, pool->get_fb_cb, pool->cb_priv, 0)) {
+                  &buf->raw_frame_buffer, pool->get_fb_cb, pool->cb_priv, 0,
+                  0)) {
             decrease_ref_count(buf, pool);
             unlock_buffer_pool(pool);
             aom_internal_error(&pbi->error, AOM_CODEC_MEM_ERROR,
@@ -5144,7 +5120,6 @@ BITSTREAM_PROFILE av1_read_profile(struct aom_read_bit_buffer *rb) {
   return (BITSTREAM_PROFILE)profile;
 }
 
-#if !CONFIG_REALTIME_ONLY
 static AOM_INLINE void superres_post_decode(AV1Decoder *pbi) {
   AV1_COMMON *const cm = &pbi->common;
   BufferPool *const pool = cm->buffer_pool;
@@ -5154,7 +5129,6 @@ static AOM_INLINE void superres_post_decode(AV1Decoder *pbi) {
 
   av1_superres_upscale(cm, pool);
 }
-#endif
 
 uint32_t av1_decode_frame_headers_and_setup(AV1Decoder *pbi,
                                             struct aom_read_bit_buffer *rb,
@@ -5240,13 +5214,12 @@ uint32_t av1_decode_frame_headers_and_setup(AV1Decoder *pbi,
 static AOM_INLINE void setup_frame_info(AV1Decoder *pbi) {
   AV1_COMMON *const cm = &pbi->common;
 
-#if !CONFIG_REALTIME_ONLY
   if (cm->rst_info[0].frame_restoration_type != RESTORE_NONE ||
       cm->rst_info[1].frame_restoration_type != RESTORE_NONE ||
       cm->rst_info[2].frame_restoration_type != RESTORE_NONE) {
     av1_alloc_restoration_buffers(cm);
   }
-#endif
+
   const int use_highbd = cm->seq_params->use_highbitdepth;
   const int buf_size = MC_TEMP_BUF_PELS << use_highbd;
   if (pbi->td.mc_buf_size != buf_size) {
@@ -5303,12 +5276,13 @@ void av1_decode_tg_tiles_and_wrapup(AV1Decoder *pbi, const uint8_t *data,
          cm->cdef_info.cdef_uv_strengths[0]);
     const int do_superres = av1_superres_scaled(cm);
     const int optimized_loop_restoration = !do_cdef && !do_superres;
-
-#if !CONFIG_REALTIME_ONLY
     const int do_loop_restoration =
         cm->rst_info[0].frame_restoration_type != RESTORE_NONE ||
         cm->rst_info[1].frame_restoration_type != RESTORE_NONE ||
         cm->rst_info[2].frame_restoration_type != RESTORE_NONE;
+    // Frame border extension is not required in the decoder
+    // as it happens in extend_mc_border().
+    int do_extend_border_mt = 0;
     if (!optimized_loop_restoration) {
       if (do_loop_restoration)
         av1_loop_restoration_save_boundary_lines(&pbi->common.cur_frame->buf,
@@ -5318,7 +5292,8 @@ void av1_decode_tg_tiles_and_wrapup(AV1Decoder *pbi, const uint8_t *data,
         if (pbi->num_workers > 1) {
           av1_cdef_frame_mt(cm, &pbi->dcb.xd, pbi->cdef_worker,
                             pbi->tile_workers, &pbi->cdef_sync,
-                            pbi->num_workers, av1_cdef_init_fb_row_mt);
+                            pbi->num_workers, av1_cdef_init_fb_row_mt,
+                            do_extend_border_mt);
         } else {
           av1_cdef_frame(&pbi->common.cur_frame->buf, cm, &pbi->dcb.xd,
                          av1_cdef_init_fb_row);
@@ -5334,7 +5309,7 @@ void av1_decode_tg_tiles_and_wrapup(AV1Decoder *pbi, const uint8_t *data,
           av1_loop_restoration_filter_frame_mt(
               (YV12_BUFFER_CONFIG *)xd->cur_buf, cm, optimized_loop_restoration,
               pbi->tile_workers, pbi->num_workers, &pbi->lr_row_sync,
-              &pbi->lr_ctxt);
+              &pbi->lr_ctxt, do_extend_border_mt);
         } else {
           av1_loop_restoration_filter_frame((YV12_BUFFER_CONFIG *)xd->cur_buf,
                                             cm, optimized_loop_restoration,
@@ -5349,7 +5324,7 @@ void av1_decode_tg_tiles_and_wrapup(AV1Decoder *pbi, const uint8_t *data,
           av1_loop_restoration_filter_frame_mt(
               (YV12_BUFFER_CONFIG *)xd->cur_buf, cm, optimized_loop_restoration,
               pbi->tile_workers, pbi->num_workers, &pbi->lr_row_sync,
-              &pbi->lr_ctxt);
+              &pbi->lr_ctxt, do_extend_border_mt);
         } else {
           av1_loop_restoration_filter_frame((YV12_BUFFER_CONFIG *)xd->cur_buf,
                                             cm, optimized_loop_restoration,
@@ -5357,20 +5332,6 @@ void av1_decode_tg_tiles_and_wrapup(AV1Decoder *pbi, const uint8_t *data,
         }
       }
     }
-#else
-    if (!optimized_loop_restoration) {
-      if (do_cdef) {
-        if (pbi->num_workers > 1) {
-          av1_cdef_frame_mt(cm, &pbi->dcb.xd, pbi->cdef_worker,
-                            pbi->tile_workers, &pbi->cdef_sync,
-                            pbi->num_workers, av1_cdef_init_fb_row_mt);
-        } else {
-          av1_cdef_frame(&pbi->common.cur_frame->buf, cm, &pbi->dcb.xd,
-                         av1_cdef_init_fb_row);
-        }
-      }
-    }
-#endif  // !CONFIG_REALTIME_ONLY
   }
 
   if (!pbi->dcb.corrupted) {
diff --git a/chromium/third_party/libaom/source/libaom/av1/decoder/decodemv.c b/chromium/third_party/libaom/source/libaom/av1/decoder/decodemv.c
index 839bda2be66..1988eb2bffa 100644
--- a/chromium/third_party/libaom/source/libaom/av1/decoder/decodemv.c
+++ b/chromium/third_party/libaom/source/libaom/av1/decoder/decodemv.c
@@ -279,7 +279,7 @@ int av1_neg_deinterleave(int diff, int ref, int max) {
 static int read_segment_id(AV1_COMMON *const cm, const MACROBLOCKD *const xd,
                            aom_reader *r, int skip) {
   int cdf_num;
-  const int pred = av1_get_spatial_seg_pred(cm, xd, &cdf_num);
+  const int pred = av1_get_spatial_seg_pred(cm, xd, &cdf_num, 0);
   if (skip) return pred;
 
   FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
@@ -1204,7 +1204,9 @@ static INLINE int assign_mv(AV1_COMMON *cm, MACROBLOCKD *xd,
                          .as_int;
       break;
     }
-    default: { return 0; }
+    default: {
+      return 0;
+    }
   }
 
   int ret = is_mv_valid(&mv[0].as_mv);
@@ -1475,7 +1477,6 @@ static void read_inter_block_mode_info(AV1Decoder *const pbi,
   read_mb_interp_filter(xd, features->interp_filter,
                         cm->seq_params->enable_dual_filter, mbmi, r);
 
-#if !CONFIG_REALTIME_ONLY
   if (mbmi->motion_mode == WARPED_CAUSAL) {
     const int mi_row = xd->mi_row;
     const int mi_col = xd->mi_col;
@@ -1496,7 +1497,6 @@ static void read_inter_block_mode_info(AV1Decoder *const pbi,
       mbmi->wm_params.invalid = 1;
     }
   }
-#endif
 
   xd->cfl.store_y = store_cfl_required(cm, xd);
 
diff --git a/chromium/third_party/libaom/source/libaom/av1/decoder/decoder.c b/chromium/third_party/libaom/source/libaom/av1/decoder/decoder.c
index d0d69a37981..f1ffaa45e83 100644
--- a/chromium/third_party/libaom/source/libaom/av1/decoder/decoder.c
+++ b/chromium/third_party/libaom/source/libaom/av1/decoder/decoder.c
@@ -58,8 +58,8 @@ static void dec_set_mb_mi(CommonModeInfoParams *mi_params, int width,
   mi_params->mi_rows = aligned_height >> MI_SIZE_LOG2;
   mi_params->mi_stride = calc_mi_size(mi_params->mi_cols);
 
-  mi_params->mb_cols = (mi_params->mi_cols + 2) >> 2;
-  mi_params->mb_rows = (mi_params->mi_rows + 2) >> 2;
+  mi_params->mb_cols = ROUND_POWER_OF_TWO(mi_params->mi_cols, 2);
+  mi_params->mb_rows = ROUND_POWER_OF_TWO(mi_params->mi_rows, 2);
   mi_params->MBs = mi_params->mb_rows * mi_params->mb_cols;
 
   mi_params->mi_alloc_bsize = BLOCK_4X4;
@@ -135,9 +135,8 @@ AV1Decoder *av1_decoder_create(BufferPool *const pool) {
   av1_loop_filter_init(cm);
 
   av1_qm_init(&cm->quant_params, av1_num_planes(cm));
-#if !CONFIG_REALTIME_ONLY
   av1_loop_restoration_precal();
-#endif
+
 #if CONFIG_ACCOUNTING
   pbi->acct_enabled = 1;
   aom_accounting_init(&pbi->accounting);
@@ -216,9 +215,7 @@ void av1_decoder_remove(AV1Decoder *pbi) {
 
   if (pbi->num_workers > 0) {
     av1_loop_filter_dealloc(&pbi->lf_row_sync);
-#if !CONFIG_REALTIME_ONLY
     av1_loop_restoration_dealloc(&pbi->lr_row_sync, pbi->num_workers);
-#endif
     av1_dealloc_dec_jobs(&pbi->tile_mt_info);
   }
 
diff --git a/chromium/third_party/libaom/source/libaom/av1/decoder/decoder.h b/chromium/third_party/libaom/source/libaom/av1/decoder/decoder.h
index 226b9dca855..560b1d9f24b 100644
--- a/chromium/third_party/libaom/source/libaom/av1/decoder/decoder.h
+++ b/chromium/third_party/libaom/source/libaom/av1/decoder/decoder.h
@@ -144,7 +144,16 @@ typedef struct AV1DecRowMTSyncData {
 #endif
   int allocated_sb_rows;
   int *cur_sb_col;
+  // Denotes the superblock interval at which conditional signalling should
+  // happen. Also denotes the minimum number of extra superblocks of the top row
+  // to be complete to start decoding the current superblock. A value of 1
+  // indicates top-right dependency.
   int sync_range;
+  // Denotes the additional number of superblocks in the previous row to be
+  // complete to start decoding the current superblock when intraBC tool is
+  // enabled. This additional top-right delay is required to satisfy the
+  // hardware constraints for intraBC tool when row multithreading is enabled.
+  int intrabc_extra_top_right_sb_delay;
   int mi_rows;
   int mi_cols;
   int mi_rows_parse_done;
diff --git a/chromium/third_party/libaom/source/libaom/av1/decoder/grain_synthesis.c b/chromium/third_party/libaom/source/libaom/av1/decoder/grain_synthesis.c
index 60b458b21a4..d276f6f90e5 100644
--- a/chromium/third_party/libaom/source/libaom/av1/decoder/grain_synthesis.c
+++ b/chromium/third_party/libaom/source/libaom/av1/decoder/grain_synthesis.c
@@ -14,6 +14,7 @@
  *
  */
 
+#include <stdbool.h>
 #include <stdio.h>
 #include <string.h>
 #include <stdlib.h>
@@ -238,7 +239,61 @@ static int grain_max;
 
 static uint16_t random_register = 0;  // random number generator register
 
-static void init_arrays(const aom_film_grain_t *params, int luma_stride,
+static void dealloc_arrays(const aom_film_grain_t *params, int ***pred_pos_luma,
+                           int ***pred_pos_chroma, int **luma_grain_block,
+                           int **cb_grain_block, int **cr_grain_block,
+                           int **y_line_buf, int **cb_line_buf,
+                           int **cr_line_buf, int **y_col_buf, int **cb_col_buf,
+                           int **cr_col_buf) {
+  int num_pos_luma = 2 * params->ar_coeff_lag * (params->ar_coeff_lag + 1);
+  int num_pos_chroma = num_pos_luma;
+  if (params->num_y_points > 0) ++num_pos_chroma;
+
+  if (*pred_pos_luma) {
+    for (int row = 0; row < num_pos_luma; row++) {
+      aom_free((*pred_pos_luma)[row]);
+    }
+    aom_free(*pred_pos_luma);
+    *pred_pos_luma = NULL;
+  }
+
+  if (*pred_pos_chroma) {
+    for (int row = 0; row < num_pos_chroma; row++) {
+      aom_free((*pred_pos_chroma)[row]);
+    }
+    aom_free(*pred_pos_chroma);
+    *pred_pos_chroma = NULL;
+  }
+
+  aom_free(*y_line_buf);
+  *y_line_buf = NULL;
+
+  aom_free(*cb_line_buf);
+  *cb_line_buf = NULL;
+
+  aom_free(*cr_line_buf);
+  *cr_line_buf = NULL;
+
+  aom_free(*y_col_buf);
+  *y_col_buf = NULL;
+
+  aom_free(*cb_col_buf);
+  *cb_col_buf = NULL;
+
+  aom_free(*cr_col_buf);
+  *cr_col_buf = NULL;
+
+  aom_free(*luma_grain_block);
+  *luma_grain_block = NULL;
+
+  aom_free(*cb_grain_block);
+  *cb_grain_block = NULL;
+
+  aom_free(*cr_grain_block);
+  *cr_grain_block = NULL;
+}
+
+static bool init_arrays(const aom_film_grain_t *params, int luma_stride,
                         int chroma_stride, int ***pred_pos_luma_p,
                         int ***pred_pos_chroma_p, int **luma_grain_block,
                         int **cb_grain_block, int **cr_grain_block,
@@ -246,6 +301,18 @@ static void init_arrays(const aom_film_grain_t *params, int luma_stride,
                         int **y_col_buf, int **cb_col_buf, int **cr_col_buf,
                         int luma_grain_samples, int chroma_grain_samples,
                         int chroma_subsamp_y, int chroma_subsamp_x) {
+  *pred_pos_luma_p = NULL;
+  *pred_pos_chroma_p = NULL;
+  *luma_grain_block = NULL;
+  *cb_grain_block = NULL;
+  *cr_grain_block = NULL;
+  *y_line_buf = NULL;
+  *cb_line_buf = NULL;
+  *cr_line_buf = NULL;
+  *y_col_buf = NULL;
+  *cb_col_buf = NULL;
+  *cr_col_buf = NULL;
+
   memset(scaling_lut_y, 0, sizeof(*scaling_lut_y) * 256);
   memset(scaling_lut_cb, 0, sizeof(*scaling_lut_cb) * 256);
   memset(scaling_lut_cr, 0, sizeof(*scaling_lut_cr) * 256);
@@ -257,17 +324,38 @@ static void init_arrays(const aom_film_grain_t *params, int luma_stride,
   int **pred_pos_luma;
   int **pred_pos_chroma;
 
-  pred_pos_luma = (int **)aom_malloc(sizeof(*pred_pos_luma) * num_pos_luma);
+  pred_pos_luma = (int **)aom_calloc(num_pos_luma, sizeof(*pred_pos_luma));
+  if (!pred_pos_luma) return false;
 
   for (int row = 0; row < num_pos_luma; row++) {
     pred_pos_luma[row] = (int *)aom_malloc(sizeof(**pred_pos_luma) * 3);
+    if (!pred_pos_luma[row]) {
+      dealloc_arrays(params, pred_pos_luma_p, pred_pos_chroma_p,
+                     luma_grain_block, cb_grain_block, cr_grain_block,
+                     y_line_buf, cb_line_buf, cr_line_buf, y_col_buf,
+                     cb_col_buf, cr_col_buf);
+      return false;
+    }
   }
 
   pred_pos_chroma =
-      (int **)aom_malloc(sizeof(*pred_pos_chroma) * num_pos_chroma);
+      (int **)aom_calloc(num_pos_chroma, sizeof(*pred_pos_chroma));
+  if (!pred_pos_chroma) {
+    dealloc_arrays(params, pred_pos_luma_p, pred_pos_chroma_p, luma_grain_block,
+                   cb_grain_block, cr_grain_block, y_line_buf, cb_line_buf,
+                   cr_line_buf, y_col_buf, cb_col_buf, cr_col_buf);
+    return false;
+  }
 
   for (int row = 0; row < num_pos_chroma; row++) {
     pred_pos_chroma[row] = (int *)aom_malloc(sizeof(**pred_pos_chroma) * 3);
+    if (!pred_pos_chroma[row]) {
+      dealloc_arrays(params, pred_pos_luma_p, pred_pos_chroma_p,
+                     luma_grain_block, cb_grain_block, cr_grain_block,
+                     y_line_buf, cb_line_buf, cr_line_buf, y_col_buf,
+                     cb_col_buf, cr_col_buf);
+      return false;
+    }
   }
 
   int pos_ar_index = 0;
@@ -330,45 +418,15 @@ static void init_arrays(const aom_film_grain_t *params, int luma_stride,
       (int *)aom_malloc(sizeof(**cb_grain_block) * chroma_grain_samples);
   *cr_grain_block =
       (int *)aom_malloc(sizeof(**cr_grain_block) * chroma_grain_samples);
-}
-
-static void dealloc_arrays(const aom_film_grain_t *params, int ***pred_pos_luma,
-                           int ***pred_pos_chroma, int **luma_grain_block,
-                           int **cb_grain_block, int **cr_grain_block,
-                           int **y_line_buf, int **cb_line_buf,
-                           int **cr_line_buf, int **y_col_buf, int **cb_col_buf,
-                           int **cr_col_buf) {
-  int num_pos_luma = 2 * params->ar_coeff_lag * (params->ar_coeff_lag + 1);
-  int num_pos_chroma = num_pos_luma;
-  if (params->num_y_points > 0) ++num_pos_chroma;
-
-  for (int row = 0; row < num_pos_luma; row++) {
-    aom_free((*pred_pos_luma)[row]);
+  if (!(*pred_pos_luma_p && *pred_pos_chroma_p && *y_line_buf && *cb_line_buf &&
+        *cr_line_buf && *y_col_buf && *cb_col_buf && *cr_col_buf &&
+        *luma_grain_block && *cb_grain_block && *cr_grain_block)) {
+    dealloc_arrays(params, pred_pos_luma_p, pred_pos_chroma_p, luma_grain_block,
+                   cb_grain_block, cr_grain_block, y_line_buf, cb_line_buf,
+                   cr_line_buf, y_col_buf, cb_col_buf, cr_col_buf);
+    return false;
   }
-  aom_free(*pred_pos_luma);
-
-  for (int row = 0; row < num_pos_chroma; row++) {
-    aom_free((*pred_pos_chroma)[row]);
-  }
-  aom_free((*pred_pos_chroma));
-
-  aom_free(*y_line_buf);
-
-  aom_free(*cb_line_buf);
-
-  aom_free(*cr_line_buf);
-
-  aom_free(*y_col_buf);
-
-  aom_free(*cb_col_buf);
-
-  aom_free(*cr_col_buf);
-
-  aom_free(*luma_grain_block);
-
-  aom_free(*cb_grain_block);
-
-  aom_free(*cr_grain_block);
+  return true;
 }
 
 // get a number between 0 and 2^bits - 1
@@ -396,15 +454,14 @@ static void init_random_generator(int luma_line, uint16_t seed) {
   random_register ^= ((luma_num * 173 + 105) & 255);
 }
 
-// Return 0 for success, -1 for failure
-static int generate_luma_grain_block(
+static void generate_luma_grain_block(
     const aom_film_grain_t *params, int **pred_pos_luma, int *luma_grain_block,
     int luma_block_size_y, int luma_block_size_x, int luma_grain_stride,
     int left_pad, int top_pad, int right_pad, int bottom_pad) {
   if (params->num_y_points == 0) {
     memset(luma_grain_block, 0,
            sizeof(*luma_grain_block) * luma_block_size_y * luma_grain_stride);
-    return 0;
+    return;
   }
 
   int bit_depth = params->bit_depth;
@@ -434,17 +491,14 @@ static int generate_luma_grain_block(
                     ((wsum + rounding_offset) >> params->ar_coeff_shift),
                 grain_min, grain_max);
     }
-  return 0;
 }
 
-// Return 0 for success, -1 for failure
-static int generate_chroma_grain_blocks(
-    const aom_film_grain_t *params,
-    //                                  int** pred_pos_luma,
-    int **pred_pos_chroma, int *luma_grain_block, int *cb_grain_block,
-    int *cr_grain_block, int luma_grain_stride, int chroma_block_size_y,
-    int chroma_block_size_x, int chroma_grain_stride, int left_pad, int top_pad,
-    int right_pad, int bottom_pad, int chroma_subsamp_y, int chroma_subsamp_x) {
+static bool generate_chroma_grain_blocks(
+    const aom_film_grain_t *params, int **pred_pos_chroma,
+    int *luma_grain_block, int *cb_grain_block, int *cr_grain_block,
+    int luma_grain_stride, int chroma_block_size_y, int chroma_block_size_x,
+    int chroma_grain_stride, int left_pad, int top_pad, int right_pad,
+    int bottom_pad, int chroma_subsamp_y, int chroma_subsamp_x) {
   int bit_depth = params->bit_depth;
   int gauss_sec_shift = 12 - bit_depth + params->grain_scale_shift;
 
@@ -517,7 +571,7 @@ static int generate_chroma_grain_blocks(
               stderr,
               "Grain synthesis: prediction between two chroma components is "
               "not supported!");
-          return -1;
+          return false;
         }
       }
       if (params->num_cb_points || params->chroma_scaling_from_luma)
@@ -531,7 +585,7 @@ static int generate_chroma_grain_blocks(
                       ((wsum_cr + rounding_offset) >> params->ar_coeff_shift),
                   grain_min, grain_max);
     }
-  return 0;
+  return true;
 }
 
 static void init_scaling_function(const int scaling_points[][2], int num_points,
@@ -1081,27 +1135,25 @@ int av1_add_film_grain_run(const aom_film_grain_t *params, uint8_t *luma,
   grain_min = 0 - grain_center;
   grain_max = grain_center - 1;
 
-  init_arrays(params, luma_stride, chroma_stride, &pred_pos_luma,
-              &pred_pos_chroma, &luma_grain_block, &cb_grain_block,
-              &cr_grain_block, &y_line_buf, &cb_line_buf, &cr_line_buf,
-              &y_col_buf, &cb_col_buf, &cr_col_buf,
-              luma_block_size_y * luma_block_size_x,
-              chroma_block_size_y * chroma_block_size_x, chroma_subsamp_y,
-              chroma_subsamp_x);
-
-  if (generate_luma_grain_block(params, pred_pos_luma, luma_grain_block,
-                                luma_block_size_y, luma_block_size_x,
-                                luma_grain_stride, left_pad, top_pad, right_pad,
-                                bottom_pad))
+  if (!init_arrays(params, luma_stride, chroma_stride, &pred_pos_luma,
+                   &pred_pos_chroma, &luma_grain_block, &cb_grain_block,
+                   &cr_grain_block, &y_line_buf, &cb_line_buf, &cr_line_buf,
+                   &y_col_buf, &cb_col_buf, &cr_col_buf,
+                   luma_block_size_y * luma_block_size_x,
+                   chroma_block_size_y * chroma_block_size_x, chroma_subsamp_y,
+                   chroma_subsamp_x))
     return -1;
 
-  if (generate_chroma_grain_blocks(
-          params,
-          //                               pred_pos_luma,
-          pred_pos_chroma, luma_grain_block, cb_grain_block, cr_grain_block,
-          luma_grain_stride, chroma_block_size_y, chroma_block_size_x,
-          chroma_grain_stride, left_pad, top_pad, right_pad, bottom_pad,
-          chroma_subsamp_y, chroma_subsamp_x))
+  generate_luma_grain_block(params, pred_pos_luma, luma_grain_block,
+                            luma_block_size_y, luma_block_size_x,
+                            luma_grain_stride, left_pad, top_pad, right_pad,
+                            bottom_pad);
+
+  if (!generate_chroma_grain_blocks(
+          params, pred_pos_chroma, luma_grain_block, cb_grain_block,
+          cr_grain_block, luma_grain_stride, chroma_block_size_y,
+          chroma_block_size_x, chroma_grain_stride, left_pad, top_pad,
+          right_pad, bottom_pad, chroma_subsamp_y, chroma_subsamp_x))
     return -1;
 
   init_scaling_function(params->scaling_points_y, params->num_y_points,
diff --git a/chromium/third_party/libaom/source/libaom/av1/decoder/inspection.c b/chromium/third_party/libaom/source/libaom/av1/decoder/inspection.c
index b706c45fd71..288d69a224d 100644
--- a/chromium/third_party/libaom/source/libaom/av1/decoder/inspection.c
+++ b/chromium/third_party/libaom/source/libaom/av1/decoder/inspection.c
@@ -8,6 +8,10 @@
  * Media Patent License 1.0 was not distributed with this source code in the
  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
 #include "av1/decoder/decoder.h"
 #include "av1/decoder/inspection.h"
 #include "av1/common/enums.h"
@@ -18,6 +22,10 @@ static void ifd_init_mi_rc(insp_frame_data *fd, int mi_cols, int mi_rows) {
   fd->mi_rows = mi_rows;
   fd->mi_grid = (insp_mi_data *)aom_malloc(sizeof(insp_mi_data) * fd->mi_rows *
                                            fd->mi_cols);
+  if (!fd->mi_grid) {
+    fprintf(stderr, "Error allocating inspection data\n");
+    abort();
+  }
 }
 
 void ifd_init(insp_frame_data *fd, int frame_width, int frame_height) {
diff --git a/chromium/third_party/libaom/source/libaom/av1/decoder/obu.c b/chromium/third_party/libaom/source/libaom/av1/decoder/obu.c
index 76abf6ba820..d589f000bce 100644
--- a/chromium/third_party/libaom/source/libaom/av1/decoder/obu.c
+++ b/chromium/third_party/libaom/source/libaom/av1/decoder/obu.c
@@ -396,7 +396,7 @@ static void alloc_tile_list_buffer(AV1Decoder *pbi) {
                              cm->seq_params->subsampling_y,
                              (cm->seq_params->use_highbitdepth &&
                               (cm->seq_params->bit_depth > AOM_BITS_8)),
-                             0, cm->features.byte_alignment))
+                             0, cm->features.byte_alignment, 0))
     aom_internal_error(&pbi->error, AOM_CODEC_MEM_ERROR,
                        "Failed to allocate the tile list output buffer");
 }
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/allintra_vis.c b/chromium/third_party/libaom/source/libaom/av1/encoder/allintra_vis.c
index ef6dd70bf80..b0a9e24d87e 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/allintra_vis.c
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/allintra_vis.c
@@ -305,7 +305,7 @@ static int64_t pick_norm_factor_and_block_size(AV1_COMP *const cpi,
   BLOCK_SIZE last_block_size;
   BLOCK_SIZE this_block_size = sb_size;
   *best_block_size = sb_size;
-  // Pick from block size 64x64, 32x32 and 16x16.
+  // Pick from block size 128x128, 64x64, 32x32 and 16x16.
   do {
     last_block_size = this_block_size;
     assert(this_block_size >= BLOCK_16X16 && this_block_size <= BLOCK_128X128);
@@ -317,7 +317,7 @@ static int64_t pick_norm_factor_and_block_size(AV1_COMP *const cpi,
 
   int64_t norm_factor = 1;
   const BLOCK_SIZE norm_block_size = this_block_size;
-  assert(norm_block_size >= BLOCK_16X16 && norm_block_size <= BLOCK_64X64);
+  assert(norm_block_size >= BLOCK_16X16 && norm_block_size <= BLOCK_128X128);
   const int norm_step = mi_size_wide[norm_block_size];
   double sb_wiener_log = 0;
   double sb_count = 0;
@@ -375,14 +375,13 @@ void av1_set_mb_wiener_variance(AV1_COMP *cpi) {
   memset(&mbmi, 0, sizeof(mbmi));
   MB_MODE_INFO *mbmi_ptr = &mbmi;
   xd->mi = &mbmi_ptr;
-  xd->cur_buf = cpi->source;
 
   const SequenceHeader *const seq_params = cm->seq_params;
   if (aom_realloc_frame_buffer(
           &cm->cur_frame->buf, cm->width, cm->height, seq_params->subsampling_x,
           seq_params->subsampling_y, seq_params->use_highbitdepth,
           cpi->oxcf.border_in_pixels, cm->features.byte_alignment, NULL, NULL,
-          NULL, cpi->oxcf.tool_cfg.enable_global_motion))
+          NULL, cpi->oxcf.tool_cfg.enable_global_motion, 0))
     aom_internal_error(cm->error, AOM_CODEC_MEM_ERROR,
                        "Failed to allocate frame buffer");
 
@@ -783,9 +782,7 @@ void av1_set_mb_ur_variance(AV1_COMP *cpi) {
 void av1_set_mb_ur_variance(AV1_COMP *cpi) {
   const AV1_COMMON *cm = &cpi->common;
   const CommonModeInfoParams *const mi_params = &cm->mi_params;
-  ThreadData *td = &cpi->td;
-  MACROBLOCK *x = &td->mb;
-  MACROBLOCKD *xd = &x->e_mbd;
+  const MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
   uint8_t *y_buffer = cpi->source->y_buffer;
   const int y_stride = cpi->source->y_stride;
   const int block_size = cpi->common.seq_params->sb_size;
@@ -794,7 +791,6 @@ void av1_set_mb_ur_variance(AV1_COMP *cpi) {
   const int num_mi_h = mi_size_high[block_size];
   const int num_cols = (mi_params->mi_cols + num_mi_w - 1) / num_mi_w;
   const int num_rows = (mi_params->mi_rows + num_mi_h - 1) / num_mi_h;
-  const int use_hbd = cpi->source->flags & YV12_FLAG_HIGHBITDEPTH;
 
   int *mb_delta_q[2];
   CHECK_MEM_ERROR(cm, mb_delta_q[0],
@@ -832,13 +828,8 @@ void av1_set_mb_ur_variance(AV1_COMP *cpi) {
           buf.stride = y_stride;
 
           unsigned int block_variance;
-          if (use_hbd) {
-            block_variance = av1_high_get_sby_perpixel_variance(
-                cpi, &buf, BLOCK_8X8, xd->bd);
-          } else {
-            block_variance =
-                av1_get_sby_perpixel_variance(cpi, &buf, BLOCK_8X8);
-          }
+          block_variance = av1_get_perpixel_variance_facade(
+              cpi, xd, &buf, BLOCK_8X8, AOM_PLANE_Y);
 
           block_variance = AOMMAX(block_variance, 1);
           var += log((double)block_variance);
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/aq_cyclicrefresh.c b/chromium/third_party/libaom/source/libaom/av1/encoder/aq_cyclicrefresh.c
index d00e5011a73..133e4826947 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/aq_cyclicrefresh.c
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/aq_cyclicrefresh.c
@@ -26,6 +26,8 @@ CYCLIC_REFRESH *av1_cyclic_refresh_alloc(int mi_rows, int mi_cols) {
 
   cr->map = aom_calloc(mi_rows * mi_cols, sizeof(*cr->map));
   cr->counter_encode_maxq_scene_change = 0;
+  cr->percent_refresh_adjustment = 5;
+  cr->rate_ratio_qdelta_adjustment = 0.25;
   if (cr->map == NULL) {
     av1_cyclic_refresh_free(cr);
     return NULL;
@@ -166,8 +168,12 @@ void av1_cyclic_reset_segment_skip(const AV1_COMP *cpi, MACROBLOCK *const x,
   const int bh = mi_size_high[bsize];
   const int xmis = AOMMIN(cm->mi_params.mi_cols - mi_col, bw);
   const int ymis = AOMMIN(cm->mi_params.mi_rows - mi_row, bh);
+
+  assert(cm->seg.enabled);
+
   if (!cr->skip_over4x4) {
-    mbmi->segment_id = av1_get_spatial_seg_pred(cm, xd, &cdf_num);
+    mbmi->segment_id =
+        av1_get_spatial_seg_pred(cm, xd, &cdf_num, cr->skip_over4x4);
     if (prev_segment_id != mbmi->segment_id) {
       const int block_index = mi_row * cm->mi_params.mi_cols + mi_col;
       for (int mi_y = 0; mi_y < ymis; mi_y++) {
@@ -266,7 +272,6 @@ void av1_cyclic_refresh_update_segment(const AV1_COMP *cpi, MACROBLOCK *const x,
 void av1_init_cyclic_refresh_counters(MACROBLOCK *const x) {
   x->actual_num_seg1_blocks = 0;
   x->actual_num_seg2_blocks = 0;
-  x->cnt_zeromv = 0;
 }
 
 // Accumulate cyclic refresh counters.
@@ -274,39 +279,6 @@ void av1_accumulate_cyclic_refresh_counters(
     CYCLIC_REFRESH *const cyclic_refresh, const MACROBLOCK *const x) {
   cyclic_refresh->actual_num_seg1_blocks += x->actual_num_seg1_blocks;
   cyclic_refresh->actual_num_seg2_blocks += x->actual_num_seg2_blocks;
-  cyclic_refresh->cnt_zeromv += x->cnt_zeromv;
-}
-
-void av1_cyclic_refresh_postencode(AV1_COMP *const cpi) {
-  AV1_COMMON *const cm = &cpi->common;
-  const CommonModeInfoParams *const mi_params = &cm->mi_params;
-  CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
-  RATE_CONTROL *const rc = &cpi->rc;
-  SVC *const svc = &cpi->svc;
-  const int avg_cnt_zeromv =
-      100 * cr->cnt_zeromv / (mi_params->mi_rows * mi_params->mi_cols);
-
-  if (!cpi->ppi->use_svc ||
-      (cpi->ppi->use_svc &&
-       !cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame &&
-       cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1)) {
-    rc->avg_frame_low_motion =
-        (rc->avg_frame_low_motion == 0)
-            ? avg_cnt_zeromv
-            : (3 * rc->avg_frame_low_motion + avg_cnt_zeromv) / 4;
-    // For SVC: set avg_frame_low_motion (only computed on top spatial layer)
-    // to all lower spatial layers.
-    if (cpi->ppi->use_svc &&
-        svc->spatial_layer_id == svc->number_spatial_layers - 1) {
-      for (int i = 0; i < svc->number_spatial_layers - 1; ++i) {
-        const int layer = LAYER_IDS_TO_IDX(i, svc->temporal_layer_id,
-                                           svc->number_temporal_layers);
-        LAYER_CONTEXT *const lc = &svc->layer_context[layer];
-        RATE_CONTROL *const lrc = &lc->rc;
-        lrc->avg_frame_low_motion = rc->avg_frame_low_motion;
-      }
-    }
-  }
 }
 
 void av1_cyclic_refresh_set_golden_update(AV1_COMP *const cpi) {
@@ -341,6 +313,9 @@ static void cyclic_refresh_update_map(AV1_COMP *const cpi) {
   unsigned char *const seg_map = cpi->enc_seg.map;
   int i, block_count, bl_index, sb_rows, sb_cols, sbs_in_frame;
   int xmis, ymis, x, y;
+  uint64_t sb_sad = 0;
+  uint64_t thresh_sad_low = 0;
+  uint64_t thresh_sad = INT64_MAX;
   memset(seg_map, CR_SEGMENT_ID_BASE, mi_params->mi_rows * mi_params->mi_cols);
   sb_cols = (mi_params->mi_cols + cm->seq_params->mib_size - 1) /
             cm->seq_params->mib_size;
@@ -370,14 +345,24 @@ static void cyclic_refresh_update_map(AV1_COMP *const cpi) {
     // Loop through all MI blocks in superblock and update map.
     xmis = AOMMIN(mi_params->mi_cols - mi_col, cm->seq_params->mib_size);
     ymis = AOMMIN(mi_params->mi_rows - mi_row, cm->seq_params->mib_size);
+    if (cr->use_block_sad_scene_det && cpi->rc.frames_since_key > 30 &&
+        cr->counter_encode_maxq_scene_change > 30 &&
+        cpi->src_sad_blk_64x64 != NULL) {
+      sb_sad = cpi->src_sad_blk_64x64[sb_col_index + sb_cols * sb_row_index];
+      int scale = (cm->width * cm->height < 640 * 360) ? 6 : 8;
+      int scale_low = 2;
+      thresh_sad = (scale * 64 * 64);
+      thresh_sad_low = (scale_low * 64 * 64);
+    }
     // cr_map only needed at 8x8 blocks.
     for (y = 0; y < ymis; y += 2) {
       for (x = 0; x < xmis; x += 2) {
         const int bl_index2 = bl_index + y * mi_params->mi_cols + x;
         // If the block is as a candidate for clean up then mark it
         // for possible boost/refresh (segment 1). The segment id may get
-        // reset to 0 later if block gets coded anything other than GLOBALMV.
-        if (cr->map[bl_index2] == 0) {
+        // reset to 0 later if block gets coded anything other than low motion.
+        // If the block_sad (sb_sad) is very low label it for refresh anyway.
+        if (cr->map[bl_index2] == 0 || sb_sad < thresh_sad_low) {
           sum_map += 4;
         } else if (cr->map[bl_index2] < 0) {
           cr->map[bl_index2]++;
@@ -386,7 +371,8 @@ static void cyclic_refresh_update_map(AV1_COMP *const cpi) {
     }
     // Enforce constant segment over superblock.
     // If segment is at least half of superblock, set to 1.
-    if (sum_map >= (xmis * ymis) >> 1) {
+    // Enforce that block sad (sb_sad) is not too high.
+    if (sum_map >= (xmis * ymis) >> 1 && sb_sad < thresh_sad) {
       for (y = 0; y < ymis; y++)
         for (x = 0; x < xmis; x++) {
           seg_map[bl_index + y * mi_params->mi_cols + x] = CR_SEGMENT_ID_BOOST1;
@@ -399,6 +385,10 @@ static void cyclic_refresh_update_map(AV1_COMP *const cpi) {
     }
   } while (cr->target_num_seg_blocks < block_count && i != cr->sb_index);
   cr->sb_index = i;
+  if (cr->target_num_seg_blocks == 0) {
+    // Disable segmentation, seg_map is already set to 0 above.
+    av1_disable_segmentation(&cm->seg);
+  }
 }
 
 // Set cyclic refresh parameters.
@@ -419,6 +409,14 @@ void av1_cyclic_refresh_update_parameters(AV1_COMP *const cpi) {
   const int scene_change_detected =
       cpi->rc.high_source_sad ||
       (cpi->ppi->use_svc && cpi->svc.high_source_sad_superframe);
+
+  // Cases to reset the cyclic refresh adjustment parameters.
+  if (frame_is_intra_only(cm) || scene_change_detected) {
+    // Reset adaptive elements for intra only frames and scene changes.
+    cr->percent_refresh_adjustment = 5;
+    cr->rate_ratio_qdelta_adjustment = 0.25;
+  }
+
   // Although this segment feature for RTC is only used for
   // blocks >= 8X8, for more efficient coding of the seg map
   // cur_frame->seg_map needs to set at 4x4 along with the
@@ -428,6 +426,8 @@ void av1_cyclic_refresh_update_parameters(AV1_COMP *const cpi) {
   // Also if loop-filter deltas is applied via segment, then
   // we need to set cr->skip_over4x4 = 1.
   cr->skip_over4x4 = (cpi->oxcf.speed > 9) ? 1 : 0;
+
+  // should we enable cyclic refresh on this frame.
   cr->apply_cyclic_refresh = 1;
   if (frame_is_intra_only(cm) || is_lossless_requested(&cpi->oxcf.rc_cfg) ||
       scene_change_detected || cpi->svc.temporal_layer_id > 0 ||
@@ -441,16 +441,20 @@ void av1_cyclic_refresh_update_parameters(AV1_COMP *const cpi) {
     cr->apply_cyclic_refresh = 0;
     return;
   }
-  cr->percent_refresh = 10;
-  // Increase the amount of refresh for #temporal_layers > 2, and for some
-  // frames after scene change that is encoded at high Q.
+
+  // Increase the amount of refresh for #temporal_layers > 2
   if (cpi->svc.number_temporal_layers > 2)
     cr->percent_refresh = 15;
-  else if (cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN &&
-           cr->counter_encode_maxq_scene_change < 20)
-    cr->percent_refresh = 15;
+  else
+    cr->percent_refresh = 10 + cr->percent_refresh_adjustment;
+
   cr->max_qdelta_perc = 60;
   cr->time_for_refresh = 0;
+  cr->use_block_sad_scene_det =
+      (cpi->oxcf.tune_cfg.content != AOM_CONTENT_SCREEN &&
+       cm->seq_params->sb_size == BLOCK_64X64)
+          ? 1
+          : 0;
   cr->motion_thresh = 32;
   cr->rate_boost_fac =
       (cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN) ? 10 : 15;
@@ -460,9 +464,9 @@ void av1_cyclic_refresh_update_parameters(AV1_COMP *const cpi) {
   if (cr->percent_refresh > 0 &&
       rc->frames_since_key <
           (4 * cpi->svc.number_temporal_layers) * (100 / cr->percent_refresh)) {
-    cr->rate_ratio_qdelta = 3.0;
+    cr->rate_ratio_qdelta = 3.0 + cr->rate_ratio_qdelta_adjustment;
   } else {
-    cr->rate_ratio_qdelta = 2.0;
+    cr->rate_ratio_qdelta = 2.25 + cr->rate_ratio_qdelta_adjustment;
   }
   // Adjust some parameters for low resolutions.
   if (cm->width * cm->height <= 352 * 288) {
@@ -520,6 +524,7 @@ void av1_cyclic_refresh_setup(AV1_COMP *const cpi) {
   const int resolution_change =
       cm->prev_frame && (cm->width != cm->prev_frame->width ||
                          cm->height != cm->prev_frame->height);
+
   if (resolution_change) av1_cyclic_refresh_reset_resize(cpi);
   if (!cr->apply_cyclic_refresh) {
     // Set segmentation map to 0 and disable.
@@ -607,6 +612,8 @@ void av1_cyclic_refresh_reset_resize(AV1_COMP *const cpi) {
   cpi->refresh_frame.golden_frame = true;
   cr->apply_cyclic_refresh = 0;
   cr->counter_encode_maxq_scene_change = 0;
+  cr->percent_refresh_adjustment = 5;
+  cr->rate_ratio_qdelta_adjustment = 0.25;
 }
 
 int av1_cyclic_refresh_disable_lf_cdef(AV1_COMP *const cpi) {
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/aq_cyclicrefresh.h b/chromium/third_party/libaom/source/libaom/av1/encoder/aq_cyclicrefresh.h
index 85da647eedc..6c2566c3080 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/aq_cyclicrefresh.h
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/aq_cyclicrefresh.h
@@ -39,6 +39,12 @@ struct CYCLIC_REFRESH {
    * for cyclic refresh.
    */
   int percent_refresh;
+
+  /*!
+   * Active adjustment delta for cyclic refresh for rate control.
+   */
+  int percent_refresh_adjustment;
+
   /*!
    * Maximum q-delta as percentage of base q.
    */
@@ -72,10 +78,6 @@ struct CYCLIC_REFRESH {
    */
   int rdmult;
   /*!
-   * Count of zero motion vectors
-   */
-  int cnt_zeromv;
-  /*!
    * Cyclic refresh map.
    */
   int8_t *map;
@@ -98,6 +100,12 @@ struct CYCLIC_REFRESH {
    * Rate target ratio to set q delta.
    */
   double rate_ratio_qdelta;
+
+  /*!
+   * Active adjustment of qdelta rate ratio for enhanced rate control
+   */
+  double rate_ratio_qdelta_adjustment;
+
   /*!
    * Boost factor for rate target ratio, for segment CR_SEGMENT_ID_BOOST2.
    */
@@ -109,6 +117,7 @@ struct CYCLIC_REFRESH {
   int apply_cyclic_refresh;
   int skip_over4x4;
   int counter_encode_maxq_scene_change;
+  int use_block_sad_scene_det;
   /*!\endcond */
 };
 
@@ -175,7 +184,7 @@ int av1_cyclic_refresh_rc_bits_per_mb(const struct AV1_COMP *cpi, int i,
  * \param[in]   mi_col    Col coordinate of the block in a step size of MI_SIZE
  * \param[in]   bsize     Block size
  *
- * \return Update the \c mbmi->segment_id, the \c cpi->cyclic_refresh and
+ * \remark Update the \c mbmi->segment_id, the \c cpi->cyclic_refresh and
  * the \c cm->cpi->enc_seg.map.
  */
 
@@ -204,7 +213,7 @@ void av1_cyclic_reset_segment_skip(const struct AV1_COMP *cpi,
  * \param[in]   dry_run   A code indicating whether it is part of the final
  *                         pass for reconstructing the superblock
  *
- * \return Update the \c mbmi->segment_id, the \c cpi->cyclic_refresh and
+ * \remark Update the \c mbmi->segment_id, the \c cpi->cyclic_refresh and
  * the \c cm->cpi->enc_seg.map.
  */
 void av1_cyclic_refresh_update_segment(const struct AV1_COMP *cpi,
@@ -224,7 +233,7 @@ void av1_cyclic_refresh_update_segment(const struct AV1_COMP *cpi,
  *
  * \param[in]   x         Pointer to MACROBLOCK structure
  *
- * \return Update the \c x->cnt_zeromv, the \c x->actual_num_seg1_blocks and
+ * \remark Update the \c x->cnt_zeromv, the \c x->actual_num_seg1_blocks and
  * the \c x->actual_num_seg1_blocks.
  */
 void av1_init_cyclic_refresh_counters(MACROBLOCK *const x);
@@ -241,28 +250,13 @@ void av1_init_cyclic_refresh_counters(MACROBLOCK *const x);
  * \param[in]   cyclic_refresh Pointer to CYCLIC_REFRESH structure
  * \param[in]   x              Pointer to MACROBLOCK structure
  *
- * \return Update the \c cyclic_refresh->cnt_zeromv, the \c
+ * \remark Update the \c cyclic_refresh->cnt_zeromv, the \c
  * cyclic_refresh->actual_num_seg1_blocks and the \c
  * cyclic_refresh->actual_num_seg1_blocks.
  */
 void av1_accumulate_cyclic_refresh_counters(
     CYCLIC_REFRESH *const cyclic_refresh, const MACROBLOCK *const x);
 
-/*!\brief Update stats after encoding frame.
- *
- * Update the number of block encoded with segment 1 and 2,
- * and update the number of blocks encoded with small/zero motion.
- *
- * \ingroup cyclic_refresh
- * \callgraph
- * \callergraph
- *
- * \param[in]   cpi       Top level encoder structure
- *
- * \return Updates the \c cpi->cyclic_refresh with the new stats.
- */
-void av1_cyclic_refresh_postencode(struct AV1_COMP *const cpi);
-
 /*!\brief Set golden frame update interval nased on cyclic refresh.
  *
  * \ingroup cyclic_refresh
@@ -271,7 +265,7 @@ void av1_cyclic_refresh_postencode(struct AV1_COMP *const cpi);
  *
  * \param[in]   cpi       Top level encoder structure
  *
- * \return Returns the interval in \c cpi->rc.baseline_gf_interval.
+ * \remark Returns the interval in \c cpi->rc.baseline_gf_interval.
  */
 void av1_cyclic_refresh_set_golden_update(struct AV1_COMP *const cpi);
 
@@ -288,7 +282,7 @@ void av1_cyclic_refresh_set_golden_update(struct AV1_COMP *const cpi);
  *
  * \param[in]       cpi          Top level encoder structure
  *
- * \return Updates the \c cpi->cyclic_refresh with the settings.
+ * \remark Updates the \c cpi->cyclic_refresh with the settings.
  */
 void av1_cyclic_refresh_update_parameters(struct AV1_COMP *const cpi);
 
@@ -302,7 +296,7 @@ void av1_cyclic_refresh_update_parameters(struct AV1_COMP *const cpi);
  *
  * \param[in]       cpi          Top level encoder structure
  *
- * \return Updates the \c cpi->cyclic_refresh with the cyclic refresh
+ * \remark Updates the \c cpi->cyclic_refresh with the cyclic refresh
  * parameters and the \c cm->seg with the segmentation data.
  */
 void av1_cyclic_refresh_setup(struct AV1_COMP *const cpi);
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/arm/crc32/hash_crc32.c b/chromium/third_party/libaom/source/libaom/av1/encoder/arm/crc32/hash_crc32.c
new file mode 100644
index 00000000000..dd8685dcf97
--- /dev/null
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/arm/crc32/hash_crc32.c
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2022, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <stdint.h>
+#include <stddef.h>
+#include <arm_acle.h>
+
+#define CRC_LOOP(op, crc, type, buf, len) \
+  while ((len) >= sizeof(type)) {         \
+    (crc) = op((crc), *(type *)(buf));    \
+    (len) -= sizeof(type);                \
+    buf += sizeof(type);                  \
+  }
+
+#define CRC_SINGLE(op, crc, type, buf, len) \
+  if ((len) >= sizeof(type)) {              \
+    (crc) = op((crc), *(type *)(buf));      \
+    (len) -= sizeof(type);                  \
+    buf += sizeof(type);                    \
+  }
+
+/* Return 32-bit CRC for the input buffer.
+ * Polynomial is 0x1EDC6F41.
+ */
+
+uint32_t av1_get_crc32c_value_arm_crc32(void *crc_calculator, uint8_t *p,
+                                        size_t len) {
+  (void)crc_calculator;
+  const uint8_t *buf = p;
+  uint32_t crc = 0xFFFFFFFF;
+
+#if !defined(__aarch64__)
+  // Align input to 8-byte boundary (only necessary for 32-bit builds.)
+  while (len && ((uintptr_t)buf & 7)) {
+    crc = __crc32cb(crc, *buf++);
+    len--;
+  }
+#endif
+
+  CRC_LOOP(__crc32cd, crc, uint64_t, buf, len)
+  CRC_SINGLE(__crc32cw, crc, uint32_t, buf, len)
+  CRC_SINGLE(__crc32ch, crc, uint16_t, buf, len)
+  CRC_SINGLE(__crc32cb, crc, uint8_t, buf, len)
+
+  return ~crc;
+}
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/arm/neon/av1_highbd_quantize_neon.c b/chromium/third_party/libaom/source/libaom/av1/encoder/arm/neon/av1_highbd_quantize_neon.c
new file mode 100644
index 00000000000..197eae09b30
--- /dev/null
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/arm/neon/av1_highbd_quantize_neon.c
@@ -0,0 +1,144 @@
+/*
+ * Copyright (c) 2022, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <arm_neon.h>
+
+#include "aom_dsp/arm/mem_neon.h"
+
+#include "av1/common/quant_common.h"
+#include "av1/encoder/av1_quantize.h"
+
+static INLINE uint16x4_t quantize_4(const tran_low_t *coeff_ptr,
+                                    tran_low_t *qcoeff_ptr,
+                                    tran_low_t *dqcoeff_ptr,
+                                    int32x4_t v_quant_s32,
+                                    int32x4_t v_dequant_s32,
+                                    int32x4_t v_round_s32, int log_scale) {
+  const int32x4_t v_coeff = vld1q_s32(coeff_ptr);
+  const int32x4_t v_coeff_sign =
+      vreinterpretq_s32_u32(vcltq_s32(v_coeff, vdupq_n_s32(0)));
+  const int32x4_t v_log_scale = vdupq_n_s32(log_scale);
+  const int32x4_t v_abs_coeff = vabsq_s32(v_coeff);
+  // ((abs_coeff << (1 + log_scale)) >= dequant_ptr[rc01])
+  const int32x4_t v_abs_coeff_scaled =
+      vshlq_s32(v_abs_coeff, vdupq_n_s32(1 + log_scale));
+  const uint32x4_t v_mask = vcgeq_s32(v_abs_coeff_scaled, v_dequant_s32);
+  // const int64_t tmp = vmask ? (int64_t)abs_coeff + log_scaled_round : 0
+  const int32x4_t v_tmp = vandq_s32(vaddq_s32(v_abs_coeff, v_round_s32),
+                                    vreinterpretq_s32_u32(v_mask));
+  //  const int abs_qcoeff = (int)((tmp * quant) >> (16 - log_scale));
+  const int32x4_t v_abs_qcoeff =
+      vqdmulhq_s32(vshlq_s32(v_tmp, v_log_scale), v_quant_s32);
+  //  qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign);
+  const int32x4_t v_qcoeff =
+      vsubq_s32(veorq_s32(v_abs_qcoeff, v_coeff_sign), v_coeff_sign);
+  // vshlq_s32 will shift right if shift value is negative.
+  const int32x4_t v_abs_dqcoeff =
+      vshlq_s32(vmulq_s32(v_abs_qcoeff, v_dequant_s32), vnegq_s32(v_log_scale));
+  //  dqcoeff_ptr[rc] = (tran_low_t)((abs_dqcoeff ^ coeff_sign) - coeff_sign);
+  const int32x4_t v_dqcoeff =
+      vsubq_s32(veorq_s32(v_abs_dqcoeff, v_coeff_sign), v_coeff_sign);
+
+  vst1q_s32(qcoeff_ptr, v_qcoeff);
+  vst1q_s32(dqcoeff_ptr, v_dqcoeff);
+
+  // Used to find eob.
+  const uint32x4_t nz_qcoeff_mask = vcgtq_s32(v_abs_qcoeff, vdupq_n_s32(0));
+  return vmovn_u32(nz_qcoeff_mask);
+}
+
+static INLINE int16x8_t get_max_lane_eob(const int16_t *iscan,
+                                         int16x8_t v_eobmax,
+                                         uint16x8_t v_mask) {
+  const int16x8_t v_iscan = vld1q_s16(&iscan[0]);
+  const int16x8_t v_iscan_plus1 = vaddq_s16(v_iscan, vdupq_n_s16(1));
+  const int16x8_t v_nz_iscan = vbslq_s16(v_mask, v_iscan_plus1, vdupq_n_s16(0));
+  return vmaxq_s16(v_eobmax, v_nz_iscan);
+}
+
+static INLINE uint16_t get_max_eob(int16x8_t v_eobmax) {
+#ifdef __aarch64__
+  return (uint16_t)vmaxvq_s16(v_eobmax);
+#else
+  const int16x4_t v_eobmax_3210 =
+      vmax_s16(vget_low_s16(v_eobmax), vget_high_s16(v_eobmax));
+  const int64x1_t v_eobmax_xx32 =
+      vshr_n_s64(vreinterpret_s64_s16(v_eobmax_3210), 32);
+  const int16x4_t v_eobmax_tmp =
+      vmax_s16(v_eobmax_3210, vreinterpret_s16_s64(v_eobmax_xx32));
+  const int64x1_t v_eobmax_xxx3 =
+      vshr_n_s64(vreinterpret_s64_s16(v_eobmax_tmp), 16);
+  const int16x4_t v_eobmax_final =
+      vmax_s16(v_eobmax_tmp, vreinterpret_s16_s64(v_eobmax_xxx3));
+  return (uint16_t)vget_lane_s16(v_eobmax_final, 0);
+#endif
+}
+
+void av1_highbd_quantize_fp_neon(
+    const tran_low_t *coeff_ptr, intptr_t count, const int16_t *zbin_ptr,
+    const int16_t *round_ptr, const int16_t *quant_ptr,
+    const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
+    tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
+    const int16_t *scan, const int16_t *iscan, int log_scale) {
+  (void)scan;
+  (void)zbin_ptr;
+  (void)quant_shift_ptr;
+
+  const int16x4_t v_quant = vld1_s16(quant_ptr);
+  const int16x4_t v_dequant = vld1_s16(dequant_ptr);
+  const int16x4_t v_zero = vdup_n_s16(0);
+  const uint16x4_t v_round_select = vcgt_s16(vdup_n_s16(log_scale), v_zero);
+  const int16x4_t v_round_no_scale = vld1_s16(round_ptr);
+  const int16x4_t v_round_log_scale =
+      vqrdmulh_n_s16(v_round_no_scale, (int16_t)(1 << (15 - log_scale)));
+  const int16x4_t v_round =
+      vbsl_s16(v_round_select, v_round_log_scale, v_round_no_scale);
+  int32x4_t v_round_s32 = vaddl_s16(v_round, v_zero);
+  int32x4_t v_quant_s32 = vshlq_n_s32(vaddl_s16(v_quant, v_zero), 15);
+  int32x4_t v_dequant_s32 = vaddl_s16(v_dequant, v_zero);
+  uint16x4_t v_mask_lo, v_mask_hi;
+  int16x8_t v_eobmax = vdupq_n_s16(-1);
+
+  // DC and first 3 AC
+  v_mask_lo = quantize_4(coeff_ptr, qcoeff_ptr, dqcoeff_ptr, v_quant_s32,
+                         v_dequant_s32, v_round_s32, log_scale);
+
+  // overwrite the DC constants with AC constants
+  v_round_s32 = vdupq_lane_s32(vget_low_s32(v_round_s32), 1);
+  v_quant_s32 = vdupq_lane_s32(vget_low_s32(v_quant_s32), 1);
+  v_dequant_s32 = vdupq_lane_s32(vget_low_s32(v_dequant_s32), 1);
+
+  // 4 more AC
+  v_mask_hi = quantize_4(coeff_ptr + 4, qcoeff_ptr + 4, dqcoeff_ptr + 4,
+                         v_quant_s32, v_dequant_s32, v_round_s32, log_scale);
+
+  // Find the max lane eob for the first 8 coeffs.
+  v_eobmax =
+      get_max_lane_eob(iscan, v_eobmax, vcombine_u16(v_mask_lo, v_mask_hi));
+
+  count -= 8;
+  do {
+    coeff_ptr += 8;
+    qcoeff_ptr += 8;
+    dqcoeff_ptr += 8;
+    iscan += 8;
+    v_mask_lo = quantize_4(coeff_ptr, qcoeff_ptr, dqcoeff_ptr, v_quant_s32,
+                           v_dequant_s32, v_round_s32, log_scale);
+    v_mask_hi = quantize_4(coeff_ptr + 4, qcoeff_ptr + 4, dqcoeff_ptr + 4,
+                           v_quant_s32, v_dequant_s32, v_round_s32, log_scale);
+    // Find the max lane eob for 8 coeffs.
+    v_eobmax =
+        get_max_lane_eob(iscan, v_eobmax, vcombine_u16(v_mask_lo, v_mask_hi));
+    count -= 8;
+  } while (count);
+
+  *eob_ptr = get_max_eob(v_eobmax);
+}
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/arm/neon/av1_temporal_denoiser_neon.c b/chromium/third_party/libaom/source/libaom/av1/encoder/arm/neon/av1_temporal_denoiser_neon.c
index ad81f40c3e9..3528105e501 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/arm/neon/av1_temporal_denoiser_neon.c
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/arm/neon/av1_temporal_denoiser_neon.c
@@ -24,6 +24,9 @@
 
 // Compute the sum of all pixel differences of this MB.
 static INLINE int horizontal_add_s8x16(const int8x16_t v_sum_diff_total) {
+#if defined(__aarch64__)
+  return vaddlvq_s8(v_sum_diff_total);
+#else
   const int16x8_t fe_dc_ba_98_76_54_32_10 = vpaddlq_s8(v_sum_diff_total);
   const int32x4_t fedc_ba98_7654_3210 = vpaddlq_s16(fe_dc_ba_98_76_54_32_10);
   const int64x2_t fedcba98_76543210 = vpaddlq_s32(fedc_ba98_7654_3210);
@@ -31,6 +34,7 @@ static INLINE int horizontal_add_s8x16(const int8x16_t v_sum_diff_total) {
                                 vget_low_s64(fedcba98_76543210));
   const int sum_diff = vget_lane_s32(vreinterpret_s32_s64(x), 0);
   return sum_diff;
+#endif
 }
 
 // Denoise a 16x1 vector.
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/arm/neon/quantize_neon.c b/chromium/third_party/libaom/source/libaom/av1/encoder/arm/neon/quantize_neon.c
index f16d8f15814..dbfbeef183a 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/arm/neon/quantize_neon.c
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/arm/neon/quantize_neon.c
@@ -11,9 +11,11 @@
 
 #include <arm_neon.h>
 
+#include <assert.h>
 #include <math.h>
 
 #include "aom_dsp/arm/mem_neon.h"
+#include "aom_dsp/arm/sum_neon.h"
 #include "aom_mem/aom_mem.h"
 
 #include "av1/common/quant_common.h"
@@ -23,6 +25,52 @@
 #include "av1/encoder/encoder.h"
 #include "av1/encoder/rd.h"
 
+static INLINE uint16_t get_max_eob(int16x8_t v_eobmax) {
+#ifdef __aarch64__
+  return (uint16_t)vmaxvq_s16(v_eobmax);
+#else
+  const int16x4_t v_eobmax_3210 =
+      vmax_s16(vget_low_s16(v_eobmax), vget_high_s16(v_eobmax));
+  const int64x1_t v_eobmax_xx32 =
+      vshr_n_s64(vreinterpret_s64_s16(v_eobmax_3210), 32);
+  const int16x4_t v_eobmax_tmp =
+      vmax_s16(v_eobmax_3210, vreinterpret_s16_s64(v_eobmax_xx32));
+  const int64x1_t v_eobmax_xxx3 =
+      vshr_n_s64(vreinterpret_s64_s16(v_eobmax_tmp), 16);
+  const int16x4_t v_eobmax_final =
+      vmax_s16(v_eobmax_tmp, vreinterpret_s16_s64(v_eobmax_xxx3));
+  return (uint16_t)vget_lane_s16(v_eobmax_final, 0);
+#endif
+}
+
+static INLINE int16x8_t get_max_lane_eob(const int16_t *iscan,
+                                         int16x8_t v_eobmax,
+                                         uint16x8_t v_mask) {
+  const int16x8_t v_iscan = vld1q_s16(&iscan[0]);
+  const int16x8_t v_iscan_plus1 = vaddq_s16(v_iscan, vdupq_n_s16(1));
+  const int16x8_t v_nz_iscan = vbslq_s16(v_mask, v_iscan_plus1, vdupq_n_s16(0));
+  return vmaxq_s16(v_eobmax, v_nz_iscan);
+}
+
+static INLINE uint16x8_t quantize_fp_8(const tran_low_t *coeff_ptr,
+                                       tran_low_t *qcoeff_ptr,
+                                       tran_low_t *dqcoeff_ptr,
+                                       int16x8_t v_quant, int16x8_t v_dequant,
+                                       int16x8_t v_round, int16x8_t v_zero) {
+  const int16x8_t v_coeff = load_tran_low_to_s16q(&coeff_ptr[0]);
+  const int16x8_t v_coeff_sign = vshrq_n_s16(v_coeff, 15);
+  const int16x8_t v_abs = vabsq_s16(v_coeff);
+  const int16x8_t v_tmp = vqaddq_s16(v_abs, v_round);
+  const int16x8_t v_tmp2 = vshrq_n_s16(vqdmulhq_s16(v_tmp, v_quant), 1);
+  const uint16x8_t v_nz_mask = vcgtq_s16(v_tmp2, v_zero);
+  const int16x8_t v_qcoeff_a = veorq_s16(v_tmp2, v_coeff_sign);
+  const int16x8_t v_qcoeff = vsubq_s16(v_qcoeff_a, v_coeff_sign);
+  const int16x8_t v_dqcoeff = vmulq_s16(v_qcoeff, v_dequant);
+  store_s16q_to_tran_low(&qcoeff_ptr[0], v_qcoeff);
+  store_s16q_to_tran_low(&dqcoeff_ptr[0], v_dqcoeff);
+  return v_nz_mask;
+}
+
 void av1_quantize_fp_neon(const tran_low_t *coeff_ptr, intptr_t count,
                           const int16_t *zbin_ptr, const int16_t *round_ptr,
                           const int16_t *quant_ptr,
@@ -38,95 +86,53 @@ void av1_quantize_fp_neon(const tran_low_t *coeff_ptr, intptr_t count,
 
   // Quantization pass: All coefficients with index >= zero_flag are
   // skippable. Note: zero_flag can be zero.
-  int i;
   const int16x8_t v_zero = vdupq_n_s16(0);
-  const int16x8_t v_one = vdupq_n_s16(1);
+  int16x8_t v_quant = vld1q_s16(quant_ptr);
+  int16x8_t v_dequant = vld1q_s16(dequant_ptr);
+  int16x8_t v_round = vld1q_s16(round_ptr);
   int16x8_t v_eobmax_76543210 = vdupq_n_s16(-1);
-  int16x8_t v_round = vmovq_n_s16(round_ptr[1]);
-  int16x8_t v_quant = vmovq_n_s16(quant_ptr[1]);
-  int16x8_t v_dequant = vmovq_n_s16(dequant_ptr[1]);
-  // adjust for dc
-  v_round = vsetq_lane_s16(round_ptr[0], v_round, 0);
-  v_quant = vsetq_lane_s16(quant_ptr[0], v_quant, 0);
-  v_dequant = vsetq_lane_s16(dequant_ptr[0], v_dequant, 0);
+  uint16x8_t v_nz_mask;
   // process dc and the first seven ac coeffs
-  {
-    const int16x8_t v_iscan = vld1q_s16(&iscan[0]);
-    const int16x8_t v_coeff = load_tran_low_to_s16q(&coeff_ptr[0]);
-    const int16x8_t v_coeff_sign = vshrq_n_s16(v_coeff, 15);
-    const int16x8_t v_abs = vabsq_s16(v_coeff);
-    const int16x8_t v_tmp = vqaddq_s16(v_abs, v_round);
-    const int32x4_t v_tmp_lo =
-        vmull_s16(vget_low_s16(v_tmp), vget_low_s16(v_quant));
-    const int32x4_t v_tmp_hi =
-        vmull_s16(vget_high_s16(v_tmp), vget_high_s16(v_quant));
-    const int16x8_t v_tmp2 =
-        vcombine_s16(vshrn_n_s32(v_tmp_lo, 16), vshrn_n_s32(v_tmp_hi, 16));
-    const uint16x8_t v_nz_mask = vceqq_s16(v_tmp2, v_zero);
-    const int16x8_t v_iscan_plus1 = vaddq_s16(v_iscan, v_one);
-    const int16x8_t v_nz_iscan = vbslq_s16(v_nz_mask, v_zero, v_iscan_plus1);
-    const int16x8_t v_qcoeff_a = veorq_s16(v_tmp2, v_coeff_sign);
-    const int16x8_t v_qcoeff = vsubq_s16(v_qcoeff_a, v_coeff_sign);
-    const int16x8_t v_dqcoeff = vmulq_s16(v_qcoeff, v_dequant);
-    v_eobmax_76543210 = vmaxq_s16(v_eobmax_76543210, v_nz_iscan);
-    store_s16q_to_tran_low(&qcoeff_ptr[0], v_qcoeff);
-    store_s16q_to_tran_low(&dqcoeff_ptr[0], v_dqcoeff);
-    v_round = vmovq_n_s16(round_ptr[1]);
-    v_quant = vmovq_n_s16(quant_ptr[1]);
-    v_dequant = vmovq_n_s16(dequant_ptr[1]);
-  }
+  v_nz_mask = quantize_fp_8(coeff_ptr, qcoeff_ptr, dqcoeff_ptr, v_quant,
+                            v_dequant, v_round, v_zero);
+  v_eobmax_76543210 = get_max_lane_eob(&iscan[0], v_eobmax_76543210, v_nz_mask);
+  // overwrite the dc constants with ac constants
+  v_quant = vdupq_lane_s16(vget_low_s16(v_quant), 1);
+  v_dequant = vdupq_lane_s16(vget_low_s16(v_dequant), 1);
+  v_round = vdupq_lane_s16(vget_low_s16(v_round), 1);
+
+  count -= 8;
   // now process the rest of the ac coeffs
-  for (i = 8; i < count; i += 8) {
-    const int16x8_t v_iscan = vld1q_s16(&iscan[i]);
-    const int16x8_t v_coeff = load_tran_low_to_s16q(&coeff_ptr[i]);
-    const int16x8_t v_coeff_sign = vshrq_n_s16(v_coeff, 15);
-    const int16x8_t v_abs = vabsq_s16(v_coeff);
-    const int16x8_t v_tmp = vqaddq_s16(v_abs, v_round);
-    const int32x4_t v_tmp_lo =
-        vmull_s16(vget_low_s16(v_tmp), vget_low_s16(v_quant));
-    const int32x4_t v_tmp_hi =
-        vmull_s16(vget_high_s16(v_tmp), vget_high_s16(v_quant));
-    const int16x8_t v_tmp2 =
-        vcombine_s16(vshrn_n_s32(v_tmp_lo, 16), vshrn_n_s32(v_tmp_hi, 16));
-    const uint16x8_t v_nz_mask = vceqq_s16(v_tmp2, v_zero);
-    const int16x8_t v_iscan_plus1 = vaddq_s16(v_iscan, v_one);
-    const int16x8_t v_nz_iscan = vbslq_s16(v_nz_mask, v_zero, v_iscan_plus1);
-    const int16x8_t v_qcoeff_a = veorq_s16(v_tmp2, v_coeff_sign);
-    const int16x8_t v_qcoeff = vsubq_s16(v_qcoeff_a, v_coeff_sign);
-    const int16x8_t v_dqcoeff = vmulq_s16(v_qcoeff, v_dequant);
-    v_eobmax_76543210 = vmaxq_s16(v_eobmax_76543210, v_nz_iscan);
-    store_s16q_to_tran_low(&qcoeff_ptr[i], v_qcoeff);
-    store_s16q_to_tran_low(&dqcoeff_ptr[i], v_dqcoeff);
-  }
-#ifdef __aarch64__
-  *eob_ptr = vmaxvq_s16(v_eobmax_76543210);
-#else
-  {
-    const int16x4_t v_eobmax_3210 = vmax_s16(vget_low_s16(v_eobmax_76543210),
-                                             vget_high_s16(v_eobmax_76543210));
-    const int64x1_t v_eobmax_xx32 =
-        vshr_n_s64(vreinterpret_s64_s16(v_eobmax_3210), 32);
-    const int16x4_t v_eobmax_tmp =
-        vmax_s16(v_eobmax_3210, vreinterpret_s16_s64(v_eobmax_xx32));
-    const int64x1_t v_eobmax_xxx3 =
-        vshr_n_s64(vreinterpret_s64_s16(v_eobmax_tmp), 16);
-    const int16x4_t v_eobmax_final =
-        vmax_s16(v_eobmax_tmp, vreinterpret_s16_s64(v_eobmax_xxx3));
-
-    *eob_ptr = (uint16_t)vget_lane_s16(v_eobmax_final, 0);
-  }
-#endif  // __aarch64__
+  do {
+    coeff_ptr += 8;
+    qcoeff_ptr += 8;
+    dqcoeff_ptr += 8;
+    iscan += 8;
+    v_nz_mask = quantize_fp_8(coeff_ptr, qcoeff_ptr, dqcoeff_ptr, v_quant,
+                              v_dequant, v_round, v_zero);
+    v_eobmax_76543210 = get_max_lane_eob(iscan, v_eobmax_76543210, v_nz_mask);
+    count -= 8;
+  } while (count > 0);
+  *eob_ptr = get_max_eob(v_eobmax_76543210);
 }
 
-static INLINE void calculate_dqcoeff_lp_and_store(const int16x8_t qcoeff,
-                                                  const int16x8_t dequant,
-                                                  int16_t *dqcoeff) {
-  const int32x4_t dqcoeff_0 =
-      vmull_s16(vget_low_s16(qcoeff), vget_low_s16(dequant));
-  const int32x4_t dqcoeff_1 =
-      vmull_s16(vget_high_s16(qcoeff), vget_high_s16(dequant));
-
-  vst1q_s16(dqcoeff, vcombine_s16(vmovn_s32(dqcoeff_0), vmovn_s32(dqcoeff_1)));
+static INLINE uint16x8_t quantize_lp_8(const int16_t *coeff_ptr,
+                                       int16_t *qcoeff_ptr,
+                                       int16_t *dqcoeff_ptr, int16x8_t v_quant,
+                                       int16x8_t v_dequant, int16x8_t v_round,
+                                       int16x8_t v_zero) {
+  const int16x8_t v_coeff = vld1q_s16(&coeff_ptr[0]);
+  const int16x8_t v_coeff_sign = vshrq_n_s16(v_coeff, 15);
+  const int16x8_t v_abs = vabsq_s16(v_coeff);
+  const int16x8_t v_tmp = vqaddq_s16(v_abs, v_round);
+  const int16x8_t v_tmp2 = vshrq_n_s16(vqdmulhq_s16(v_tmp, v_quant), 1);
+  const uint16x8_t v_nz_mask = vcgtq_s16(v_tmp2, v_zero);
+  const int16x8_t v_qcoeff_a = veorq_s16(v_tmp2, v_coeff_sign);
+  const int16x8_t v_qcoeff = vsubq_s16(v_qcoeff_a, v_coeff_sign);
+  const int16x8_t v_dqcoeff = vmulq_s16(v_qcoeff, v_dequant);
+  vst1q_s16(qcoeff_ptr, v_qcoeff);
+  vst1q_s16(dqcoeff_ptr, v_dqcoeff);
+  return v_nz_mask;
 }
 
 void av1_quantize_lp_neon(const int16_t *coeff_ptr, intptr_t n_coeffs,
@@ -138,81 +144,177 @@ void av1_quantize_lp_neon(const int16_t *coeff_ptr, intptr_t n_coeffs,
   // Quantization pass: All coefficients with index >= zero_flag are
   // skippable. Note: zero_flag can be zero.
   const int16x8_t v_zero = vdupq_n_s16(0);
-  const int16x8_t v_one = vdupq_n_s16(1);
+  int16x8_t v_quant = vld1q_s16(quant_ptr);
+  int16x8_t v_dequant = vld1q_s16(dequant_ptr);
+  int16x8_t v_round = vld1q_s16(round_ptr);
   int16x8_t v_eobmax_76543210 = vdupq_n_s16(-1);
-  int16x8_t v_round = vmovq_n_s16(round_ptr[1]);
-  int16x8_t v_quant = vmovq_n_s16(quant_ptr[1]);
-  int16x8_t v_dequant = vmovq_n_s16(dequant_ptr[1]);
-
-  // adjust for dc
-  v_round = vsetq_lane_s16(round_ptr[0], v_round, 0);
-  v_quant = vsetq_lane_s16(quant_ptr[0], v_quant, 0);
-  v_dequant = vsetq_lane_s16(dequant_ptr[0], v_dequant, 0);
+  uint16x8_t v_nz_mask;
+  intptr_t count = n_coeffs;
+
   // process dc and the first seven ac coeffs
-  {
-    const int16x8_t v_iscan = vld1q_s16(&iscan[0]);
-    const int16x8_t v_coeff = vld1q_s16(coeff_ptr);
-    const int16x8_t v_coeff_sign = vshrq_n_s16(v_coeff, 15);
-    const int16x8_t v_abs = vabsq_s16(v_coeff);
-    const int16x8_t v_tmp = vqaddq_s16(v_abs, v_round);
-    const int32x4_t v_tmp_lo =
-        vmull_s16(vget_low_s16(v_tmp), vget_low_s16(v_quant));
-    const int32x4_t v_tmp_hi =
-        vmull_s16(vget_high_s16(v_tmp), vget_high_s16(v_quant));
-    const int16x8_t v_tmp2 =
-        vcombine_s16(vshrn_n_s32(v_tmp_lo, 16), vshrn_n_s32(v_tmp_hi, 16));
-    const uint16x8_t v_nz_mask = vceqq_s16(v_tmp2, v_zero);
-    const int16x8_t v_iscan_plus1 = vaddq_s16(v_iscan, v_one);
-    const int16x8_t v_nz_iscan = vbslq_s16(v_nz_mask, v_zero, v_iscan_plus1);
-    const int16x8_t v_qcoeff_a = veorq_s16(v_tmp2, v_coeff_sign);
-    const int16x8_t v_qcoeff = vsubq_s16(v_qcoeff_a, v_coeff_sign);
-    calculate_dqcoeff_lp_and_store(v_qcoeff, v_dequant, dqcoeff_ptr);
-    v_eobmax_76543210 = vmaxq_s16(v_eobmax_76543210, v_nz_iscan);
-    vst1q_s16(qcoeff_ptr, v_qcoeff);
-    v_round = vmovq_n_s16(round_ptr[1]);
-    v_quant = vmovq_n_s16(quant_ptr[1]);
-    v_dequant = vmovq_n_s16(dequant_ptr[1]);
-  }
+  v_nz_mask = quantize_lp_8(coeff_ptr, qcoeff_ptr, dqcoeff_ptr, v_quant,
+                            v_dequant, v_round, v_zero);
+  v_eobmax_76543210 = get_max_lane_eob(iscan, v_eobmax_76543210, v_nz_mask);
+  // overwrite the dc constants with ac constants
+  v_quant = vdupq_lane_s16(vget_low_s16(v_quant), 1);
+  v_dequant = vdupq_lane_s16(vget_low_s16(v_dequant), 1);
+  v_round = vdupq_lane_s16(vget_low_s16(v_round), 1);
+
+  count -= 8;
   // now process the rest of the ac coeffs
-  for (int i = 8; i < n_coeffs; i += 8) {
-    const int16x8_t v_iscan = vld1q_s16(&iscan[i]);
-    const int16x8_t v_coeff = vld1q_s16(coeff_ptr + i);
-    const int16x8_t v_coeff_sign = vshrq_n_s16(v_coeff, 15);
-    const int16x8_t v_abs = vabsq_s16(v_coeff);
-    const int16x8_t v_tmp = vqaddq_s16(v_abs, v_round);
-    const int32x4_t v_tmp_lo =
-        vmull_s16(vget_low_s16(v_tmp), vget_low_s16(v_quant));
-    const int32x4_t v_tmp_hi =
-        vmull_s16(vget_high_s16(v_tmp), vget_high_s16(v_quant));
-    const int16x8_t v_tmp2 =
-        vcombine_s16(vshrn_n_s32(v_tmp_lo, 16), vshrn_n_s32(v_tmp_hi, 16));
-    const uint16x8_t v_nz_mask = vceqq_s16(v_tmp2, v_zero);
-    const int16x8_t v_iscan_plus1 = vaddq_s16(v_iscan, v_one);
-    const int16x8_t v_nz_iscan = vbslq_s16(v_nz_mask, v_zero, v_iscan_plus1);
-    const int16x8_t v_qcoeff_a = veorq_s16(v_tmp2, v_coeff_sign);
-    const int16x8_t v_qcoeff = vsubq_s16(v_qcoeff_a, v_coeff_sign);
-    calculate_dqcoeff_lp_and_store(v_qcoeff, v_dequant, dqcoeff_ptr + i);
-    v_eobmax_76543210 = vmaxq_s16(v_eobmax_76543210, v_nz_iscan);
-    vst1q_s16(qcoeff_ptr + i, v_qcoeff);
+  do {
+    coeff_ptr += 8;
+    qcoeff_ptr += 8;
+    dqcoeff_ptr += 8;
+    iscan += 8;
+    v_nz_mask = quantize_lp_8(coeff_ptr, qcoeff_ptr, dqcoeff_ptr, v_quant,
+                              v_dequant, v_round, v_zero);
+    v_eobmax_76543210 = get_max_lane_eob(iscan, v_eobmax_76543210, v_nz_mask);
+    count -= 8;
+  } while (count != 0);
+  *eob_ptr = get_max_eob(v_eobmax_76543210);
+}
+
+static AOM_FORCE_INLINE uint16x8_t quantize_fp_logscale_8(
+    const tran_low_t *coeff_ptr, tran_low_t *qcoeff_ptr,
+    tran_low_t *dqcoeff_ptr, int16x8_t v_quant, int16x8_t v_dequant,
+    int16x8_t v_round, int16x8_t v_zero, int log_scale) {
+  const int16x8_t v_log_scale_minus_1 = vdupq_n_s16(log_scale - 1);
+  const int16x8_t v_neg_log_scale_plus_1 = vdupq_n_s16(-(1 + log_scale));
+  const int16x8_t v_coeff = load_tran_low_to_s16q(coeff_ptr);
+  const int16x8_t v_coeff_sign = vshrq_n_s16(v_coeff, 15);
+  const int16x8_t v_abs_coeff = vabsq_s16(v_coeff);
+  const uint16x8_t v_mask =
+      vcgeq_s16(v_abs_coeff, vshlq_s16(v_dequant, v_neg_log_scale_plus_1));
+  // const int64_t tmp = vmask ? (int64_t)abs_coeff + log_scaled_round : 0
+  const int16x8_t v_tmp = vandq_s16(vqaddq_s16(v_abs_coeff, v_round),
+                                    vreinterpretq_s16_u16(v_mask));
+  const int16x8_t v_tmp2 =
+      vqdmulhq_s16(vshlq_s16(v_tmp, v_log_scale_minus_1), v_quant);
+  const uint16x8_t v_nz_mask = vcgtq_s16(v_tmp2, v_zero);
+  const int16x8_t v_qcoeff =
+      vsubq_s16(veorq_s16(v_tmp2, v_coeff_sign), v_coeff_sign);
+  // Multiplying by dequant here will use all 16 bits. Cast to unsigned before
+  // shifting right. (vshlq_s16 will shift right if shift value is negative)
+  const uint16x8_t v_abs_dqcoeff =
+      vshlq_u16(vreinterpretq_u16_s16(vmulq_s16(v_tmp2, v_dequant)),
+                vdupq_n_s16(-log_scale));
+  const int16x8_t v_dqcoeff =
+      vsubq_s16(veorq_s16(vreinterpretq_s16_u16(v_abs_dqcoeff), v_coeff_sign),
+                v_coeff_sign);
+  store_s16q_to_tran_low(qcoeff_ptr, v_qcoeff);
+  store_s16q_to_tran_low(dqcoeff_ptr, v_dqcoeff);
+  return v_nz_mask;
+}
+
+static AOM_FORCE_INLINE uint16x8_t quantize_fp_logscale2_8(
+    const tran_low_t *coeff_ptr, tran_low_t *qcoeff_ptr,
+    tran_low_t *dqcoeff_ptr, int16x8_t v_quant, int16x8_t v_dequant,
+    int16x8_t v_round, int16x8_t v_zero) {
+  const int16x8_t v_coeff = load_tran_low_to_s16q(coeff_ptr);
+  const int16x8_t v_coeff_sign = vshrq_n_s16(v_coeff, 15);
+  const int16x8_t v_abs_coeff = vabsq_s16(v_coeff);
+  const uint16x8_t v_mask =
+      vcgeq_u16(vshlq_n_u16(vreinterpretq_u16_s16(v_abs_coeff), 1),
+                vshrq_n_u16(vreinterpretq_u16_s16(v_dequant), 2));
+  // abs_coeff = vmask ? (int64_t)abs_coeff + log_scaled_round : 0
+  const int16x8_t v_tmp = vandq_s16(vqaddq_s16(v_abs_coeff, v_round),
+                                    vreinterpretq_s16_u16(v_mask));
+  // tmp32 = (int)((abs_coeff * quant_ptr[rc != 0]) >> (16 - log_scale));
+  const int16x8_t v_tmp2 =
+      vorrq_s16(vshlq_n_s16(vqdmulhq_s16(v_tmp, v_quant), 1),
+                vreinterpretq_s16_u16(vshrq_n_u16(
+                    vreinterpretq_u16_s16(vmulq_s16(v_tmp, v_quant)), 14)));
+  const uint16x8_t v_nz_mask = vcgtq_s16(v_tmp2, v_zero);
+  const int16x8_t v_qcoeff =
+      vsubq_s16(veorq_s16(v_tmp2, v_coeff_sign), v_coeff_sign);
+  // const tran_low_t abs_dqcoeff = (tmp32 * dequant_ptr[rc != 0]) >> log_scale;
+  const int16x8_t v_abs_dqcoeff =
+      vorrq_s16(vshlq_n_s16(vqdmulhq_s16(v_tmp2, v_dequant), 13),
+                vreinterpretq_s16_u16(vshrq_n_u16(
+                    vreinterpretq_u16_s16(vmulq_s16(v_tmp2, v_dequant)), 2)));
+  const int16x8_t v_dqcoeff =
+      vsubq_s16(veorq_s16(v_abs_dqcoeff, v_coeff_sign), v_coeff_sign);
+  store_s16q_to_tran_low(qcoeff_ptr, v_qcoeff);
+  store_s16q_to_tran_low(dqcoeff_ptr, v_dqcoeff);
+  return v_nz_mask;
+}
+
+static AOM_FORCE_INLINE void quantize_fp_no_qmatrix_neon(
+    const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *round_ptr,
+    const int16_t *quant_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
+    const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *iscan,
+    int log_scale) {
+  const int16x8_t v_zero = vdupq_n_s16(0);
+  int16x8_t v_quant = vld1q_s16(quant_ptr);
+  int16x8_t v_dequant = vld1q_s16(dequant_ptr);
+  const int16x8_t v_round_no_scale = vld1q_s16(round_ptr);
+  int16x8_t v_round =
+      vqrdmulhq_n_s16(v_round_no_scale, (int16_t)(1 << (15 - log_scale)));
+  int16x8_t v_eobmax_76543210 = vdupq_n_s16(-1);
+  intptr_t non_zero_count = n_coeffs;
+
+  assert(n_coeffs > 16);
+  // Pre-scan pass
+  const int16x8_t v_dequant_scaled =
+      vshlq_s16(v_dequant, vdupq_n_s16(-(1 + log_scale)));
+  const int16x8_t v_zbin_s16 =
+      vdupq_lane_s16(vget_low_s16(v_dequant_scaled), 1);
+  intptr_t i = n_coeffs;
+  do {
+    const int16x8_t v_coeff_a = load_tran_low_to_s16q(coeff_ptr + i - 8);
+    const int16x8_t v_coeff_b = load_tran_low_to_s16q(coeff_ptr + i - 16);
+    const int16x8_t v_abs_coeff_a = vabsq_s16(v_coeff_a);
+    const int16x8_t v_abs_coeff_b = vabsq_s16(v_coeff_b);
+    const uint16x8_t v_mask_a = vcgeq_s16(v_abs_coeff_a, v_zbin_s16);
+    const uint16x8_t v_mask_b = vcgeq_s16(v_abs_coeff_b, v_zbin_s16);
+    // If the coefficient is in the base ZBIN range, then discard.
+    if (horizontal_long_add_u16x8(v_mask_a, v_mask_b) == 0) {
+      non_zero_count -= 16;
+    } else {
+      break;
+    }
+    i -= 16;
+  } while (i > 0);
+
+  const intptr_t remaining_zcoeffs = n_coeffs - non_zero_count;
+  memset(qcoeff_ptr + non_zero_count, 0,
+         remaining_zcoeffs * sizeof(*qcoeff_ptr));
+  memset(dqcoeff_ptr + non_zero_count, 0,
+         remaining_zcoeffs * sizeof(*dqcoeff_ptr));
+
+  // process dc and the first seven ac coeffs
+  uint16x8_t v_nz_mask;
+  if (log_scale == 2) {
+    v_nz_mask = quantize_fp_logscale2_8(coeff_ptr, qcoeff_ptr, dqcoeff_ptr,
+                                        v_quant, v_dequant, v_round, v_zero);
+  } else {
+    v_nz_mask =
+        quantize_fp_logscale_8(coeff_ptr, qcoeff_ptr, dqcoeff_ptr, v_quant,
+                               v_dequant, v_round, v_zero, log_scale);
   }
-#ifdef __aarch64__
-  *eob_ptr = vmaxvq_s16(v_eobmax_76543210);
-#else
-  {
-    const int16x4_t v_eobmax_3210 = vmax_s16(vget_low_s16(v_eobmax_76543210),
-                                             vget_high_s16(v_eobmax_76543210));
-    const int64x1_t v_eobmax_xx32 =
-        vshr_n_s64(vreinterpret_s64_s16(v_eobmax_3210), 32);
-    const int16x4_t v_eobmax_tmp =
-        vmax_s16(v_eobmax_3210, vreinterpret_s16_s64(v_eobmax_xx32));
-    const int64x1_t v_eobmax_xxx3 =
-        vshr_n_s64(vreinterpret_s64_s16(v_eobmax_tmp), 16);
-    const int16x4_t v_eobmax_final =
-        vmax_s16(v_eobmax_tmp, vreinterpret_s16_s64(v_eobmax_xxx3));
-
-    *eob_ptr = (uint16_t)vget_lane_s16(v_eobmax_final, 0);
+  v_eobmax_76543210 = get_max_lane_eob(iscan, v_eobmax_76543210, v_nz_mask);
+  // overwrite the dc constants with ac constants
+  v_quant = vdupq_lane_s16(vget_low_s16(v_quant), 1);
+  v_dequant = vdupq_lane_s16(vget_low_s16(v_dequant), 1);
+  v_round = vdupq_lane_s16(vget_low_s16(v_round), 1);
+
+  for (intptr_t count = non_zero_count - 8; count > 0; count -= 8) {
+    coeff_ptr += 8;
+    qcoeff_ptr += 8;
+    dqcoeff_ptr += 8;
+    iscan += 8;
+    if (log_scale == 2) {
+      v_nz_mask = quantize_fp_logscale2_8(coeff_ptr, qcoeff_ptr, dqcoeff_ptr,
+                                          v_quant, v_dequant, v_round, v_zero);
+    } else {
+      v_nz_mask =
+          quantize_fp_logscale_8(coeff_ptr, qcoeff_ptr, dqcoeff_ptr, v_quant,
+                                 v_dequant, v_round, v_zero, log_scale);
+    }
+    v_eobmax_76543210 = get_max_lane_eob(iscan, v_eobmax_76543210, v_nz_mask);
   }
-#endif  // __aarch64__
+  *eob_ptr = get_max_eob(v_eobmax_76543210);
 }
 
 void av1_quantize_fp_32x32_neon(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
@@ -223,110 +325,12 @@ void av1_quantize_fp_32x32_neon(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
                                 tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
                                 const int16_t *dequant_ptr, uint16_t *eob_ptr,
                                 const int16_t *scan, const int16_t *iscan) {
-  const int log_scale = 1;
-  const int rounding[2] = { ROUND_POWER_OF_TWO(round_ptr[0], log_scale),
-                            ROUND_POWER_OF_TWO(round_ptr[1], log_scale) };
-
   (void)zbin_ptr;
   (void)quant_shift_ptr;
   (void)scan;
-
-  memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
-  memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
-
-  const int16x8_t zero = vdupq_n_s16(0);
-  int16x8_t v_eobmax_76543210 = vreinterpretq_s16_u16(vceqq_s16(zero, zero));
-  int16x8_t round = vdupq_n_s16(rounding[1]);
-  int16x8_t quant = vdupq_n_s16(quant_ptr[1]);
-  int16x8_t dequant = vdupq_n_s16(dequant_ptr[1]);
-  dequant = vsetq_lane_s16(dequant_ptr[0], dequant, 0);
-
-  int16x8_t coeff = load_tran_low_to_s16q(&coeff_ptr[0]);
-
-  int16x8_t abs = vabsq_s16(coeff);
-  uint16x8_t check = vcgeq_s16(abs, vshrq_n_s16(dequant, 2));
-  uint64_t nz_check = vget_lane_u64(vreinterpret_u64_u8(vmovn_u16(check)), 0);
-  if (nz_check) {
-    const int16x8_t coeff_sign = vshrq_n_s16(coeff, 15);
-    const int16x8_t v_iscan = vld1q_s16(&iscan[0]);
-    round = vsetq_lane_s16(rounding[0], round, 0);
-    quant = vsetq_lane_s16(quant_ptr[0], quant, 0);
-
-    abs = vqaddq_s16(abs, round);
-    int16x8_t temp = vqdmulhq_s16(abs, quant);
-    int16x8_t qcoeff_temp = vsubq_s16(veorq_s16(temp, coeff_sign), coeff_sign);
-    abs = vreinterpretq_s16_u16(
-        vshrq_n_u16(vreinterpretq_u16_s16(vmulq_s16(temp, dequant)), 1));
-    int16x8_t dqcoeff_temp = vsubq_s16(veorq_s16(abs, coeff_sign), coeff_sign);
-
-    int16x8_t coeff_nz_mask =
-        vbslq_s16(check, qcoeff_temp, load_tran_low_to_s16q(&qcoeff_ptr[0]));
-    store_s16q_to_tran_low(&qcoeff_ptr[0], coeff_nz_mask);
-    coeff_nz_mask =
-        vbslq_s16(check, dqcoeff_temp, load_tran_low_to_s16q(&dqcoeff_ptr[0]));
-    store_s16q_to_tran_low(&dqcoeff_ptr[0], coeff_nz_mask);
-
-    round = vsetq_lane_s16(rounding[1], round, 0);
-    quant = vsetq_lane_s16(quant_ptr[1], quant, 0);
-
-    uint16x8_t vtmp_mask = vcgtq_s16(abs, zero);
-    const uint16x8_t v_nz_mask = vandq_u16(vtmp_mask, check);
-    check = vandq_u16(v_nz_mask, vcgtq_s16(v_iscan, v_eobmax_76543210));
-    v_eobmax_76543210 = vbslq_s16(check, v_iscan, v_eobmax_76543210);
-  }
-
-  dequant = vsetq_lane_s16(dequant_ptr[1], dequant, 0);
-
-  for (int i = 8; i < n_coeffs; i += 8) {
-    coeff = load_tran_low_to_s16q(&coeff_ptr[i]);
-    abs = vabsq_s16(coeff);
-    check = vcgeq_s16(abs, vshrq_n_s16(dequant, 2));
-
-    nz_check = vget_lane_u64(vreinterpret_u64_u8(vmovn_u16(check)), 0);
-    if (nz_check) {
-      const int16x8_t coeff_sign = vshrq_n_s16(coeff, 15);
-      const int16x8_t v_iscan = vld1q_s16(&iscan[i]);
-
-      abs = vqaddq_s16(abs, round);
-      int16x8_t temp = vqdmulhq_s16(abs, quant);
-      int16x8_t qcoeff_temp =
-          vsubq_s16(veorq_s16(temp, coeff_sign), coeff_sign);
-      abs = vreinterpretq_s16_u16(
-          vshrq_n_u16(vreinterpretq_u16_s16(vmulq_s16(temp, dequant)), 1));
-      int16x8_t dqcoeff_temp =
-          vsubq_s16(veorq_s16(abs, coeff_sign), coeff_sign);
-
-      int16x8_t coeff_nz_mask =
-          vbslq_s16(check, qcoeff_temp, load_tran_low_to_s16q(&qcoeff_ptr[i]));
-      store_s16q_to_tran_low(&qcoeff_ptr[i], coeff_nz_mask);
-      coeff_nz_mask = vbslq_s16(check, dqcoeff_temp,
-                                load_tran_low_to_s16q(&dqcoeff_ptr[i]));
-      store_s16q_to_tran_low(&dqcoeff_ptr[i], coeff_nz_mask);
-
-      uint16x8_t vtmp_mask = vcgtq_s16(abs, zero);
-      const uint16x8_t v_nz_mask = vandq_u16(vtmp_mask, check);
-      check = vandq_u16(v_nz_mask, vcgtq_s16(v_iscan, v_eobmax_76543210));
-      v_eobmax_76543210 = vbslq_s16(check, v_iscan, v_eobmax_76543210);
-    }
-  }
-#ifdef __aarch64__
-  *eob_ptr = vmaxvq_s16(v_eobmax_76543210) + 1;
-#else
-  {
-    const int16x4_t v_eobmax_3210 = vmax_s16(vget_low_s16(v_eobmax_76543210),
-                                             vget_high_s16(v_eobmax_76543210));
-    const int64x1_t v_eobmax_xx32 =
-        vshr_n_s64(vreinterpret_s64_s16(v_eobmax_3210), 32);
-    const int16x4_t v_eobmax_tmp =
-        vmax_s16(v_eobmax_3210, vreinterpret_s16_s64(v_eobmax_xx32));
-    const int64x1_t v_eobmax_xxx3 =
-        vshr_n_s64(vreinterpret_s64_s16(v_eobmax_tmp), 16);
-    const int16x4_t v_eobmax_final =
-        vmax_s16(v_eobmax_tmp, vreinterpret_s16_s64(v_eobmax_xxx3));
-
-    *eob_ptr = (uint16_t)vget_lane_s16(v_eobmax_final, 0) + 1;
-  }
-#endif  // __aarch64__
+  quantize_fp_no_qmatrix_neon(coeff_ptr, n_coeffs, round_ptr, quant_ptr,
+                              qcoeff_ptr, dqcoeff_ptr, dequant_ptr, eob_ptr,
+                              iscan, 1);
 }
 
 void av1_quantize_fp_64x64_neon(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
@@ -337,123 +341,12 @@ void av1_quantize_fp_64x64_neon(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
                                 tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
                                 const int16_t *dequant_ptr, uint16_t *eob_ptr,
                                 const int16_t *scan, const int16_t *iscan) {
-  const int log_scale = 2;
-  const int16x8_t v_log_scale =
-      vreinterpretq_s16_s64(vdupq_n_s64(0xFFFEFFFEFFFEFFFE));
-
-  const int rounding[2] = { ROUND_POWER_OF_TWO(round_ptr[0], log_scale),
-                            ROUND_POWER_OF_TWO(round_ptr[1], log_scale) };
-
   (void)zbin_ptr;
   (void)quant_shift_ptr;
   (void)scan;
-
-  memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
-  memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
-
-  const int16x8_t zero = vdupq_n_s16(0);
-  int16x8_t v_eobmax_76543210 = vreinterpretq_s16_u16(vceqq_s16(zero, zero));
-
-  int16x8_t round = vdupq_n_s16(rounding[1]);
-  int16x8_t quant = vdupq_n_s16(quant_ptr[1]);
-  int16x8_t dequant = vdupq_n_s16(dequant_ptr[1]);
-  dequant = vsetq_lane_s16(dequant_ptr[0], dequant, 0);
-
-  int16x8_t coeff = load_tran_low_to_s16q(&coeff_ptr[0]);
-  int16x8_t abs = vabsq_s16(coeff);
-  uint16x8_t check = vcgeq_u16(vshlq_n_u16(vreinterpretq_u16_s16(abs), 1),
-                               vshrq_n_u16(vreinterpretq_u16_s16(dequant), 2));
-  uint64_t nz_check = vget_lane_u64(vreinterpret_u64_u8(vmovn_u16(check)), 0);
-  if (nz_check) {
-    const int16x8_t coeff_sign = vshrq_n_s16(coeff, 15);
-    const int16x8_t v_iscan = vld1q_s16(&iscan[0]);
-    round = vsetq_lane_s16(rounding[0], round, 0);
-    quant = vsetq_lane_s16(quant_ptr[0], quant, 0);
-    abs = vqaddq_s16(abs, round);
-    int16x8_t temp =
-        vorrq_s16(vshlq_n_s16(vqdmulhq_s16(abs, quant), 1),
-                  vreinterpretq_s16_u16(vshrq_n_u16(
-                      vreinterpretq_u16_s16(vmulq_s16(abs, quant)), 14)));
-    int16x8_t qcoeff_temp = vsubq_s16(veorq_s16(temp, coeff_sign), coeff_sign);
-
-    abs = vreinterpretq_s16_u16(vshlq_u16(
-        vreinterpretq_u16_s16(vmulq_s16(temp, dequant)), v_log_scale));
-    abs = vorrq_s16(vshlq_n_s16(vqdmulhq_s16(temp, dequant), 13), abs);
-    int16x8_t dqcoeff_temp = vsubq_s16(veorq_s16(abs, coeff_sign), coeff_sign);
-    int16x8_t coeff_nz_mask =
-        vbslq_s16(check, qcoeff_temp, load_tran_low_to_s16q(&qcoeff_ptr[0]));
-    store_s16q_to_tran_low(&qcoeff_ptr[0], coeff_nz_mask);
-    coeff_nz_mask =
-        vbslq_s16(check, dqcoeff_temp, load_tran_low_to_s16q(&dqcoeff_ptr[0]));
-    store_s16q_to_tran_low(&dqcoeff_ptr[0], coeff_nz_mask);
-
-    round = vsetq_lane_s16(rounding[1], round, 0);
-    quant = vsetq_lane_s16(quant_ptr[1], quant, 0);
-
-    uint16x8_t vtmp_mask = vcgtq_s16(abs, zero);
-    const uint16x8_t v_nz_mask = vandq_u16(vtmp_mask, check);
-    check = vandq_u16(v_nz_mask, vcgtq_s16(v_iscan, v_eobmax_76543210));
-    v_eobmax_76543210 = vbslq_s16(check, v_iscan, v_eobmax_76543210);
-  }
-
-  dequant = vsetq_lane_s16(dequant_ptr[1], dequant, 0);
-
-  for (int i = 8; i < n_coeffs; i += 8) {
-    coeff = load_tran_low_to_s16q(&coeff_ptr[i]);
-    abs = vabsq_s16(coeff);
-    check = vcgeq_u16(vshlq_n_u16(vreinterpretq_u16_s16(abs), 1),
-                      vshrq_n_u16(vreinterpretq_u16_s16(dequant), 2));
-    nz_check = vget_lane_u64(vreinterpret_u64_u8(vmovn_u16(check)), 0);
-    if (nz_check) {
-      const int16x8_t coeff_sign = vshrq_n_s16(coeff, 15);
-      const int16x8_t v_iscan = vld1q_s16(&iscan[i]);
-      abs = vqaddq_s16(abs, round);
-      int16x8_t temp =
-          vorrq_s16(vshlq_n_s16(vqdmulhq_s16(abs, quant), 1),
-                    vreinterpretq_s16_u16(vshrq_n_u16(
-                        vreinterpretq_u16_s16(vmulq_s16(abs, quant)), 14)));
-
-      int16x8_t qcoeff_temp =
-          vsubq_s16(veorq_s16(temp, coeff_sign), coeff_sign);
-
-      abs = vreinterpretq_s16_u16(vshlq_u16(
-          vreinterpretq_u16_s16(vmulq_s16(temp, dequant)), v_log_scale));
-      abs = vorrq_s16(vshlq_n_s16(vqdmulhq_s16(temp, dequant), 13), abs);
-
-      int16x8_t dqcoeff_temp =
-          vsubq_s16(veorq_s16(abs, coeff_sign), coeff_sign);
-      int16x8_t coeff_nz_mask =
-          vbslq_s16(check, qcoeff_temp, load_tran_low_to_s16q(&qcoeff_ptr[i]));
-      store_s16q_to_tran_low(&qcoeff_ptr[i], coeff_nz_mask);
-      coeff_nz_mask = vbslq_s16(check, dqcoeff_temp,
-                                load_tran_low_to_s16q(&dqcoeff_ptr[i]));
-      store_s16q_to_tran_low(&dqcoeff_ptr[i], coeff_nz_mask);
-
-      uint16x8_t vtmp_mask = vcgtq_s16(abs, zero);
-      const uint16x8_t v_nz_mask = vandq_u16(vtmp_mask, check);
-
-      check = vandq_u16(v_nz_mask, vcgtq_s16(v_iscan, v_eobmax_76543210));
-      v_eobmax_76543210 = vbslq_s16(check, v_iscan, v_eobmax_76543210);
-    }
-  }
-#ifdef __aarch64__
-  *eob_ptr = vmaxvq_s16(v_eobmax_76543210) + 1;
-#else
-  {
-    const int16x4_t v_eobmax_3210 = vmax_s16(vget_low_s16(v_eobmax_76543210),
-                                             vget_high_s16(v_eobmax_76543210));
-    const int64x1_t v_eobmax_xx32 =
-        vshr_n_s64(vreinterpret_s64_s16(v_eobmax_3210), 32);
-    const int16x4_t v_eobmax_tmp =
-        vmax_s16(v_eobmax_3210, vreinterpret_s16_s64(v_eobmax_xx32));
-    const int64x1_t v_eobmax_xxx3 =
-        vshr_n_s64(vreinterpret_s64_s16(v_eobmax_tmp), 16);
-    const int16x4_t v_eobmax_final =
-        vmax_s16(v_eobmax_tmp, vreinterpret_s16_s64(v_eobmax_xxx3));
-
-    *eob_ptr = (uint16_t)vget_lane_s16(v_eobmax_final, 0) + 1;
-  }
-#endif  // __aarch64__
+  quantize_fp_no_qmatrix_neon(coeff_ptr, n_coeffs, round_ptr, quant_ptr,
+                              qcoeff_ptr, dqcoeff_ptr, dequant_ptr, eob_ptr,
+                              iscan, 2);
 }
 
 void aom_quantize_b_neon(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
@@ -550,25 +443,7 @@ void aom_quantize_b_neon(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
       v_eobmax_76543210 = vbslq_s16(vcond, v_iscan, v_eobmax_76543210);
     }
   }
-
-#ifdef __aarch64__
-  *eob_ptr = vmaxvq_s16(v_eobmax_76543210) + 1;
-#else
-  {
-    const int16x4_t v_eobmax_3210 = vmax_s16(vget_low_s16(v_eobmax_76543210),
-                                             vget_high_s16(v_eobmax_76543210));
-    const int64x1_t v_eobmax_xx32 =
-        vshr_n_s64(vreinterpret_s64_s16(v_eobmax_3210), 32);
-    const int16x4_t v_eobmax_tmp =
-        vmax_s16(v_eobmax_3210, vreinterpret_s16_s64(v_eobmax_xx32));
-    const int64x1_t v_eobmax_xxx3 =
-        vshr_n_s64(vreinterpret_s64_s16(v_eobmax_tmp), 16);
-    const int16x4_t v_eobmax_final =
-        vmax_s16(v_eobmax_tmp, vreinterpret_s16_s64(v_eobmax_xxx3));
-
-    *eob_ptr = (uint16_t)vget_lane_s16(v_eobmax_final, 0) + 1;
-  }
-#endif  // __aarch64__
+  *eob_ptr = get_max_eob(v_eobmax_76543210) + 1;
 }
 
 #define QM_MULL_SHIFT(x0, x1)                                              \
@@ -703,25 +578,7 @@ static void aom_quantize_b_helper_16x16_neon(
       v_eobmax_76543210 = vbslq_s16(vcond, v_iscan, v_eobmax_76543210);
     }
   }
-
-#ifdef __aarch64__
-  *eob_ptr = vmaxvq_s16(v_eobmax_76543210) + 1;
-#else
-  {
-    const int16x4_t v_eobmax_3210 = vmax_s16(vget_low_s16(v_eobmax_76543210),
-                                             vget_high_s16(v_eobmax_76543210));
-    const int64x1_t v_eobmax_xx32 =
-        vshr_n_s64(vreinterpret_s64_s16(v_eobmax_3210), 32);
-    const int16x4_t v_eobmax_tmp =
-        vmax_s16(v_eobmax_3210, vreinterpret_s16_s64(v_eobmax_xx32));
-    const int64x1_t v_eobmax_xxx3 =
-        vshr_n_s64(vreinterpret_s64_s16(v_eobmax_tmp), 16);
-    const int16x4_t v_eobmax_final =
-        vmax_s16(v_eobmax_tmp, vreinterpret_s16_s64(v_eobmax_xxx3));
-
-    *eob_ptr = (uint16_t)vget_lane_s16(v_eobmax_final, 0) + 1;
-  }
-#endif  // __aarch64__
+  *eob_ptr = get_max_eob(v_eobmax_76543210) + 1;
 }
 
 static void aom_quantize_b_helper_32x32_neon(
@@ -859,25 +716,7 @@ static void aom_quantize_b_helper_32x32_neon(
       v_eobmax_76543210 = vbslq_s16(vcond, v_iscan, v_eobmax_76543210);
     }
   }
-
-#ifdef __aarch64__
-  *eob_ptr = vmaxvq_s16(v_eobmax_76543210) + 1;
-#else
-  {
-    const int16x4_t v_eobmax_3210 = vmax_s16(vget_low_s16(v_eobmax_76543210),
-                                             vget_high_s16(v_eobmax_76543210));
-    const int64x1_t v_eobmax_xx32 =
-        vshr_n_s64(vreinterpret_s64_s16(v_eobmax_3210), 32);
-    const int16x4_t v_eobmax_tmp =
-        vmax_s16(v_eobmax_3210, vreinterpret_s16_s64(v_eobmax_xx32));
-    const int64x1_t v_eobmax_xxx3 =
-        vshr_n_s64(vreinterpret_s64_s16(v_eobmax_tmp), 16);
-    const int16x4_t v_eobmax_final =
-        vmax_s16(v_eobmax_tmp, vreinterpret_s16_s64(v_eobmax_xxx3));
-
-    *eob_ptr = (uint16_t)vget_lane_s16(v_eobmax_final, 0) + 1;
-  }
-#endif  // __aarch64__
+  *eob_ptr = get_max_eob(v_eobmax_76543210) + 1;
 }
 
 static void aom_quantize_b_helper_64x64_neon(
@@ -1026,25 +865,7 @@ static void aom_quantize_b_helper_64x64_neon(
       v_eobmax_76543210 = vbslq_s16(vcond, v_iscan, v_eobmax_76543210);
     }
   }
-
-#ifdef __aarch64__
-  *eob_ptr = vmaxvq_s16(v_eobmax_76543210) + 1;
-#else
-  {
-    const int16x4_t v_eobmax_3210 = vmax_s16(vget_low_s16(v_eobmax_76543210),
-                                             vget_high_s16(v_eobmax_76543210));
-    const int64x1_t v_eobmax_xx32 =
-        vshr_n_s64(vreinterpret_s64_s16(v_eobmax_3210), 32);
-    const int16x4_t v_eobmax_tmp =
-        vmax_s16(v_eobmax_3210, vreinterpret_s16_s64(v_eobmax_xx32));
-    const int64x1_t v_eobmax_xxx3 =
-        vshr_n_s64(vreinterpret_s64_s16(v_eobmax_tmp), 16);
-    const int16x4_t v_eobmax_final =
-        vmax_s16(v_eobmax_tmp, vreinterpret_s16_s64(v_eobmax_xxx3));
-
-    *eob_ptr = (uint16_t)vget_lane_s16(v_eobmax_final, 0) + 1;
-  }
-#endif  // __aarch64__
+  *eob_ptr = get_max_eob(v_eobmax_76543210) + 1;
 }
 
 void aom_quantize_b_helper_neon(
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/arm/neon/temporal_filter_neon.c b/chromium/third_party/libaom/source/libaom/av1/encoder/arm/neon/temporal_filter_neon.c
new file mode 100644
index 00000000000..cae44f9a151
--- /dev/null
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/arm/neon/temporal_filter_neon.c
@@ -0,0 +1,433 @@
+/*
+ * Copyright (c) 2022, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <arm_neon.h>
+
+#include "config/av1_rtcd.h"
+#include "av1/encoder/encoder.h"
+#include "av1/encoder/temporal_filter.h"
+#include "aom_dsp/arm/mem_neon.h"
+#include "aom_dsp/arm/sum_neon.h"
+
+// For the squared error buffer, add padding for 4 samples.
+#define SSE_STRIDE (BW + 4)
+
+#if defined(__aarch64__) && defined(__ARM_FEATURE_DOTPROD)
+
+// clang-format off
+
+DECLARE_ALIGNED(16, static const uint8_t, kSlidingWindowMask[]) = {
+  0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00,
+  0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00,
+  0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00,
+  0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF
+};
+
+// clang-format on
+
+static INLINE void get_abs_diff(const uint8_t *frame1, const uint32_t stride1,
+                                const uint8_t *frame2, const uint32_t stride2,
+                                const uint32_t block_width,
+                                const uint32_t block_height,
+                                uint8_t *frame_abs_diff,
+                                const unsigned int dst_stride) {
+  uint8_t *dst = frame_abs_diff;
+
+  uint32_t i = 0;
+  do {
+    uint32_t j = 0;
+    do {
+      uint8x16_t s = vld1q_u8(frame1 + i * stride1 + j);
+      uint8x16_t r = vld1q_u8(frame2 + i * stride2 + j);
+      uint8x16_t abs_diff = vabdq_u8(s, r);
+      vst1q_u8(dst + j + 2, abs_diff);
+      j += 16;
+    } while (j < block_width);
+
+    dst += dst_stride;
+    i++;
+  } while (i < block_height);
+}
+
+static INLINE uint8x16_t load_and_pad(uint8_t *src, const uint32_t col,
+                                      const uint32_t block_width) {
+  uint8x8_t s = vld1_u8(src);
+
+  if (col == 0) {
+    s[0] = s[2];
+    s[1] = s[2];
+  } else if (col >= block_width - 4) {
+    s[6] = s[5];
+    s[7] = s[5];
+  }
+  return vcombine_u8(s, s);
+}
+
+static void apply_temporal_filter(
+    const uint8_t *frame, const unsigned int stride, const uint32_t block_width,
+    const uint32_t block_height, const int *subblock_mses,
+    unsigned int *accumulator, uint16_t *count, uint8_t *frame_abs_diff,
+    uint32_t *luma_sse_sum, const double inv_num_ref_pixels,
+    const double decay_factor, const double inv_factor,
+    const double weight_factor, double *d_factor) {
+  assert(((block_width == 16) || (block_width == 32)) &&
+         ((block_height == 16) || (block_height == 32)));
+
+  uint32_t acc_5x5_neon[BH][BW];
+  const uint8x16x2_t vmask = vld1q_u8_x2(kSlidingWindowMask);
+
+  // Traverse 4 columns at a time - first and last two columns need padding.
+  for (uint32_t col = 0; col < block_width; col += 4) {
+    uint8x16_t vsrc[5][2];
+    uint8_t *src = frame_abs_diff + col;
+
+    // Load, pad (for first and last two columns) and mask 3 rows from the top.
+    for (int i = 2; i < 5; i++) {
+      uint8x16_t s = load_and_pad(src, col, block_width);
+      vsrc[i][0] = vandq_u8(s, vmask.val[0]);
+      vsrc[i][1] = vandq_u8(s, vmask.val[1]);
+      src += SSE_STRIDE;
+    }
+
+    // Pad the top 2 rows.
+    vsrc[0][0] = vsrc[2][0];
+    vsrc[0][1] = vsrc[2][1];
+    vsrc[1][0] = vsrc[2][0];
+    vsrc[1][1] = vsrc[2][1];
+
+    for (unsigned int row = 0; row < block_height; row++) {
+      uint32x4_t sum_01 = vdupq_n_u32(0);
+      uint32x4_t sum_23 = vdupq_n_u32(0);
+
+      sum_01 = vdotq_u32(sum_01, vsrc[0][0], vsrc[0][0]);
+      sum_01 = vdotq_u32(sum_01, vsrc[1][0], vsrc[1][0]);
+      sum_01 = vdotq_u32(sum_01, vsrc[2][0], vsrc[2][0]);
+      sum_01 = vdotq_u32(sum_01, vsrc[3][0], vsrc[3][0]);
+      sum_01 = vdotq_u32(sum_01, vsrc[4][0], vsrc[4][0]);
+
+      sum_23 = vdotq_u32(sum_23, vsrc[0][1], vsrc[0][1]);
+      sum_23 = vdotq_u32(sum_23, vsrc[1][1], vsrc[1][1]);
+      sum_23 = vdotq_u32(sum_23, vsrc[2][1], vsrc[2][1]);
+      sum_23 = vdotq_u32(sum_23, vsrc[3][1], vsrc[3][1]);
+      sum_23 = vdotq_u32(sum_23, vsrc[4][1], vsrc[4][1]);
+
+      vst1q_u32(&acc_5x5_neon[row][col], vpaddq_u32(sum_01, sum_23));
+
+      // Push all rows in the sliding window up one.
+      for (int i = 0; i < 4; i++) {
+        vsrc[i][0] = vsrc[i + 1][0];
+        vsrc[i][1] = vsrc[i + 1][1];
+      }
+
+      if (row <= block_height - 4) {
+        // Load next row into the bottom of the sliding window.
+        uint8x16_t s = load_and_pad(src, col, block_width);
+        vsrc[4][0] = vandq_u8(s, vmask.val[0]);
+        vsrc[4][1] = vandq_u8(s, vmask.val[1]);
+        src += SSE_STRIDE;
+      } else {
+        // Pad the bottom 2 rows.
+        vsrc[4][0] = vsrc[3][0];
+        vsrc[4][1] = vsrc[3][1];
+      }
+    }
+  }
+
+  // Perform filtering.
+  for (unsigned int i = 0, k = 0; i < block_height; i++) {
+    for (unsigned int j = 0; j < block_width; j++, k++) {
+      const int pixel_value = frame[i * stride + j];
+      uint32_t diff_sse = acc_5x5_neon[i][j] + luma_sse_sum[i * BW + j];
+
+      const double window_error = diff_sse * inv_num_ref_pixels;
+      const int subblock_idx =
+          (i >= block_height / 2) * 2 + (j >= block_width / 2);
+      const double block_error = (double)subblock_mses[subblock_idx];
+      const double combined_error =
+          weight_factor * window_error + block_error * inv_factor;
+      // Compute filter weight.
+      double scaled_error =
+          combined_error * d_factor[subblock_idx] * decay_factor;
+      scaled_error = AOMMIN(scaled_error, 7);
+      const int weight = (int)(exp(-scaled_error) * TF_WEIGHT_SCALE);
+      accumulator[k] += weight * pixel_value;
+      count[k] += weight;
+    }
+  }
+}
+
+#else  // !(defined(__aarch64__) && defined(__ARM_FEATURE_DOTPROD))
+
+// When using vld1q_u16_x4 compilers may insert an alignment hint of 256 bits.
+DECLARE_ALIGNED(32, static const uint16_t, kSlidingWindowMask[]) = {
+  0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0x0000, 0x0000, 0x0000,
+  0x0000, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0x0000, 0x0000,
+  0x0000, 0x0000, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0x0000,
+  0x0000, 0x0000, 0x0000, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF
+};
+
+static INLINE void get_squared_error(
+    const uint8_t *frame1, const uint32_t stride1, const uint8_t *frame2,
+    const uint32_t stride2, const uint32_t block_width,
+    const uint32_t block_height, uint16_t *frame_sse,
+    const unsigned int dst_stride) {
+  uint16_t *dst = frame_sse;
+
+  uint32_t i = 0;
+  do {
+    uint32_t j = 0;
+    do {
+      uint8x16_t s = vld1q_u8(frame1 + i * stride1 + j);
+      uint8x16_t r = vld1q_u8(frame2 + i * stride2 + j);
+
+      uint8x16_t abs_diff = vabdq_u8(s, r);
+      uint16x8_t sse_lo =
+          vmull_u8(vget_low_u8(abs_diff), vget_low_u8(abs_diff));
+      uint16x8_t sse_hi =
+          vmull_u8(vget_high_u8(abs_diff), vget_high_u8(abs_diff));
+
+      vst1q_u16(dst + j + 2, sse_lo);
+      vst1q_u16(dst + j + 10, sse_hi);
+
+      j += 16;
+    } while (j < block_width);
+
+    dst += dst_stride;
+    i++;
+  } while (i < block_height);
+}
+
+static INLINE uint16x8_t load_and_pad(uint16_t *src, const uint32_t col,
+                                      const uint32_t block_width) {
+  uint16x8_t s = vld1q_u16(src);
+
+  if (col == 0) {
+    s[0] = s[2];
+    s[1] = s[2];
+  } else if (col >= block_width - 4) {
+    s[6] = s[5];
+    s[7] = s[5];
+  }
+  return s;
+}
+
+static void apply_temporal_filter(
+    const uint8_t *frame, const unsigned int stride, const uint32_t block_width,
+    const uint32_t block_height, const int *subblock_mses,
+    unsigned int *accumulator, uint16_t *count, uint16_t *frame_sse,
+    uint32_t *luma_sse_sum, const double inv_num_ref_pixels,
+    const double decay_factor, const double inv_factor,
+    const double weight_factor, double *d_factor) {
+  assert(((block_width == 16) || (block_width == 32)) &&
+         ((block_height == 16) || (block_height == 32)));
+
+  uint32_t acc_5x5_neon[BH][BW];
+  const uint16x8x4_t vmask = vld1q_u16_x4(kSlidingWindowMask);
+
+  // Traverse 4 columns at a time - first and last two columns need padding.
+  for (uint32_t col = 0; col < block_width; col += 4) {
+    uint16x8_t vsrc[5];
+    uint16_t *src = frame_sse + col;
+
+    // Load and pad (for first and last two columns) 3 rows from the top.
+    for (int i = 2; i < 5; i++) {
+      vsrc[i] = load_and_pad(src, col, block_width);
+      src += SSE_STRIDE;
+    }
+
+    // Pad the top 2 rows.
+    vsrc[0] = vsrc[2];
+    vsrc[1] = vsrc[2];
+
+    for (unsigned int row = 0; row < block_height; row++) {
+      for (int i = 0; i < 4; i++) {
+        uint32x4_t vsum = vdupq_n_u32(0);
+        for (int j = 0; j < 5; j++) {
+          vsum = vpadalq_u16(vsum, vandq_u16(vsrc[j], vmask.val[i]));
+        }
+        acc_5x5_neon[row][col + i] = horizontal_add_u32x4(vsum);
+      }
+
+      // Push all rows in the sliding window up one.
+      for (int i = 0; i < 4; i++) {
+        vsrc[i] = vsrc[i + 1];
+      }
+
+      if (row <= block_height - 4) {
+        // Load next row into the bottom of the sliding window.
+        vsrc[4] = load_and_pad(src, col, block_width);
+        src += SSE_STRIDE;
+      } else {
+        // Pad the bottom 2 rows.
+        vsrc[4] = vsrc[3];
+      }
+    }
+  }
+
+  // Perform filtering.
+  for (unsigned int i = 0, k = 0; i < block_height; i++) {
+    for (unsigned int j = 0; j < block_width; j++, k++) {
+      const int pixel_value = frame[i * stride + j];
+      uint32_t diff_sse = acc_5x5_neon[i][j] + luma_sse_sum[i * BW + j];
+
+      const double window_error = diff_sse * inv_num_ref_pixels;
+      const int subblock_idx =
+          (i >= block_height / 2) * 2 + (j >= block_width / 2);
+      const double block_error = (double)subblock_mses[subblock_idx];
+      const double combined_error =
+          weight_factor * window_error + block_error * inv_factor;
+      // Compute filter weight.
+      double scaled_error =
+          combined_error * d_factor[subblock_idx] * decay_factor;
+      scaled_error = AOMMIN(scaled_error, 7);
+      const int weight = (int)(exp(-scaled_error) * TF_WEIGHT_SCALE);
+      accumulator[k] += weight * pixel_value;
+      count[k] += weight;
+    }
+  }
+}
+
+#endif  // defined(__aarch64__) && defined(__ARM_FEATURE_DOTPROD)
+
+void av1_apply_temporal_filter_neon(
+    const YV12_BUFFER_CONFIG *frame_to_filter, const MACROBLOCKD *mbd,
+    const BLOCK_SIZE block_size, const int mb_row, const int mb_col,
+    const int num_planes, const double *noise_levels, const MV *subblock_mvs,
+    const int *subblock_mses, const int q_factor, const int filter_strength,
+    const uint8_t *pred, uint32_t *accum, uint16_t *count) {
+  const int is_high_bitdepth = frame_to_filter->flags & YV12_FLAG_HIGHBITDEPTH;
+  assert(block_size == BLOCK_32X32 && "Only support 32x32 block with Neon!");
+  assert(TF_WINDOW_LENGTH == 5 && "Only support window length 5 with Neon!");
+  assert(!is_high_bitdepth && "Only support low bit-depth with Neon!");
+  assert(num_planes >= 1 && num_planes <= MAX_MB_PLANE);
+  (void)is_high_bitdepth;
+
+  // Block information.
+  const int mb_height = block_size_high[block_size];
+  const int mb_width = block_size_wide[block_size];
+  // Frame information.
+  const int frame_height = frame_to_filter->y_crop_height;
+  const int frame_width = frame_to_filter->y_crop_width;
+  const int min_frame_size = AOMMIN(frame_height, frame_width);
+  // Variables to simplify combined error calculation.
+  const double inv_factor = 1.0 / ((TF_WINDOW_BLOCK_BALANCE_WEIGHT + 1) *
+                                   TF_SEARCH_ERROR_NORM_WEIGHT);
+  const double weight_factor =
+      (double)TF_WINDOW_BLOCK_BALANCE_WEIGHT * inv_factor;
+  // Adjust filtering based on q.
+  // Larger q -> stronger filtering -> larger weight.
+  // Smaller q -> weaker filtering -> smaller weight.
+  double q_decay = pow((double)q_factor / TF_Q_DECAY_THRESHOLD, 2);
+  q_decay = CLIP(q_decay, 1e-5, 1);
+  if (q_factor >= TF_QINDEX_CUTOFF) {
+    // Max q_factor is 255, therefore the upper bound of q_decay is 8.
+    // We do not need a clip here.
+    q_decay = 0.5 * pow((double)q_factor / 64, 2);
+  }
+  // Smaller strength -> smaller filtering weight.
+  double s_decay = pow((double)filter_strength / TF_STRENGTH_THRESHOLD, 2);
+  s_decay = CLIP(s_decay, 1e-5, 1);
+  double d_factor[4] = { 0 };
+#if defined(__aarch64__) && defined(__ARM_FEATURE_DOTPROD)
+  uint8_t frame_abs_diff[SSE_STRIDE * BH] = { 0 };
+#else   // !(defined(__aarch64__) && defined(__ARM_FEATURE_DOTPROD))
+  uint16_t frame_sse[SSE_STRIDE * BH] = { 0 };
+#endif  // defined(__aarch64__) && defined(__ARM_FEATURE_DOTPROD)
+  uint32_t luma_sse_sum[BW * BH] = { 0 };
+
+  for (int subblock_idx = 0; subblock_idx < 4; subblock_idx++) {
+    // Larger motion vector -> smaller filtering weight.
+    const MV mv = subblock_mvs[subblock_idx];
+    const double distance = sqrt(pow(mv.row, 2) + pow(mv.col, 2));
+    double distance_threshold = min_frame_size * TF_SEARCH_DISTANCE_THRESHOLD;
+    distance_threshold = AOMMAX(distance_threshold, 1);
+    d_factor[subblock_idx] = distance / distance_threshold;
+    d_factor[subblock_idx] = AOMMAX(d_factor[subblock_idx], 1);
+  }
+
+  // Handle planes in sequence.
+  int plane_offset = 0;
+  for (int plane = 0; plane < num_planes; ++plane) {
+    const uint32_t plane_h = mb_height >> mbd->plane[plane].subsampling_y;
+    const uint32_t plane_w = mb_width >> mbd->plane[plane].subsampling_x;
+    const uint32_t frame_stride =
+        frame_to_filter->strides[plane == AOM_PLANE_Y ? 0 : 1];
+    const int frame_offset = mb_row * plane_h * frame_stride + mb_col * plane_w;
+
+    const uint8_t *ref = frame_to_filter->buffers[plane] + frame_offset;
+    const int ss_x_shift =
+        mbd->plane[plane].subsampling_x - mbd->plane[AOM_PLANE_Y].subsampling_x;
+    const int ss_y_shift =
+        mbd->plane[plane].subsampling_y - mbd->plane[AOM_PLANE_Y].subsampling_y;
+    const int num_ref_pixels = TF_WINDOW_LENGTH * TF_WINDOW_LENGTH +
+                               ((plane) ? (1 << (ss_x_shift + ss_y_shift)) : 0);
+    const double inv_num_ref_pixels = 1.0 / num_ref_pixels;
+    // Larger noise -> larger filtering weight.
+    const double n_decay = 0.5 + log(2 * noise_levels[plane] + 5.0);
+    // Decay factors for non-local mean approach.
+    const double decay_factor = 1 / (n_decay * q_decay * s_decay);
+
+    // Filter U-plane and V-plane using Y-plane. This is because motion
+    // search is only done on Y-plane, so the information from Y-plane
+    // will be more accurate. The luma sse sum is reused in both chroma
+    // planes.
+#if defined(__aarch64__) && defined(__ARM_FEATURE_DOTPROD)
+    if (plane == AOM_PLANE_U) {
+      for (unsigned int i = 0; i < plane_h; i++) {
+        for (unsigned int j = 0; j < plane_w; j++) {
+          for (int ii = 0; ii < (1 << ss_y_shift); ++ii) {
+            for (int jj = 0; jj < (1 << ss_x_shift); ++jj) {
+              const int yy = (i << ss_y_shift) + ii;  // Y-coord on Y-plane.
+              const int xx = (j << ss_x_shift) + jj;  // X-coord on Y-plane.
+              luma_sse_sum[i * BW + j] +=
+                  (frame_abs_diff[yy * SSE_STRIDE + xx + 2] *
+                   frame_abs_diff[yy * SSE_STRIDE + xx + 2]);
+            }
+          }
+        }
+      }
+    }
+
+    get_abs_diff(ref, frame_stride, pred + plane_offset, plane_w, plane_w,
+                 plane_h, frame_abs_diff, SSE_STRIDE);
+
+    apply_temporal_filter(pred + plane_offset, plane_w, plane_w, plane_h,
+                          subblock_mses, accum + plane_offset,
+                          count + plane_offset, frame_abs_diff, luma_sse_sum,
+                          inv_num_ref_pixels, decay_factor, inv_factor,
+                          weight_factor, d_factor);
+#else   // !(defined(__aarch64__) && defined(__ARM_FEATURE_DOTPROD))
+    if (plane == AOM_PLANE_U) {
+      for (unsigned int i = 0; i < plane_h; i++) {
+        for (unsigned int j = 0; j < plane_w; j++) {
+          for (int ii = 0; ii < (1 << ss_y_shift); ++ii) {
+            for (int jj = 0; jj < (1 << ss_x_shift); ++jj) {
+              const int yy = (i << ss_y_shift) + ii;  // Y-coord on Y-plane.
+              const int xx = (j << ss_x_shift) + jj;  // X-coord on Y-plane.
+              luma_sse_sum[i * BW + j] += frame_sse[yy * SSE_STRIDE + xx + 2];
+            }
+          }
+        }
+      }
+    }
+
+    get_squared_error(ref, frame_stride, pred + plane_offset, plane_w, plane_w,
+                      plane_h, frame_sse, SSE_STRIDE);
+
+    apply_temporal_filter(
+        pred + plane_offset, plane_w, plane_w, plane_h, subblock_mses,
+        accum + plane_offset, count + plane_offset, frame_sse, luma_sse_sum,
+        inv_num_ref_pixels, decay_factor, inv_factor, weight_factor, d_factor);
+#endif  // defined(__aarch64__) && defined(__ARM_FEATURE_DOTPROD)
+
+    plane_offset += plane_h * plane_w;
+  }
+}
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/arm/neon/wedge_utils_neon.c b/chromium/third_party/libaom/source/libaom/av1/encoder/arm/neon/wedge_utils_neon.c
new file mode 100644
index 00000000000..54d8d191138
--- /dev/null
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/arm/neon/wedge_utils_neon.c
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2022, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <arm_neon.h>
+#include <assert.h>
+
+#include "aom_dsp/arm/sum_neon.h"
+#include "av1/common/reconinter.h"
+
+#define MAX_MASK_VALUE (1 << WEDGE_WEIGHT_BITS)
+
+/**
+ * See av1_wedge_sse_from_residuals_c for details of the parameters and
+ * computation.
+ */
+uint64_t av1_wedge_sse_from_residuals_neon(const int16_t *r1, const int16_t *d,
+                                           const uint8_t *m, int N) {
+  assert(N % 64 == 0);
+
+  uint64x2_t v_csse[2] = { vdupq_n_u64(0), vdupq_n_u64(0) };
+
+  int i = 0;
+  do {
+    int32x4_t sum[4];
+    int32x4_t sse[2];
+    int16x4_t sum_s16[4];
+
+    const int16x8_t r1_l = vld1q_s16(r1 + i);
+    const int16x8_t r1_h = vld1q_s16(r1 + i + 8);
+    const int16x8_t d_l = vld1q_s16(d + i);
+    const int16x8_t d_h = vld1q_s16(d + i + 8);
+    // The following three lines are a bit inelegant compared to using a pair
+    // of vmovl_u8()... but it forces the compiler to emit a ZIP1, ZIP2 pair -
+    // which can be executed in parallel with the subsequent SSHL instructions.
+    // (SSHL can only be executed on half of the Neon pipes in modern Arm
+    // cores, whereas ZIP1/2 can be executed on all of them.)
+    const uint8x16x2_t m_u16 = vzipq_u8(vld1q_u8(m + i), vdupq_n_u8(0));
+    const int16x8_t m_l = vreinterpretq_s16_u8(m_u16.val[0]);
+    const int16x8_t m_h = vreinterpretq_s16_u8(m_u16.val[1]);
+
+    sum[0] = vshll_n_s16(vget_low_s16(r1_l), WEDGE_WEIGHT_BITS);
+    sum[1] = vshll_n_s16(vget_high_s16(r1_l), WEDGE_WEIGHT_BITS);
+    sum[2] = vshll_n_s16(vget_low_s16(r1_h), WEDGE_WEIGHT_BITS);
+    sum[3] = vshll_n_s16(vget_high_s16(r1_h), WEDGE_WEIGHT_BITS);
+
+    sum[0] = vmlal_s16(sum[0], vget_low_s16(m_l), vget_low_s16(d_l));
+    sum[1] = vmlal_s16(sum[1], vget_high_s16(m_l), vget_high_s16(d_l));
+    sum[2] = vmlal_s16(sum[2], vget_low_s16(m_h), vget_low_s16(d_h));
+    sum[3] = vmlal_s16(sum[3], vget_high_s16(m_h), vget_high_s16(d_h));
+
+    sum_s16[0] = vqmovn_s32(sum[0]);
+    sum_s16[1] = vqmovn_s32(sum[1]);
+    sum_s16[2] = vqmovn_s32(sum[2]);
+    sum_s16[3] = vqmovn_s32(sum[3]);
+
+    sse[0] = vmull_s16(sum_s16[0], sum_s16[0]);
+    sse[1] = vmull_s16(sum_s16[2], sum_s16[2]);
+    sse[0] = vmlal_s16(sse[0], sum_s16[1], sum_s16[1]);
+    sse[1] = vmlal_s16(sse[1], sum_s16[3], sum_s16[3]);
+
+    v_csse[0] = vpadalq_u32(v_csse[0], vreinterpretq_u32_s32(sse[0]));
+    v_csse[1] = vpadalq_u32(v_csse[1], vreinterpretq_u32_s32(sse[1]));
+
+    i += 16;
+  } while (i < N);
+
+  uint64_t csse = horizontal_add_u64x2(vaddq_u64(v_csse[0], v_csse[1]));
+  return ROUND_POWER_OF_TWO(csse, 2 * WEDGE_WEIGHT_BITS);
+}
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/av1_quantize.c b/chromium/third_party/libaom/source/libaom/av1/encoder/av1_quantize.c
index 256558e8187..db61dbc3373 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/av1_quantize.c
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/av1_quantize.c
@@ -305,6 +305,7 @@ void av1_quantize_b_facade(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
                            const SCAN_ORDER *sc, const QUANT_PARAM *qparam) {
   const qm_val_t *qm_ptr = qparam->qmatrix;
   const qm_val_t *iqm_ptr = qparam->iqmatrix;
+#if !CONFIG_REALTIME_ONLY
   if (qparam->use_quant_b_adapt) {
     // TODO(sarahparker) These quantize_b optimizations need SIMD
     // implementations
@@ -336,34 +337,36 @@ void av1_quantize_b_facade(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
         default: assert(0);
       }
     }
+    return;
+  }
+#endif  // !CONFIG_REALTIME_ONLY
+
+  if (qm_ptr != NULL && iqm_ptr != NULL) {
+    aom_quantize_b_helper_c(coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX,
+                            p->quant_QTX, p->quant_shift_QTX, qcoeff_ptr,
+                            dqcoeff_ptr, p->dequant_QTX, eob_ptr, sc->scan,
+                            sc->iscan, qm_ptr, iqm_ptr, qparam->log_scale);
   } else {
-    if (qm_ptr != NULL && iqm_ptr != NULL) {
-      aom_quantize_b_helper_c(coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX,
-                              p->quant_QTX, p->quant_shift_QTX, qcoeff_ptr,
-                              dqcoeff_ptr, p->dequant_QTX, eob_ptr, sc->scan,
-                              sc->iscan, qm_ptr, iqm_ptr, qparam->log_scale);
-    } else {
-      switch (qparam->log_scale) {
-        case 0:
-          aom_quantize_b(coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX,
-                         p->quant_QTX, p->quant_shift_QTX, qcoeff_ptr,
-                         dqcoeff_ptr, p->dequant_QTX, eob_ptr, sc->scan,
-                         sc->iscan);
-          break;
-        case 1:
-          aom_quantize_b_32x32(coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX,
-                               p->quant_QTX, p->quant_shift_QTX, qcoeff_ptr,
-                               dqcoeff_ptr, p->dequant_QTX, eob_ptr, sc->scan,
-                               sc->iscan);
-          break;
-        case 2:
-          aom_quantize_b_64x64(coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX,
-                               p->quant_QTX, p->quant_shift_QTX, qcoeff_ptr,
-                               dqcoeff_ptr, p->dequant_QTX, eob_ptr, sc->scan,
-                               sc->iscan);
-          break;
-        default: assert(0);
-      }
+    switch (qparam->log_scale) {
+      case 0:
+        aom_quantize_b(coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX,
+                       p->quant_QTX, p->quant_shift_QTX, qcoeff_ptr,
+                       dqcoeff_ptr, p->dequant_QTX, eob_ptr, sc->scan,
+                       sc->iscan);
+        break;
+      case 1:
+        aom_quantize_b_32x32(coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX,
+                             p->quant_QTX, p->quant_shift_QTX, qcoeff_ptr,
+                             dqcoeff_ptr, p->dequant_QTX, eob_ptr, sc->scan,
+                             sc->iscan);
+        break;
+      case 2:
+        aom_quantize_b_64x64(coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX,
+                             p->quant_QTX, p->quant_shift_QTX, qcoeff_ptr,
+                             dqcoeff_ptr, p->dequant_QTX, eob_ptr, sc->scan,
+                             sc->iscan);
+        break;
+      default: assert(0);
     }
   }
 }
@@ -446,6 +449,7 @@ void av1_highbd_quantize_b_facade(const tran_low_t *coeff_ptr,
                                   const QUANT_PARAM *qparam) {
   const qm_val_t *qm_ptr = qparam->qmatrix;
   const qm_val_t *iqm_ptr = qparam->iqmatrix;
+#if !CONFIG_REALTIME_ONLY
   if (qparam->use_quant_b_adapt) {
     if (qm_ptr != NULL && iqm_ptr != NULL) {
       aom_highbd_quantize_b_adaptive_helper_c(
@@ -475,34 +479,36 @@ void av1_highbd_quantize_b_facade(const tran_low_t *coeff_ptr,
         default: assert(0);
       }
     }
+    return;
+  }
+#endif  // !CONFIG_REALTIME_ONLY
+
+  if (qm_ptr != NULL && iqm_ptr != NULL) {
+    aom_highbd_quantize_b_helper_c(
+        coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX, p->quant_QTX,
+        p->quant_shift_QTX, qcoeff_ptr, dqcoeff_ptr, p->dequant_QTX, eob_ptr,
+        sc->scan, sc->iscan, qm_ptr, iqm_ptr, qparam->log_scale);
   } else {
-    if (qm_ptr != NULL && iqm_ptr != NULL) {
-      aom_highbd_quantize_b_helper_c(
-          coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX, p->quant_QTX,
-          p->quant_shift_QTX, qcoeff_ptr, dqcoeff_ptr, p->dequant_QTX, eob_ptr,
-          sc->scan, sc->iscan, qm_ptr, iqm_ptr, qparam->log_scale);
-    } else {
-      switch (qparam->log_scale) {
-        case 0:
-          aom_highbd_quantize_b(coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX,
-                                p->quant_QTX, p->quant_shift_QTX, qcoeff_ptr,
-                                dqcoeff_ptr, p->dequant_QTX, eob_ptr, sc->scan,
-                                sc->iscan);
-          break;
-        case 1:
-          aom_highbd_quantize_b_32x32(
-              coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX, p->quant_QTX,
-              p->quant_shift_QTX, qcoeff_ptr, dqcoeff_ptr, p->dequant_QTX,
-              eob_ptr, sc->scan, sc->iscan);
-          break;
-        case 2:
-          aom_highbd_quantize_b_64x64(
-              coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX, p->quant_QTX,
-              p->quant_shift_QTX, qcoeff_ptr, dqcoeff_ptr, p->dequant_QTX,
-              eob_ptr, sc->scan, sc->iscan);
-          break;
-        default: assert(0);
-      }
+    switch (qparam->log_scale) {
+      case 0:
+        aom_highbd_quantize_b(coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX,
+                              p->quant_QTX, p->quant_shift_QTX, qcoeff_ptr,
+                              dqcoeff_ptr, p->dequant_QTX, eob_ptr, sc->scan,
+                              sc->iscan);
+        break;
+      case 1:
+        aom_highbd_quantize_b_32x32(
+            coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX, p->quant_QTX,
+            p->quant_shift_QTX, qcoeff_ptr, dqcoeff_ptr, p->dequant_QTX,
+            eob_ptr, sc->scan, sc->iscan);
+        break;
+      case 2:
+        aom_highbd_quantize_b_64x64(
+            coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX, p->quant_QTX,
+            p->quant_shift_QTX, qcoeff_ptr, dqcoeff_ptr, p->dequant_QTX,
+            eob_ptr, sc->scan, sc->iscan);
+        break;
+      default: assert(0);
     }
   }
 }
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/av1_temporal_denoiser.c b/chromium/third_party/libaom/source/libaom/av1/encoder/av1_temporal_denoiser.c
index 26e0eda7726..881d252d024 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/av1_temporal_denoiser.c
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/av1_temporal_denoiser.c
@@ -233,7 +233,7 @@ static AV1_DENOISER_DECISION perform_motion_compensation(
         frame == ALTREF_FRAME ||
         (frame == GOLDEN_FRAME && use_gf_temporal_ref) ||
         (frame != LAST_FRAME &&
-         ((ctx->zeromv_lastref_sse<(5 * ctx->zeromv_sse)>> 2) ||
+         ((ctx->zeromv_lastref_sse < (5 * ctx->zeromv_sse) >> 2) ||
           denoiser->denoising_level >= kDenHigh))) {
       frame = LAST_FRAME;
       ctx->newmv_sse = ctx->zeromv_lastref_sse;
@@ -348,7 +348,7 @@ void av1_denoiser_denoise(AV1_COMP *cpi, MACROBLOCK *mb, int mi_row, int mi_col,
     decision = perform_motion_compensation(
         &cpi->common, denoiser, mb, bs, increase_denoising, mi_row, mi_col, ctx,
         motion_magnitude, &zeromv_filter, cpi->svc.number_spatial_layers,
-        cpi->source->y_width, cpi->svc.ref_idx[0], cpi->svc.ref_idx[3],
+        cpi->source->y_width, cpi->rtc_ref.ref_idx[0], cpi->rtc_ref.ref_idx[3],
         cpi->ppi->use_svc, cpi->svc.spatial_layer_id, use_gf_temporal_ref);
 
   if (decision == FILTER_BLOCK) {
@@ -395,10 +395,11 @@ static void swap_frame_buffer(YV12_BUFFER_CONFIG *const dest,
 }
 
 void av1_denoiser_update_frame_info(
-    AV1_DENOISER *denoiser, YV12_BUFFER_CONFIG src, struct SVC *svc,
-    FRAME_TYPE frame_type, int refresh_alt_ref_frame, int refresh_golden_frame,
-    int refresh_last_frame, int alt_fb_idx, int gld_fb_idx, int lst_fb_idx,
-    int resized, int svc_refresh_denoiser_buffers, int second_spatial_layer) {
+    AV1_DENOISER *denoiser, YV12_BUFFER_CONFIG src, struct RTC_REF *rtc_ref,
+    struct SVC *svc, FRAME_TYPE frame_type, int refresh_alt_ref_frame,
+    int refresh_golden_frame, int refresh_last_frame, int alt_fb_idx,
+    int gld_fb_idx, int lst_fb_idx, int resized,
+    int svc_refresh_denoiser_buffers, int second_spatial_layer) {
   const int shift = second_spatial_layer ? denoiser->num_ref_frames : 0;
   // Copy source into denoised reference buffers on KEY_FRAME or
   // if the just encoded frame was resized. For SVC, copy source if the base
@@ -415,10 +416,10 @@ void av1_denoiser_update_frame_info(
     return;
   }
 
-  if (svc->set_ref_frame_config) {
+  if (rtc_ref->set_ref_frame_config) {
     int i;
     for (i = 0; i < REF_FRAMES; i++) {
-      if (svc->refresh[svc->spatial_layer_id] & (1 << i))
+      if (rtc_ref->refresh[svc->spatial_layer_id] & (1 << i))
         copy_frame(&denoiser->running_avg_y[i + 1 + shift],
                    &denoiser->running_avg_y[INTRA_FRAME + shift]);
     }
@@ -487,7 +488,7 @@ static int av1_denoiser_realloc_svc_helper(AV1_COMMON *cm,
         &denoiser->running_avg_y[fb_idx], cm->width, cm->height,
         cm->seq_params->subsampling_x, cm->seq_params->subsampling_y,
         cm->seq_params->use_highbitdepth, AOM_BORDER_IN_PIXELS,
-        cm->features.byte_alignment);
+        cm->features.byte_alignment, 0);
     if (fail) {
       av1_denoiser_free(denoiser);
       return 1;
@@ -497,15 +498,16 @@ static int av1_denoiser_realloc_svc_helper(AV1_COMMON *cm,
 }
 
 int av1_denoiser_realloc_svc(AV1_COMMON *cm, AV1_DENOISER *denoiser,
-                             struct SVC *svc, int svc_buf_shift,
-                             int refresh_alt, int refresh_gld, int refresh_lst,
-                             int alt_fb_idx, int gld_fb_idx, int lst_fb_idx) {
+                             struct RTC_REF *rtc_ref, struct SVC *svc,
+                             int svc_buf_shift, int refresh_alt,
+                             int refresh_gld, int refresh_lst, int alt_fb_idx,
+                             int gld_fb_idx, int lst_fb_idx) {
   int fail = 0;
-  if (svc->set_ref_frame_config) {
+  if (rtc_ref->set_ref_frame_config) {
     int i;
     for (i = 0; i < REF_FRAMES; i++) {
       if (cm->current_frame.frame_type == KEY_FRAME ||
-          svc->refresh[svc->spatial_layer_id] & (1 << i)) {
+          rtc_ref->refresh[svc->spatial_layer_id] & (1 << i)) {
         fail = av1_denoiser_realloc_svc_helper(cm, denoiser,
                                                i + 1 + svc_buf_shift);
       }
@@ -574,7 +576,7 @@ int av1_denoiser_alloc(AV1_COMMON *cm, struct SVC *svc, AV1_DENOISER *denoiser,
       fail = aom_alloc_frame_buffer(
           &denoiser->running_avg_y[i + denoiser->num_ref_frames * layer],
           denoise_width, denoise_height, ssx, ssy, use_highbitdepth, border,
-          legacy_byte_alignment);
+          legacy_byte_alignment, 0);
       if (fail) {
         av1_denoiser_free(denoiser);
         return 1;
@@ -586,7 +588,7 @@ int av1_denoiser_alloc(AV1_COMMON *cm, struct SVC *svc, AV1_DENOISER *denoiser,
 
     fail = aom_alloc_frame_buffer(
         &denoiser->mc_running_avg_y[layer], denoise_width, denoise_height, ssx,
-        ssy, use_highbitdepth, border, legacy_byte_alignment);
+        ssy, use_highbitdepth, border, legacy_byte_alignment, 0);
     if (fail) {
       av1_denoiser_free(denoiser);
       return 1;
@@ -595,9 +597,9 @@ int av1_denoiser_alloc(AV1_COMMON *cm, struct SVC *svc, AV1_DENOISER *denoiser,
 
   // denoiser->last_source only used for noise_estimation, so only for top
   // layer.
-  fail =
-      aom_alloc_frame_buffer(&denoiser->last_source, width, height, ssx, ssy,
-                             use_highbitdepth, border, legacy_byte_alignment);
+  fail = aom_alloc_frame_buffer(&denoiser->last_source, width, height, ssx, ssy,
+                                use_highbitdepth, border, legacy_byte_alignment,
+                                0);
   if (fail) {
     av1_denoiser_free(denoiser);
     return 1;
@@ -671,8 +673,10 @@ void av1_denoiser_set_noise_level(AV1_COMP *const cpi, int noise_level) {
 int64_t av1_scale_part_thresh(int64_t threshold, AV1_DENOISER_LEVEL noise_level,
                               CONTENT_STATE_SB content_state,
                               int temporal_layer_id) {
-  if ((content_state.source_sad == kLowSad && content_state.low_sumdiff) ||
-      (content_state.source_sad == kHighSad && content_state.low_sumdiff) ||
+  if ((content_state.source_sad_nonrd <= kLowSad &&
+       content_state.low_sumdiff) ||
+      (content_state.source_sad_nonrd == kHighSad &&
+       content_state.low_sumdiff) ||
       (content_state.lighting_change && !content_state.low_sumdiff) ||
       (noise_level == kDenHigh) || (temporal_layer_id != 0)) {
     int64_t scaled_thr =
@@ -689,10 +693,10 @@ int64_t av1_scale_acskip_thresh(int64_t threshold,
                                 AV1_DENOISER_LEVEL noise_level, int abs_sumdiff,
                                 int temporal_layer_id) {
   if (noise_level >= kDenLow && abs_sumdiff < 5)
-    return threshold *=
-           (noise_level == kDenLow) ? 2 : (temporal_layer_id == 2) ? 10 : 6;
-  else
-    return threshold;
+    threshold *= (noise_level == kDenLow)   ? 2
+                 : (temporal_layer_id == 2) ? 10
+                                            : 6;
+  return threshold;
 }
 
 void av1_denoiser_reset_on_first_frame(AV1_COMP *const cpi) {
@@ -708,6 +712,7 @@ void av1_denoiser_reset_on_first_frame(AV1_COMP *const cpi) {
 
 void av1_denoiser_update_ref_frame(AV1_COMP *const cpi) {
   AV1_COMMON *const cm = &cpi->common;
+  RTC_REF *const rtc_ref = &cpi->rtc_ref;
   SVC *const svc = &cpi->svc;
 
   if (cpi->oxcf.noise_sensitivity > 0 && denoise_svc(cpi) &&
@@ -737,7 +742,8 @@ void av1_denoiser_update_ref_frame(AV1_COMP *const cpi) {
           svc->number_spatial_layers - svc->spatial_layer_id == 2 ? 1 : 0;
       // Check if we need to allocate extra buffers in the denoiser
       // for refreshed frames.
-      if (av1_denoiser_realloc_svc(cm, &cpi->denoiser, svc, svc_buf_shift,
+      if (av1_denoiser_realloc_svc(cm, &cpi->denoiser, rtc_ref,
+                                   svc, svc_buf_shift,
                                    cpi->refresh_alt_ref_frame,
                                    cpi->refresh_golden_frame,
                                    cpi->refresh_last_frame, cpi->alt_fb_idx,
@@ -747,10 +753,10 @@ void av1_denoiser_update_ref_frame(AV1_COMP *const cpi) {
 #endif
     }
     av1_denoiser_update_frame_info(
-        &cpi->denoiser, *cpi->source, svc, frame_type,
+        &cpi->denoiser, *cpi->source, rtc_ref, svc, frame_type,
         cpi->refresh_frame.alt_ref_frame, cpi->refresh_frame.golden_frame, 1,
-        svc->ref_idx[6], svc->ref_idx[3], svc->ref_idx[0], resize_pending,
-        svc_refresh_denoiser_buffers, denoise_svc_second_layer);
+        rtc_ref->ref_idx[6], rtc_ref->ref_idx[3], rtc_ref->ref_idx[0],
+        resize_pending, svc_refresh_denoiser_buffers, denoise_svc_second_layer);
   }
 }
 
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/av1_temporal_denoiser.h b/chromium/third_party/libaom/source/libaom/av1/encoder/av1_temporal_denoiser.h
index 71c8c1c0e9c..14dcccce697 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/av1_temporal_denoiser.h
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/av1_temporal_denoiser.h
@@ -69,12 +69,14 @@ typedef struct {
 
 struct AV1_COMP;
 struct SVC;
+struct RTC_REF;
 
 void av1_denoiser_update_frame_info(
-    AV1_DENOISER *denoiser, YV12_BUFFER_CONFIG src, struct SVC *svc,
-    FRAME_TYPE frame_type, int refresh_alt_ref_frame, int refresh_golden_frame,
-    int refresh_last_frame, int alt_fb_idx, int gld_fb_idx, int lst_fb_idx,
-    int resized, int svc_refresh_denoiser_buffers, int second_spatial_layer);
+    AV1_DENOISER *denoiser, YV12_BUFFER_CONFIG src, struct RTC_REF *rtc_ref,
+    struct SVC *svc, FRAME_TYPE frame_type, int refresh_alt_ref_frame,
+    int refresh_golden_frame, int refresh_last_frame, int alt_fb_idx,
+    int gld_fb_idx, int lst_fb_idx, int resized,
+    int svc_refresh_denoiser_buffers, int second_spatial_layer);
 
 void av1_denoiser_denoise(struct AV1_COMP *cpi, MACROBLOCK *mb, int mi_row,
                           int mi_col, BLOCK_SIZE bs, PICK_MODE_CONTEXT *ctx,
@@ -88,9 +90,10 @@ void av1_denoiser_update_frame_stats(MB_MODE_INFO *mi, int64_t sse,
                                      PICK_MODE_CONTEXT *ctx);
 
 int av1_denoiser_realloc_svc(AV1_COMMON *cm, AV1_DENOISER *denoiser,
-                             struct SVC *svc, int svc_buf_shift,
-                             int refresh_alt, int refresh_gld, int refresh_lst,
-                             int alt_fb_idx, int gld_fb_idx, int lst_fb_idx);
+                             struct RTC_REF *rtc, struct SVC *svc,
+                             int svc_buf_shift, int refresh_alt,
+                             int refresh_gld, int refresh_lst, int alt_fb_idx,
+                             int gld_fb_idx, int lst_fb_idx);
 
 int av1_denoiser_alloc(AV1_COMMON *cm, struct SVC *svc, AV1_DENOISER *denoiser,
                        int use_svc, int noise_sen, int width, int height,
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/bitstream.c b/chromium/third_party/libaom/source/libaom/av1/encoder/bitstream.c
index f13c9d7a96b..a9e9fdc7fcf 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/bitstream.c
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/bitstream.c
@@ -462,7 +462,8 @@ static AOM_INLINE void write_segment_id(AV1_COMP *cpi, MACROBLOCKD *const xd,
 
   AV1_COMMON *const cm = &cpi->common;
   int cdf_num;
-  const int pred = av1_get_spatial_seg_pred(cm, xd, &cdf_num);
+  const int pred = av1_get_spatial_seg_pred(cm, xd, &cdf_num,
+                                            cpi->cyclic_refresh->skip_over4x4);
   const int mi_row = xd->mi_row;
   const int mi_col = xd->mi_col;
 
@@ -2163,12 +2164,10 @@ static AOM_INLINE void wb_write_uniform(struct aom_write_bit_buffer *wb, int n,
 
 static AOM_INLINE void write_tile_info_max_tile(
     const AV1_COMMON *const cm, struct aom_write_bit_buffer *wb) {
-  int width_mi =
-      ALIGN_POWER_OF_TWO(cm->mi_params.mi_cols, cm->seq_params->mib_size_log2);
-  int height_mi =
-      ALIGN_POWER_OF_TWO(cm->mi_params.mi_rows, cm->seq_params->mib_size_log2);
-  int width_sb = width_mi >> cm->seq_params->mib_size_log2;
-  int height_sb = height_mi >> cm->seq_params->mib_size_log2;
+  int width_sb =
+      CEIL_POWER_OF_TWO(cm->mi_params.mi_cols, cm->seq_params->mib_size_log2);
+  int height_sb =
+      CEIL_POWER_OF_TWO(cm->mi_params.mi_rows, cm->seq_params->mib_size_log2);
   int size_sb, i;
   const CommonTileParams *const tiles = &cm->tiles;
 
@@ -2682,6 +2681,12 @@ static AOM_INLINE void write_global_motion_params(
     struct aom_write_bit_buffer *wb, int allow_hp) {
   const TransformationType type = params->wmtype;
 
+  // As a workaround for an AV1 spec bug, we avoid choosing TRANSLATION
+  // type models. Check here that we don't accidentally pick one somehow.
+  // See comments in gm_get_motion_vector() for details on the bug we're
+  // working around here
+  assert(type != TRANSLATION);
+
   aom_wb_write_bit(wb, type != IDENTITY);
   if (type != IDENTITY) {
     aom_wb_write_bit(wb, type == ROTZOOM);
@@ -2765,7 +2770,31 @@ static AOM_INLINE void write_global_motion(AV1_COMP *cpi,
   }
 }
 
-static int check_frame_refs_short_signaling(AV1_COMMON *const cm) {
+static int check_frame_refs_short_signaling(AV1_COMMON *const cm,
+                                            bool enable_ref_short_signaling) {
+  // In rtc case when res < 360p and speed >= 9, we turn on
+  // frame_refs_short_signaling if it won't break the decoder.
+  if (enable_ref_short_signaling) {
+    const int gld_map_idx = get_ref_frame_map_idx(cm, GOLDEN_FRAME);
+    const int base =
+        1 << (cm->seq_params->order_hint_info.order_hint_bits_minus_1 + 1);
+
+    const int order_hint_group_cur =
+        cm->current_frame.display_order_hint / base;
+    const int order_hint_group_gld =
+        cm->ref_frame_map[gld_map_idx]->display_order_hint / base;
+    const int relative_dist = cm->current_frame.order_hint -
+                              cm->ref_frame_map[gld_map_idx]->order_hint;
+
+    // If current frame and GOLDEN frame are in the same order_hint group, and
+    // they are not far apart (i.e., > 64 frames), then return 1.
+    if (order_hint_group_cur == order_hint_group_gld && relative_dist >= 0 &&
+        relative_dist <= 64) {
+      return 1;
+    }
+    return 0;
+  }
+
   // Check whether all references are distinct frames.
   const RefCntBuffer *seen_bufs[FRAME_BUFFERS] = { NULL };
   int num_refs = 0;
@@ -2843,7 +2872,13 @@ static AOM_INLINE void write_uncompressed_header_obu(
   CurrentFrame *const current_frame = &cm->current_frame;
   FeatureFlags *const features = &cm->features;
 
-  current_frame->frame_refs_short_signaling = 0;
+  if (!cpi->sf.rt_sf.enable_ref_short_signaling ||
+      !seq_params->order_hint_info.enable_order_hint ||
+      seq_params->order_hint_info.enable_ref_frame_mvs) {
+    current_frame->frame_refs_short_signaling = 0;
+  } else {
+    current_frame->frame_refs_short_signaling = 1;
+  }
 
   if (seq_params->still_picture) {
     assert(cm->show_existing_frame == 0);
@@ -3009,12 +3044,20 @@ static AOM_INLINE void write_uncompressed_header_obu(
 #endif  // FRAME_REFS_SHORT_SIGNALING
 
       if (current_frame->frame_refs_short_signaling) {
-        // NOTE(zoeliu@google.com):
-        //   An example solution for encoder-side implementation on frame refs
-        //   short signaling, which is only turned on when the encoder side
-        //   decision on ref frames is identical to that at the decoder side.
+        //    In rtc case when cpi->sf.rt_sf.enable_ref_short_signaling is true,
+        //    we turn on frame_refs_short_signaling when the current frame and
+        //    golden frame are in the same order_hint group, and their relative
+        //    distance is <= 64 (in order to be decodable).
+
+        //    For other cases, an example solution for encoder-side
+        //    implementation on frame_refs_short_signaling is also provided in
+        //    this function, where frame_refs_short_signaling is only turned on
+        //    when the encoder side decision on ref frames is identical to that
+        //    at the decoder side.
+
         current_frame->frame_refs_short_signaling =
-            check_frame_refs_short_signaling(cm);
+            check_frame_refs_short_signaling(
+                cm, cpi->sf.rt_sf.enable_ref_short_signaling);
       }
 
       if (seq_params->order_hint_info.enable_order_hint)
@@ -3400,6 +3443,7 @@ uint32_t av1_write_sequence_header_obu(const SequenceHeader *seq_params,
         aom_wb_write_bit(
             &wb, seq_params->op_params[i].display_model_param_present_flag);
         if (seq_params->op_params[i].display_model_param_present_flag) {
+          assert(seq_params->op_params[i].initial_display_delay >= 1);
           assert(seq_params->op_params[i].initial_display_delay <= 10);
           aom_wb_write_literal(
               &wb, seq_params->op_params[i].initial_display_delay - 1, 4);
@@ -3603,7 +3647,7 @@ static void write_large_scale_tile_obu(
           }
         }
 
-        mem_put_le32(buf->data, tile_header);
+        mem_put_le32(buf->data, (MEM_VALUE_T)tile_header);
       }
 
       *total_size += tile_size;
@@ -3778,10 +3822,8 @@ void av1_accumulate_pack_bs_thread_data(AV1_COMP *const cpi,
   int do_max_mv_magnitude_update = 1;
   cpi->rc.coefficient_size += td->coefficient_size;
 
-#if CONFIG_FRAME_PARALLEL_ENCODE
   // Disable max_mv_magnitude update for parallel frames based on update flag.
   if (!cpi->do_frame_data_update) do_max_mv_magnitude_update = 0;
-#endif
 
   if (cpi->sf.mv_sf.auto_mv_step_size && do_max_mv_magnitude_update)
     cpi->mv_search_params.max_mv_magnitude =
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/block.h b/chromium/third_party/libaom/source/libaom/av1/encoder/block.h
index cca23e39fba..be2ba02a442 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/block.h
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/block.h
@@ -22,6 +22,7 @@
 #include "av1/common/mvref_common.h"
 
 #include "av1/encoder/enc_enums.h"
+#include "av1/encoder/mcomp_structs.h"
 #if !CONFIG_REALTIME_ONLY
 #include "av1/encoder/partition_cnn_weights.h"
 #endif
@@ -382,6 +383,23 @@ typedef struct {
   uint8_t variance_low[105];
 } PartitionSearchInfo;
 
+/*!\cond */
+enum {
+  /**
+   * Do not prune transform depths.
+   */
+  TX_PRUNE_NONE = 0,
+  /**
+   * Prune largest transform (depth 0) based on NN model.
+   */
+  TX_PRUNE_LARGEST = 1,
+  /**
+   * Prune split transforms (depth>=1) based on NN model.
+   */
+  TX_PRUNE_SPLIT = 2,
+} UENUM1BYTE(TX_PRUNE_TYPE);
+/*!\endcond */
+
 /*! \brief Defines the parameters used to perform txfm search.
  *
  * For the most part, this determines how various speed features are used.
@@ -430,7 +448,12 @@ typedef struct {
   TX_MODE tx_mode_search_type;
 
   /*!
-   * Flag to enable/disable DC block prediction.
+   * Determines whether a block can be predicted as transform skip or DC only
+   * based on residual mean and variance.
+   * Type 0 : No skip block or DC only block prediction
+   * Type 1 : Prediction of skip block based on residual mean and variance
+   * Type 2 : Prediction of skip block or DC only block based on residual mean
+   * and variance
    */
   unsigned int predict_dc_level;
 
@@ -439,6 +462,24 @@ typedef struct {
    * during RD search.
    */
   int use_qm_dist_metric;
+
+  /*!
+   * Keep track of previous mode evaluation stage type. This will be used to
+   * reset mb rd hash record when mode evaluation type changes.
+   */
+  int mode_eval_type;
+
+#if !CONFIG_REALTIME_ONLY
+  //! Indicates the transform depths for which RD evaluation is skipped.
+  TX_PRUNE_TYPE nn_prune_depths_for_intra_tx;
+
+  /*! \brief Indicates if NN model should be invoked to prune transform depths.
+   *
+   * Used to signal whether NN model should be evaluated to prune the R-D
+   * evaluation of specific transform depths.
+   */
+  bool enable_nn_prune_intra_tx_depths;
+#endif
 } TxfmSearchParams;
 
 /*!\cond */
@@ -455,7 +496,7 @@ typedef struct {
  */
 typedef struct {
   //! Whether to skip transform and quantization on a partition block level.
-  int skip_txfm;
+  uint8_t skip_txfm;
 
   /*! \brief Whether to skip transform and quantization on a txfm block level.
    *
@@ -760,13 +801,17 @@ typedef struct {
 /*!\cond */
 typedef enum {
   kZeroSad = 0,
-  kLowSad = 1,
-  kMedSad = 2,
-  kHighSad = 3
+  kVeryLowSad = 1,
+  kLowSad = 2,
+  kMedSad = 3,
+  kHighSad = 4
 } SOURCE_SAD;
 
 typedef struct {
-  SOURCE_SAD source_sad;
+  //! SAD levels in non-rd path
+  SOURCE_SAD source_sad_nonrd;
+  //! SAD levels in rd-path for var-based part qindex thresholds
+  SOURCE_SAD source_sad_rd;
   int lighting_change;
   int low_sumdiff;
 } CONTENT_STATE_SB;
@@ -784,6 +829,14 @@ typedef struct {
   int var;
 } Block4x4VarInfo;
 
+#ifndef NDEBUG
+typedef struct SetOffsetsLoc {
+  int mi_row;
+  int mi_col;
+  BLOCK_SIZE bsize;
+} SetOffsetsLoc;
+#endif  // NDEBUG
+
 /*!\endcond */
 
 /*! \brief Encoder's parameters related to the current coding block.
@@ -964,9 +1017,16 @@ typedef struct macroblock {
    */
   int cnt_zeromv;
 
-  /*!\brief Flag to force zeromv-skip block, for nonrd path.
+  /*!\brief Flag to force zeromv-skip at superblock level, for nonrd path.
+   *
+   * 0/1 imply zeromv-skip is disabled/enabled. 2 implies that the blocks
+   * in the superblock may be marked as zeromv-skip at block level.
    */
-  int force_zeromv_skip;
+  int force_zeromv_skip_for_sb;
+
+  /*!\brief Flag to force zeromv-skip at block level, for nonrd path.
+   */
+  int force_zeromv_skip_for_blk;
 
   /*! \brief Previous segment id for which qmatrices were updated.
    * This is used to bypass setting of qmatrices if no change in qindex.
@@ -999,8 +1059,12 @@ typedef struct macroblock {
    * This is used to measure how viable a reference frame is.
    */
   int pred_mv_sad[REF_FRAMES];
-  //! The minimum of \ref pred_mv_sad.
-  int best_pred_mv_sad;
+  /*! \brief The minimum of \ref pred_mv_sad.
+   *
+   * Index 0 stores the minimum \ref pred_mv_sad across past reference frames.
+   * Index 1 stores the minimum \ref pred_mv_sad across future reference frames.
+   */
+  int best_pred_mv_sad[2];
   //! The sad of the 1st mv ref (nearest).
   int pred_mv0_sad[REF_FRAMES];
   //! The sad of the 2nd mv ref (near).
@@ -1151,6 +1215,9 @@ typedef struct macroblock {
   PixelLevelGradientInfo *pixel_gradient_info;
   /*! \brief Flags indicating the availability of cached gradient info. */
   bool is_sb_gradient_cached[PLANE_TYPES];
+
+  /*! \brief Flag to reuse predicted samples of inter block. */
+  bool reuse_inter_pred;
   /**@}*/
 
   /*****************************************************************************
@@ -1170,6 +1237,15 @@ typedef struct macroblock {
    * extending outside the UMV borders
    */
   FullMvLimits mv_limits;
+
+  /*! \brief Buffer for storing the search site config.
+   *
+   * When resize mode or super resolution mode is on, the stride of the
+   * reference frame does not always match what's specified in \ref
+   * MotionVectorSearchParams::search_site_cfg. When his happens, we update the
+   * search_sine_config buffer here and use it for motion search.
+   */
+  search_site_config search_site_cfg_buf[NUM_DISTINCT_SEARCH_METHODS];
   /**@}*/
 
   /*****************************************************************************
@@ -1196,6 +1272,8 @@ typedef struct macroblock {
    * of moving color objects.
    */
   uint8_t color_sensitivity_sb[2];
+  //! Color sensitivity flag for the superblock for golden reference.
+  uint8_t color_sensitivity_sb_g[2];
   //! Color sensitivity flag for the coding block.
   uint8_t color_sensitivity[2];
   /**@}*/
@@ -1229,6 +1307,10 @@ typedef struct macroblock {
    *  store source variance and log of source variance of each 4x4 sub-block.
    */
   Block4x4VarInfo *src_var_info_of_4x4_sub_blocks;
+#ifndef NDEBUG
+  /*! \brief A hash to make sure av1_set_offsets is called */
+  SetOffsetsLoc last_set_offsets_loc;
+#endif  // NDEBUG
 } MACROBLOCK;
 #undef SINGLE_REF_MODES
 
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/cnn.c b/chromium/third_party/libaom/source/libaom/av1/encoder/cnn.c
index 599812f2199..639922ff4be 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/cnn.c
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/cnn.c
@@ -11,6 +11,7 @@
 
 #include <assert.h>
 #include <math.h>
+#include <stdbool.h>
 
 #include "aom_dsp/aom_dsp_common.h"
 #include "av1/common/av1_common_int.h"
@@ -55,13 +56,14 @@ static void free_tensor(TENSOR *tensor) {
   }
 }
 
-static void realloc_tensor(TENSOR *tensor, int channels, int width,
+static bool realloc_tensor(TENSOR *tensor, int channels, int width,
                            int height) {
   const int newallocsize = channels * width * height;
   if (tensor->allocsize < newallocsize) {
     free_tensor(tensor);
     tensor->buf[0] =
         (float *)aom_malloc(sizeof(*tensor->buf[0]) * newallocsize);
+    if (!tensor->buf[0]) return false;
     tensor->allocsize = newallocsize;
   }
   tensor->width = width;
@@ -70,6 +72,7 @@ static void realloc_tensor(TENSOR *tensor, int channels, int width,
   tensor->channels = channels;
   for (int c = 1; c < channels; ++c)
     tensor->buf[c] = &tensor->buf[0][c * width * height];
+  return true;
 }
 
 static void copy_tensor(const TENSOR *src, int copy_channels, int dst_offset,
@@ -115,7 +118,7 @@ static void swap_tensor(TENSOR *t1, TENSOR *t2) {
 
 // The concatenated tensor goes into dst with first the channels in
 // original dst followed by the channels in the src
-static void concat_tensor(const TENSOR *src, TENSOR *dst) {
+static bool concat_tensor(const TENSOR *src, TENSOR *dst) {
   assert(src->width == dst->width);
   assert(src->height == dst->height);
 
@@ -126,7 +129,7 @@ static void concat_tensor(const TENSOR *src, TENSOR *dst) {
     TENSOR t;
     init_tensor(&t);
     // allocate new buffers and copy first the dst channels
-    realloc_tensor(&t, channels, dst->width, dst->height);
+    if (!realloc_tensor(&t, channels, dst->width, dst->height)) return false;
     copy_tensor(dst, dst->channels, 0, &t);
     // Swap the tensors and free the old buffers
     swap_tensor(dst, &t);
@@ -136,6 +139,7 @@ static void concat_tensor(const TENSOR *src, TENSOR *dst) {
     dst->buf[c] = &dst->buf[0][c * dst->width * dst->height];
   // Copy the channels in src after the first dst_channels channels.
   copy_tensor(src, src->channels, dst_channels, dst);
+  return true;
 }
 
 int check_tensor_equal_dims(TENSOR *t1, TENSOR *t2) {
@@ -150,9 +154,9 @@ int check_tensor_equal_size(TENSOR *t1, TENSOR *t2) {
 void av1_find_cnn_layer_output_size(int in_width, int in_height,
                                     const CNN_LAYER_CONFIG *layer_config,
                                     int *out_width, int *out_height) {
+  assert(layer_config->skip_width > 0);
+  assert(layer_config->skip_height > 0);
   if (!layer_config->deconvolve) {
-    assert(layer_config->skip_width > 0);
-    assert(layer_config->skip_height > 0);
     switch (layer_config->pad) {
       case PADDING_SAME_ZERO:
       case PADDING_SAME_REPLICATE:
@@ -326,7 +330,7 @@ void av1_cnn_activate_c(float **output, int channels, int width, int height,
   }
 }
 
-static void copy_active_tensor_to_branches(const TENSOR *layer_active_tensor,
+static bool copy_active_tensor_to_branches(const TENSOR *layer_active_tensor,
                                            const CNN_LAYER_CONFIG *layer_config,
                                            int branch, TENSOR branch_output[]) {
   const CNN_BRANCH_CONFIG *branch_config = &layer_config->branch_config;
@@ -338,11 +342,15 @@ static void copy_active_tensor_to_branches(const TENSOR *layer_active_tensor,
       int copy_channels = branch_config->channels_to_copy > 0
                               ? branch_config->channels_to_copy
                               : layer_active_tensor->channels;
-      realloc_tensor(&branch_output[b], copy_channels,
-                     layer_active_tensor->width, layer_active_tensor->height);
+      if (!realloc_tensor(&branch_output[b], copy_channels,
+                          layer_active_tensor->width,
+                          layer_active_tensor->height)) {
+        return false;
+      }
       copy_tensor(layer_active_tensor, copy_channels, 0, &branch_output[b]);
     }
   }
+  return true;
 }
 
 // CNNConvolve specific to maxpool set as 1, either skip_width or skip_height
@@ -892,10 +900,11 @@ void av1_cnn_deconvolve_c(const float **input, int in_width, int in_height,
   }
 }
 
-void av1_cnn_predict_c(const float **input, int in_width, int in_height,
+bool av1_cnn_predict_c(const float **input, int in_width, int in_height,
                        int in_stride, const CNN_CONFIG *cnn_config,
                        const CNN_THREAD_DATA *thread_data,
                        CNN_MULTI_OUT *output_struct) {
+  bool success = false;
   TENSOR tensor1[CNN_MAX_BRANCHES] = { { 0 } };
   TENSOR tensor2[CNN_MAX_BRANCHES] = { { 0 } };
 
@@ -938,8 +947,10 @@ void av1_cnn_predict_c(const float **input, int in_width, int in_height,
                                    &o_height);
     const int output_num = layer_config->output_num;
     if (output_num == -1) {  // Non-output layer
-      realloc_tensor(&tensor2[branch], layer_config->out_channels, o_width,
-                     o_height);
+      if (!realloc_tensor(&tensor2[branch], layer_config->out_channels, o_width,
+                          o_height)) {
+        goto Error;
+      }
     } else {  // Output layer
       free_tensor(&tensor2[branch]);
       assign_tensor(&tensor2[branch], output[output_num],
@@ -953,8 +964,10 @@ void av1_cnn_predict_c(const float **input, int in_width, int in_height,
                    !(branch_config->branches_to_combine & (1 << branch))));
 
     if (layer_config->branch_copy_type == BRANCH_INPUT) {
-      copy_active_tensor_to_branches(&tensor1[branch], layer_config, branch,
-                                     tensor2);
+      if (!copy_active_tensor_to_branches(&tensor1[branch], layer_config,
+                                          branch, tensor2)) {
+        goto Error;
+      }
     }
     // Check consistency of input and output channels
     assert(tensor1[branch].channels == layer_config->in_channels);
@@ -981,8 +994,10 @@ void av1_cnn_predict_c(const float **input, int in_width, int in_height,
     }
 
     if (layer_config->branch_copy_type == BRANCH_OUTPUT) {
-      copy_active_tensor_to_branches(&tensor2[branch], layer_config, branch,
-                                     tensor2);
+      if (!copy_active_tensor_to_branches(&tensor2[branch], layer_config,
+                                          branch, tensor2)) {
+        goto Error;
+      }
     }
 
     // Add tensors from other branches if needed
@@ -1018,7 +1033,7 @@ void av1_cnn_predict_c(const float **input, int in_width, int in_height,
           if ((branch_config->branches_to_combine & (1 << b)) && b != branch) {
             assert(check_tensor_equal_dims(&tensor2[b], &tensor2[branch]));
             assert(tensor2[b].channels > 0);
-            concat_tensor(&tensor2[b], &tensor2[branch]);
+            if (!concat_tensor(&tensor2[b], &tensor2[branch])) goto Error;
           }
         }
       } else {  // Output layer
@@ -1048,20 +1063,25 @@ void av1_cnn_predict_c(const float **input, int in_width, int in_height,
     }
 
     if (layer_config->branch_copy_type == BRANCH_COMBINED) {
-      copy_active_tensor_to_branches(&tensor2[branch], layer_config, branch,
-                                     tensor2);
+      if (!copy_active_tensor_to_branches(&tensor2[branch], layer_config,
+                                          branch, tensor2)) {
+        goto Error;
+      }
     }
   }
 
+  success = true;
+Error:
   for (int b = 0; b < CNN_MAX_BRANCHES; ++b) {
     free_tensor(&tensor1[b]);
     free_tensor(&tensor2[b]);
   }
+  return success;
 }
 
 // Assume output already has proper allocation
 // Assume input image buffers all have same resolution and strides
-void av1_cnn_predict_img_multi_out(uint8_t **dgd, int width, int height,
+bool av1_cnn_predict_img_multi_out(uint8_t **dgd, int width, int height,
                                    int stride, const CNN_CONFIG *cnn_config,
                                    const CNN_THREAD_DATA *thread_data,
                                    CNN_MULTI_OUT *output) {
@@ -1073,6 +1093,7 @@ void av1_cnn_predict_img_multi_out(uint8_t **dgd, int width, int height,
   float *inputs[CNN_MAX_CHANNELS];
   float *input_ =
       (float *)aom_malloc(in_width * in_height * in_channels * sizeof(*input_));
+  if (!input_) return false;
   const int in_stride = in_width;
 
   for (int c = 0; c < in_channels; ++c) {
@@ -1107,15 +1128,16 @@ void av1_cnn_predict_img_multi_out(uint8_t **dgd, int width, int height,
           input[i * in_stride + j] = (float)dgd[c][i * stride + j] / max_val;
     }
   }
-  av1_cnn_predict((const float **)inputs, in_width, in_height, in_stride,
-                  cnn_config, thread_data, output);
+  bool success = av1_cnn_predict((const float **)inputs, in_width, in_height,
+                                 in_stride, cnn_config, thread_data, output);
 
   aom_free(input_);
+  return success;
 }
 
 // Assume output already has proper allocation
 // Assume input image buffers all have same resolution and strides
-void av1_cnn_predict_img_multi_out_highbd(uint16_t **dgd, int width, int height,
+bool av1_cnn_predict_img_multi_out_highbd(uint16_t **dgd, int width, int height,
                                           int stride,
                                           const CNN_CONFIG *cnn_config,
                                           const CNN_THREAD_DATA *thread_data,
@@ -1129,6 +1151,7 @@ void av1_cnn_predict_img_multi_out_highbd(uint16_t **dgd, int width, int height,
   float *inputs[CNN_MAX_CHANNELS];
   float *input_ =
       (float *)aom_malloc(in_width * in_height * in_channels * sizeof(*input_));
+  if (!input_) return false;
   const int in_stride = in_width;
 
   for (int c = 0; c < in_channels; ++c) {
@@ -1164,15 +1187,16 @@ void av1_cnn_predict_img_multi_out_highbd(uint16_t **dgd, int width, int height,
     }
   }
 
-  av1_cnn_predict((const float **)inputs, in_width, in_height, in_stride,
-                  cnn_config, thread_data, output);
+  bool success = av1_cnn_predict((const float **)inputs, in_width, in_height,
+                                 in_stride, cnn_config, thread_data, output);
 
   aom_free(input_);
+  return success;
 }
 
 // Assume output already has proper allocation
 // Assume input image buffers all have same resolution and strides
-void av1_cnn_predict_img(uint8_t **dgd, int width, int height, int stride,
+bool av1_cnn_predict_img(uint8_t **dgd, int width, int height, int stride,
                          const CNN_CONFIG *cnn_config,
                          const CNN_THREAD_DATA *thread_data, float **output,
                          int out_stride) {
@@ -1184,13 +1208,13 @@ void av1_cnn_predict_img(uint8_t **dgd, int width, int height, int stride,
   CNN_MULTI_OUT output_struct = { .output_channels = output_chs,
                                   .output_strides = output_strides,
                                   .output_buffer = output };
-  av1_cnn_predict_img_multi_out(dgd, width, height, stride, cnn_config,
-                                thread_data, &output_struct);
+  return av1_cnn_predict_img_multi_out(dgd, width, height, stride, cnn_config,
+                                       thread_data, &output_struct);
 }
 
 // Assume output already has proper allocation
 // Assume input image buffers all have same resolution and strides
-void av1_cnn_predict_img_highbd(uint16_t **dgd, int width, int height,
+bool av1_cnn_predict_img_highbd(uint16_t **dgd, int width, int height,
                                 int stride, const CNN_CONFIG *cnn_config,
                                 const CNN_THREAD_DATA *thread_data,
                                 int bit_depth, float **output, int out_stride) {
@@ -1202,6 +1226,7 @@ void av1_cnn_predict_img_highbd(uint16_t **dgd, int width, int height,
   CNN_MULTI_OUT output_struct = { .output_channels = output_chs,
                                   .output_strides = output_strides,
                                   .output_buffer = output };
-  av1_cnn_predict_img_multi_out_highbd(dgd, width, height, stride, cnn_config,
-                                       thread_data, bit_depth, &output_struct);
+  return av1_cnn_predict_img_multi_out_highbd(dgd, width, height, stride,
+                                              cnn_config, thread_data,
+                                              bit_depth, &output_struct);
 }
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/cnn.h b/chromium/third_party/libaom/source/libaom/av1/encoder/cnn.h
index 3b55aa0b370..1a6c03a4c93 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/cnn.h
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/cnn.h
@@ -17,6 +17,7 @@ extern "C" {
 #endif
 
 #include <math.h>
+#include <stdbool.h>
 
 #include "aom_util/aom_thread.h"
 #include "config/av1_rtcd.h"
@@ -174,11 +175,11 @@ void av1_find_cnn_layer_output_size(int in_width, int in_height,
 
 // Prediction functions from set of input image buffers. This function supports
 // CNN with multiple outputs.
-void av1_cnn_predict_img_multi_out(uint8_t **dgd, int width, int height,
+bool av1_cnn_predict_img_multi_out(uint8_t **dgd, int width, int height,
                                    int stride, const CNN_CONFIG *cnn_config,
                                    const CNN_THREAD_DATA *thread_data,
                                    struct CNN_MULTI_OUT *output);
-void av1_cnn_predict_img_multi_out_highbd(uint16_t **dgd, int width, int height,
+bool av1_cnn_predict_img_multi_out_highbd(uint16_t **dgd, int width, int height,
                                           int stride,
                                           const CNN_CONFIG *cnn_config,
                                           const CNN_THREAD_DATA *thread_data,
@@ -186,11 +187,11 @@ void av1_cnn_predict_img_multi_out_highbd(uint16_t **dgd, int width, int height,
 
 // Prediction functions from set of input image buffers. This function only
 // supports a single output.
-void av1_cnn_predict_img(uint8_t **dgd, int width, int height, int stride,
+bool av1_cnn_predict_img(uint8_t **dgd, int width, int height, int stride,
                          const CNN_CONFIG *cnn_config,
                          const CNN_THREAD_DATA *thread_data, float **output,
                          int out_stride);
-void av1_cnn_predict_img_highbd(uint16_t **dgd, int width, int height,
+bool av1_cnn_predict_img_highbd(uint16_t **dgd, int width, int height,
                                 int stride, const CNN_CONFIG *cnn_config,
                                 const CNN_THREAD_DATA *thread_data,
                                 int bit_depth, float **output, int out_stride);
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/compound_type.c b/chromium/third_party/libaom/source/libaom/av1/encoder/compound_type.c
index 4f762b93ed7..39c505d43cc 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/compound_type.c
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/compound_type.c
@@ -465,7 +465,8 @@ static INLINE void compute_best_interintra_mode(
     INTERINTRA_MODE *best_interintra_mode, int64_t *best_interintra_rd,
     INTERINTRA_MODE interintra_mode, BLOCK_SIZE bsize) {
   const AV1_COMMON *const cm = &cpi->common;
-  int rate, skip_txfm_sb;
+  int rate;
+  uint8_t skip_txfm_sb;
   int64_t dist, skip_sse_sb;
   const int bw = block_size_wide[bsize];
   mbmi->interintra_mode = interintra_mode;
@@ -688,7 +689,8 @@ static int handle_wedge_inter_intra_mode(
   const int_mv mv0 = mbmi->mv[0];
   // Refine motion vector for NEWMV case.
   if (have_newmv_in_inter_mode(mbmi->mode)) {
-    int rate_sum, skip_txfm_sb;
+    int rate_sum;
+    uint8_t skip_txfm_sb;
     int64_t dist_sum, skip_sse_sb;
     // get negative of mask
     const uint8_t *mask =
@@ -1048,7 +1050,8 @@ static int64_t masked_compound_type_rd(
   const COMPOUND_TYPE compound_type = mbmi->interinter_comp.type;
   // This function will be called only for COMPOUND_WEDGE and COMPOUND_DIFFWTD
   assert(compound_type == COMPOUND_WEDGE || compound_type == COMPOUND_DIFFWTD);
-  int rate_sum, tmp_skip_txfm_sb;
+  int rate_sum;
+  uint8_t tmp_skip_txfm_sb;
   int64_t dist_sum, tmp_skip_sse_sb;
   pick_interinter_mask_type pick_interinter_mask[2] = { pick_interinter_wedge,
                                                         pick_interinter_seg };
@@ -1300,7 +1303,8 @@ int av1_compound_type_rd(const AV1_COMP *const cpi, MACROBLOCK *x,
     // Case COMPOUND_AVERAGE and COMPOUND_DISTWTD
     if (cur_type < COMPOUND_WEDGE) {
       if (cpi->sf.inter_sf.enable_fast_compound_mode_search == 2) {
-        int rate_sum, tmp_skip_txfm_sb;
+        int rate_sum;
+        uint8_t tmp_skip_txfm_sb;
         int64_t dist_sum, tmp_skip_sse_sb;
 
         // Reuse data if matching record is found
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/context_tree.c b/chromium/third_party/libaom/source/libaom/av1/encoder/context_tree.c
index d8735695482..f3287452f72 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/context_tree.c
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/context_tree.c
@@ -66,10 +66,16 @@ void av1_free_shared_coeff_buffer(PC_TREE_SHARED_BUFFERS *shared_bufs) {
 PICK_MODE_CONTEXT *av1_alloc_pmc(const struct AV1_COMP *const cpi,
                                  BLOCK_SIZE bsize,
                                  PC_TREE_SHARED_BUFFERS *shared_bufs) {
-  PICK_MODE_CONTEXT *ctx = NULL;
+  PICK_MODE_CONTEXT *volatile ctx = NULL;
   const AV1_COMMON *const cm = &cpi->common;
   struct aom_internal_error_info error;
 
+  if (setjmp(error.jmp)) {
+    av1_free_pmc(ctx, av1_num_planes(cm));
+    return NULL;
+  }
+  error.setjmp = 1;
+
   AOM_CHECK_MEM_ERROR(&error, ctx, aom_calloc(1, sizeof(*ctx)));
   ctx->rd_mode_is_ready = 0;
 
@@ -111,6 +117,12 @@ PICK_MODE_CONTEXT *av1_alloc_pmc(const struct AV1_COMP *const cpi,
   return ctx;
 }
 
+void av1_reset_pmc(PICK_MODE_CONTEXT *ctx) {
+  av1_zero_array(ctx->blk_skip, ctx->num_4x4_blk);
+  av1_zero_array(ctx->tx_type_map, ctx->num_4x4_blk);
+  av1_invalid_rd_stats(&ctx->rd_stats);
+}
+
 void av1_free_pmc(PICK_MODE_CONTEXT *ctx, int num_planes) {
   if (ctx == NULL) return;
 
@@ -152,6 +164,8 @@ PC_TREE *av1_alloc_pc_tree_node(BLOCK_SIZE bsize) {
     pc_tree->horizontal[i] = NULL;
     pc_tree->vertical[i] = NULL;
   }
+
+#if !CONFIG_REALTIME_ONLY
   for (int i = 0; i < 3; ++i) {
     pc_tree->horizontala[i] = NULL;
     pc_tree->horizontalb[i] = NULL;
@@ -161,6 +175,9 @@ PC_TREE *av1_alloc_pc_tree_node(BLOCK_SIZE bsize) {
   for (int i = 0; i < 4; ++i) {
     pc_tree->horizontal4[i] = NULL;
     pc_tree->vertical4[i] = NULL;
+  }
+#endif
+  for (int i = 0; i < 4; ++i) {
     pc_tree->split[i] = NULL;
   }
 
@@ -188,6 +205,7 @@ void av1_free_pc_tree_recursive(PC_TREE *pc_tree, int num_planes, int keep_best,
     if (!keep_best || (partition != PARTITION_VERT))
       FREE_PMC_NODE(pc_tree->vertical[i]);
   }
+#if !CONFIG_REALTIME_ONLY
   for (int i = 0; i < 3; ++i) {
     if (!keep_best || (partition != PARTITION_HORZ_A))
       FREE_PMC_NODE(pc_tree->horizontala[i]);
@@ -204,7 +222,7 @@ void av1_free_pc_tree_recursive(PC_TREE *pc_tree, int num_planes, int keep_best,
     if (!keep_best || (partition != PARTITION_VERT_4))
       FREE_PMC_NODE(pc_tree->vertical4[i]);
   }
-
+#endif
   if (!keep_best || (partition != PARTITION_SPLIT)) {
     for (int i = 0; i < 4; ++i) {
       if (pc_tree->split[i] != NULL) {
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/context_tree.h b/chromium/third_party/libaom/source/libaom/av1/encoder/context_tree.h
index 65395cf3413..413535d621d 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/context_tree.h
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/context_tree.h
@@ -75,12 +75,14 @@ typedef struct PC_TREE {
   PICK_MODE_CONTEXT *none;
   PICK_MODE_CONTEXT *horizontal[2];
   PICK_MODE_CONTEXT *vertical[2];
+#if !CONFIG_REALTIME_ONLY
   PICK_MODE_CONTEXT *horizontala[3];
   PICK_MODE_CONTEXT *horizontalb[3];
   PICK_MODE_CONTEXT *verticala[3];
   PICK_MODE_CONTEXT *verticalb[3];
   PICK_MODE_CONTEXT *horizontal4[4];
   PICK_MODE_CONTEXT *vertical4[4];
+#endif
   struct PC_TREE *split[4];
   int index;
 } PC_TREE;
@@ -110,6 +112,7 @@ void av1_free_pc_tree_recursive(PC_TREE *tree, int num_planes, int keep_best,
 PICK_MODE_CONTEXT *av1_alloc_pmc(const struct AV1_COMP *const cpi,
                                  BLOCK_SIZE bsize,
                                  PC_TREE_SHARED_BUFFERS *shared_bufs);
+void av1_reset_pmc(PICK_MODE_CONTEXT *ctx);
 void av1_free_pmc(PICK_MODE_CONTEXT *ctx, int num_planes);
 void av1_copy_tree_context(PICK_MODE_CONTEXT *dst_ctx,
                            PICK_MODE_CONTEXT *src_ctx);
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/encode_strategy.c b/chromium/third_party/libaom/source/libaom/av1/encoder/encode_strategy.c
index caf7aa58619..f4b21ad335d 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/encode_strategy.c
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/encode_strategy.c
@@ -176,6 +176,18 @@ static int choose_primary_ref_frame(
     return PRIMARY_REF_NONE;
   }
 
+#if !CONFIG_REALTIME_ONLY
+  if (cpi->use_ducky_encode) {
+    int wanted_fb = cpi->ppi->gf_group.primary_ref_idx[cpi->gf_frame_index];
+    for (int ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) {
+      if (get_ref_frame_map_idx(cm, ref_frame) == wanted_fb)
+        return ref_frame - LAST_FRAME;
+    }
+
+    return PRIMARY_REF_NONE;
+  }
+#endif  // !CONFIG_REALTIME_ONLY
+
   // In large scale case, always use Last frame's frame contexts.
   // Note(yunqing): In other cases, primary_ref_frame is chosen based on
   // cpi->ppi->gf_group.layer_depth[cpi->gf_frame_index], which also controls
@@ -188,8 +200,7 @@ static int choose_primary_ref_frame(
   // current frame
   const int current_ref_type = get_current_frame_ref_type(cpi);
   int wanted_fb = cpi->ppi->fb_of_context_type[current_ref_type];
-#if CONFIG_FRAME_PARALLEL_ENCODE && CONFIG_FRAME_PARALLEL_ENCODE_2 && \
-    CONFIG_FPMT_TEST
+#if CONFIG_FPMT_TEST
   if (cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE) {
     GF_GROUP *const gf_group = &cpi->ppi->gf_group;
     if (gf_group->update_type[cpi->gf_frame_index] == INTNL_ARF_UPDATE) {
@@ -207,8 +218,7 @@ static int choose_primary_ref_frame(
       }
     }
   }
-#endif  // CONFIG_FRAME_PARALLEL_ENCODE && CONFIG_FRAME_PARALLEL_ENCODE_2 &&
-        // CONFIG_FPMT_TEST
+#endif  // CONFIG_FPMT_TEST
   int primary_ref_frame = PRIMARY_REF_NONE;
   for (int ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) {
     if (get_ref_frame_map_idx(cm, ref_frame) == wanted_fb) {
@@ -248,12 +258,9 @@ static void adjust_frame_rate(AV1_COMP *cpi, int64_t ts_start, int64_t ts_end) {
 
   if (this_duration) {
     if (step) {
-#if CONFIG_FRAME_PARALLEL_ENCODE
       cpi->new_framerate = 10000000.0 / this_duration;
-#endif
-      av1_new_framerate(cpi, 10000000.0 / this_duration);
+      av1_new_framerate(cpi, cpi->new_framerate);
     } else {
-      double framerate;
       // Average this frame's rate into the last second's average
       // frame rate. If we haven't seen 1 second yet, then average
       // over the whole interval seen.
@@ -262,17 +269,13 @@ static void adjust_frame_rate(AV1_COMP *cpi, int64_t ts_start, int64_t ts_end) {
       double avg_duration = 10000000.0 / cpi->framerate;
       avg_duration *= (interval - avg_duration + this_duration);
       avg_duration /= interval;
-#if CONFIG_FRAME_PARALLEL_ENCODE
       cpi->new_framerate = (10000000.0 / avg_duration);
       // For parallel frames update cpi->framerate with new_framerate
       // during av1_post_encode_updates()
-      framerate =
+      double framerate =
           (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] > 0)
               ? cpi->framerate
               : cpi->new_framerate;
-#else
-      framerate = (10000000.0 / avg_duration);
-#endif
       av1_new_framerate(cpi, framerate);
     }
   }
@@ -307,8 +310,7 @@ int is_forced_keyframe_pending(struct lookahead_ctx *lookahead,
 // Return the frame source, or NULL if we couldn't find one
 static struct lookahead_entry *choose_frame_source(
     AV1_COMP *const cpi, int *const flush, int *pop_lookahead,
-    struct lookahead_entry **last_source,
-    EncodeFrameParams *const frame_params) {
+    struct lookahead_entry **last_source, int *const show_frame) {
   AV1_COMMON *const cm = &cpi->common;
   const GF_GROUP *const gf_group = &cpi->ppi->gf_group;
   struct lookahead_entry *source = NULL;
@@ -350,9 +352,8 @@ static struct lookahead_entry *choose_frame_source(
     src_index = 0;
   }
 
-  frame_params->show_frame = *pop_lookahead;
+  *show_frame = *pop_lookahead;
 
-#if CONFIG_FRAME_PARALLEL_ENCODE
 #if CONFIG_FPMT_TEST
   if (cpi->ppi->fpmt_unit_test_cfg == PARALLEL_ENCODE) {
 #else
@@ -363,8 +364,7 @@ static struct lookahead_entry *choose_frame_source(
         !is_stat_generation_stage(cpi))
       src_index = gf_group->src_offset[cpi->gf_frame_index];
   }
-#endif  // CONFIG_FRAME_PARALLEL_ENCODE
-  if (frame_params->show_frame) {
+  if (*show_frame) {
     // show frame, pop from buffer
     // Get last frame source.
     if (cm->current_frame.frame_number > 0) {
@@ -412,35 +412,35 @@ static void update_frame_flags(const AV1_COMMON *const cm,
                                const RefreshFrameInfo *const refresh_frame,
                                unsigned int *frame_flags) {
   if (encode_show_existing_frame(cm)) {
-    *frame_flags &= ~FRAMEFLAGS_GOLDEN;
-    *frame_flags &= ~FRAMEFLAGS_BWDREF;
-    *frame_flags &= ~FRAMEFLAGS_ALTREF;
-    *frame_flags &= ~FRAMEFLAGS_KEY;
+    *frame_flags &= ~(uint32_t)FRAMEFLAGS_GOLDEN;
+    *frame_flags &= ~(uint32_t)FRAMEFLAGS_BWDREF;
+    *frame_flags &= ~(uint32_t)FRAMEFLAGS_ALTREF;
+    *frame_flags &= ~(uint32_t)FRAMEFLAGS_KEY;
     return;
   }
 
   if (refresh_frame->golden_frame) {
     *frame_flags |= FRAMEFLAGS_GOLDEN;
   } else {
-    *frame_flags &= ~FRAMEFLAGS_GOLDEN;
+    *frame_flags &= ~(uint32_t)FRAMEFLAGS_GOLDEN;
   }
 
   if (refresh_frame->alt_ref_frame) {
     *frame_flags |= FRAMEFLAGS_ALTREF;
   } else {
-    *frame_flags &= ~FRAMEFLAGS_ALTREF;
+    *frame_flags &= ~(uint32_t)FRAMEFLAGS_ALTREF;
   }
 
   if (refresh_frame->bwd_ref_frame) {
     *frame_flags |= FRAMEFLAGS_BWDREF;
   } else {
-    *frame_flags &= ~FRAMEFLAGS_BWDREF;
+    *frame_flags &= ~(uint32_t)FRAMEFLAGS_BWDREF;
   }
 
   if (cm->current_frame.frame_type == KEY_FRAME) {
     *frame_flags |= FRAMEFLAGS_KEY;
   } else {
-    *frame_flags &= ~FRAMEFLAGS_KEY;
+    *frame_flags &= ~(uint32_t)FRAMEFLAGS_KEY;
   }
 }
 
@@ -513,12 +513,8 @@ static int get_free_ref_map_index(RefFrameMapPair ref_map_pairs[REF_FRAMES]) {
 }
 
 static int get_refresh_idx(RefFrameMapPair ref_frame_map_pairs[REF_FRAMES],
-                           int update_arf,
-#if CONFIG_FRAME_PARALLEL_ENCODE_2
-                           GF_GROUP *gf_group, int gf_index,
-                           int enable_refresh_skip,
-#endif  // CONFIG_FRAME_PARALLEL_ENCODE_2
-                           int cur_frame_disp) {
+                           int update_arf, GF_GROUP *gf_group, int gf_index,
+                           int enable_refresh_skip, int cur_frame_disp) {
   int arf_count = 0;
   int oldest_arf_order = INT32_MAX;
   int oldest_arf_idx = -1;
@@ -534,7 +530,6 @@ static int get_refresh_idx(RefFrameMapPair ref_frame_map_pairs[REF_FRAMES],
     // Keep future frames and three closest previous frames in output order.
     if (frame_order > cur_frame_disp - 3) continue;
 
-#if CONFIG_FRAME_PARALLEL_ENCODE_2
     if (enable_refresh_skip) {
       int skip_frame = 0;
       // Prevent refreshing a frame in gf_group->skip_frame_refresh.
@@ -548,7 +543,6 @@ static int get_refresh_idx(RefFrameMapPair ref_frame_map_pairs[REF_FRAMES],
       }
       if (skip_frame) continue;
     }
-#endif  // CONFIG_FRAME_PARALLEL_ENCODE_2
 
     // Keep track of the oldest level 1 frame if the current frame is also level
     // 1.
@@ -572,17 +566,14 @@ static int get_refresh_idx(RefFrameMapPair ref_frame_map_pairs[REF_FRAMES],
   if (update_arf && arf_count > 2) return oldest_arf_idx;
   if (oldest_idx >= 0) return oldest_idx;
   if (oldest_arf_idx >= 0) return oldest_arf_idx;
-#if CONFIG_FRAME_PARALLEL_ENCODE_2
   if (oldest_idx == -1) {
     assert(arf_count > 2 && enable_refresh_skip);
     return oldest_arf_idx;
   }
-#endif  // CONFIG_FRAME_PARALLEL_ENCODE_2
   assert(0 && "No valid refresh index found");
   return -1;
 }
 
-#if CONFIG_FRAME_PARALLEL_ENCODE && CONFIG_FRAME_PARALLEL_ENCODE_2
 // Computes the reference refresh index for INTNL_ARF_UPDATE frame.
 int av1_calc_refresh_idx_for_intnl_arf(
     AV1_COMP *cpi, RefFrameMapPair ref_frame_map_pairs[REF_FRAMES],
@@ -603,7 +594,6 @@ int av1_calc_refresh_idx_for_intnl_arf(
     return refresh_idx;
   }
 }
-#endif  // CONFIG_FRAME_PARALLEL_ENCODE && CONFIG_FRAME_PARALLEL_ENCODE_2
 
 int av1_get_refresh_frame_flags(
     const AV1_COMP *const cpi, const EncodeFrameParams *const frame_params,
@@ -625,16 +615,26 @@ int av1_get_refresh_frame_flags(
   // flags to 0 to keep things consistent.
   if (frame_params->show_existing_frame) return 0;
 
-  const SVC *const svc = &cpi->svc;
-  if (is_frame_droppable(svc, ext_refresh_frame_flags)) return 0;
+  const RTC_REF *const rtc_ref = &cpi->rtc_ref;
+  if (is_frame_droppable(rtc_ref, ext_refresh_frame_flags)) return 0;
 
-  int refresh_mask = 0;
+#if !CONFIG_REALTIME_ONLY
+  if (cpi->use_ducky_encode &&
+      cpi->ducky_encode_info.frame_info.gop_mode == DUCKY_ENCODE_GOP_MODE_RCL) {
+    int new_fb_map_idx = cpi->ppi->gf_group.update_ref_idx[gf_index];
+    if (new_fb_map_idx == INVALID_IDX) return 0;
+    return 1 << new_fb_map_idx;
+  }
+#endif  // !CONFIG_REALTIME_ONLY
 
+  int refresh_mask = 0;
   if (ext_refresh_frame_flags->update_pending) {
-    if (svc->set_ref_frame_config) {
+    if (rtc_ref->set_ref_frame_config ||
+        use_rtc_reference_structure_one_layer(cpi)) {
       for (unsigned int i = 0; i < INTER_REFS_PER_FRAME; i++) {
-        int ref_frame_map_idx = svc->ref_idx[i];
-        refresh_mask |= svc->refresh[ref_frame_map_idx] << ref_frame_map_idx;
+        int ref_frame_map_idx = rtc_ref->ref_idx[i];
+        refresh_mask |= rtc_ref->refresh[ref_frame_map_idx]
+                        << ref_frame_map_idx;
       }
       return refresh_mask;
     }
@@ -688,16 +688,11 @@ int av1_get_refresh_frame_flags(
     refresh_mask = 1 << free_fb_index;
     return refresh_mask;
   }
-#if CONFIG_FRAME_PARALLEL_ENCODE_2
   const int enable_refresh_skip = !is_one_pass_rt_params(cpi);
-#endif  // CONFIG_FRAME_PARALLEL_ENCODE_2
   const int update_arf = frame_update_type == ARF_UPDATE;
   const int refresh_idx =
-      get_refresh_idx(ref_frame_map_pairs, update_arf,
-#if CONFIG_FRAME_PARALLEL_ENCODE_2
-                      &cpi->ppi->gf_group, gf_index, enable_refresh_skip,
-#endif  // CONFIG_FRAME_PARALLEL_ENCODE_2
-                      cur_disp_order);
+      get_refresh_idx(ref_frame_map_pairs, update_arf, &cpi->ppi->gf_group,
+                      gf_index, enable_refresh_skip, cur_disp_order);
   return 1 << refresh_idx;
 }
 
@@ -763,6 +758,7 @@ static int denoise_and_encode(AV1_COMP *const cpi, uint8_t *const dest,
       apply_filtering = cpi->sf.hl_sf.second_alt_ref_filtering;
     }
   }
+
 #if CONFIG_COLLECT_COMPONENT_TIMING
   if (cpi->oxcf.pass == 2) start_timing(cpi, apply_filtering_time);
 #endif
@@ -797,6 +793,18 @@ static int denoise_and_encode(AV1_COMP *const cpi, uint8_t *const dest,
     }
 
     if (is_second_arf) {
+      // Allocate the memory for tf_buf_second_arf buffer, only when it is
+      // required.
+      int ret = aom_realloc_frame_buffer(
+          &cpi->ppi->tf_info.tf_buf_second_arf, oxcf->frm_dim_cfg.width,
+          oxcf->frm_dim_cfg.height, cm->seq_params->subsampling_x,
+          cm->seq_params->subsampling_y, cm->seq_params->use_highbitdepth,
+          cpi->oxcf.border_in_pixels, cm->features.byte_alignment, NULL, NULL,
+          NULL, cpi->oxcf.tool_cfg.enable_global_motion, 0);
+      if (ret)
+        aom_internal_error(cm->error, AOM_CODEC_MEM_ERROR,
+                           "Failed to allocate tf_buf_second_arf");
+
       YV12_BUFFER_CONFIG *tf_buf_second_arf =
           &cpi->ppi->tf_info.tf_buf_second_arf;
       // We didn't apply temporal filtering for second arf ahead in
@@ -812,12 +820,19 @@ static int denoise_and_encode(AV1_COMP *const cpi, uint8_t *const dest,
       if (show_existing_alt_ref) {
         aom_extend_frame_borders(tf_buf_second_arf, av1_num_planes(cm));
         frame_input->source = tf_buf_second_arf;
-        aom_copy_metadata_to_frame_buffer(frame_input->source,
-                                          source_buffer->metadata);
       }
       // Currently INTNL_ARF_UPDATE only do show_existing.
       cpi->common.showable_frame |= 1;
     }
+
+    // Copy source metadata to the temporal filtered frame
+    if (source_buffer->metadata &&
+        aom_copy_metadata_to_frame_buffer(frame_input->source,
+                                          source_buffer->metadata)) {
+      aom_internal_error(
+          cm->error, AOM_CODEC_MEM_ERROR,
+          "Failed to copy source metadata to the temporal filtered frame");
+    }
   }
 #if CONFIG_COLLECT_COMPONENT_TIMING
   if (cpi->oxcf.pass == 2) end_timing(cpi, apply_filtering_time);
@@ -828,8 +843,7 @@ static int denoise_and_encode(AV1_COMP *const cpi, uint8_t *const dest,
   cm->show_frame = frame_params->show_frame;
   cm->current_frame.frame_type = frame_params->frame_type;
   // TODO(bohanli): Why is this? what part of it is necessary?
-  av1_set_frame_size(cpi, cm->superres_upscaled_width,
-                     cm->superres_upscaled_height);
+  av1_set_frame_size(cpi, cm->width, cm->height);
   if (set_mv_params) av1_set_mv_search_params(cpi);
 
 #if CONFIG_RD_COMMAND
@@ -852,7 +866,10 @@ static int denoise_and_encode(AV1_COMP *const cpi, uint8_t *const dest,
       // In rare case, it's possible to have non ARF/GF update_type here.
       // We should set allow_tpl to zero in the situation
       allow_tpl =
-          allow_tpl && (update_type == ARF_UPDATE || update_type == GF_UPDATE);
+          allow_tpl && (update_type == ARF_UPDATE || update_type == GF_UPDATE ||
+                        (cpi->use_ducky_encode &&
+                         cpi->ducky_encode_info.frame_info.gop_mode ==
+                             DUCKY_ENCODE_GOP_MODE_RCL));
     }
 
     if (allow_tpl) {
@@ -967,27 +984,43 @@ static void set_unmapped_ref(RefBufMapData *buffer_map, int n_bufs,
 }
 
 void av1_get_ref_frames(RefFrameMapPair ref_frame_map_pairs[REF_FRAMES],
-                        int cur_frame_disp,
-#if CONFIG_FRAME_PARALLEL_ENCODE_2
-                        const AV1_COMP *cpi, int gf_index,
+                        int cur_frame_disp, const AV1_COMP *cpi, int gf_index,
                         int is_parallel_encode,
-#endif  // CONFIG_FRAME_PARALLEL_ENCODE_2
                         int remapped_ref_idx[REF_FRAMES]) {
   int buf_map_idx = 0;
 
   // Initialize reference frame mappings.
   for (int i = 0; i < REF_FRAMES; ++i) remapped_ref_idx[i] = INVALID_IDX;
 
+#if !CONFIG_REALTIME_ONLY
+  if (cpi->use_ducky_encode &&
+      cpi->ducky_encode_info.frame_info.gop_mode == DUCKY_ENCODE_GOP_MODE_RCL) {
+    int valid_rf_idx = 0;
+    for (int rf = LAST_FRAME; rf < REF_FRAMES; ++rf) {
+      if (cpi->ppi->gf_group.ref_frame_list[gf_index][rf] != INVALID_IDX) {
+        remapped_ref_idx[rf - LAST_FRAME] =
+            cpi->ppi->gf_group.ref_frame_list[gf_index][rf];
+        valid_rf_idx = remapped_ref_idx[rf - LAST_FRAME];
+      }
+    }
+
+    for (int i = 0; i < REF_FRAMES; ++i) {
+      if (remapped_ref_idx[i] == INVALID_IDX)
+        remapped_ref_idx[i] = valid_rf_idx;
+    }
+
+    return;
+  }
+#endif  // !CONFIG_REALTIME_ONLY
+
   RefBufMapData buffer_map[REF_FRAMES];
   int n_bufs = 0;
   memset(buffer_map, 0, REF_FRAMES * sizeof(buffer_map[0]));
   int min_level = MAX_ARF_LAYERS;
   int max_level = 0;
-#if CONFIG_FRAME_PARALLEL_ENCODE_2
   GF_GROUP *gf_group = &cpi->ppi->gf_group;
   int skip_ref_unmapping = 0;
   int is_one_pass_rt = is_one_pass_rt_params(cpi);
-#endif  // CONFIG_FRAME_PARALLEL_ENCODE_2
 
   // Go through current reference buffers and store display order, pyr level,
   // and map index.
@@ -1041,7 +1074,6 @@ void av1_get_ref_frames(RefFrameMapPair ref_frame_map_pairs[REF_FRAMES],
       add_ref_to_slot(&buffer_map[i], remapped_ref_idx, BWDREF_FRAME);
     }
 
-#if CONFIG_FRAME_PARALLEL_ENCODE_2
     // During parallel encodes of lower layer frames, exclude the first frame
     // (frame_parallel_level 1) from being used for the reference assignment of
     // the second frame (frame_parallel_level 2).
@@ -1067,7 +1099,6 @@ void av1_get_ref_frames(RefFrameMapPair ref_frame_map_pairs[REF_FRAMES],
       // skip the call to set_unmapped_ref(). Applicable in steady state.
       if (buffer_map[i].used) skip_ref_unmapping = 1;
     }
-#endif  // CONFIG_FRAME_PARALLEL_ENCODE_2
 
     // Keep track of where the frames change from being past frames to future
     // frames.
@@ -1087,9 +1118,7 @@ void av1_get_ref_frames(RefFrameMapPair ref_frame_map_pairs[REF_FRAMES],
   }
 
   // Find the buffer to be excluded from the mapping.
-#if CONFIG_FRAME_PARALLEL_ENCODE_2
   if (!skip_ref_unmapping)
-#endif  // CONFIG_FRAME_PARALLEL_ENCODE_2
     set_unmapped_ref(buffer_map, n_bufs, n_min_level_refs, min_level,
                      cur_frame_disp);
 
@@ -1270,6 +1299,14 @@ int av1_encode_strategy(AV1_COMP *const cpi, size_t *const size,
 
   cpi->skip_tpl_setup_stats = 0;
 #if !CONFIG_REALTIME_ONLY
+  if (oxcf->pass != AOM_RC_FIRST_PASS) {
+    TplParams *const tpl_data = &cpi->ppi->tpl_data;
+    if (tpl_data->tpl_stats_pool[0] == NULL) {
+      av1_setup_tpl_buffers(cpi->ppi, &cm->mi_params, oxcf->frm_dim_cfg.width,
+                            oxcf->frm_dim_cfg.height, 0,
+                            oxcf->gf_cfg.lag_in_frames);
+    }
+  }
   cpi->twopass_frame.this_frame = NULL;
   const int use_one_pass_rt_params = is_one_pass_rt_params(cpi);
   if (!use_one_pass_rt_params && !is_stat_generation_stage(cpi)) {
@@ -1277,7 +1314,6 @@ int av1_encode_strategy(AV1_COMP *const cpi, size_t *const size,
     start_timing(cpi, av1_get_second_pass_params_time);
 #endif
 
-#if CONFIG_FRAME_PARALLEL_ENCODE
     // Initialise frame_level_rate_correction_factors with value previous
     // to the parallel frames.
     if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] > 0) {
@@ -1291,9 +1327,9 @@ int av1_encode_strategy(AV1_COMP *const cpi, size_t *const size,
                 cpi->ppi->p_rc.rate_correction_factors[i];
       }
     }
+
     // copy mv_stats from ppi to frame_level cpi.
     cpi->mv_stats = cpi->ppi->mv_stats;
-#endif
     av1_get_second_pass_params(cpi, &frame_params, *frame_flags);
 #if CONFIG_COLLECT_COMPONENT_TIMING
     end_timing(cpi, av1_get_second_pass_params_time);
@@ -1330,7 +1366,7 @@ int av1_encode_strategy(AV1_COMP *const cpi, size_t *const size,
     frame_params.show_frame = 1;
   } else {
     source = choose_frame_source(cpi, &flush, pop_lookahead, &last_source,
-                                 &frame_params);
+                                 &frame_params.show_frame);
   }
 
   if (source == NULL) {  // If no source was found, we can't encode a frame.
@@ -1344,11 +1380,9 @@ int av1_encode_strategy(AV1_COMP *const cpi, size_t *const size,
     return -1;
   }
 
-#if CONFIG_FRAME_PARALLEL_ENCODE
   // reset src_offset to allow actual encode call for this frame to get its
   // source.
   gf_group->src_offset[cpi->gf_frame_index] = 0;
-#endif
 
   // Source may be changed if temporal filtered later.
   frame_input.source = &source->img;
@@ -1367,13 +1401,13 @@ int av1_encode_strategy(AV1_COMP *const cpi, size_t *const size,
   av1_apply_encoding_flags(cpi, source->flags);
   *frame_flags = (source->flags & AOM_EFLAG_FORCE_KF) ? FRAMEFLAGS_KEY : 0;
 
-#if CONFIG_FRAME_PARALLEL_ENCODE && CONFIG_FPMT_TEST
+#if CONFIG_FPMT_TEST
   if (cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE) {
     if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] > 0) {
       cpi->framerate = cpi->temp_framerate;
     }
   }
-#endif  // CONFIG_FRAME_PARALLEL_ENCODE && CONFIG_FPMT_TEST
+#endif  // CONFIG_FPMT_TEST
 
   // Shown frames and arf-overlay frames need frame-rate considering
   if (frame_params.show_frame)
@@ -1391,6 +1425,7 @@ int av1_encode_strategy(AV1_COMP *const cpi, size_t *const size,
     // only one operating point supported now
     const int64_t pts64 = ticks_to_timebase_units(timestamp_ratio, *time_stamp);
     if (pts64 < 0 || pts64 > UINT32_MAX) return AOM_CODEC_ERROR;
+
     cm->frame_presentation_time = (uint32_t)pts64;
   }
 
@@ -1398,16 +1433,16 @@ int av1_encode_strategy(AV1_COMP *const cpi, size_t *const size,
   start_timing(cpi, av1_get_one_pass_rt_params_time);
 #endif
 #if CONFIG_REALTIME_ONLY
-  av1_get_one_pass_rt_params(cpi, &frame_params, *frame_flags);
-  if (cpi->oxcf.speed >= 5 && cpi->ppi->number_spatial_layers == 1 &&
-      cpi->ppi->number_temporal_layers == 1)
-    av1_set_reference_structure_one_pass_rt(cpi, cpi->gf_frame_index == 0);
+  av1_get_one_pass_rt_params(cpi, &frame_params.frame_type, &frame_input,
+                             *frame_flags);
+  if (use_rtc_reference_structure_one_layer(cpi))
+    av1_set_rtc_reference_structure_one_layer(cpi, cpi->gf_frame_index == 0);
 #else
   if (use_one_pass_rt_params) {
-    av1_get_one_pass_rt_params(cpi, &frame_params, *frame_flags);
-    if (cpi->oxcf.speed >= 5 && cpi->ppi->number_spatial_layers == 1 &&
-        cpi->ppi->number_temporal_layers == 1)
-      av1_set_reference_structure_one_pass_rt(cpi, cpi->gf_frame_index == 0);
+    av1_get_one_pass_rt_params(cpi, &frame_params.frame_type, &frame_input,
+                               *frame_flags);
+    if (use_rtc_reference_structure_one_layer(cpi))
+      av1_set_rtc_reference_structure_one_layer(cpi, cpi->gf_frame_index == 0);
   }
 #endif
 #if CONFIG_COLLECT_COMPONENT_TIMING
@@ -1423,22 +1458,18 @@ int av1_encode_strategy(AV1_COMP *const cpi, size_t *const size,
     frame_params.frame_type = INTER_FRAME;
   }
 
-  // TODO(david.turner@argondesign.com): Move all the encode strategy
-  // (largely near av1_get_compressed_data) in here
-
-  // TODO(david.turner@argondesign.com): Change all the encode strategy to
-  // modify frame_params instead of cm or cpi.
-
   // Per-frame encode speed.  In theory this can vary, but things may have
   // been written assuming speed-level will not change within a sequence, so
   // this parameter should be used with caution.
   frame_params.speed = oxcf->speed;
 
-  // Work out some encoding parameters specific to the pass:
-  if (has_no_stats_stage(cpi) && oxcf->q_cfg.aq_mode == CYCLIC_REFRESH_AQ) {
-    av1_cyclic_refresh_update_parameters(cpi);
-  } else if (is_stat_generation_stage(cpi)) {
-    cpi->td.mb.e_mbd.lossless[0] = is_lossless_requested(&oxcf->rc_cfg);
+#if !CONFIG_REALTIME_ONLY
+  // Set forced key frames when necessary. For two-pass encoding / lap mode,
+  // this is already handled by av1_get_second_pass_params. However when no
+  // stats are available, we still need to check if the new frame is a keyframe.
+  // For one pass rt, this is already checked in av1_get_one_pass_rt_params.
+  if (!use_one_pass_rt_params &&
+      (is_stat_generation_stage(cpi) || has_no_stats_stage(cpi))) {
     // Current frame is coded as a key-frame for any of the following cases:
     // 1) First frame of a video
     // 2) For all-intra frame encoding
@@ -1449,9 +1480,18 @@ int av1_encode_strategy(AV1_COMP *const cpi, size_t *const size,
     if (kf_requested && frame_update_type != OVERLAY_UPDATE &&
         frame_update_type != INTNL_OVERLAY_UPDATE) {
       frame_params.frame_type = KEY_FRAME;
-    } else {
+    } else if (is_stat_generation_stage(cpi)) {
+      // For stats generation, set the frame type to inter here.
       frame_params.frame_type = INTER_FRAME;
     }
+  }
+#endif
+
+  // Work out some encoding parameters specific to the pass:
+  if (has_no_stats_stage(cpi) && oxcf->q_cfg.aq_mode == CYCLIC_REFRESH_AQ) {
+    av1_cyclic_refresh_update_parameters(cpi);
+  } else if (is_stat_generation_stage(cpi)) {
+    cpi->td.mb.e_mbd.lossless[0] = is_lossless_requested(&oxcf->rc_cfg);
   } else if (is_stat_consumption_stage(cpi)) {
 #if CONFIG_MISMATCH_DEBUG
     mismatch_move_frame_idx_w();
@@ -1476,7 +1516,6 @@ int av1_encode_strategy(AV1_COMP *const cpi, size_t *const size,
       gf_group->refbuf_state[cpi->gf_frame_index], force_refresh_all);
 
   if (!is_stat_generation_stage(cpi)) {
-    const RefCntBuffer *ref_frames[INTER_REFS_PER_FRAME];
     const YV12_BUFFER_CONFIG *ref_frame_buf[INTER_REFS_PER_FRAME];
 
     RefFrameMapPair ref_frame_map_pairs[REF_FRAMES];
@@ -1486,28 +1525,33 @@ int av1_encode_strategy(AV1_COMP *const cpi, size_t *const size,
         cpi->common.current_frame.frame_number + order_offset;
 
     int get_ref_frames = 0;
-#if CONFIG_FRAME_PARALLEL_ENCODE && CONFIG_FPMT_TEST
+#if CONFIG_FPMT_TEST
     get_ref_frames =
         (cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE) ? 1 : 0;
-#endif  // CONFIG_FRAME_PARALLEL_ENCODE && CONFIG_FPMT_TEST
+#endif  // CONFIG_FPMT_TEST
     if (get_ref_frames ||
         gf_group->frame_parallel_level[cpi->gf_frame_index] == 0) {
       if (!ext_flags->refresh_frame.update_pending) {
-        av1_get_ref_frames(ref_frame_map_pairs, cur_frame_disp,
-#if CONFIG_FRAME_PARALLEL_ENCODE_2
-                           cpi, cpi->gf_frame_index, 1,
-#endif  // CONFIG_FRAME_PARALLEL_ENCODE_2
-                           cm->remapped_ref_idx);
-      } else if (cpi->svc.set_ref_frame_config) {
+        av1_get_ref_frames(ref_frame_map_pairs, cur_frame_disp, cpi,
+                           cpi->gf_frame_index, 1, cm->remapped_ref_idx);
+      } else if (cpi->rtc_ref.set_ref_frame_config ||
+                 use_rtc_reference_structure_one_layer(cpi)) {
         for (unsigned int i = 0; i < INTER_REFS_PER_FRAME; i++)
-          cm->remapped_ref_idx[i] = cpi->svc.ref_idx[i];
+          cm->remapped_ref_idx[i] = cpi->rtc_ref.ref_idx[i];
       }
     }
 
     // Get the reference frames
+    bool has_ref_frames = false;
     for (int i = 0; i < INTER_REFS_PER_FRAME; ++i) {
-      ref_frames[i] = get_ref_frame_buf(cm, ref_frame_priority_order[i]);
-      ref_frame_buf[i] = ref_frames[i] != NULL ? &ref_frames[i]->buf : NULL;
+      const RefCntBuffer *ref_frame =
+          get_ref_frame_buf(cm, ref_frame_priority_order[i]);
+      ref_frame_buf[i] = ref_frame != NULL ? &ref_frame->buf : NULL;
+      if (ref_frame != NULL) has_ref_frames = true;
+    }
+    if (!has_ref_frames && (frame_params.frame_type == INTER_FRAME ||
+                            frame_params.frame_type == S_FRAME)) {
+      return AOM_CODEC_ERROR;
     }
 
     // Work out which reference frame slots may be used.
@@ -1525,23 +1569,15 @@ int av1_encode_strategy(AV1_COMP *const cpi, size_t *const size,
 
     frame_params.order_offset = gf_group->arf_src_offset[cpi->gf_frame_index];
 
-#if CONFIG_FRAME_PARALLEL_ENCODE
-#if CONFIG_FRAME_PARALLEL_ENCODE_2
     // Call av1_get_refresh_frame_flags() if refresh index not available.
     if (!cpi->refresh_idx_available) {
-#endif
-#endif
       frame_params.refresh_frame_flags = av1_get_refresh_frame_flags(
           cpi, &frame_params, frame_update_type, cpi->gf_frame_index,
           cur_frame_disp, ref_frame_map_pairs);
-#if CONFIG_FRAME_PARALLEL_ENCODE
-#if CONFIG_FRAME_PARALLEL_ENCODE_2
     } else {
       assert(cpi->ref_refresh_index != INVALID_IDX);
       frame_params.refresh_frame_flags = (1 << cpi->ref_refresh_index);
     }
-#endif  // CONFIG_FRAME_PARALLEL_ENCODE
-#endif  // CONFIG_FRAME_PARALLEL_ENCODE_2
 
     // Make the frames marked as is_frame_non_ref to non-reference frames.
     if (gf_group->is_frame_non_ref[cpi->gf_frame_index])
@@ -1636,7 +1672,8 @@ int av1_encode_strategy(AV1_COMP *const cpi, size_t *const size,
 
   // Leave a signal for a higher level caller about if this frame is droppable
   if (*size > 0) {
-    cpi->droppable = is_frame_droppable(&cpi->svc, &ext_flags->refresh_frame);
+    cpi->droppable =
+        is_frame_droppable(&cpi->rtc_ref, &ext_flags->refresh_frame);
   }
 
   return AOM_CODEC_OK;
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/encode_strategy.h b/chromium/third_party/libaom/source/libaom/av1/encoder/encode_strategy.h
index a04c483f50d..c1d14d134cf 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/encode_strategy.h
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/encode_strategy.h
@@ -86,11 +86,8 @@ int av1_get_refresh_ref_frame_map(int refresh_frame_flags);
  *                                 in AV1Common.
  */
 void av1_get_ref_frames(RefFrameMapPair ref_frame_map_pairs[REF_FRAMES],
-                        int cur_frame_disp,
-#if CONFIG_FRAME_PARALLEL_ENCODE_2
-                        const AV1_COMP *cpi, int gf_index,
+                        int cur_frame_disp, const AV1_COMP *cpi, int gf_index,
                         int is_parallel_encode,
-#endif  // CONFIG_FRAME_PARALLEL_ENCODE_2
                         int remapped_ref_idx[REF_FRAMES]);
 
 int is_forced_keyframe_pending(struct lookahead_ctx *lookahead,
@@ -98,12 +95,12 @@ int is_forced_keyframe_pending(struct lookahead_ctx *lookahead,
                                const COMPRESSOR_STAGE compressor_stage);
 
 static AOM_INLINE int is_frame_droppable(
-    const SVC *const svc,
+    const RTC_REF *const rtc_ref,
     const ExtRefreshFrameFlagsInfo *const ext_refresh_frame_flags) {
   // Droppable frame is only used by external refresh flags. VoD setting won't
   // trigger its use case.
-  if (svc->set_ref_frame_config)
-    return svc->non_reference_frame;
+  if (rtc_ref->set_ref_frame_config)
+    return rtc_ref->non_reference_frame;
   else if (ext_refresh_frame_flags->update_pending)
     return !(ext_refresh_frame_flags->alt_ref_frame ||
              ext_refresh_frame_flags->alt2_ref_frame ||
@@ -130,13 +127,9 @@ static AOM_INLINE int get_current_frame_ref_type(const AV1_COMP *const cpi) {
   }
 }
 
-#if CONFIG_FRAME_PARALLEL_ENCODE
-#if CONFIG_FRAME_PARALLEL_ENCODE_2
 int av1_calc_refresh_idx_for_intnl_arf(
     AV1_COMP *cpi, RefFrameMapPair ref_frame_map_pairs[REF_FRAMES],
     int gf_index);
-#endif  // CONFIG_FRAME_PARALLEL_ENCODE_2
-#endif  // CONFIG_FRAME_PARALLEL_ENCODE
 /*!\endcond */
 #ifdef __cplusplus
 }  // extern "C"
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/encodeframe.c b/chromium/third_party/libaom/source/libaom/av1/encoder/encodeframe.c
index 418c907c3c8..8feeb73aa1d 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/encodeframe.c
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/encodeframe.c
@@ -147,28 +147,50 @@ static const uint16_t AV1_HIGH_VAR_OFFS_12[MAX_SB_SIZE] = {
 };
 /*!\endcond */
 
-unsigned int av1_get_sby_perpixel_variance(const AV1_COMP *cpi,
-                                           const struct buf_2d *ref,
-                                           BLOCK_SIZE bs) {
-  unsigned int sse;
-  const unsigned int var =
-      cpi->ppi->fn_ptr[bs].vf(ref->buf, ref->stride, AV1_VAR_OFFS, 0, &sse);
-  return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[bs]);
+void av1_init_rtc_counters(MACROBLOCK *const x) {
+  av1_init_cyclic_refresh_counters(x);
+  x->cnt_zeromv = 0;
 }
 
-unsigned int av1_high_get_sby_perpixel_variance(const AV1_COMP *cpi,
-                                                const struct buf_2d *ref,
-                                                BLOCK_SIZE bs, int bd) {
+void av1_accumulate_rtc_counters(AV1_COMP *cpi, const MACROBLOCK *const x) {
+  if (cpi->oxcf.q_cfg.aq_mode == CYCLIC_REFRESH_AQ)
+    av1_accumulate_cyclic_refresh_counters(cpi->cyclic_refresh, x);
+  cpi->rc.cnt_zeromv += x->cnt_zeromv;
+}
+
+unsigned int av1_get_perpixel_variance(const AV1_COMP *cpi,
+                                       const MACROBLOCKD *xd,
+                                       const struct buf_2d *ref,
+                                       BLOCK_SIZE bsize, int plane,
+                                       int use_hbd) {
+  const int subsampling_x = xd->plane[plane].subsampling_x;
+  const int subsampling_y = xd->plane[plane].subsampling_y;
+  const BLOCK_SIZE plane_bsize =
+      get_plane_block_size(bsize, subsampling_x, subsampling_y);
   unsigned int var, sse;
-  assert(bd == 8 || bd == 10 || bd == 12);
-  const int off_index = (bd - 8) >> 1;
-  const uint16_t *high_var_offs[3] = { AV1_HIGH_VAR_OFFS_8,
-                                       AV1_HIGH_VAR_OFFS_10,
-                                       AV1_HIGH_VAR_OFFS_12 };
-  var = cpi->ppi->fn_ptr[bs].vf(ref->buf, ref->stride,
-                                CONVERT_TO_BYTEPTR(high_var_offs[off_index]), 0,
-                                &sse);
-  return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[bs]);
+  if (use_hbd) {
+    const int bd = xd->bd;
+    assert(bd == 8 || bd == 10 || bd == 12);
+    const int off_index = (bd - 8) >> 1;
+    static const uint16_t *high_var_offs[3] = { AV1_HIGH_VAR_OFFS_8,
+                                                AV1_HIGH_VAR_OFFS_10,
+                                                AV1_HIGH_VAR_OFFS_12 };
+    var = cpi->ppi->fn_ptr[plane_bsize].vf(
+        ref->buf, ref->stride, CONVERT_TO_BYTEPTR(high_var_offs[off_index]), 0,
+        &sse);
+  } else {
+    var = cpi->ppi->fn_ptr[plane_bsize].vf(ref->buf, ref->stride, AV1_VAR_OFFS,
+                                           0, &sse);
+  }
+  return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[plane_bsize]);
+}
+
+unsigned int av1_get_perpixel_variance_facade(const AV1_COMP *cpi,
+                                              const MACROBLOCKD *xd,
+                                              const struct buf_2d *ref,
+                                              BLOCK_SIZE bsize, int plane) {
+  const int use_hbd = is_cur_buf_hbd(xd);
+  return av1_get_perpixel_variance(cpi, xd, ref, bsize, plane, use_hbd);
 }
 
 void av1_setup_src_planes(MACROBLOCK *x, const YV12_BUFFER_CONFIG *src,
@@ -202,7 +224,7 @@ void av1_setup_src_planes(MACROBLOCK *x, const YV12_BUFFER_CONFIG *src,
  * \param[in]     mi_col      Block column (in "MI_SIZE" units) index
  * \param[out]    num_planes  Number of image planes (e.g. Y,U,V)
  *
- * \return No return value but updates macroblock and thread data
+ * \remark No return value but updates macroblock and thread data
  * related to the q / q delta to be used.
  */
 static AOM_INLINE void setup_delta_q(AV1_COMP *const cpi, ThreadData *td,
@@ -473,25 +495,14 @@ static AOM_INLINE void encode_nonrd_sb(AV1_COMP *cpi, ThreadData *td,
   MB_MODE_INFO **mi = cm->mi_params.mi_grid_base +
                       get_mi_grid_idx(&cm->mi_params, mi_row, mi_col);
   const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
+  PC_TREE *const pc_root = td->rt_pc_root;
 
-  // Grade the temporal variation of the sb, the grade will be used to decide
-  // fast mode search strategy for coding blocks
-  if (sf->rt_sf.source_metrics_sb_nonrd &&
-      cpi->svc.number_spatial_layers <= 1 &&
-      cm->current_frame.frame_type != KEY_FRAME) {
-    if (!cpi->sf.rt_sf.check_scene_detection || cpi->rc.frame_source_sad > 0)
-      av1_source_content_sb(cpi, x, mi_row, mi_col);
-    else
-      x->content_state_sb.source_sad = kZeroSad;
-  }
 #if CONFIG_RT_ML_PARTITIONING
   if (sf->part_sf.partition_search_type == ML_BASED_PARTITION) {
-    PC_TREE *const pc_root = av1_alloc_pc_tree_node(sb_size);
     RD_STATS dummy_rdc;
     get_estimated_pred(cpi, tile_info, x, mi_row, mi_col);
     av1_nonrd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col,
                              BLOCK_64X64, &dummy_rdc, 1, INT64_MAX, pc_root);
-    av1_free_pc_tree_recursive(pc_root, av1_num_planes(cm), 0, 0);
     return;
   }
 #endif
@@ -511,19 +522,16 @@ static AOM_INLINE void encode_nonrd_sb(AV1_COMP *cpi, ThreadData *td,
          sf->part_sf.partition_search_type == VAR_BASED_PARTITION);
   set_cb_offsets(td->mb.cb_offset, 0, 0);
 
-  // Adjust and encode the superblock
-  PC_TREE *const pc_root = av1_alloc_pc_tree_node(sb_size);
-
   // Initialize the flag to skip cdef to 1.
+  const int block64_in_sb = (sb_size == BLOCK_128X128) ? 2 : 1;
   if (sf->rt_sf.skip_cdef_sb) {
     // If 128x128 block is used, we need to set the flag for all 4 64x64 sub
     // "blocks".
-    const int block64_in_sb = (sb_size == BLOCK_128X128) ? 2 : 1;
     for (int r = 0; r < block64_in_sb; ++r) {
       for (int c = 0; c < block64_in_sb; ++c) {
         const int idx_in_sb =
             r * MI_SIZE_64X64 * cm->mi_params.mi_stride + c * MI_SIZE_64X64;
-        if (mi[idx_in_sb]) mi[idx_in_sb]->skip_cdef_curr_sb = 1;
+        if (mi[idx_in_sb]) mi[idx_in_sb]->cdef_strength = 1;
       }
     }
   }
@@ -537,20 +545,18 @@ static AOM_INLINE void encode_nonrd_sb(AV1_COMP *cpi, ThreadData *td,
   end_timing(cpi, nonrd_use_partition_time);
 #endif
 
-  if (sf->rt_sf.skip_cdef_sb) {
+  if (sf->rt_sf.skip_cdef_sb && block64_in_sb == 2) {
     // If 128x128 block is used, we need to set the flag for all 4 64x64 sub
     // "blocks".
-    const int block64_in_sb = (sb_size == BLOCK_128X128) ? 2 : 1;
-    const int skip = mi[0]->skip_cdef_curr_sb;
+    const int skip = mi[0]->cdef_strength;
     for (int r = 0; r < block64_in_sb; ++r) {
       for (int c = 0; c < block64_in_sb; ++c) {
         const int idx_in_sb =
             r * MI_SIZE_64X64 * cm->mi_params.mi_stride + c * MI_SIZE_64X64;
-        if (mi[idx_in_sb]) mi[idx_in_sb]->skip_cdef_curr_sb = skip;
+        if (mi[idx_in_sb]) mi[idx_in_sb]->cdef_strength = skip;
       }
     }
   }
-  av1_free_pc_tree_recursive(pc_root, av1_num_planes(cm), 0, 0);
 }
 
 // This function initializes the stats for encode_rd_sb.
@@ -602,6 +608,8 @@ static INLINE void init_encode_rd_sb(AV1_COMP *cpi, ThreadData *td,
   (void)gather_tpl_data;
 #endif
 
+  x->reuse_inter_pred = false;
+  x->txfm_search_params.mode_eval_type = DEFAULT_EVAL;
   reset_mb_rd_record(x->txfm_search_info.mb_rd_record);
   av1_zero(x->picked_ref_frames_mask);
   av1_invalid_rd_stats(rd_cost);
@@ -773,6 +781,124 @@ static AOM_INLINE int delay_wait_for_top_right_sb(const AV1_COMP *const cpi) {
     return 0;
 }
 
+/*!\brief Calculate source SAD at superblock level using 64x64 block source SAD
+ *
+ * \ingroup partition_search
+ * \callgraph
+ * \callergraph
+ */
+static AOM_INLINE uint64_t get_sb_source_sad(const AV1_COMP *cpi, int mi_row,
+                                             int mi_col) {
+  if (cpi->src_sad_blk_64x64 == NULL) return UINT64_MAX;
+
+  const AV1_COMMON *const cm = &cpi->common;
+  const int blk_64x64_in_mis = (cm->seq_params->sb_size == BLOCK_128X128)
+                                   ? (cm->seq_params->mib_size >> 1)
+                                   : cm->seq_params->mib_size;
+  const int num_blk_64x64_cols =
+      (cm->mi_params.mi_cols + blk_64x64_in_mis - 1) / blk_64x64_in_mis;
+  const int num_blk_64x64_rows =
+      (cm->mi_params.mi_rows + blk_64x64_in_mis - 1) / blk_64x64_in_mis;
+  const int blk_64x64_col_index = mi_col / blk_64x64_in_mis;
+  const int blk_64x64_row_index = mi_row / blk_64x64_in_mis;
+  uint64_t curr_sb_sad = UINT64_MAX;
+  const uint64_t *const src_sad_blk_64x64_data =
+      &cpi->src_sad_blk_64x64[blk_64x64_col_index +
+                              blk_64x64_row_index * num_blk_64x64_cols];
+  if (cm->seq_params->sb_size == BLOCK_128X128 &&
+      blk_64x64_col_index + 1 < num_blk_64x64_cols &&
+      blk_64x64_row_index + 1 < num_blk_64x64_rows) {
+    // Calculate SB source SAD by accumulating source SAD of 64x64 blocks in the
+    // superblock
+    curr_sb_sad = src_sad_blk_64x64_data[0] + src_sad_blk_64x64_data[1] +
+                  src_sad_blk_64x64_data[num_blk_64x64_cols] +
+                  src_sad_blk_64x64_data[num_blk_64x64_cols + 1];
+  } else if (cm->seq_params->sb_size == BLOCK_64X64) {
+    curr_sb_sad = src_sad_blk_64x64_data[0];
+  }
+  return curr_sb_sad;
+}
+
+/*!\brief Determine whether grading content can be skipped based on sad stat
+ *
+ * \ingroup partition_search
+ * \callgraph
+ * \callergraph
+ */
+static AOM_INLINE bool is_calc_src_content_needed(AV1_COMP *cpi,
+                                                  MACROBLOCK *const x,
+                                                  int mi_row, int mi_col) {
+  const uint64_t curr_sb_sad = get_sb_source_sad(cpi, mi_row, mi_col);
+  if (curr_sb_sad == UINT64_MAX) return true;
+  if (curr_sb_sad == 0) {
+    x->content_state_sb.source_sad_nonrd = kZeroSad;
+    return false;
+  }
+  AV1_COMMON *const cm = &cpi->common;
+  bool do_calc_src_content = true;
+
+  if (cpi->oxcf.speed < 9) return do_calc_src_content;
+
+  // TODO(yunqing): Tune/validate the thresholds for 128x128 SB size.
+  if (AOMMIN(cm->width, cm->height) < 360) {
+    // Derive Average 64x64 block source SAD from SB source SAD
+    const uint64_t avg_64x64_blk_sad =
+        (cm->seq_params->sb_size == BLOCK_128X128) ? ((curr_sb_sad + 2) >> 2)
+                                                   : curr_sb_sad;
+
+    // The threshold is determined based on kLowSad and kHighSad threshold and
+    // test results.
+    const uint64_t thresh_low = 15000;
+    const uint64_t thresh_high = 40000;
+
+    if (avg_64x64_blk_sad > thresh_low && avg_64x64_blk_sad < thresh_high) {
+      do_calc_src_content = false;
+      // Note: set x->content_state_sb.source_sad_rd as well if this is extended
+      // to RTC rd path.
+      x->content_state_sb.source_sad_nonrd = kMedSad;
+    }
+  }
+
+  return do_calc_src_content;
+}
+
+/*!\brief Determine whether grading content is needed based on sf and frame stat
+ *
+ * \ingroup partition_search
+ * \callgraph
+ * \callergraph
+ */
+// TODO(any): consolidate sfs to make interface cleaner
+static AOM_INLINE void grade_source_content_sb(AV1_COMP *cpi,
+                                               MACROBLOCK *const x,
+                                               TileDataEnc *tile_data,
+                                               int mi_row, int mi_col) {
+  AV1_COMMON *const cm = &cpi->common;
+  if (cm->current_frame.frame_type == KEY_FRAME) {
+    assert(x->content_state_sb.source_sad_nonrd == kMedSad);
+    assert(x->content_state_sb.source_sad_rd == kMedSad);
+    return;
+  }
+  bool calc_src_content = false;
+
+  if (cpi->sf.rt_sf.source_metrics_sb_nonrd &&
+      cpi->svc.number_spatial_layers <= 1) {
+    if (!cpi->sf.rt_sf.check_scene_detection || cpi->rc.frame_source_sad > 0) {
+      calc_src_content = is_calc_src_content_needed(cpi, x, mi_row, mi_col);
+    } else {
+      x->content_state_sb.source_sad_nonrd = kZeroSad;
+    }
+  } else if ((cpi->sf.rt_sf.var_part_based_on_qidx >= 1) &&
+             (cm->width * cm->height <= 352 * 288)) {
+    if (cpi->rc.frame_source_sad > 0)
+      calc_src_content = true;
+    else
+      x->content_state_sb.source_sad_rd = kZeroSad;
+  }
+  if (calc_src_content)
+    av1_source_content_sb(cpi, x, tile_data, mi_row, mi_col);
+}
+
 /*!\brief Encode a superblock row by breaking it into superblocks
  *
  * \ingroup partition_search
@@ -849,12 +975,15 @@ static AOM_INLINE void encode_sb_row(AV1_COMP *cpi, ThreadData *td,
     // Reset color coding related parameters
     x->color_sensitivity_sb[0] = 0;
     x->color_sensitivity_sb[1] = 0;
+    x->color_sensitivity_sb_g[0] = 0;
+    x->color_sensitivity_sb_g[1] = 0;
     x->color_sensitivity[0] = 0;
     x->color_sensitivity[1] = 0;
-    x->content_state_sb.source_sad = kMedSad;
+    x->content_state_sb.source_sad_nonrd = kMedSad;
+    x->content_state_sb.source_sad_rd = kMedSad;
     x->content_state_sb.lighting_change = 0;
     x->content_state_sb.low_sumdiff = 0;
-    x->force_zeromv_skip = 0;
+    x->force_zeromv_skip_for_sb = 0;
 
     if (cpi->oxcf.mode == ALLINTRA) {
       x->intra_sb_rdmult_modifier = 128;
@@ -881,6 +1010,10 @@ static AOM_INLINE void encode_sb_row(AV1_COMP *cpi, ThreadData *td,
     init_src_var_info_of_4x4_sub_blocks(cpi, x->src_var_info_of_4x4_sub_blocks,
                                         sb_size);
 
+    // Grade the temporal variation of the sb, the grade will be used to decide
+    // fast mode search strategy for coding blocks
+    grade_source_content_sb(cpi, x, tile_data, mi_row, mi_col);
+
     // encode the superblock
     if (use_nonrd_mode) {
       encode_nonrd_sb(cpi, td, tile_data, tp, mi_row, mi_col, seg_skip);
@@ -899,6 +1032,7 @@ static AOM_INLINE void encode_sb_row(AV1_COMP *cpi, ThreadData *td,
     enc_row_mt->sync_write_ptr(row_mt_sync, sb_row, sb_col_in_tile,
                                sb_cols_in_tile);
   }
+
 #if CONFIG_COLLECT_COMPONENT_TIMING
   end_timing(cpi, encode_sb_row_time);
 #endif
@@ -1123,15 +1257,10 @@ static AOM_INLINE void encode_tiles(AV1_COMP *cpi) {
       cpi->td.rd_counts.seg_tmp_pred_cost[1] = 0;
       cpi->td.mb.e_mbd.tile_ctx = &this_tile->tctx;
       cpi->td.mb.tile_pb_ctx = &this_tile->tctx;
-      // Reset cyclic refresh counters.
-      av1_init_cyclic_refresh_counters(&cpi->td.mb);
-
+      av1_init_rtc_counters(&cpi->td.mb);
       av1_encode_tile(cpi, &cpi->td, tile_row, tile_col);
-      // Accumulate cyclic refresh params.
-      if (cpi->oxcf.q_cfg.aq_mode == CYCLIC_REFRESH_AQ &&
-          !frame_is_intra_only(&cpi->common))
-        av1_accumulate_cyclic_refresh_counters(cpi->cyclic_refresh,
-                                               &cpi->td.mb);
+      if (!frame_is_intra_only(&cpi->common))
+        av1_accumulate_rtc_counters(cpi, &cpi->td.mb);
       cpi->intrabc_used |= cpi->td.intrabc_used;
       cpi->deltaq_used |= cpi->td.deltaq_used;
     }
@@ -1329,6 +1458,43 @@ static int allow_deltaq_mode(AV1_COMP *cpi) {
 #endif  // !CONFIG_REALTIME_ONLY
 }
 
+#define FORCE_ZMV_SKIP_128X128_BLK_DIFF 10000
+#define FORCE_ZMV_SKIP_MAX_PER_PIXEL_DIFF 4
+
+// Populates block level thresholds for force zeromv-skip decision
+static void populate_thresh_to_force_zeromv_skip(AV1_COMP *cpi) {
+  if (cpi->sf.rt_sf.part_early_exit_zeromv == 0) return;
+
+  // Threshold for forcing zeromv-skip decision is as below:
+  // For 128x128 blocks, threshold is 10000 and per pixel threshold is 0.6103.
+  // For 64x64 blocks, threshold is 5000 and per pixel threshold is 1.221
+  // allowing slightly higher error for smaller blocks.
+  // Per Pixel Threshold of 64x64 block        Area of 64x64 block         1  1
+  // ------------------------------------=sqrt(---------------------)=sqrt(-)=-
+  // Per Pixel Threshold of 128x128 block      Area of 128x128 block       4  2
+  // Thus, per pixel thresholds for blocks of size 32x32, 16x16,...  can be
+  // chosen as 2.442, 4.884,.... As the per pixel error tends to be higher for
+  // small blocks, the same is clipped to 4.
+  const unsigned int thresh_exit_128x128_part = FORCE_ZMV_SKIP_128X128_BLK_DIFF;
+  const int num_128x128_pix =
+      block_size_wide[BLOCK_128X128] * block_size_high[BLOCK_128X128];
+
+  for (BLOCK_SIZE bsize = BLOCK_4X4; bsize < BLOCK_SIZES_ALL; bsize++) {
+    const int num_block_pix = block_size_wide[bsize] * block_size_high[bsize];
+
+    // Calculate the threshold for zeromv-skip decision based on area of the
+    // partition
+    unsigned int thresh_exit_part_blk =
+        (unsigned int)(thresh_exit_128x128_part *
+                           sqrt((double)num_block_pix / num_128x128_pix) +
+                       0.5);
+    thresh_exit_part_blk = AOMMIN(
+        thresh_exit_part_blk,
+        (unsigned int)(FORCE_ZMV_SKIP_MAX_PER_PIXEL_DIFF * num_block_pix));
+    cpi->zeromv_skip_thresh_exit_part[bsize] = thresh_exit_part_blk;
+  }
+}
+
 /*!\brief Encoder setup(only for the current frame), encoding, and recontruction
  * for a single frame
  *
@@ -1342,7 +1508,7 @@ static AOM_INLINE void encode_frame_internal(AV1_COMP *cpi) {
   FeatureFlags *const features = &cm->features;
   MACROBLOCKD *const xd = &x->e_mbd;
   RD_COUNTS *const rdc = &cpi->td.rd_counts;
-#if CONFIG_FRAME_PARALLEL_ENCODE && CONFIG_FPMT_TEST
+#if CONFIG_FPMT_TEST
   FrameProbInfo *const temp_frame_probs = &cpi->ppi->temp_frame_probs;
   FrameProbInfo *const temp_frame_probs_simulation =
       &cpi->ppi->temp_frame_probs_simulation;
@@ -1381,11 +1547,11 @@ static AOM_INLINE void encode_frame_internal(AV1_COMP *cpi) {
     const FRAME_UPDATE_TYPE update_type =
         get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
     int warped_probability =
-#if CONFIG_FRAME_PARALLEL_ENCODE && CONFIG_FPMT_TEST
+#if CONFIG_FPMT_TEST
         cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE
             ? temp_frame_probs->warped_probs[update_type]
             :
-#endif  // CONFIG_FRAME_PARALLEL_ENCODE && CONFIG_FPMT_TEST
+#endif  // CONFIG_FPMT_TEST
             frame_probs->warped_probs[update_type];
     if (warped_probability < cpi->sf.inter_sf.prune_warped_prob_thresh)
       features->allow_warped_motion = 0;
@@ -1416,7 +1582,10 @@ static AOM_INLINE void encode_frame_internal(AV1_COMP *cpi) {
     }
 
     av1_hash_table_init(intrabc_hash_info);
-    av1_hash_table_create(&intrabc_hash_info->intrabc_hash_table);
+    if (!av1_hash_table_create(&intrabc_hash_info->intrabc_hash_table)) {
+      aom_internal_error(cm->error, AOM_CODEC_MEM_ERROR,
+                         "Error allocating intrabc_hash_table");
+    }
     hash_table_created = 1;
     av1_generate_block_2x2_hash_value(intrabc_hash_info, cpi->source,
                                       block_hash_values[0], is_block_same[0]);
@@ -1425,6 +1594,7 @@ static AOM_INLINE void encode_frame_internal(AV1_COMP *cpi) {
     const int max_sb_size =
         (1 << (cm->seq_params->mib_size_log2 + MI_SIZE_LOG2));
     int src_idx = 0;
+    bool error = false;
     for (int size = 4; size <= max_sb_size; size *= 2, src_idx = !src_idx) {
       const int dst_idx = !src_idx;
       av1_generate_block_hash_value(
@@ -1432,9 +1602,13 @@ static AOM_INLINE void encode_frame_internal(AV1_COMP *cpi) {
           block_hash_values[dst_idx], is_block_same[src_idx],
           is_block_same[dst_idx]);
       if (size >= min_alloc_size) {
-        av1_add_to_hash_map_by_row_with_precal_data(
-            &intrabc_hash_info->intrabc_hash_table, block_hash_values[dst_idx],
-            is_block_same[dst_idx][2], pic_width, pic_height, size);
+        if (!av1_add_to_hash_map_by_row_with_precal_data(
+                &intrabc_hash_info->intrabc_hash_table,
+                block_hash_values[dst_idx], is_block_same[dst_idx][2],
+                pic_width, pic_height, size)) {
+          error = true;
+          break;
+        }
       }
     }
 
@@ -1447,6 +1621,11 @@ static AOM_INLINE void encode_frame_internal(AV1_COMP *cpi) {
         aom_free(is_block_same[k][j]);
       }
     }
+
+    if (error) {
+      aom_internal_error(cm->error, AOM_CODEC_MEM_ERROR,
+                         "Error adding data to intrabc_hash_table");
+    }
   }
 
   const CommonQuantParams *quant_params = &cm->quant_params;
@@ -1517,13 +1696,13 @@ static AOM_INLINE void encode_frame_internal(AV1_COMP *cpi) {
   } else {
     cpi->cyclic_refresh->actual_num_seg1_blocks = 0;
     cpi->cyclic_refresh->actual_num_seg2_blocks = 0;
-    cpi->cyclic_refresh->cnt_zeromv = 0;
+    cpi->rc.cnt_zeromv = 0;
   }
 
   av1_frame_init_quantizer(cpi);
-
   init_encode_frame_mb_context(cpi);
   set_default_interp_skip_flags(cm, &cpi->interp_search_flags);
+
   if (cm->prev_frame && cm->prev_frame->seg.enabled)
     cm->last_frame_seg_map = cm->prev_frame->seg_map;
   else
@@ -1579,6 +1758,7 @@ static AOM_INLINE void encode_frame_internal(AV1_COMP *cpi) {
   // has to be called after 'skip_mode_flag' is initialized.
   av1_initialize_rd_consts(cpi);
   av1_set_sad_per_bit(cpi, &x->sadperbit, quant_params->base_qindex);
+  populate_thresh_to_force_zeromv_skip(cpi);
 
   enc_row_mt->sync_read_ptr = av1_row_mt_sync_read_dummy;
   enc_row_mt->sync_write_ptr = av1_row_mt_sync_write_dummy;
@@ -1592,10 +1772,18 @@ static AOM_INLINE void encode_frame_internal(AV1_COMP *cpi) {
     enc_row_mt->sync_write_ptr = av1_row_mt_sync_write;
     av1_encode_tiles_row_mt(cpi);
   } else {
-    if (AOMMIN(mt_info->num_workers, cm->tiles.cols * cm->tiles.rows) > 1)
+    if (AOMMIN(mt_info->num_workers, cm->tiles.cols * cm->tiles.rows) > 1) {
       av1_encode_tiles_mt(cpi);
-    else
+    } else {
+      // Preallocate the pc_tree for realtime coding to reduce the cost of
+      // memory allocation.
+      const int use_nonrd_mode = cpi->sf.rt_sf.use_nonrd_pick_mode;
+      td->rt_pc_root = use_nonrd_mode
+                           ? av1_alloc_pc_tree_node(cm->seq_params->sb_size)
+                           : NULL;
       encode_tiles(cpi);
+      av1_free_pc_tree_recursive(td->rt_pc_root, av1_num_planes(cm), 0, 0);
+    }
   }
 
   // If intrabc is allowed but never selected, reset the allow_intrabc flag.
@@ -1620,7 +1808,6 @@ static AOM_INLINE void encode_frame_internal(AV1_COMP *cpi) {
   assert(oxcf->txfm_cfg.enable_tx64 || tx_search_type != USE_LARGESTALL);
   features->tx_mode = select_tx_mode(cm, tx_search_type);
 
-#if CONFIG_FRAME_PARALLEL_ENCODE
   // Retain the frame level probability update conditions for parallel frames.
   // These conditions will be consumed during postencode stage to update the
   // probability.
@@ -1638,7 +1825,6 @@ static AOM_INLINE void encode_frame_internal(AV1_COMP *cpi) {
          cpi->sf.interp_sf.adaptive_interp_filter_search == 2 &&
          features->interp_filter == SWITCHABLE);
   }
-#endif
 
   if (cpi->sf.tx_sf.tx_type_search.prune_tx_type_using_stats ||
       ((cpi->sf.tx_sf.tx_type_search.fast_inter_tx_type_prob_thresh !=
@@ -1659,7 +1845,6 @@ static AOM_INLINE void encode_frame_internal(AV1_COMP *cpi) {
         const int new_prob =
             sum ? MAX_TX_TYPE_PROB * cpi->td.rd_counts.tx_type_used[i][j] / sum
                 : (j ? 0 : MAX_TX_TYPE_PROB);
-#if CONFIG_FRAME_PARALLEL_ENCODE
 #if CONFIG_FPMT_TEST
         if (cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE) {
           if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] ==
@@ -1690,7 +1875,6 @@ static AOM_INLINE void encode_frame_internal(AV1_COMP *cpi) {
           cpi->frame_new_probs[cpi->num_frame_recode]
               .tx_type_probs[update_type][i][j] = new_prob;
         }
-#endif  // CONFIG_FRAME_PARALLEL_ENCODE
         if (update_txtype_frameprobs) {
           int prob =
               (frame_probs->tx_type_probs[update_type][i][j] + new_prob) >> 1;
@@ -1720,7 +1904,6 @@ static AOM_INLINE void encode_frame_internal(AV1_COMP *cpi) {
 
       const int new_prob =
           sum ? 128 * cpi->td.rd_counts.obmc_used[i][1] / sum : 0;
-#if CONFIG_FRAME_PARALLEL_ENCODE
 #if CONFIG_FPMT_TEST
       if (cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE) {
         if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] == 0) {
@@ -1745,7 +1928,6 @@ static AOM_INLINE void encode_frame_internal(AV1_COMP *cpi) {
         cpi->frame_new_probs[cpi->num_frame_recode].obmc_probs[update_type][i] =
             new_prob;
       }
-#endif  // CONFIG_FRAME_PARALLEL_ENCODE
       if (update_obmc_frameprobs) {
         frame_probs->obmc_probs[update_type][i] =
             (frame_probs->obmc_probs[update_type][i] + new_prob) >> 1;
@@ -1761,7 +1943,6 @@ static AOM_INLINE void encode_frame_internal(AV1_COMP *cpi) {
     int sum = 0;
     for (i = 0; i < 2; i++) sum += cpi->td.rd_counts.warped_used[i];
     const int new_prob = sum ? 128 * cpi->td.rd_counts.warped_used[1] / sum : 0;
-#if CONFIG_FRAME_PARALLEL_ENCODE
 #if CONFIG_FPMT_TEST
     if (cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE) {
       if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] == 0) {
@@ -1786,7 +1967,6 @@ static AOM_INLINE void encode_frame_internal(AV1_COMP *cpi) {
       cpi->frame_new_probs[cpi->num_frame_recode].warped_probs[update_type] =
           new_prob;
     }
-#endif  // CONFIG_FRAME_PARALLEL_ENCODE
     if (update_warp_frameprobs) {
       frame_probs->warped_probs[update_type] =
           (frame_probs->warped_probs[update_type] + new_prob) >> 1;
@@ -1813,7 +1993,6 @@ static AOM_INLINE void encode_frame_internal(AV1_COMP *cpi) {
         const int new_prob =
             sum ? 1536 * cpi->td.counts->switchable_interp[i][j] / sum
                 : (j ? 0 : 1536);
-#if CONFIG_FRAME_PARALLEL_ENCODE
 #if CONFIG_FPMT_TEST
         if (cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE) {
           if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] ==
@@ -1844,7 +2023,6 @@ static AOM_INLINE void encode_frame_internal(AV1_COMP *cpi) {
           cpi->frame_new_probs[cpi->num_frame_recode]
               .switchable_interp_probs[update_type][i][j] = new_prob;
         }
-#endif  // CONFIG_FRAME_PARALLEL_ENCODE
         if (update_interpfilter_frameprobs) {
           int prob = (frame_probs->switchable_interp_probs[update_type][i][j] +
                       new_prob) >>
@@ -1875,9 +2053,10 @@ void av1_encode_frame(AV1_COMP *cpi) {
   FeatureFlags *const features = &cm->features;
   const int num_planes = av1_num_planes(cm);
   RD_COUNTS *const rdc = &cpi->td.rd_counts;
+  const AV1EncoderConfig *const oxcf = &cpi->oxcf;
   // Indicates whether or not to use a default reduced set for ext-tx
   // rather than the potential full set of 16 transforms
-  features->reduced_tx_set_used = cpi->oxcf.txfm_cfg.reduced_tx_type_set;
+  features->reduced_tx_set_used = oxcf->txfm_cfg.reduced_tx_type_set;
 
   // Make sure segment_id is no larger than last_active_segid.
   if (cm->seg.enabled && cm->seg.update_map) {
@@ -1919,7 +2098,8 @@ void av1_encode_frame(AV1_COMP *cpi) {
     features->interp_filter = SWITCHABLE;
     if (cm->tiles.large_scale) features->interp_filter = EIGHTTAP_REGULAR;
 
-    features->switchable_motion_mode = 1;
+    features->switchable_motion_mode = is_switchable_motion_mode_allowed(
+        features->allow_warped_motion, oxcf->motion_mode_cfg.enable_obmc);
 
     rdc->compound_ref_used_flag = 0;
     rdc->skip_mode_used_flag = 0;
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/encodeframe.h b/chromium/third_party/libaom/source/libaom/av1/encoder/encodeframe.h
index 36b38d59f7d..ce32fb47e62 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/encodeframe.h
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/encodeframe.h
@@ -31,6 +31,11 @@ struct yv12_buffer_config;
 struct AV1_COMP;
 struct ThreadData;
 
+void av1_init_rtc_counters(struct macroblock *const x);
+
+void av1_accumulate_rtc_counters(struct AV1_COMP *cpi,
+                                 const struct macroblock *const x);
+
 void av1_setup_src_planes(struct macroblock *x,
                           const struct yv12_buffer_config *src, int mi_row,
                           int mi_col, const int num_planes, BLOCK_SIZE bsize);
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/encodeframe_utils.c b/chromium/third_party/libaom/source/libaom/av1/encoder/encodeframe_utils.c
index 01281564b44..7055f4d0bbf 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/encodeframe_utils.c
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/encodeframe_utils.c
@@ -15,9 +15,7 @@
 
 #include "av1/encoder/encoder.h"
 #include "av1/encoder/encodeframe_utils.h"
-#include "av1/encoder/partition_strategy.h"
 #include "av1/encoder/rdopt.h"
-#include "av1/encoder/aq_variance.h"
 
 void av1_set_ssim_rdmult(const AV1_COMP *const cpi, int *errorperbit,
                          const BLOCK_SIZE bsize, const int mi_row,
@@ -42,6 +40,7 @@ void av1_set_ssim_rdmult(const AV1_COMP *const cpi, int *errorperbit,
     for (col = mi_col / num_mi_h;
          col < num_cols && col < mi_col / num_mi_h + num_bcols; ++col) {
       const int index = row * num_cols + col;
+      assert(cpi->ssim_rdmult_scaling_factors[index] != 0.0);
       geom_mean_of_scale += log(cpi->ssim_rdmult_scaling_factors[index]);
       num_of_mi += 1.0;
     }
@@ -326,7 +325,8 @@ void av1_update_state(const AV1_COMP *const cpi, ThreadData *td,
 
     if (!dry_run && !mi_addr->skip_txfm) {
       int cdf_num;
-      const int spatial_pred = av1_get_spatial_seg_pred(cm, xd, &cdf_num);
+      const int spatial_pred = av1_get_spatial_seg_pred(
+          cm, xd, &cdf_num, cpi->cyclic_refresh->skip_over4x4);
       const int coded_id = av1_neg_interleave(mi_addr->segment_id, spatial_pred,
                                               seg->last_active_segid + 1);
       int64_t spatial_cost = x->mode_costs.spatial_pred_cost[cdf_num][coded_id];
@@ -951,8 +951,10 @@ void av1_get_tpl_stats_sb(AV1_COMP *cpi, BLOCK_SIZE bsize, int mi_row,
 
       TplDepStats *this_stats = &tpl_stats[av1_tpl_ptr_pos(
           row, col, tpl_stride, tpl_data->tpl_stats_block_mis_log2)];
-      sb_enc->tpl_inter_cost[count] = this_stats->inter_cost;
-      sb_enc->tpl_intra_cost[count] = this_stats->intra_cost;
+      sb_enc->tpl_inter_cost[count] = this_stats->inter_cost
+                                      << TPL_DEP_COST_SCALE_LOG2;
+      sb_enc->tpl_intra_cost[count] = this_stats->intra_cost
+                                      << TPL_DEP_COST_SCALE_LOG2;
       memcpy(sb_enc->tpl_mv[count], this_stats->mv, sizeof(this_stats->mv));
       mi_count++;
       count++;
@@ -1021,7 +1023,7 @@ int av1_get_q_for_deltaq_objective(AV1_COMP *const cpi, ThreadData *td,
       mc_dep_reg += log(3 * dist_scaled + mc_dep_delta) * cbcmp;
       srcrf_dist += (double)(this_stats->srcrf_dist << RDDIV_BITS);
       srcrf_sse += (double)(this_stats->srcrf_sse << RDDIV_BITS);
-      srcrf_rate += (double)this_stats->srcrf_rate;
+      srcrf_rate += (double)(this_stats->srcrf_rate << TPL_DEP_COST_SCALE_LOG2);
 #ifndef NDEBUG
       mi_count++;
 #endif
@@ -1310,36 +1312,119 @@ void av1_avg_cdf_symbols(FRAME_CONTEXT *ctx_left, FRAME_CONTEXT *ctx_tr,
               CFL_ALPHABET_SIZE);
 }
 
+// Check neighbor blocks' motion information.
+static int check_neighbor_blocks(MB_MODE_INFO **mi, int mi_stride,
+                                 const TileInfo *const tile_info, int mi_row,
+                                 int mi_col) {
+  int is_above_low_motion = 1;
+  int is_left_low_motion = 1;
+  const int thr = 24;
+
+  // Check above block.
+  if (mi_row > tile_info->mi_row_start) {
+    const MB_MODE_INFO *above_mbmi = mi[-mi_stride];
+    const int_mv above_mv = above_mbmi->mv[0];
+    if (above_mbmi->mode >= INTRA_MODE_END &&
+        (abs(above_mv.as_mv.row) > thr || abs(above_mv.as_mv.col) > thr))
+      is_above_low_motion = 0;
+  }
+
+  // Check left block.
+  if (mi_col > tile_info->mi_col_start) {
+    const MB_MODE_INFO *left_mbmi = mi[-1];
+    const int_mv left_mv = left_mbmi->mv[0];
+    if (left_mbmi->mode >= INTRA_MODE_END &&
+        (abs(left_mv.as_mv.row) > thr || abs(left_mv.as_mv.col) > thr))
+      is_left_low_motion = 0;
+  }
+
+  return (is_above_low_motion && is_left_low_motion);
+}
+
+// Check this block's motion in a fast way.
+static int fast_detect_non_zero_motion(AV1_COMP *cpi, const uint8_t *src_y,
+                                       int src_ystride,
+                                       const uint8_t *last_src_y,
+                                       int last_src_ystride, int mi_row,
+                                       int mi_col) {
+  AV1_COMMON *const cm = &cpi->common;
+  const BLOCK_SIZE bsize = cm->seq_params->sb_size;
+  unsigned int blk_sad = INT_MAX;
+  if (cpi->src_sad_blk_64x64 != NULL) {
+    const int sb_size_by_mb = (bsize == BLOCK_128X128)
+                                  ? (cm->seq_params->mib_size >> 1)
+                                  : cm->seq_params->mib_size;
+    const int sb_cols =
+        (cm->mi_params.mi_cols + sb_size_by_mb - 1) / sb_size_by_mb;
+    const int sbi_col = mi_col / sb_size_by_mb;
+    const int sbi_row = mi_row / sb_size_by_mb;
+    blk_sad = (unsigned int)cpi->src_sad_blk_64x64[sbi_col + sbi_row * sb_cols];
+  } else {
+    blk_sad = cpi->ppi->fn_ptr[bsize].sdf(src_y, src_ystride, last_src_y,
+                                          last_src_ystride);
+  }
+
+  // Search 4 1-away points.
+  const uint8_t *const search_pos[4] = {
+    last_src_y - last_src_ystride,
+    last_src_y - 1,
+    last_src_y + 1,
+    last_src_y + last_src_ystride,
+  };
+  unsigned int sad_arr[4];
+  cpi->ppi->fn_ptr[bsize].sdx4df(src_y, src_ystride, search_pos,
+                                 last_src_ystride, sad_arr);
+
+  blk_sad = (blk_sad * 5) >> 3;
+  return (blk_sad < sad_arr[0] && blk_sad < sad_arr[1] &&
+          blk_sad < sad_arr[2] && blk_sad < sad_arr[3]);
+}
+
 // Grade the temporal variation of the source by comparing the current sb and
 // its collocated block in the last frame.
-void av1_source_content_sb(AV1_COMP *cpi, MACROBLOCK *x, int mi_row,
-                           int mi_col) {
+void av1_source_content_sb(AV1_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
+                           int mi_row, int mi_col) {
+  if (cpi->last_source->y_width != cpi->source->y_width ||
+      cpi->last_source->y_height != cpi->source->y_height)
+    return;
+
   unsigned int tmp_sse;
   unsigned int tmp_variance;
   const BLOCK_SIZE bsize = cpi->common.seq_params->sb_size;
   uint8_t *src_y = cpi->source->y_buffer;
-  int src_ystride = cpi->source->y_stride;
+  const int src_ystride = cpi->source->y_stride;
+  const int src_offset = src_ystride * (mi_row << 2) + (mi_col << 2);
   uint8_t *last_src_y = cpi->last_source->y_buffer;
-  int last_src_ystride = cpi->last_source->y_stride;
-  const int offset = cpi->source->y_stride * (mi_row << 2) + (mi_col << 2);
-  uint64_t avg_source_sse_threshold = 100000;        // ~5*5*(64*64)
+  const int last_src_ystride = cpi->last_source->y_stride;
+  const int last_src_offset = last_src_ystride * (mi_row << 2) + (mi_col << 2);
+  uint64_t avg_source_sse_threshold_verylow = 10000;     // ~1.5*1.5*(64*64)
+  uint64_t avg_source_sse_threshold_low[2] = { 100000,   // ~5*5*(64*64)
+                                               36000 };  // ~3*3*(64*64)
+
   uint64_t avg_source_sse_threshold_high = 1000000;  // ~15*15*(64*64)
   uint64_t sum_sq_thresh = 10000;  // sum = sqrt(thresh / 64*64)) ~1.5
 #if CONFIG_AV1_HIGHBITDEPTH
   MACROBLOCKD *xd = &x->e_mbd;
   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) return;
 #endif
-  src_y += offset;
-  last_src_y += offset;
+  src_y += src_offset;
+  last_src_y += last_src_offset;
   tmp_variance = cpi->ppi->fn_ptr[bsize].vf(src_y, src_ystride, last_src_y,
                                             last_src_ystride, &tmp_sse);
+  // rd thresholds
+  if (tmp_sse < avg_source_sse_threshold_low[1])
+    x->content_state_sb.source_sad_rd = kLowSad;
 
+  // nonrd thresholds
   if (tmp_sse == 0)
-    x->content_state_sb.source_sad = kZeroSad;
-  else if (tmp_sse < avg_source_sse_threshold)
-    x->content_state_sb.source_sad = kLowSad;
+    x->content_state_sb.source_sad_nonrd = kZeroSad;
+  else if (tmp_sse < avg_source_sse_threshold_verylow)
+    x->content_state_sb.source_sad_nonrd = kVeryLowSad;
+  else if (tmp_sse < avg_source_sse_threshold_low[0])
+    x->content_state_sb.source_sad_nonrd = kLowSad;
   else if (tmp_sse > avg_source_sse_threshold_high)
-    x->content_state_sb.source_sad = kHighSad;
+    x->content_state_sb.source_sad_nonrd = kHighSad;
+
   // Detect large lighting change.
   // Note: tmp_sse - tmp_variance = ((sum * sum) >> 12)
   if (tmp_sse > 0) {
@@ -1350,10 +1435,9 @@ void av1_source_content_sb(AV1_COMP *cpi, MACROBLOCK *x, int mi_row,
       x->content_state_sb.low_sumdiff = 1;
   }
 
-  if (cpi->last_source->y_width != cpi->source->y_width ||
-      cpi->last_source->y_height != cpi->source->y_height)
+  if (!cpi->sf.rt_sf.use_rtc_tf || tmp_sse == 0 || cpi->rc.high_source_sad ||
+      cpi->rc.frame_source_sad > 20000)
     return;
-  if (!cpi->sf.rt_sf.use_rtc_tf) return;
 
   // In-place temporal filter. If psnr calculation is enabled, we store the
   // source for that.
@@ -1362,10 +1446,35 @@ void av1_source_content_sb(AV1_COMP *cpi, MACROBLOCK *x, int mi_row,
   const unsigned int nmean2 = tmp_sse - tmp_variance;
   const int ac_q_step = av1_ac_quant_QTX(cm->quant_params.base_qindex, 0,
                                          cm->seq_params->bit_depth);
-  const unsigned int threshold = 3 * ac_q_step * ac_q_step / 2;
+  const PRIMARY_RATE_CONTROL *const p_rc = &cpi->ppi->p_rc;
+  const int avg_q_step = av1_ac_quant_QTX(p_rc->avg_frame_qindex[INTER_FRAME],
+                                          0, cm->seq_params->bit_depth);
+
+  const unsigned int threshold =
+      (cpi->sf.rt_sf.use_rtc_tf == 1)
+          ? (clamp(avg_q_step, 250, 1000)) * ac_q_step
+          : 250 * ac_q_step;
 
   // TODO(yunqing): use a weighted sum instead of averaging in filtering.
   if (tmp_variance <= threshold && nmean2 <= 15) {
+    // Check neighbor blocks. If neighbor blocks aren't low-motion blocks,
+    // skip temporal filtering for this block.
+    MB_MODE_INFO **mi = cm->mi_params.mi_grid_base +
+                        get_mi_grid_idx(&cm->mi_params, mi_row, mi_col);
+    const TileInfo *const tile_info = &tile_data->tile_info;
+    const int is_neighbor_blocks_low_motion = check_neighbor_blocks(
+        mi, cm->mi_params.mi_stride, tile_info, mi_row, mi_col);
+    if (!is_neighbor_blocks_low_motion) return;
+
+    // Only consider 64x64 SB for now. Need to extend to 128x128 for large SB
+    // size.
+    // Test several nearby points. If non-zero mv exists, don't do temporal
+    // filtering.
+    const int is_this_blk_low_motion = fast_detect_non_zero_motion(
+        cpi, src_y, src_ystride, last_src_y, last_src_ystride, mi_row, mi_col);
+
+    if (!is_this_blk_low_motion) return;
+
     const int shift_x[2] = { 0, cpi->source->subsampling_x };
     const int shift_y[2] = { 0, cpi->source->subsampling_y };
     const uint8_t h = block_size_high[bsize];
@@ -1441,7 +1550,7 @@ void av1_backup_sb_state(SB_FIRST_PASS_STATS *sb_fp_stats, const AV1_COMP *cpi,
       xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK);
   av1_save_context(x, &sb_fp_stats->x_ctx, mi_row, mi_col, sb_size, num_planes);
 
-  sb_fp_stats->rd_count = cpi->td.rd_counts;
+  sb_fp_stats->rd_count = td->rd_counts;
   sb_fp_stats->split_count = x->txfm_search_info.txb_split_count;
 
   sb_fp_stats->fc = *td->counts;
@@ -1474,7 +1583,7 @@ void av1_restore_sb_state(const SB_FIRST_PASS_STATS *sb_fp_stats, AV1_COMP *cpi,
   av1_restore_context(x, &sb_fp_stats->x_ctx, mi_row, mi_col, sb_size,
                       num_planes);
 
-  cpi->td.rd_counts = sb_fp_stats->rd_count;
+  td->rd_counts = sb_fp_stats->rd_count;
   x->txfm_search_info.txb_split_count = sb_fp_stats->split_count;
 
   *td->counts = sb_fp_stats->fc;
@@ -1572,6 +1681,10 @@ void av1_set_cost_upd_freq(AV1_COMP *cpi, ThreadData *td,
   MACROBLOCK *const x = &td->mb;
   MACROBLOCKD *const xd = &x->e_mbd;
 
+  if (cm->features.disable_cdf_update) {
+    return;
+  }
+
   switch (cpi->sf.inter_sf.coeff_cost_upd_level) {
     case INTERNAL_COST_UPD_OFF:
     case INTERNAL_COST_UPD_TILE:  // Tile level
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/encodeframe_utils.h b/chromium/third_party/libaom/source/libaom/av1/encoder/encodeframe_utils.h
index 34fa8c77807..9229c3d7cc1 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/encodeframe_utils.h
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/encodeframe_utils.h
@@ -391,8 +391,8 @@ void av1_update_picked_ref_frames_mask(MACROBLOCK *const x, int ref_type,
 void av1_avg_cdf_symbols(FRAME_CONTEXT *ctx_left, FRAME_CONTEXT *ctx_tr,
                          int wt_left, int wt_tr);
 
-void av1_source_content_sb(AV1_COMP *cpi, MACROBLOCK *x, int mi_row,
-                           int mi_col);
+void av1_source_content_sb(AV1_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
+                           int mi_row, int mi_col);
 
 void av1_reset_mbmi(CommonModeInfoParams *const mi_params, BLOCK_SIZE sb_size,
                     int mi_row, int mi_col);
@@ -455,8 +455,8 @@ static AOM_INLINE void av1_alloc_mb_data(const AV1_COMP *cpi,
     // Memory for mb_rd_record is allocated only when use_mb_rd_hash sf is
     // enabled.
     if (sf->rd_sf.use_mb_rd_hash)
-      mb->txfm_search_info.mb_rd_record =
-          (MB_RD_RECORD *)aom_malloc(sizeof(MB_RD_RECORD));
+      CHECK_MEM_ERROR(cm, mb->txfm_search_info.mb_rd_record,
+                      (MB_RD_RECORD *)aom_malloc(sizeof(MB_RD_RECORD)));
     if (!frame_is_intra_only(cm))
       CHECK_MEM_ERROR(
           cm, mb->inter_modes_info,
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/encodemb.c b/chromium/third_party/libaom/source/libaom/av1/encoder/encodemb.c
index 885365147ca..8dee801af11 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/encodemb.c
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/encodemb.c
@@ -167,7 +167,8 @@ void av1_dropout_qcoeff_num(MACROBLOCK *mb, int plane, int block,
   const SCAN_ORDER *const scan_order = get_scan(tx_size, tx_type);
 
   // Early return if there are not enough non-zero coefficients.
-  if (p->eobs[block] == 0 || p->eobs[block] <= dropout_num_before) {
+  if (p->eobs[block] == 0 || p->eobs[block] <= dropout_num_before ||
+      max_eob <= dropout_num_before + dropout_num_after) {
     return;
   }
 
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/encodemb.h b/chromium/third_party/libaom/source/libaom/av1/encoder/encodemb.h
index b58d13d5de1..b819e8244cc 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/encodemb.h
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/encodemb.h
@@ -56,7 +56,7 @@ struct encode_b_args {
   const struct AV1_COMP *cpi;
   MACROBLOCK *x;
   struct optimize_ctx *ctx;
-  int8_t *skip;
+  uint8_t *skip;
   ENTROPY_CONTEXT *ta;
   ENTROPY_CONTEXT *tl;
   RUN_TYPE dry_run;
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/encodemv.c b/chromium/third_party/libaom/source/libaom/av1/encoder/encodemv.c
index 4a7d87408cb..7cae72c159c 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/encodemv.c
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/encodemv.c
@@ -115,14 +115,21 @@ static void encode_mv_component(aom_writer *w, int comp, nmv_component *mvcomp,
         2);
 }
 
-static void build_nmv_component_cost_table(int *mvcost,
-                                           const nmv_component *const mvcomp,
-                                           MvSubpelPrecision precision) {
-  int i, v;
+/* TODO(siekyleb@amazon.com): This function writes MV_VALS ints or 128 KiB. This
+ *   is more than most L1D caches and is a significant chunk of L2. Write
+ *   SIMD that uses streaming writes to avoid loading all of that into L1, or
+ *   just don't update the larger component costs every time this called
+ *   (or both).
+ */
+void av1_build_nmv_component_cost_table(int *mvcost,
+                                        const nmv_component *const mvcomp,
+                                        MvSubpelPrecision precision) {
+  int i, j, v, o, mantissa;
   int sign_cost[2], class_cost[MV_CLASSES], class0_cost[CLASS0_SIZE];
   int bits_cost[MV_OFFSET_BITS][2];
-  int class0_fp_cost[CLASS0_SIZE][MV_FP_SIZE], fp_cost[MV_FP_SIZE];
-  int class0_hp_cost[2], hp_cost[2];
+  int class0_fp_cost[CLASS0_SIZE][MV_FP_SIZE] = { 0 },
+      fp_cost[MV_FP_SIZE] = { 0 };
+  int class0_hp_cost[2] = { 0 }, hp_cost[2] = { 0 };
 
   av1_cost_tokens_from_cdf(sign_cost, mvcomp->sign_cdf, NULL);
   av1_cost_tokens_from_cdf(class_cost, mvcomp->classes_cdf, NULL);
@@ -131,45 +138,114 @@ static void build_nmv_component_cost_table(int *mvcost,
     av1_cost_tokens_from_cdf(bits_cost[i], mvcomp->bits_cdf[i], NULL);
   }
 
-  for (i = 0; i < CLASS0_SIZE; ++i)
-    av1_cost_tokens_from_cdf(class0_fp_cost[i], mvcomp->class0_fp_cdf[i], NULL);
-  av1_cost_tokens_from_cdf(fp_cost, mvcomp->fp_cdf, NULL);
+  if (precision > MV_SUBPEL_NONE) {
+    for (i = 0; i < CLASS0_SIZE; ++i)
+      av1_cost_tokens_from_cdf(class0_fp_cost[i], mvcomp->class0_fp_cdf[i],
+                               NULL);
+    av1_cost_tokens_from_cdf(fp_cost, mvcomp->fp_cdf, NULL);
+  }
 
   if (precision > MV_SUBPEL_LOW_PRECISION) {
     av1_cost_tokens_from_cdf(class0_hp_cost, mvcomp->class0_hp_cdf, NULL);
     av1_cost_tokens_from_cdf(hp_cost, mvcomp->hp_cdf, NULL);
   }
+
+  // Instead of accumulating the cost of each vector component's bits
+  //   individually, compute the costs based on smaller vectors. Costs for
+  //   [2^exp, 2 * 2^exp - 1] are calculated based on [0, 2^exp - 1]
+  //   respectively. Offsets are maintained to swap both 1) class costs when
+  //   treated as a complete vector component with the highest set bit when
+  //   treated as a mantissa (significand) and 2) leading zeros to account for
+  //   the current exponent.
+
+  // Cost offsets
+  int cost_swap[MV_OFFSET_BITS] = { 0 };
+  // Delta to convert positive vector to negative vector costs
+  int negate_sign = sign_cost[1] - sign_cost[0];
+
+  // Initialize with offsets to swap the class costs with the costs of the
+  //   highest set bit.
+  for (i = 1; i < MV_OFFSET_BITS; ++i) {
+    cost_swap[i] = bits_cost[i - 1][1];
+    if (i > CLASS0_BITS) cost_swap[i] -= class_cost[i - CLASS0_BITS];
+  }
+
+  // Seed the fractional costs onto the output (overwritten latter).
+  for (o = 0; o < MV_FP_SIZE; ++o) {
+    int hp;
+    for (hp = 0; hp < 2; ++hp) {
+      v = 2 * o + hp + 1;
+      mvcost[v] = fp_cost[o] + hp_cost[hp] + sign_cost[0];
+    }
+  }
+
   mvcost[0] = 0;
-  for (v = 1; v <= MV_MAX; ++v) {
-    int z, c, o, d, e, f, cost = 0;
-    z = v - 1;
-    c = av1_get_mv_class(z, &o);
-    cost += class_cost[c];
-    d = (o >> 3);     /* int mv data */
-    f = (o >> 1) & 3; /* fractional pel mv data */
-    e = (o & 1);      /* high precision mv data */
-    if (c == MV_CLASS_0) {
-      cost += class0_cost[d];
-    } else {
-      const int b = c + CLASS0_BITS - 1; /* number of bits */
-      for (i = 0; i < b; ++i) cost += bits_cost[i][((d >> i) & 1)];
+  // Fill the costs for each exponent's vectors, using the costs set in the
+  //   previous exponents.
+  for (i = 0; i < MV_OFFSET_BITS; ++i) {
+    const int exponent = (2 * MV_FP_SIZE) << i;
+
+    int class = 0;
+    if (i >= CLASS0_BITS) {
+      class = class_cost[i - CLASS0_BITS + 1];
     }
-    if (precision > MV_SUBPEL_NONE) {
-      if (c == MV_CLASS_0) {
-        cost += class0_fp_cost[d][f];
-      } else {
-        cost += fp_cost[f];
+
+    // Iterate through mantissas, keeping track of the location
+    //   of the highest set bit for the mantissa.
+    // To be clear: in the outer loop, the position of the highest set bit
+    //   (exponent) is tracked and, in this loop, the highest set bit of the
+    //   mantissa is tracked.
+    mantissa = 0;
+    for (j = 0; j <= i; ++j) {
+      for (; mantissa < (2 * MV_FP_SIZE) << j; ++mantissa) {
+        int cost = mvcost[mantissa + 1] + class + cost_swap[j];
+        v = exponent + mantissa + 1;
+        mvcost[v] = cost;
+        mvcost[-v] = cost + negate_sign;
+      }
+      cost_swap[j] += bits_cost[i][0];
+    }
+  }
+
+  // Special case to avoid buffer overrun
+  {
+    int exponent = (2 * MV_FP_SIZE) << MV_OFFSET_BITS;
+    int class = class_cost[MV_CLASSES - 1];
+    mantissa = 0;
+    for (j = 0; j < MV_OFFSET_BITS; ++j) {
+      for (; mantissa < (2 * MV_FP_SIZE) << j; ++mantissa) {
+        int cost = mvcost[mantissa + 1] + class + cost_swap[j];
+        v = exponent + mantissa + 1;
+        mvcost[v] = cost;
+        mvcost[-v] = cost + negate_sign;
       }
-      if (precision > MV_SUBPEL_LOW_PRECISION) {
-        if (c == MV_CLASS_0) {
-          cost += class0_hp_cost[e];
-        } else {
-          cost += hp_cost[e];
-        }
+    }
+    // At this point: mantissa = exponent >> 1
+
+    // Manually calculate the final cost offset
+    int cost_swap_hi =
+        bits_cost[MV_OFFSET_BITS - 1][1] - class_cost[MV_CLASSES - 2];
+    for (; mantissa < exponent - 1; ++mantissa) {
+      int cost = mvcost[mantissa + 1] + class + cost_swap_hi;
+      v = exponent + mantissa + 1;
+      mvcost[v] = cost;
+      mvcost[-v] = cost + negate_sign;
+    }
+  }
+
+  // Fill costs for class0 vectors, overwriting previous placeholder values
+  //   used for calculating the costs of the larger vectors.
+  for (i = 0; i < CLASS0_SIZE; ++i) {
+    const int top = i * 2 * MV_FP_SIZE;
+    for (o = 0; o < MV_FP_SIZE; ++o) {
+      int hp;
+      int cost = class0_fp_cost[i][o] + class_cost[0] + class0_cost[i];
+      for (hp = 0; hp < 2; ++hp) {
+        v = top + 2 * o + hp + 1;
+        mvcost[v] = cost + class0_hp_cost[hp] + sign_cost[0];
+        mvcost[-v] = cost + class0_hp_cost[hp] + sign_cost[1];
       }
     }
-    mvcost[v] = cost + sign_cost[0];
-    mvcost[-v] = cost + sign_cost[1];
   }
 }
 
@@ -219,8 +295,8 @@ void av1_build_nmv_cost_table(int *mvjoint, int *mvcost[2],
                               const nmv_context *ctx,
                               MvSubpelPrecision precision) {
   av1_cost_tokens_from_cdf(mvjoint, ctx->joints_cdf, NULL);
-  build_nmv_component_cost_table(mvcost[0], &ctx->comps[0], precision);
-  build_nmv_component_cost_table(mvcost[1], &ctx->comps[1], precision);
+  av1_build_nmv_component_cost_table(mvcost[0], &ctx->comps[0], precision);
+  av1_build_nmv_component_cost_table(mvcost[1], &ctx->comps[1], precision);
 }
 
 int_mv av1_get_ref_mv_from_stack(int ref_idx,
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/encodemv.h b/chromium/third_party/libaom/source/libaom/av1/encoder/encodemv.h
index 962844bc794..c39001a5a25 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/encodemv.h
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/encodemv.h
@@ -27,6 +27,9 @@ void av1_update_mv_stats(const MV *mv, const MV *ref, nmv_context *mvctx,
 void av1_build_nmv_cost_table(int *mvjoint, int *mvcost[2],
                               const nmv_context *mvctx,
                               MvSubpelPrecision precision);
+void av1_build_nmv_component_cost_table(int *mvcost,
+                                        const nmv_component *const mvcomp,
+                                        MvSubpelPrecision precision);
 
 void av1_update_mv_count(ThreadData *td);
 
@@ -62,9 +65,9 @@ static INLINE uint8_t av1_log_in_base_2(unsigned int n) {
 }
 
 static INLINE MV_CLASS_TYPE av1_get_mv_class(int z, int *offset) {
-  const MV_CLASS_TYPE c = (z >= CLASS0_SIZE * 4096)
-                              ? MV_CLASS_10
-                              : (MV_CLASS_TYPE)av1_log_in_base_2(z >> 3);
+  assert(z >= 0);
+  const MV_CLASS_TYPE c = (MV_CLASS_TYPE)av1_log_in_base_2(z >> 3);
+  assert(c <= MV_CLASS_10);
   if (offset) *offset = z - av1_mv_class_base(c);
   return c;
 }
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/encoder.c b/chromium/third_party/libaom/source/libaom/av1/encoder/encoder.c
index b7c87dcba1b..5e1f6b786a4 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/encoder.c
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/encoder.c
@@ -94,33 +94,33 @@ FILE *yuv_rec_file;
 FILE *yuv_denoised_file = NULL;
 #endif
 
-static INLINE void Scale2Ratio(AOM_SCALING mode, int *hr, int *hs) {
+static INLINE void Scale2Ratio(AOM_SCALING_MODE mode, int *hr, int *hs) {
   switch (mode) {
-    case NORMAL:
+    case AOME_NORMAL:
       *hr = 1;
       *hs = 1;
       break;
-    case FOURFIVE:
+    case AOME_FOURFIVE:
       *hr = 4;
       *hs = 5;
       break;
-    case THREEFIVE:
+    case AOME_THREEFIVE:
       *hr = 3;
       *hs = 5;
       break;
-    case THREEFOUR:
+    case AOME_THREEFOUR:
       *hr = 3;
       *hs = 4;
       break;
-    case ONEFOUR:
+    case AOME_ONEFOUR:
       *hr = 1;
       *hs = 4;
       break;
-    case ONEEIGHT:
+    case AOME_ONEEIGHT:
       *hr = 1;
       *hs = 8;
       break;
-    case ONETWO:
+    case AOME_ONETWO:
       *hr = 1;
       *hs = 2;
       break;
@@ -136,30 +136,27 @@ int av1_set_active_map(AV1_COMP *cpi, unsigned char *new_map_16x16, int rows,
                        int cols) {
   const CommonModeInfoParams *const mi_params = &cpi->common.mi_params;
   if (rows == mi_params->mb_rows && cols == mi_params->mb_cols) {
-    unsigned char *const active_map_8x8 = cpi->active_map.map;
+    unsigned char *const active_map_4x4 = cpi->active_map.map;
     const int mi_rows = mi_params->mi_rows;
     const int mi_cols = mi_params->mi_cols;
     const int row_scale = mi_size_high[BLOCK_16X16] == 2 ? 1 : 2;
     const int col_scale = mi_size_wide[BLOCK_16X16] == 2 ? 1 : 2;
-    cpi->active_map.update = 1;
+    cpi->active_map.update = 0;
     if (new_map_16x16) {
-      int r, c;
-      for (r = 0; r < mi_rows; ++r) {
-        for (c = 0; c < mi_cols; ++c) {
-          active_map_8x8[r * mi_cols + c] =
+      for (int r = 0; r < mi_rows; ++r) {
+        for (int c = 0; c < mi_cols; ++c) {
+          active_map_4x4[r * mi_cols + c] =
               new_map_16x16[(r >> row_scale) * cols + (c >> col_scale)]
                   ? AM_SEGMENT_ID_ACTIVE
                   : AM_SEGMENT_ID_INACTIVE;
         }
       }
       cpi->active_map.enabled = 1;
-    } else {
-      cpi->active_map.enabled = 0;
     }
     return 0;
-  } else {
-    return -1;
   }
+
+  return -1;
 }
 
 int av1_get_active_map(AV1_COMP *cpi, unsigned char *new_map_16x16, int rows,
@@ -175,9 +172,8 @@ int av1_get_active_map(AV1_COMP *cpi, unsigned char *new_map_16x16, int rows,
 
     memset(new_map_16x16, !cpi->active_map.enabled, rows * cols);
     if (cpi->active_map.enabled) {
-      int r, c;
-      for (r = 0; r < mi_rows; ++r) {
-        for (c = 0; c < mi_cols; ++c) {
+      for (int r = 0; r < mi_rows; ++r) {
+        for (int c = 0; c < mi_cols; ++c) {
           // Cyclic refresh segments are considered active despite not having
           // AM_SEGMENT_ID_ACTIVE
           new_map_16x16[(r >> row_scale) * cols + (c >> col_scale)] |=
@@ -186,35 +182,21 @@ int av1_get_active_map(AV1_COMP *cpi, unsigned char *new_map_16x16, int rows,
       }
     }
     return 0;
-  } else {
-    return -1;
   }
+
+  return -1;
 }
 
-void av1_initialize_enc(void) {
+void av1_initialize_enc(unsigned int usage, enum aom_rc_mode end_usage) {
+  bool is_allintra = usage == ALLINTRA;
+
   av1_rtcd();
   aom_dsp_rtcd();
   aom_scale_rtcd();
   av1_init_intra_predictors();
   av1_init_me_luts();
-  av1_rc_init_minq_luts();
-  av1_init_wedge_masks();
-}
-
-static void update_reference_segmentation_map(AV1_COMP *cpi) {
-  AV1_COMMON *const cm = &cpi->common;
-  const CommonModeInfoParams *const mi_params = &cm->mi_params;
-  MB_MODE_INFO **mi_4x4_ptr = mi_params->mi_grid_base;
-  uint8_t *cache_ptr = cm->cur_frame->seg_map;
-
-  for (int row = 0; row < mi_params->mi_rows; row++) {
-    MB_MODE_INFO **mi_4x4 = mi_4x4_ptr;
-    uint8_t *cache = cache_ptr;
-    for (int col = 0; col < mi_params->mi_cols; col++, mi_4x4++, cache++)
-      cache[0] = mi_4x4[0]->segment_id;
-    mi_4x4_ptr += mi_params->mi_stride;
-    cache_ptr += mi_params->mi_cols;
-  }
+  if (!is_allintra) av1_init_wedge_masks();
+  if (!is_allintra || end_usage != AOM_Q) av1_rc_init_minq_luts();
 }
 
 void av1_new_framerate(AV1_COMP *cpi, double framerate) {
@@ -238,6 +220,36 @@ double av1_get_compression_ratio(const AV1_COMMON *const cm,
   return uncompressed_frame_size / (double)encoded_frame_size;
 }
 
+static void auto_tile_size_balancing(AV1_COMMON *const cm, int num_sbs,
+                                     int num_tiles_lg, int tile_col_row) {
+  CommonTileParams *const tiles = &cm->tiles;
+  int i, start_sb;
+  int size_sb = num_sbs >> num_tiles_lg;
+  int res_sbs = num_sbs - (size_sb << num_tiles_lg);
+  int num_tiles = 1 << num_tiles_lg;
+  int inc_index = num_tiles - res_sbs;
+
+  tiles->uniform_spacing = 0;
+
+  for (i = 0, start_sb = 0; start_sb < num_sbs && i < MAX_TILE_COLS; ++i) {
+    if (i == inc_index) ++size_sb;
+    if (tile_col_row)
+      tiles->col_start_sb[i] = start_sb;
+    else
+      tiles->row_start_sb[i] = start_sb;
+
+    start_sb += AOMMIN(size_sb, tiles->max_width_sb);
+  }
+
+  if (tile_col_row) {
+    tiles->cols = i;
+    tiles->col_start_sb[i] = num_sbs;
+  } else {
+    tiles->rows = i;
+    tiles->row_start_sb[i] = num_sbs;
+  }
+}
+
 static void set_tile_info(AV1_COMMON *const cm,
                           const TileConfig *const tile_cfg) {
   const CommonModeInfoParams *const mi_params = &cm->mi_params;
@@ -247,15 +259,16 @@ static void set_tile_info(AV1_COMMON *const cm,
 
   av1_get_tile_limits(cm);
 
+  int sb_cols =
+      CEIL_POWER_OF_TWO(mi_params->mi_cols, seq_params->mib_size_log2);
   // configure tile columns
   if (tile_cfg->tile_width_count == 0 || tile_cfg->tile_height_count == 0) {
     tiles->uniform_spacing = 1;
     tiles->log2_cols = AOMMAX(tile_cfg->tile_columns, tiles->min_log2_cols);
     tiles->log2_cols = AOMMIN(tiles->log2_cols, tiles->max_log2_cols);
+  } else if (tile_cfg->tile_widths[0] < 0) {
+    auto_tile_size_balancing(cm, sb_cols, tile_cfg->tile_columns, 1);
   } else {
-    int mi_cols =
-        ALIGN_POWER_OF_TWO(mi_params->mi_cols, seq_params->mib_size_log2);
-    int sb_cols = mi_cols >> seq_params->mib_size_log2;
     int size_sb, j = 0;
     tiles->uniform_spacing = 0;
     for (i = 0, start_sb = 0; start_sb < sb_cols && i < MAX_TILE_COLS; i++) {
@@ -271,13 +284,14 @@ static void set_tile_info(AV1_COMMON *const cm,
                           tiles);
 
   // configure tile rows
+  int sb_rows =
+      CEIL_POWER_OF_TWO(mi_params->mi_rows, seq_params->mib_size_log2);
   if (tiles->uniform_spacing) {
     tiles->log2_rows = AOMMAX(tile_cfg->tile_rows, tiles->min_log2_rows);
     tiles->log2_rows = AOMMIN(tiles->log2_rows, tiles->max_log2_rows);
+  } else if (tile_cfg->tile_heights[0] < 0) {
+    auto_tile_size_balancing(cm, sb_rows, tile_cfg->tile_rows, 0);
   } else {
-    int mi_rows =
-        ALIGN_POWER_OF_TWO(mi_params->mi_rows, seq_params->mib_size_log2);
-    int sb_rows = mi_rows >> seq_params->mib_size_log2;
     int size_sb, j = 0;
     for (i = 0, start_sb = 0; start_sb < sb_rows && i < MAX_TILE_ROWS; i++) {
       tiles->row_start_sb[i] = start_sb;
@@ -470,7 +484,7 @@ void av1_init_seq_coding_tools(AV1_PRIMARY *const ppi,
 }
 
 static void init_config_sequence(struct AV1_PRIMARY *ppi,
-                                 AV1EncoderConfig *oxcf) {
+                                 const AV1EncoderConfig *oxcf) {
   SequenceHeader *const seq_params = &ppi->seq_params;
   const DecoderModelCfg *const dec_model_cfg = &oxcf->dec_model_cfg;
   const ColorCfg *const color_cfg = &oxcf->color_cfg;
@@ -546,7 +560,7 @@ static void init_config_sequence(struct AV1_PRIMARY *ppi,
   av1_change_config_seq(ppi, oxcf, NULL);
 }
 
-static void init_config(struct AV1_COMP *cpi, AV1EncoderConfig *oxcf) {
+static void init_config(struct AV1_COMP *cpi, const AV1EncoderConfig *oxcf) {
   AV1_COMMON *const cm = &cpi->common;
   ResizePendingParams *resize_pending_params = &cpi->resize_pending_params;
 
@@ -559,18 +573,20 @@ static void init_config(struct AV1_COMP *cpi, AV1EncoderConfig *oxcf) {
 
   alloc_compressor_data(cpi);
 
-  av1_update_film_grain_parameters(cpi, oxcf);
-
   // Single thread case: use counts in common.
   cpi->td.counts = &cpi->counts;
 
-  // Set init SVC parameters.
-  cpi->svc.set_ref_frame_config = 0;
-  cpi->svc.non_reference_frame = 0;
+  // Init SVC parameters.
   cpi->svc.number_spatial_layers = 1;
   cpi->svc.number_temporal_layers = 1;
   cm->spatial_layer_id = 0;
   cm->temporal_layer_id = 0;
+  // Init rtc_ref parameters.
+  cpi->rtc_ref.set_ref_frame_config = 0;
+  cpi->rtc_ref.non_reference_frame = 0;
+  cpi->rtc_ref.ref_frame_comp[0] = 0;
+  cpi->rtc_ref.ref_frame_comp[1] = 0;
+  cpi->rtc_ref.ref_frame_comp[2] = 0;
 
   // change includes all joint functionality
   av1_change_config(cpi, oxcf, false);
@@ -680,6 +696,7 @@ void av1_change_config(struct AV1_COMP *cpi, const AV1EncoderConfig *oxcf,
   RefreshFrameInfo *const refresh_frame = &cpi->refresh_frame;
   const FrameDimensionCfg *const frm_dim_cfg = &cpi->oxcf.frm_dim_cfg;
   const RateControlCfg *const rc_cfg = &oxcf->rc_cfg;
+  FeatureFlags *const features = &cm->features;
 
   // in case of LAP, lag in frames is set according to number of lap buffers
   // calculated at init time. This stores and restores LAP's lag in frames to
@@ -689,9 +706,10 @@ void av1_change_config(struct AV1_COMP *cpi, const AV1EncoderConfig *oxcf,
     lap_lag_in_frames = cpi->oxcf.gf_cfg.lag_in_frames;
   }
 
+  cpi->oxcf = *oxcf;
+
   av1_update_film_grain_parameters(cpi, oxcf);
 
-  cpi->oxcf = *oxcf;
   // When user provides superres_mode = AOM_SUPERRES_AUTO, we still initialize
   // superres mode for current encoding = AOM_SUPERRES_NONE. This is to ensure
   // that any analysis (e.g. TPL) happening outside the main encoding loop still
@@ -733,12 +751,12 @@ void av1_change_config(struct AV1_COMP *cpi, const AV1EncoderConfig *oxcf,
   refresh_frame->golden_frame = false;
   refresh_frame->bwd_ref_frame = false;
 
-  cm->features.refresh_frame_context =
+  features->refresh_frame_context =
       (oxcf->tool_cfg.frame_parallel_decoding_mode)
           ? REFRESH_FRAME_CONTEXT_DISABLED
           : REFRESH_FRAME_CONTEXT_BACKWARD;
   if (oxcf->tile_cfg.enable_large_scale_tile)
-    cm->features.refresh_frame_context = REFRESH_FRAME_CONTEXT_DISABLED;
+    features->refresh_frame_context = REFRESH_FRAME_CONTEXT_DISABLED;
 
   if (x->palette_buffer == NULL) {
     CHECK_MEM_ERROR(cm, x->palette_buffer,
@@ -786,9 +804,10 @@ void av1_change_config(struct AV1_COMP *cpi, const AV1EncoderConfig *oxcf,
   rc->worst_quality = rc_cfg->worst_allowed_q;
   rc->best_quality = rc_cfg->best_allowed_q;
 
-  cm->features.interp_filter =
+  features->interp_filter =
       oxcf->tile_cfg.enable_large_scale_tile ? EIGHTTAP_REGULAR : SWITCHABLE;
-  cm->features.switchable_motion_mode = 1;
+  features->switchable_motion_mode = is_switchable_motion_mode_allowed(
+      features->allow_warped_motion, oxcf->motion_mode_cfg.enable_obmc);
 
   if (frm_dim_cfg->render_width > 0 && frm_dim_cfg->render_height > 0) {
     cm->render_width = frm_dim_cfg->render_width;
@@ -819,7 +838,7 @@ void av1_change_config(struct AV1_COMP *cpi, const AV1EncoderConfig *oxcf,
 
   set_tile_info(cm, &cpi->oxcf.tile_cfg);
 
-  if (!cpi->svc.set_ref_frame_config)
+  if (!cpi->rtc_ref.set_ref_frame_config)
     cpi->ext_flags.refresh_frame.update_pending = 0;
   cpi->ext_flags.refresh_frame_context_pending = 0;
 
@@ -863,7 +882,7 @@ static INLINE void update_frame_index_set(FRAME_INDEX_SET *frame_index_set,
 
 AV1_PRIMARY *av1_create_primary_compressor(
     struct aom_codec_pkt_list *pkt_list_head, int num_lap_buffers,
-    AV1EncoderConfig *oxcf) {
+    const AV1EncoderConfig *oxcf) {
   AV1_PRIMARY *volatile const ppi = aom_memalign(32, sizeof(AV1_PRIMARY));
   if (!ppi) return NULL;
   av1_zero(*ppi);
@@ -1167,14 +1186,6 @@ AV1_PRIMARY *av1_create_primary_compressor(
         aom_calloc(num_rows * num_cols,
                    sizeof(*ppi->tpl_sb_rdmult_scaling_factors)));
 
-#if !CONFIG_REALTIME_ONLY
-    if (oxcf->pass != AOM_RC_FIRST_PASS) {
-      av1_setup_tpl_buffers(ppi, &mi_params, oxcf->frm_dim_cfg.width,
-                            oxcf->frm_dim_cfg.height, 0,
-                            oxcf->gf_cfg.lag_in_frames);
-    }
-#endif
-
 #if CONFIG_INTERNAL_STATS
     ppi->b_calculate_blockiness = 1;
     ppi->b_calculate_consistency = 1;
@@ -1230,24 +1241,25 @@ AV1_PRIMARY *av1_create_primary_compressor(
   return ppi;
 }
 
-AV1_COMP *av1_create_compressor(AV1_PRIMARY *ppi, AV1EncoderConfig *oxcf,
+AV1_COMP *av1_create_compressor(AV1_PRIMARY *ppi, const AV1EncoderConfig *oxcf,
                                 BufferPool *const pool, COMPRESSOR_STAGE stage,
                                 int lap_lag_in_frames) {
   AV1_COMP *volatile const cpi = aom_memalign(32, sizeof(AV1_COMP));
-  AV1_COMMON *volatile const cm = cpi != NULL ? &cpi->common : NULL;
 
-  if (!cm) return NULL;
+  if (!cpi) return NULL;
 
   av1_zero(*cpi);
 
   cpi->ppi = ppi;
+
+  AV1_COMMON *volatile const cm = &cpi->common;
   cm->seq_params = &ppi->seq_params;
-#if CONFIG_FRAME_PARALLEL_ENCODE
   cm->error =
       (struct aom_internal_error_info *)aom_calloc(1, sizeof(*cm->error));
-#else
-  cm->error = &ppi->error;
-#endif  // CONFIG_FRAME_PARALLEL_ENCODE
+  if (!cm->error) {
+    aom_free(cpi);
+    return NULL;
+  }
 
   // The jmp_buf is valid only for the duration of the function that calls
   // setjmp(). Therefore, this function must reset the 'setjmp' field to 0
@@ -1255,7 +1267,7 @@ AV1_COMP *av1_create_compressor(AV1_PRIMARY *ppi, AV1EncoderConfig *oxcf,
   if (setjmp(cm->error->jmp)) {
     cm->error->setjmp = 0;
     av1_remove_compressor(cpi);
-    return 0;
+    return NULL;
   }
 
   cm->error->setjmp = 1;
@@ -1359,8 +1371,17 @@ AV1_COMP *av1_create_compressor(AV1_PRIMARY *ppi, AV1EncoderConfig *oxcf,
   av1_set_speed_features_framesize_independent(cpi, oxcf->speed);
   av1_set_speed_features_framesize_dependent(cpi, oxcf->speed);
 
+  int max_mi_cols = mi_params->mi_cols;
+  int max_mi_rows = mi_params->mi_rows;
+  if (oxcf->frm_dim_cfg.forced_max_frame_width) {
+    max_mi_cols = size_in_mi(oxcf->frm_dim_cfg.forced_max_frame_width);
+  }
+  if (oxcf->frm_dim_cfg.forced_max_frame_height) {
+    max_mi_rows = size_in_mi(oxcf->frm_dim_cfg.forced_max_frame_height);
+  }
+
   CHECK_MEM_ERROR(cm, cpi->consec_zero_mv,
-                  aom_calloc((mi_params->mi_rows * mi_params->mi_cols) >> 2,
+                  aom_calloc((max_mi_rows * max_mi_cols) >> 2,
                              sizeof(*cpi->consec_zero_mv)));
 
   cpi->mb_weber_stats = NULL;
@@ -1370,8 +1391,8 @@ AV1_COMP *av1_create_compressor(AV1_PRIMARY *ppi, AV1EncoderConfig *oxcf,
     const int bsize = BLOCK_16X16;
     const int w = mi_size_wide[bsize];
     const int h = mi_size_high[bsize];
-    const int num_cols = (mi_params->mi_cols + w - 1) / w;
-    const int num_rows = (mi_params->mi_rows + h - 1) / h;
+    const int num_cols = (max_mi_cols + w - 1) / w;
+    const int num_rows = (max_mi_rows + h - 1) / h;
     CHECK_MEM_ERROR(cm, cpi->ssim_rdmult_scaling_factors,
                     aom_calloc(num_rows * num_cols,
                                sizeof(*cpi->ssim_rdmult_scaling_factors)));
@@ -1445,6 +1466,7 @@ AV1_COMP *av1_create_compressor(AV1_PRIMARY *ppi, AV1EncoderConfig *oxcf,
   }
 
   cpi->second_pass_log_stream = NULL;
+  cpi->use_ducky_encode = 0;
 
   cm->error->setjmp = 0;
   return cpi;
@@ -1473,9 +1495,7 @@ static AOM_INLINE void free_thread_data(AV1_PRIMARY *ppi) {
   PrimaryMultiThreadInfo *const p_mt_info = &ppi->p_mt_info;
   for (int t = 1; t < p_mt_info->num_workers; ++t) {
     EncWorkerData *const thread_data = &p_mt_info->tile_thr_data[t];
-#if CONFIG_FRAME_PARALLEL_ENCODE
     thread_data->td = thread_data->original_td;
-#endif
     aom_free(thread_data->td->tctx);
     aom_free(thread_data->td->palette_buffer);
     aom_free(thread_data->td->tmp_conv_dst);
@@ -1524,6 +1544,7 @@ void av1_remove_primary_compressor(AV1_PRIMARY *ppi) {
   for (int frame = 0; frame < MAX_LAG_BUFFERS; ++frame) {
     aom_free(tpl_data->tpl_stats_pool[frame]);
     aom_free_frame_buffer(&tpl_data->tpl_rec_pool[frame]);
+    tpl_data->tpl_stats_pool[frame] = NULL;
   }
 
 #if !CONFIG_REALTIME_ONLY
@@ -1567,9 +1588,7 @@ void av1_remove_compressor(AV1_COMP *cpi) {
   av1_denoiser_free(&(cpi->denoiser));
 #endif
 
-#if CONFIG_FRAME_PARALLEL_ENCODE
   aom_free(cm->error);
-#endif
   aom_free(cpi->td.tctx);
   MultiThreadInfo *const mt_info = &cpi->mt_info;
 #if CONFIG_MULTITHREAD
@@ -1776,18 +1795,15 @@ void av1_set_mv_search_params(AV1_COMP *cpi) {
         mv_search_params->mv_step_param = av1_init_search_range(
             AOMMIN(max_mv_def, 2 * mv_search_params->max_mv_magnitude));
       }
-#if CONFIG_FRAME_PARALLEL_ENCODE
       // Reset max_mv_magnitude based on update flag.
       if (cpi->do_frame_data_update) mv_search_params->max_mv_magnitude = -1;
-#else
-      mv_search_params->max_mv_magnitude = -1;
-#endif
     }
   }
 }
 
 void av1_set_screen_content_options(AV1_COMP *cpi, FeatureFlags *features) {
   const AV1_COMMON *const cm = &cpi->common;
+  const MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
 
   if (cm->seq_params->force_screen_content_tools != 2) {
     features->allow_screen_content_tools = features->allow_intrabc =
@@ -1799,6 +1815,7 @@ void av1_set_screen_content_options(AV1_COMP *cpi, FeatureFlags *features) {
     features->allow_screen_content_tools = 1;
     features->allow_intrabc = cpi->oxcf.mode == REALTIME ? 0 : 1;
     cpi->is_screen_content_type = 1;
+    cpi->use_screen_content_tools = 1;
     return;
   }
 
@@ -1853,10 +1870,8 @@ void av1_set_screen_content_options(AV1_COMP *cpi, FeatureFlags *features) {
         struct buf_2d buf;
         buf.stride = stride;
         buf.buf = (uint8_t *)this_src;
-        const unsigned int var =
-            use_hbd
-                ? av1_high_get_sby_perpixel_variance(cpi, &buf, BLOCK_16X16, bd)
-                : av1_get_sby_perpixel_variance(cpi, &buf, BLOCK_16X16);
+        const unsigned int var = av1_get_perpixel_variance(
+            cpi, xd, &buf, BLOCK_16X16, AOM_PLANE_Y, use_hbd);
         if (var > var_thresh) ++counts_2;
       }
     }
@@ -1876,19 +1891,6 @@ void av1_set_screen_content_options(AV1_COMP *cpi, FeatureFlags *features) {
        counts_2 * blk_h * blk_w * 30 > width * height);
 }
 
-// Function pointer to search site config initialization
-// of different search method functions.
-typedef void (*av1_init_search_site_config)(search_site_config *cfg, int stride,
-                                            int level);
-
-av1_init_search_site_config
-    av1_init_motion_compensation[NUM_DISTINCT_SEARCH_METHODS] = {
-      av1_init_dsmotion_compensation,     av1_init_motion_compensation_nstep,
-      av1_init_motion_compensation_nstep, av1_init_dsmotion_compensation,
-      av1_init_motion_compensation_hex,   av1_init_motion_compensation_bigdia,
-      av1_init_motion_compensation_square
-    };
-
 static void init_motion_estimation(AV1_COMP *cpi) {
   AV1_COMMON *const cm = &cpi->common;
   MotionVectorSearchParams *const mv_search_params = &cpi->mv_search_params;
@@ -1961,14 +1963,22 @@ static void set_restoration_unit_size(int width, int height, int sx, int sy,
 static void init_ref_frame_bufs(AV1_COMP *cpi) {
   AV1_COMMON *const cm = &cpi->common;
   int i;
-  BufferPool *const pool = cm->buffer_pool;
-  cm->cur_frame = NULL;
+  if (cm->cur_frame) {
+    cm->cur_frame->ref_count--;
+    cm->cur_frame = NULL;
+  }
   for (i = 0; i < REF_FRAMES; ++i) {
-    cm->ref_frame_map[i] = NULL;
+    if (cm->ref_frame_map[i]) {
+      cm->ref_frame_map[i]->ref_count--;
+      cm->ref_frame_map[i] = NULL;
+    }
   }
+#ifndef NDEBUG
+  BufferPool *const pool = cm->buffer_pool;
   for (i = 0; i < FRAME_BUFFERS; ++i) {
-    pool->frame_bufs[i].ref_count = 0;
+    assert(pool->frame_bufs[i].ref_count == 0);
   }
+#endif
 }
 
 void av1_check_initial_width(AV1_COMP *cpi, int use_highbitdepth,
@@ -2098,12 +2108,17 @@ void av1_set_frame_size(AV1_COMP *cpi, int width, int height) {
                          "Failed to allocate context buffers");
   }
 
+  AV1EncoderConfig *oxcf = &cpi->oxcf;
+  oxcf->border_in_pixels = av1_get_enc_border_size(
+      av1_is_resize_needed(oxcf), oxcf->kf_cfg.key_freq_max == 0,
+      cm->seq_params->sb_size);
+
   // Reset the frame pointers to the current frame size.
   if (aom_realloc_frame_buffer(
           &cm->cur_frame->buf, cm->width, cm->height, seq_params->subsampling_x,
           seq_params->subsampling_y, seq_params->use_highbitdepth,
           cpi->oxcf.border_in_pixels, cm->features.byte_alignment, NULL, NULL,
-          NULL, cpi->oxcf.tool_cfg.enable_global_motion))
+          NULL, cpi->oxcf.tool_cfg.enable_global_motion, 0))
     aom_internal_error(cm->error, AOM_CODEC_MEM_ERROR,
                        "Failed to allocate frame buffer");
 
@@ -2146,6 +2161,28 @@ void av1_set_frame_size(AV1_COMP *cpi, int width, int height) {
   set_ref_ptrs(cm, xd, LAST_FRAME, LAST_FRAME);
 }
 
+static INLINE int extend_borders_mt(const AV1_COMP *cpi,
+                                    MULTI_THREADED_MODULES stage, int plane) {
+  const AV1_COMMON *const cm = &cpi->common;
+  if (cpi->mt_info.num_mod_workers[stage] < 2) return 0;
+  switch (stage) {
+    // TODO(deepa.kg@ittiam.com): When cdef and loop-restoration are disabled,
+    // multi-thread frame border extension along with loop filter frame.
+    // As loop-filtering of a superblock row modifies the pixels of the
+    // above superblock row, border extension requires that loop filtering
+    // of the current and above superblock row is complete.
+    case MOD_LPF: return 0;
+    case MOD_CDEF:
+      return is_cdef_used(cm) && !cpi->rtc_ref.non_reference_frame &&
+             !is_restoration_used(cm) && !av1_superres_scaled(cm);
+    case MOD_LR:
+      return is_restoration_used(cm) &&
+             (cm->rst_info[plane].frame_restoration_type != RESTORE_NONE);
+    default: assert(0);
+  }
+  return 0;
+}
+
 /*!\brief Select and apply cdef filters and switchable restoration filters
  *
  * \ingroup high_level_algo
@@ -2165,19 +2202,27 @@ static void cdef_restoration_frame(AV1_COMP *cpi, AV1_COMMON *cm,
     start_timing(cpi, cdef_time);
 #endif
     const int num_workers = cpi->mt_info.num_mod_workers[MOD_CDEF];
+    const int use_screen_content_model =
+        cm->quant_params.base_qindex >
+            AOMMAX(cpi->sf.rt_sf.screen_content_cdef_filter_qindex_thresh,
+                   cpi->rc.best_quality + 5) &&
+        cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN;
     // Find CDEF parameters
     av1_cdef_search(&cpi->mt_info, &cm->cur_frame->buf, cpi->source, cm, xd,
                     cpi->sf.lpf_sf.cdef_pick_method, cpi->td.mb.rdmult,
-                    cpi->sf.rt_sf.skip_cdef_sb, cpi->rc.frames_since_key,
-                    cpi->oxcf.tool_cfg.cdef_control,
-                    cpi->svc.non_reference_frame);
+                    cpi->sf.rt_sf.skip_cdef_sb, cpi->oxcf.tool_cfg.cdef_control,
+                    use_screen_content_model, cpi->rtc_ref.non_reference_frame);
 
     // Apply the filter
-    if (!cpi->svc.non_reference_frame) {
+    if (!cpi->rtc_ref.non_reference_frame) {
       if (num_workers > 1) {
+        // Extension of frame borders is multi-threaded along with cdef.
+        const int do_extend_border =
+            extend_borders_mt(cpi, MOD_CDEF, /* plane */ 0);
         av1_cdef_frame_mt(cm, xd, cpi->mt_info.cdef_worker,
                           cpi->mt_info.workers, &cpi->mt_info.cdef_sync,
-                          num_workers, av1_cdef_init_fb_row_mt);
+                          num_workers, av1_cdef_init_fb_row_mt,
+                          do_extend_border);
       } else {
         av1_cdef_frame(&cm->cur_frame->buf, cm, xd, av1_cdef_init_fb_row);
       }
@@ -2185,11 +2230,6 @@ static void cdef_restoration_frame(AV1_COMP *cpi, AV1_COMMON *cm,
 #if CONFIG_COLLECT_COMPONENT_TIMING
     end_timing(cpi, cdef_time);
 #endif
-  } else {
-    cm->cdef_info.cdef_bits = 0;
-    cm->cdef_info.cdef_strengths[0] = 0;
-    cm->cdef_info.nb_cdef_strengths = 1;
-    cm->cdef_info.cdef_uv_strengths[0] = 0;
   }
 
   av1_superres_post_encode(cpi);
@@ -2206,18 +2246,18 @@ static void cdef_restoration_frame(AV1_COMP *cpi, AV1_COMMON *cm,
     if (cm->rst_info[0].frame_restoration_type != RESTORE_NONE ||
         cm->rst_info[1].frame_restoration_type != RESTORE_NONE ||
         cm->rst_info[2].frame_restoration_type != RESTORE_NONE) {
-      if (num_workers > 1)
+      if (num_workers > 1) {
+        // Extension of frame borders is multi-threaded along with loop
+        // restoration filter.
+        const int do_extend_border = 1;
         av1_loop_restoration_filter_frame_mt(
             &cm->cur_frame->buf, cm, 0, mt_info->workers, num_workers,
-            &mt_info->lr_row_sync, &cpi->lr_ctxt);
-      else
+            &mt_info->lr_row_sync, &cpi->lr_ctxt, do_extend_border);
+      } else {
         av1_loop_restoration_filter_frame(&cm->cur_frame->buf, cm, 0,
                                           &cpi->lr_ctxt);
+      }
     }
-  } else {
-    cm->rst_info[0].frame_restoration_type = RESTORE_NONE;
-    cm->rst_info[1].frame_restoration_type = RESTORE_NONE;
-    cm->rst_info[2].frame_restoration_type = RESTORE_NONE;
   }
 #if CONFIG_COLLECT_COMPONENT_TIMING
   end_timing(cpi, loop_restoration_time);
@@ -2225,8 +2265,61 @@ static void cdef_restoration_frame(AV1_COMP *cpi, AV1_COMMON *cm,
 #endif  // !CONFIG_REALTIME_ONLY
 }
 
-/*!\brief Select and apply in-loop deblocking filters, cdef filters, and
- * restoration filters
+static void extend_frame_borders(AV1_COMP *cpi) {
+  const AV1_COMMON *const cm = &cpi->common;
+  // TODO(debargha): Fix mv search range on encoder side
+  for (int plane = 0; plane < av1_num_planes(cm); ++plane) {
+    const bool extend_border_done = extend_borders_mt(cpi, MOD_CDEF, plane) ||
+                                    extend_borders_mt(cpi, MOD_LR, plane);
+    if (!extend_border_done) {
+      const YV12_BUFFER_CONFIG *const ybf = &cm->cur_frame->buf;
+      aom_extend_frame_borders_plane_row(ybf, plane, 0,
+                                         ybf->crop_heights[plane > 0]);
+    }
+  }
+}
+
+static void set_postproc_filter_default_params(AV1_COMMON *cm) {
+  struct loopfilter *const lf = &cm->lf;
+  CdefInfo *const cdef_info = &cm->cdef_info;
+  RestorationInfo *const rst_info = cm->rst_info;
+
+  lf->filter_level[0] = 0;
+  lf->filter_level[1] = 0;
+  cdef_info->cdef_bits = 0;
+  cdef_info->cdef_strengths[0] = 0;
+  cdef_info->nb_cdef_strengths = 1;
+  cdef_info->cdef_uv_strengths[0] = 0;
+  rst_info[0].frame_restoration_type = RESTORE_NONE;
+  rst_info[1].frame_restoration_type = RESTORE_NONE;
+  rst_info[2].frame_restoration_type = RESTORE_NONE;
+}
+
+// Checks if post-processing filters need to be applied.
+// NOTE: This function decides if the application of different post-processing
+// filters on the reconstructed frame can be skipped at the encoder side.
+// However the computation of different filter parameters that are signaled in
+// the bitstream is still required.
+static bool should_skip_postproc_filtering(AV1_COMP *cpi, int use_cdef,
+                                           int use_restoration) {
+  if (!cpi->oxcf.algo_cfg.skip_postproc_filtering || cpi->ppi->b_calculate_psnr)
+    return false;
+  assert(cpi->oxcf.mode == ALLINTRA);
+  const AV1_COMMON *const cm = &cpi->common;
+
+  // The post-processing filters are applied one after the other. In case of
+  // ALLINTRA encoding, the reconstructed frame is not used as a reference
+  // frame. Hence, the application of these filters can be skipped when
+  // 1. filter parameters of the subsequent stages are not dependent on the
+  // filtered output of the current stage or
+  // 2. subsequent filtering stages are disabled
+  // Hence, the application of deblocking filters is also skipped if there are
+  // no further filtering stages.
+  return (!use_cdef && !av1_superres_scaled(cm) && !use_restoration);
+}
+
+/*!\brief Select and apply deblocking filters, cdef filters, and restoration
+ * filters.
  *
  * \ingroup high_level_algo
  */
@@ -2241,29 +2334,36 @@ static void loopfilter_frame(AV1_COMP *cpi, AV1_COMMON *cm) {
 
   const int use_loopfilter =
       !cm->features.coded_lossless && !cm->tiles.large_scale;
-  const int use_cdef = cm->seq_params->enable_cdef &&
-                       !cm->features.coded_lossless && !cm->tiles.large_scale;
+  const int use_cdef = is_cdef_used(cm);
   const int use_restoration = is_restoration_used(cm);
-  const int is_realtime = cpi->sf.rt_sf.use_nonrd_pick_mode;
-
-  struct loopfilter *lf = &cm->lf;
 
 #if CONFIG_COLLECT_COMPONENT_TIMING
   start_timing(cpi, loop_filter_time);
 #endif
   if (use_loopfilter) {
     av1_pick_filter_level(cpi->source, cpi, cpi->sf.lpf_sf.lpf_pick);
-  } else {
-    lf->filter_level[0] = 0;
-    lf->filter_level[1] = 0;
+    if (should_skip_postproc_filtering(cpi, use_cdef, use_restoration)) return;
+    struct loopfilter *lf = &cm->lf;
+    if ((lf->filter_level[0] || lf->filter_level[1]) &&
+        !cpi->rtc_ref.non_reference_frame) {
+      // lpf_opt_level = 1 : Enables dual/quad loop-filtering.
+      // lpf_opt_level is set to 1 if transform size search depth in inter
+      // blocks is limited to one as quad loop filtering assumes that all the
+      // transform blocks within a 16x8/8x16/16x16 prediction block are of the
+      // same size. lpf_opt_level = 2 : Filters both chroma planes together, in
+      // addition to enabling dual/quad loop-filtering. This is enabled when lpf
+      // pick method is LPF_PICK_FROM_Q as u and v plane filter levels are
+      // equal.
+      int lpf_opt_level = 0;
+      if (is_inter_tx_size_search_level_one(&cpi->sf.tx_sf)) {
+        lpf_opt_level = (cpi->sf.lpf_sf.lpf_pick == LPF_PICK_FROM_Q) ? 2 : 1;
+      }
+      av1_loop_filter_frame_mt(&cm->cur_frame->buf, cm, xd, 0, num_planes, 0,
+                               mt_info->workers, num_workers,
+                               &mt_info->lf_row_sync, lpf_opt_level);
+    }
   }
 
-  if ((lf->filter_level[0] || lf->filter_level[1]) &&
-      !cpi->svc.non_reference_frame) {
-    av1_loop_filter_frame_mt(&cm->cur_frame->buf, cm, xd, 0, num_planes, 0,
-                             mt_info->workers, num_workers,
-                             &mt_info->lf_row_sync, is_realtime);
-  }
 #if CONFIG_COLLECT_COMPONENT_TIMING
   end_timing(cpi, loop_filter_time);
 #endif
@@ -2271,6 +2371,36 @@ static void loopfilter_frame(AV1_COMP *cpi, AV1_COMMON *cm) {
   cdef_restoration_frame(cpi, cm, xd, use_restoration, use_cdef);
 }
 
+static void update_motion_stat(AV1_COMP *const cpi) {
+  AV1_COMMON *const cm = &cpi->common;
+  const CommonModeInfoParams *const mi_params = &cm->mi_params;
+  RATE_CONTROL *const rc = &cpi->rc;
+  SVC *const svc = &cpi->svc;
+  const int avg_cnt_zeromv =
+      100 * cpi->rc.cnt_zeromv / (mi_params->mi_rows * mi_params->mi_cols);
+  if (!cpi->ppi->use_svc ||
+      (cpi->ppi->use_svc &&
+       !cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame &&
+       cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1)) {
+    rc->avg_frame_low_motion =
+        (rc->avg_frame_low_motion == 0)
+            ? avg_cnt_zeromv
+            : (3 * rc->avg_frame_low_motion + avg_cnt_zeromv) / 4;
+    // For SVC: set avg_frame_low_motion (only computed on top spatial layer)
+    // to all lower spatial layers.
+    if (cpi->ppi->use_svc &&
+        svc->spatial_layer_id == svc->number_spatial_layers - 1) {
+      for (int i = 0; i < svc->number_spatial_layers - 1; ++i) {
+        const int layer = LAYER_IDS_TO_IDX(i, svc->temporal_layer_id,
+                                           svc->number_temporal_layers);
+        LAYER_CONTEXT *const lc = &svc->layer_context[layer];
+        RATE_CONTROL *const lrc = &lc->rc;
+        lrc->avg_frame_low_motion = rc->avg_frame_low_motion;
+      }
+    }
+  }
+}
+
 /*!\brief Encode a frame without the recode loop, usually used in one-pass
  * encoding and realtime coding.
  *
@@ -2390,21 +2520,22 @@ static int encode_without_recode(AV1_COMP *cpi) {
     }
   }
 
-#if CONFIG_FRAME_PARALLEL_ENCODE
   int scale_references = 0;
 #if CONFIG_FPMT_TEST
   scale_references =
       cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE ? 1 : 0;
 #endif  // CONFIG_FPMT_TEST
   if (scale_references ||
-      cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] == 0)
-#endif  // CONFIG_FRAME_PARALLEL_ENCODE
-  {
+      cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] == 0) {
     // For SVC the inter-layer/spatial prediction is not done for newmv
     // (zero_mode is forced), and since the scaled references are only
-    // use for newmv search, we can avoid scaling here.
+    // use for newmv search, we can avoid scaling here when
+    // force_zero_mode_spatial_ref is set for SVC mode.
+    // Also add condition for dynamic_resize: for dynamic_resize we always
+    // check for scaling references for now.
     if (!frame_is_intra_only(cm) &&
-        !(cpi->ppi->use_svc && cpi->svc.force_zero_mode_spatial_ref))
+        (!cpi->ppi->use_svc || !cpi->svc.force_zero_mode_spatial_ref ||
+         cpi->oxcf.resize_cfg.resize_mode == RESIZE_DYNAMIC))
       av1_scale_references(cpi, filter_scaler, phase_scaler, 1);
   }
 
@@ -2440,8 +2571,8 @@ static int encode_without_recode(AV1_COMP *cpi) {
   if (q_cfg->aq_mode == CYCLIC_REFRESH_AQ) {
     suppress_active_map(cpi);
     av1_cyclic_refresh_setup(cpi);
-    av1_apply_active_map(cpi);
   }
+  av1_apply_active_map(cpi);
   if (cm->seg.enabled) {
     if (!cm->seg.update_data && cm->prev_frame) {
       segfeatures_copy(&cm->seg, &cm->prev_frame->seg);
@@ -2468,7 +2599,7 @@ static int encode_without_recode(AV1_COMP *cpi) {
               &cpi->orig_source, cpi->oxcf.frm_dim_cfg.width,
               cpi->oxcf.frm_dim_cfg.height, seq_params->subsampling_x,
               seq_params->subsampling_y, seq_params->use_highbitdepth,
-              cpi->oxcf.border_in_pixels, cm->features.byte_alignment))
+              cpi->oxcf.border_in_pixels, cm->features.byte_alignment, 0))
         aom_internal_error(cm->error, AOM_CODEC_MEM_ERROR,
                            "Failed to allocate scaled buffer");
     }
@@ -2489,10 +2620,8 @@ static int encode_without_recode(AV1_COMP *cpi) {
   // transform / motion compensation build reconstruction frame
   av1_encode_frame(cpi);
 
-  // Update some stats from cyclic refresh.
-  if (q_cfg->aq_mode == CYCLIC_REFRESH_AQ && !cpi->rc.rtc_external_ratectrl &&
-      !frame_is_intra_only(cm))
-    av1_cyclic_refresh_postencode(cpi);
+  if (!cpi->rc.rtc_external_ratectrl && !frame_is_intra_only(cm))
+    update_motion_stat(cpi);
 
   // Adjust the refresh of the golden (longer-term) reference based on QP
   // selected for this frame. This is for CBR with 1 layer/non-svc RTC mode.
@@ -2595,9 +2724,8 @@ static int encode_with_recode_loop(AV1_COMP *cpi, size_t *size, uint8_t *dest) {
   int original_q = 0;
 #endif
 
-#if CONFIG_FRAME_PARALLEL_ENCODE
   cpi->num_frame_recode = 0;
-#endif
+
   // Loop variables
   int loop = 0;
   int loop_count = 0;
@@ -2644,7 +2772,6 @@ static int encode_with_recode_loop(AV1_COMP *cpi, size_t *size, uint8_t *dest) {
           cpi->oxcf.tool_cfg.enable_global_motion);
     }
 
-#if CONFIG_FRAME_PARALLEL_ENCODE
     int scale_references = 0;
 #if CONFIG_FPMT_TEST
     scale_references =
@@ -2652,9 +2779,6 @@ static int encode_with_recode_loop(AV1_COMP *cpi, size_t *size, uint8_t *dest) {
 #endif  // CONFIG_FPMT_TEST
     if (scale_references ||
         cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] == 0) {
-#else
-    {
-#endif  // CONFIG_FRAME_PARALLEL_ENCODE
       if (!frame_is_intra_only(cm)) {
         if (loop_count > 0) {
           release_scaled_references(cpi);
@@ -2712,6 +2836,19 @@ static int encode_with_recode_loop(AV1_COMP *cpi, size_t *size, uint8_t *dest) {
     }
 #endif  // CONFIG_RATECTRL_LOG && CONFIG_THREE_PASS && CONFIG_BITRATE_ACCURACY
 
+    if (cpi->use_ducky_encode) {
+      const DuckyEncodeFrameInfo *frame_info =
+          &cpi->ducky_encode_info.frame_info;
+      if (frame_info->qp_mode == DUCKY_ENCODE_FRAME_MODE_QINDEX) {
+        q = frame_info->q_index;
+
+        // TODO(jingning): Coding block level QP offset is currently disabled
+        // in RC lib.
+        cm->delta_q_info.delta_q_present_flag = 0;
+      }
+      // TODO(angiebird): Implement DUCKY_ENCODE_FRAME_MODE_QINDEX_RDMULT mode
+    }
+
     av1_set_quantizer(cm, q_cfg->qm_minlevel, q_cfg->qm_maxlevel, q,
                       q_cfg->enable_chroma_deltaq, q_cfg->enable_hdr_deltaq);
     av1_set_speed_features_qindex_dependent(cpi, oxcf->speed);
@@ -2774,19 +2911,13 @@ static int encode_with_recode_loop(AV1_COMP *cpi, size_t *size, uint8_t *dest) {
     // transform / motion compensation build reconstruction frame
     av1_encode_frame(cpi);
 
-#if CONFIG_FRAME_PARALLEL_ENCODE
     // Disable mv_stats collection for parallel frames based on update flag.
     if (!cpi->do_frame_data_update) do_mv_stats_collection = 0;
-#endif  // CONFIG_FRAME_PARALLEL_ENCODE
 
-      // Reset the mv_stats in case we are interrupted by an intraframe or an
-      // overlay frame.
-#if CONFIG_FRAME_PARALLEL_ENCODE
+    // Reset the mv_stats in case we are interrupted by an intraframe or an
+    // overlay frame.
     if (cpi->mv_stats.valid && do_mv_stats_collection) av1_zero(cpi->mv_stats);
-#else
-    if (cpi->ppi->mv_stats.valid && do_mv_stats_collection)
-      av1_zero(cpi->ppi->mv_stats);
-#endif
+
     // Gather the mv_stats for the next frame
     if (cpi->sf.hl_sf.high_precision_mv_usage == LAST_MV_DATA &&
         av1_frame_allows_smart_mv(cpi) && do_mv_stats_collection) {
@@ -2860,18 +2991,20 @@ static int encode_with_recode_loop(AV1_COMP *cpi, size_t *size, uint8_t *dest) {
     }
 #endif
 
+    if (cpi->use_ducky_encode) {
+      // Ducky encode currently does not support recode loop.
+      loop = 0;
+    }
 #if CONFIG_BITRATE_ACCURACY || CONFIG_RD_COMMAND
     loop = 0;  // turn off recode loop when CONFIG_BITRATE_ACCURACY is on
 #endif         // CONFIG_BITRATE_ACCURACY || CONFIG_RD_COMMAND
 
     if (loop) {
       ++loop_count;
-#if CONFIG_FRAME_PARALLEL_ENCODE
       cpi->num_frame_recode =
           (cpi->num_frame_recode < (NUM_RECODES_PER_FRAME - 1))
               ? (cpi->num_frame_recode + 1)
               : (NUM_RECODES_PER_FRAME - 1);
-#endif
 #if CONFIG_INTERNAL_STATS
       ++cpi->frame_recode_hits;
 #endif
@@ -2940,14 +3073,12 @@ static int encode_with_recode_loop_and_filter(AV1_COMP *cpi, size_t *size,
 #if CONFIG_COLLECT_COMPONENT_TIMING
   start_timing(cpi, encode_with_or_without_recode_time);
 #endif
-#if CONFIG_FRAME_PARALLEL_ENCODE
   for (int i = 0; i < NUM_RECODES_PER_FRAME; i++) {
     cpi->do_update_frame_probs_txtype[i] = 0;
     cpi->do_update_frame_probs_obmc[i] = 0;
     cpi->do_update_frame_probs_warp[i] = 0;
     cpi->do_update_frame_probs_interpfilter[i] = 0;
   }
-#endif
 
   cpi->do_update_vbr_bits_off_target_fast = 0;
   int err;
@@ -3011,24 +3142,11 @@ static int encode_with_recode_loop_and_filter(AV1_COMP *cpi, size_t *size,
   cm->cur_frame->buf.render_width = cm->render_width;
   cm->cur_frame->buf.render_height = cm->render_height;
 
-  // Pick the loop filter level for the frame.
-  if (!cm->features.allow_intrabc) {
-    loopfilter_frame(cpi, cm);
-  } else {
-    cm->lf.filter_level[0] = 0;
-    cm->lf.filter_level[1] = 0;
-    cm->cdef_info.cdef_bits = 0;
-    cm->cdef_info.cdef_strengths[0] = 0;
-    cm->cdef_info.nb_cdef_strengths = 1;
-    cm->cdef_info.cdef_uv_strengths[0] = 0;
-    cm->rst_info[0].frame_restoration_type = RESTORE_NONE;
-    cm->rst_info[1].frame_restoration_type = RESTORE_NONE;
-    cm->rst_info[2].frame_restoration_type = RESTORE_NONE;
-  }
+  set_postproc_filter_default_params(cm);
 
-  // TODO(debargha): Fix mv search range on encoder side
-  // aom_extend_frame_inner_borders(&cm->cur_frame->buf, av1_num_planes(cm));
-  aom_extend_frame_borders(&cm->cur_frame->buf, av1_num_planes(cm));
+  if (!cm->features.allow_intrabc) loopfilter_frame(cpi, cm);
+
+  extend_frame_borders(cpi);
 
 #ifdef OUTPUT_YUV_REC
   aom_write_one_yuv_frame(cm, &cm->cur_frame->buf);
@@ -3064,6 +3182,21 @@ static int encode_with_recode_loop_and_filter(AV1_COMP *cpi, size_t *size,
     const int64_t bits = (*size << 3);
     *rate = (bits << 5);  // To match scale.
   }
+
+#if !CONFIG_REALTIME_ONLY
+  if (cpi->use_ducky_encode) {
+    PSNR_STATS psnr;
+    aom_calc_psnr(cpi->source, &cpi->common.cur_frame->buf, &psnr);
+    DuckyEncodeFrameResult *frame_result = &cpi->ducky_encode_info.frame_result;
+    frame_result->global_order_idx = cm->cur_frame->display_order_hint;
+    frame_result->q_index = cm->quant_params.base_qindex;
+    frame_result->rdmult = cpi->rd.RDMULT;
+    frame_result->rate = (int)(*size) * 8;
+    frame_result->dist = psnr.sse[0];
+    frame_result->psnr = psnr.psnr[0];
+  }
+#endif  // !CONFIG_REALTIME_ONLY
+
   return AOM_CODEC_OK;
 }
 
@@ -3225,18 +3358,20 @@ static int encode_with_and_without_superres(AV1_COMP *cpi, size_t *size,
 
 // Conditions to disable cdf_update mode in selective mode for real-time.
 // Handle case for layers, scene change, and resizing.
-static int selective_disable_cdf_rtc(AV1_COMP *cpi) {
-  AV1_COMMON *const cm = &cpi->common;
-  RATE_CONTROL *const rc = &cpi->rc;
+static AOM_INLINE int selective_disable_cdf_rtc(const AV1_COMP *cpi) {
+  const AV1_COMMON *const cm = &cpi->common;
+  const RATE_CONTROL *const rc = &cpi->rc;
   // For single layer.
   if (cpi->svc.number_spatial_layers == 1 &&
       cpi->svc.number_temporal_layers == 1) {
     // Don't disable on intra_only, scene change (high_source_sad = 1),
-    // or resized frame. To avoid quality loss for now, force enable at
-    // every 8 frames.
+    // or resized frame. To avoid quality loss force enable at
+    // for ~30 frames after key or scene/slide change, and
+    // after 8 frames since last update if frame_source_sad > 0.
     if (frame_is_intra_only(cm) || is_frame_resize_pending(cpi) ||
-        rc->high_source_sad || rc->frames_since_key < 10 ||
-        cm->current_frame.frame_number % 8 == 0)
+        rc->high_source_sad || rc->frames_since_key < 30 ||
+        cpi->cyclic_refresh->counter_encode_maxq_scene_change < 30 ||
+        (cpi->frames_since_last_update > 8 && cpi->rc.frame_source_sad > 0))
       return 0;
     else
       return 1;
@@ -3337,6 +3472,8 @@ static int encode_frame_to_data_rate(AV1_COMP *cpi, size_t *size,
   struct segmentation *const seg = &cm->seg;
   FeatureFlags *const features = &cm->features;
   const TileConfig *const tile_cfg = &oxcf->tile_cfg;
+  assert(cpi->source != NULL);
+  cpi->td.mb.e_mbd.cur_buf = cpi->source;
 
 #if CONFIG_COLLECT_COMPONENT_TIMING
   start_timing(cpi, encode_frame_to_data_rate_time);
@@ -3367,6 +3504,10 @@ static int encode_frame_to_data_rate(AV1_COMP *cpi, size_t *size,
 
   cpi->last_frame_type = current_frame->frame_type;
 
+  if (frame_is_intra_only(cm)) {
+    cpi->frames_since_last_update = 0;
+  }
+
   if (frame_is_sframe(cm)) {
     GF_GROUP *gf_group = &cpi->ppi->gf_group;
     // S frame will wipe out any previously encoded altref so we cannot place
@@ -3423,6 +3564,21 @@ static int encode_frame_to_data_rate(AV1_COMP *cpi, size_t *size,
                                    cm->cur_frame->buf.y_crop_height);
     }
 
+#if !CONFIG_REALTIME_ONLY
+    if (cpi->use_ducky_encode) {
+      PSNR_STATS psnr;
+      aom_calc_psnr(cpi->source, &cpi->common.cur_frame->buf, &psnr);
+      DuckyEncodeFrameResult *frame_result =
+          &cpi->ducky_encode_info.frame_result;
+      frame_result->global_order_idx = cm->cur_frame->display_order_hint;
+      frame_result->q_index = cm->quant_params.base_qindex;
+      frame_result->rdmult = cpi->rd.RDMULT;
+      frame_result->rate = (int)(*size) * 8;
+      frame_result->dist = psnr.sse[0];
+      frame_result->psnr = psnr.psnr[0];
+    }
+#endif  // !CONFIG_REALTIME_ONLY
+
     ++current_frame->frame_number;
     update_frame_index_set(&cpi->frame_index_set, cm->show_frame);
     return AOM_CODEC_OK;
@@ -3448,6 +3604,11 @@ static int encode_frame_to_data_rate(AV1_COMP *cpi, size_t *size,
     cpi->common.features.cur_frame_force_integer_mv = 0;
   }
 
+  // This is used by av1_pack_bitstream. So this needs to be set in case of
+  // row-mt where the encoding code will use a temporary structure.
+  cpi->td.mb.e_mbd.cur_frame_force_integer_mv =
+      cpi->common.features.cur_frame_force_integer_mv;
+
   // Set default state for segment based loop filter update flags.
   cm->lf.mode_ref_delta_update = 0;
 
@@ -3546,7 +3707,11 @@ static int encode_frame_to_data_rate(AV1_COMP *cpi, size_t *size,
       features->disable_cdf_update = 1;
       break;
     case 1:  // Enable CDF update for all frames.
-      features->disable_cdf_update = 0;
+      if (cpi->sf.rt_sf.disable_cdf_update_non_reference_frame &&
+          cpi->rtc_ref.non_reference_frame && cpi->rc.frames_since_key > 2)
+        features->disable_cdf_update = 1;
+      else
+        features->disable_cdf_update = 0;
       break;
     case 2:
       // Strategically determine at which frames to do CDF update.
@@ -3561,8 +3726,6 @@ static int encode_frame_to_data_rate(AV1_COMP *cpi, size_t *size,
       break;
   }
 
-#if CONFIG_FRAME_PARALLEL_ENCODE
-#if CONFIG_FRAME_PARALLEL_ENCODE_2
   // Disable cdf update for the INTNL_ARF_UPDATE frame with
   // frame_parallel_level 1.
   if (!cpi->do_frame_data_update &&
@@ -3570,8 +3733,6 @@ static int encode_frame_to_data_rate(AV1_COMP *cpi, size_t *size,
     assert(cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] == 1);
     features->disable_cdf_update = 1;
   }
-#endif  // CONFIG_FRAME_PARALLEL_ENCODE_2
-#endif  // CONFIG_FRAME_PARALLEL_ENCODE
 
   int largest_tile_id = 0;
   if (av1_superres_in_recode_allowed(cpi)) {
@@ -3607,16 +3768,13 @@ static int encode_frame_to_data_rate(AV1_COMP *cpi, size_t *size,
 #endif  // DUMP_RECON_FRAMES
 
   if (cm->seg.enabled) {
-    if (cm->seg.update_map) {
-      update_reference_segmentation_map(cpi);
-    } else if (cm->last_frame_seg_map) {
+    if (cm->seg.update_map == 0 && cm->last_frame_seg_map) {
       memcpy(cm->cur_frame->seg_map, cm->last_frame_seg_map,
              cm->cur_frame->mi_cols * cm->cur_frame->mi_rows *
                  sizeof(*cm->cur_frame->seg_map));
     }
   }
 
-#if CONFIG_FRAME_PARALLEL_ENCODE
   int release_scaled_refs = 0;
 #if CONFIG_FPMT_TEST
   release_scaled_refs =
@@ -3624,9 +3782,6 @@ static int encode_frame_to_data_rate(AV1_COMP *cpi, size_t *size,
 #endif  // CONFIG_FPMT_TEST
   if (release_scaled_refs ||
       cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] == 0) {
-#else
-  {
-#endif  // CONFIG_FRAME_PARALLEL_ENCODE
     if (frame_is_intra_only(cm) == 0) {
       release_scaled_references(cpi);
     }
@@ -3661,6 +3816,12 @@ static int encode_frame_to_data_rate(AV1_COMP *cpi, size_t *size,
 
   cpi->last_frame_type = current_frame->frame_type;
 
+  if (cm->features.disable_cdf_update) {
+    cpi->frames_since_last_update++;
+  } else {
+    cpi->frames_since_last_update = 1;
+  }
+
   // Clear the one shot update flags for segmentation map and mode/ref loop
   // filter deltas.
   cm->seg.update_map = 0;
@@ -3843,8 +4004,11 @@ int av1_receive_raw_frame(AV1_COMP *cpi, aom_enc_frame_flags_t frame_flags,
 #endif  //  CONFIG_DENOISE
 
   if (av1_lookahead_push(cpi->ppi->lookahead, sd, time_stamp, end_time,
-                         use_highbitdepth, frame_flags))
+                         use_highbitdepth, frame_flags)) {
+    aom_internal_error(cm->error, AOM_CODEC_ERROR,
+                       "av1_lookahead_push() failed");
     res = -1;
+  }
 #if CONFIG_INTERNAL_STATS
   aom_usec_timer_mark(&timer);
   cpi->ppi->total_time_receive_data += aom_usec_timer_elapsed(&timer);
@@ -4160,7 +4324,7 @@ static AOM_INLINE void update_frames_till_gf_update(AV1_COMP *cpi) {
   // We should fix the cpi->common.show_frame flag
   // instead of checking the other condition to update the counter properly.
   if (cpi->common.show_frame ||
-      is_frame_droppable(&cpi->svc, &cpi->ext_flags.refresh_frame)) {
+      is_frame_droppable(&cpi->rtc_ref, &cpi->ext_flags.refresh_frame)) {
     // Decrement count down till next gf
     if (cpi->rc.frames_till_gf_update_due > 0)
       cpi->rc.frames_till_gf_update_due--;
@@ -4224,10 +4388,8 @@ static void update_end_of_frame_stats(AV1_COMP *cpi) {
       cpi->ppi->filter_level_v = lf->filter_level_v;
     }
   }
-#if CONFIG_FRAME_PARALLEL_ENCODE
   // Store frame level mv_stats from cpi to ppi.
   cpi->ppi->mv_stats = cpi->mv_stats;
-#endif
 }
 
 // Updates frame level stats related to global motion
@@ -4245,7 +4407,7 @@ static AOM_INLINE void update_gm_stats(AV1_COMP *cpi) {
     }
   }
   int update_actual_stats = 1;
-#if CONFIG_FRAME_PARALLEL_ENCODE && CONFIG_FPMT_TEST
+#if CONFIG_FPMT_TEST
   update_actual_stats =
       (cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE) ? 0 : 1;
   if (!update_actual_stats) {
@@ -4293,8 +4455,6 @@ void av1_post_encode_updates(AV1_COMP *const cpi,
 #endif
 
   if (!is_stat_generation_stage(cpi) && !cpi->is_dropped_frame) {
-#if CONFIG_FRAME_PARALLEL_ENCODE
-#if CONFIG_FRAME_PARALLEL_ENCODE_2
     // Before calling refresh_reference_frames(), copy ppi->ref_frame_map_copy
     // to cm->ref_frame_map for frame_parallel_level 2 frame in a parallel
     // encode set of lower layer frames.
@@ -4307,11 +4467,7 @@ void av1_post_encode_updates(AV1_COMP *const cpi,
       memcpy(cm->ref_frame_map, ppi->ref_frame_map_copy,
              sizeof(cm->ref_frame_map));
     }
-#endif  // CONFIG_FRAME_PARALLEL_ENCODE_2
-#endif  // CONFIG_FRAME_PARALLEL_ENCODE
     refresh_reference_frames(cpi);
-#if CONFIG_FRAME_PARALLEL_ENCODE
-#if CONFIG_FRAME_PARALLEL_ENCODE_2
     // For frame_parallel_level 1 frame in a parallel encode set of lower layer
     // frames, store the updated cm->ref_frame_map in ppi->ref_frame_map_copy.
     if (ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] == 1 &&
@@ -4319,8 +4475,6 @@ void av1_post_encode_updates(AV1_COMP *const cpi,
       memcpy(ppi->ref_frame_map_copy, cm->ref_frame_map,
              sizeof(cm->ref_frame_map));
     }
-#endif  // CONFIG_FRAME_PARALLEL_ENCODE_2
-#endif  // CONFIG_FRAME_PARALLEL_ENCODE
     av1_rc_postencode_update(cpi, cpi_data->frame_size);
   }
 
@@ -4328,12 +4482,10 @@ void av1_post_encode_updates(AV1_COMP *const cpi,
     av1_lookahead_pop(cpi->ppi->lookahead, cpi_data->flush,
                       cpi->compressor_stage);
   }
-#if CONFIG_FRAME_PARALLEL_ENCODE
   if (cpi->common.show_frame) {
     cpi->ppi->ts_start_last_show_frame = cpi_data->ts_frame_start;
     cpi->ppi->ts_end_last_show_frame = cpi_data->ts_frame_end;
   }
-#endif  // CONFIG_FRAME_PARALLEL_ENCODE
   if (ppi->level_params.keep_level_stats && !is_stat_generation_stage(cpi)) {
     // Initialize level info. at the beginning of each sequence.
     if (cm->current_frame.frame_type == KEY_FRAME &&
@@ -4382,7 +4534,6 @@ int av1_get_compressed_data(AV1_COMP *cpi, AV1_COMP_DATA *const cpi_data) {
   const AV1EncoderConfig *const oxcf = &cpi->oxcf;
   AV1_COMMON *const cm = &cpi->common;
 
-#if CONFIG_FRAME_PARALLEL_ENCODE
   // The jmp_buf is valid only for the duration of the function that calls
   // setjmp(). Therefore, this function must reset the 'setjmp' field to 0
   // before it returns.
@@ -4391,7 +4542,6 @@ int av1_get_compressed_data(AV1_COMP *cpi, AV1_COMP_DATA *const cpi_data) {
     return cm->error->error_code;
   }
   cm->error->setjmp = 1;
-#endif  // CONFIG_FRAME_PARALLEL_ENCODE
 
 #if CONFIG_INTERNAL_STATS
   cpi->frame_recode_hits = 0;
@@ -4440,12 +4590,8 @@ int av1_get_compressed_data(AV1_COMP *cpi, AV1_COMP_DATA *const cpi_data) {
     cm->features.refresh_frame_context = REFRESH_FRAME_CONTEXT_DISABLED;
 
   if (assign_cur_frame_new_fb(cm) == NULL) {
-#if CONFIG_FRAME_PARALLEL_ENCODE
     aom_internal_error(cpi->common.error, AOM_CODEC_ERROR,
                        "Failed to allocate new cur_frame");
-#else
-    return AOM_CODEC_ERROR;
-#endif  // CONFIG_FRAME_PARALLEL_ENCODE
   }
 
 #if CONFIG_COLLECT_COMPONENT_TIMING
@@ -4501,19 +4647,13 @@ int av1_get_compressed_data(AV1_COMP *cpi, AV1_COMP_DATA *const cpi_data) {
 #endif
 
   if (result == -1) {
-#if CONFIG_FRAME_PARALLEL_ENCODE
     cm->error->setjmp = 0;
-#endif
     // Returning -1 indicates no frame encoded; more input is required
     return -1;
   }
   if (result != AOM_CODEC_OK) {
-#if CONFIG_FRAME_PARALLEL_ENCODE
     aom_internal_error(cpi->common.error, AOM_CODEC_ERROR,
                        "Failed to encode frame");
-#else
-    return AOM_CODEC_ERROR;
-#endif  // CONFIG_FRAME_PARALLEL_ENCODE
   }
 #if CONFIG_INTERNAL_STATS
   aom_usec_timer_mark(&cmptimer);
@@ -4527,13 +4667,10 @@ int av1_get_compressed_data(AV1_COMP *cpi, AV1_COMP_DATA *const cpi_data) {
   }
 #endif  // CONFIG_SPEED_STATS
 
-#if CONFIG_FRAME_PARALLEL_ENCODE
   cm->error->setjmp = 0;
-#endif
   return AOM_CODEC_OK;
 }
 
-#if CONFIG_FRAME_PARALLEL_ENCODE
 // Populates cpi->scaled_ref_buf corresponding to frames in a parallel encode
 // set. Also sets the bitmask 'ref_buffers_used_map'.
 void av1_scale_references_fpmt(AV1_COMP *cpi, int *ref_buffers_used_map) {
@@ -4693,7 +4830,6 @@ int av1_init_parallel_frame_context(const AV1_COMP_DATA *const first_cpi_data,
   memcpy(ref_frame_map_pairs, first_ref_frame_map_pairs,
          sizeof(RefFrameMapPair) * REF_FRAMES);
 
-#if CONFIG_FRAME_PARALLEL_ENCODE_2
   // Store the reference refresh index of frame_parallel_level 1 frame in a
   // parallel encode set of lower layer frames.
   if (gf_group->update_type[gf_index_start] == INTNL_ARF_UPDATE) {
@@ -4708,7 +4844,6 @@ int av1_init_parallel_frame_context(const AV1_COMP_DATA *const first_cpi_data,
     ref_frame_map_pairs[first_cpi->ref_refresh_index].pyr_level =
         gf_group->layer_depth[gf_index_start];
   }
-#endif  // CONFIG_FRAME_PARALLEL_ENCODE_2
 
   // Set do_frame_data_update flag as false for frame_parallel_level 1 frame.
   first_cpi->do_frame_data_update = false;
@@ -4717,11 +4852,8 @@ int av1_init_parallel_frame_context(const AV1_COMP_DATA *const first_cpi_data,
     first_cpi->time_stamps.prev_ts_end = ppi->ts_end_last_show_frame;
   }
 
-  av1_get_ref_frames(first_ref_frame_map_pairs, cur_frame_disp,
-#if CONFIG_FRAME_PARALLEL_ENCODE_2
-                     first_cpi, gf_index_start, 1,
-#endif  // CONFIG_FRAME_PARALLEL_ENCODE_2
-                     first_cpi->common.remapped_ref_idx);
+  av1_get_ref_frames(first_ref_frame_map_pairs, cur_frame_disp, first_cpi,
+                     gf_index_start, 1, first_cpi->common.remapped_ref_idx);
 
   av1_scale_references_fpmt(first_cpi, ref_buffers_used_map);
   parallel_frame_count++;
@@ -4789,7 +4921,6 @@ int av1_init_parallel_frame_context(const AV1_COMP_DATA *const first_cpi_data,
       cur_cpi_data->timestamp_ratio = first_cpi_data->timestamp_ratio;
       cur_cpi_data->flush = first_cpi_data->flush;
       cur_cpi_data->frame_size = 0;
-#if CONFIG_FRAME_PARALLEL_ENCODE_2
       if (gf_group->update_type[gf_index_start] == INTNL_ARF_UPDATE) {
         // If the first frame in a parallel encode set is INTNL_ARF_UPDATE
         // frame, initialize lib_flags of frame_parallel_level 2 frame in the
@@ -4809,14 +4940,10 @@ int av1_init_parallel_frame_context(const AV1_COMP_DATA *const first_cpi_data,
         cur_cpi->ref_refresh_index = INVALID_IDX;
         cur_cpi->refresh_idx_available = false;
       }
-#endif  // CONFIG_FRAME_PARALLEL_ENCODE_2
       cur_cpi->twopass_frame.stats_in = stats_in;
 
-      av1_get_ref_frames(first_ref_frame_map_pairs, cur_frame_disp,
-#if CONFIG_FRAME_PARALLEL_ENCODE_2
-                         cur_cpi, i, 1,
-#endif  // CONFIG_FRAME_PARALLEL_ENCODE_2
-                         cur_cpi->common.remapped_ref_idx);
+      av1_get_ref_frames(first_ref_frame_map_pairs, cur_frame_disp, cur_cpi, i,
+                         1, cur_cpi->common.remapped_ref_idx);
       av1_scale_references_fpmt(cur_cpi, ref_buffers_used_map);
       parallel_frame_count++;
     }
@@ -4839,7 +4966,6 @@ int av1_init_parallel_frame_context(const AV1_COMP_DATA *const first_cpi_data,
   // Return the number of frames in the parallel encode set.
   return parallel_frame_count;
 }
-#endif  // CONFIG_FRAME_PARALLEL_ENCODE
 
 int av1_get_preview_raw_frame(AV1_COMP *cpi, YV12_BUFFER_CONFIG *dest) {
   AV1_COMMON *cm = &cpi->common;
@@ -4847,7 +4973,7 @@ int av1_get_preview_raw_frame(AV1_COMP *cpi, YV12_BUFFER_CONFIG *dest) {
     return -1;
   } else {
     int ret;
-    if (cm->cur_frame != NULL) {
+    if (cm->cur_frame != NULL && !cpi->oxcf.algo_cfg.skip_postproc_filtering) {
       *dest = cm->cur_frame->buf;
       dest->y_width = cm->width;
       dest->y_height = cm->height;
@@ -4862,7 +4988,9 @@ int av1_get_preview_raw_frame(AV1_COMP *cpi, YV12_BUFFER_CONFIG *dest) {
 }
 
 int av1_get_last_show_frame(AV1_COMP *cpi, YV12_BUFFER_CONFIG *frame) {
-  if (cpi->last_show_frame_buf == NULL) return -1;
+  if (cpi->last_show_frame_buf == NULL ||
+      cpi->oxcf.algo_cfg.skip_postproc_filtering)
+    return -1;
 
   *frame = cpi->last_show_frame_buf->buf;
   return 0;
@@ -4883,10 +5011,11 @@ aom_codec_err_t av1_copy_new_frame_enc(AV1_COMMON *cm,
 
 int av1_set_internal_size(AV1EncoderConfig *const oxcf,
                           ResizePendingParams *resize_pending_params,
-                          AOM_SCALING horiz_mode, AOM_SCALING vert_mode) {
+                          AOM_SCALING_MODE horiz_mode,
+                          AOM_SCALING_MODE vert_mode) {
   int hr = 0, hs = 0, vr = 0, vs = 0;
 
-  if (horiz_mode > ONETWO || vert_mode > ONETWO) return -1;
+  if (horiz_mode > AOME_ONETWO || vert_mode > AOME_ONETWO) return -1;
 
   Scale2Ratio(horiz_mode, &hr, &hs);
   Scale2Ratio(vert_mode, &vr, &vs);
@@ -4895,7 +5024,7 @@ int av1_set_internal_size(AV1EncoderConfig *const oxcf,
   resize_pending_params->width = (hs - 1 + oxcf->frm_dim_cfg.width * hr) / hs;
   resize_pending_params->height = (vs - 1 + oxcf->frm_dim_cfg.height * vr) / vs;
 
-  if (horiz_mode != NORMAL || vert_mode != NORMAL) {
+  if (horiz_mode != AOME_NORMAL || vert_mode != AOME_NORMAL) {
     oxcf->resize_cfg.resize_mode = RESIZE_FIXED;
     oxcf->algo_cfg.enable_tpl_model = 0;
   }
@@ -4963,29 +5092,33 @@ int av1_convert_sect5obus_to_annexb(uint8_t *buffer, size_t *frame_size) {
   return AOM_CODEC_OK;
 }
 
-static void svc_set_updates_ref_frame_config(
-    ExtRefreshFrameFlagsInfo *const ext_refresh_frame_flags, SVC *const svc) {
+static void rtc_set_updates_ref_frame_config(
+    ExtRefreshFrameFlagsInfo *const ext_refresh_frame_flags,
+    RTC_REF *const rtc_ref) {
   ext_refresh_frame_flags->update_pending = 1;
-  ext_refresh_frame_flags->last_frame = svc->refresh[svc->ref_idx[0]];
-  ext_refresh_frame_flags->golden_frame = svc->refresh[svc->ref_idx[3]];
-  ext_refresh_frame_flags->bwd_ref_frame = svc->refresh[svc->ref_idx[4]];
-  ext_refresh_frame_flags->alt2_ref_frame = svc->refresh[svc->ref_idx[5]];
-  ext_refresh_frame_flags->alt_ref_frame = svc->refresh[svc->ref_idx[6]];
-  svc->non_reference_frame = 1;
+  ext_refresh_frame_flags->last_frame = rtc_ref->refresh[rtc_ref->ref_idx[0]];
+  ext_refresh_frame_flags->golden_frame = rtc_ref->refresh[rtc_ref->ref_idx[3]];
+  ext_refresh_frame_flags->bwd_ref_frame =
+      rtc_ref->refresh[rtc_ref->ref_idx[4]];
+  ext_refresh_frame_flags->alt2_ref_frame =
+      rtc_ref->refresh[rtc_ref->ref_idx[5]];
+  ext_refresh_frame_flags->alt_ref_frame =
+      rtc_ref->refresh[rtc_ref->ref_idx[6]];
+  rtc_ref->non_reference_frame = 1;
   for (int i = 0; i < REF_FRAMES; i++) {
-    if (svc->refresh[i] == 1) {
-      svc->non_reference_frame = 0;
+    if (rtc_ref->refresh[i] == 1) {
+      rtc_ref->non_reference_frame = 0;
       break;
     }
   }
 }
 
-static int svc_set_references_external_ref_frame_config(AV1_COMP *cpi) {
+static int rtc_set_references_external_ref_frame_config(AV1_COMP *cpi) {
   // LAST_FRAME (0), LAST2_FRAME(1), LAST3_FRAME(2), GOLDEN_FRAME(3),
   // BWDREF_FRAME(4), ALTREF2_FRAME(5), ALTREF_FRAME(6).
   int ref = AOM_REFFRAME_ALL;
   for (int i = 0; i < INTER_REFS_PER_FRAME; i++) {
-    if (!cpi->svc.reference[i]) ref ^= (1 << i);
+    if (!cpi->rtc_ref.reference[i]) ref ^= (1 << i);
   }
   return ref;
 }
@@ -5024,8 +5157,8 @@ void av1_apply_encoding_flags(AV1_COMP *cpi, aom_enc_frame_flags_t flags) {
 
     av1_use_as_reference(&ext_flags->ref_frame_flags, ref);
   } else {
-    if (cpi->svc.set_ref_frame_config) {
-      int ref = svc_set_references_external_ref_frame_config(cpi);
+    if (cpi->rtc_ref.set_ref_frame_config) {
+      int ref = rtc_set_references_external_ref_frame_config(cpi);
       av1_use_as_reference(&ext_flags->ref_frame_flags, ref);
     }
   }
@@ -5052,8 +5185,8 @@ void av1_apply_encoding_flags(AV1_COMP *cpi, aom_enc_frame_flags_t flags) {
     ext_refresh_frame_flags->alt2_ref_frame = (upd & AOM_ALT2_FLAG) != 0;
     ext_refresh_frame_flags->update_pending = 1;
   } else {
-    if (cpi->svc.set_ref_frame_config)
-      svc_set_updates_ref_frame_config(ext_refresh_frame_flags, &cpi->svc);
+    if (cpi->rtc_ref.set_ref_frame_config)
+      rtc_set_updates_ref_frame_config(ext_refresh_frame_flags, &cpi->rtc_ref);
     else
       ext_refresh_frame_flags->update_pending = 0;
   }
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/encoder.h b/chromium/third_party/libaom/source/libaom/av1/encoder/encoder.h
index 57782a9bf4a..2cdf9530754 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/encoder.h
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/encoder.h
@@ -104,16 +104,6 @@ typedef struct aom_rational64 {
 } aom_rational64_t;  // alias for struct aom_rational
 
 enum {
-  NORMAL = 0,
-  FOURFIVE = 1,
-  THREEFIVE = 2,
-  THREEFOUR = 3,
-  ONEFOUR = 4,
-  ONEEIGHT = 5,
-  ONETWO = 6
-} UENUM1BYTE(AOM_SCALING);
-
-enum {
   // Good Quality Fast Encoding. The encoder balances quality with the amount of
   // time it takes to encode the output. Speed setting controls how fast.
   GOOD,
@@ -135,13 +125,13 @@ enum {
   FRAMEFLAGS_ERROR_RESILIENT = 1 << 6,
 } UENUM1BYTE(FRAMETYPE_FLAGS);
 
-#if CONFIG_FRAME_PARALLEL_ENCODE && CONFIG_FPMT_TEST
+#if CONFIG_FPMT_TEST
 enum {
   PARALLEL_ENCODE = 0,
   PARALLEL_SIMULATION_ENCODE,
   NUM_FPMT_TEST_ENCODES
 } UENUM1BYTE(FPMT_TEST_ENC_CFG);
-#endif  // CONFIG_FRAME_PARALLEL_ENCODE && CONFIG_FPMT_TEST
+#endif  // CONFIG_FPMT_TEST
 // 0 level frames are sometimes used for rate control purposes, but for
 // reference mapping purposes, the minimum level should be 1.
 #define MIN_PYR_LEVEL 1
@@ -200,8 +190,6 @@ enum {
 
 #define MAX_VBR_CORPUS_COMPLEXITY 10000
 
-/*!\cond */
-
 typedef enum {
   MOD_FP,           // First pass
   MOD_TF,           // Temporal filtering
@@ -573,13 +561,13 @@ typedef struct {
   int drop_frames_water_mark;
   /*!
    * under_shoot_pct indicates the tolerance of the VBR algorithm to
-   * undershoot and is used as a trigger threshold for more agressive
+   * undershoot and is used as a trigger threshold for more aggressive
    * adaptation of Q. It's value can range from 0-100.
    */
   int under_shoot_pct;
   /*!
    * over_shoot_pct indicates the tolerance of the VBR algorithm to overshoot
-   * and is used as a trigger threshold for more agressive adaptation of Q.
+   * and is used as a trigger threshold for more aggressive adaptation of Q.
    * It's value can range from 0-1000.
    */
   int over_shoot_pct;
@@ -861,6 +849,12 @@ typedef struct {
    * 3: Loop filter is disables for the frames with low motion
    */
   LOOPFILTER_CONTROL loopfilter_control;
+
+  /*!
+   * Indicates if the application of post-processing filters should be skipped
+   * on reconstructed frame.
+   */
+  bool skip_postproc_filtering;
 } AlgoCfg;
 /*!\cond */
 
@@ -1067,6 +1061,9 @@ typedef struct AV1EncoderConfig {
 
   // Exit the encoder when it fails to encode to a given level.
   int strict_level_conformance;
+
+  // Max depth for the GOP after a key frame
+  int kf_max_pyr_height;
   /*!\endcond */
 } AV1EncoderConfig;
 
@@ -1363,17 +1360,25 @@ typedef struct {
 #endif  // CONFIG_MULTITHREAD
   /*!
    * Buffer to store the superblock whose encoding is complete.
-   * cur_col[i] stores the number of superblocks which finished encoding in the
-   * ith superblock row.
+   * num_finished_cols[i] stores the number of superblocks which finished
+   * encoding in the ith superblock row.
    */
   int *num_finished_cols;
   /*!
-   * Number of extra superblocks of the top row to be complete for encoding
-   * of the current superblock to start. A value of 1 indicates top-right
-   * dependency.
+   * Denotes the superblock interval at which conditional signalling should
+   * happen. Also denotes the minimum number of extra superblocks of the top row
+   * to be complete to start encoding the current superblock. A value of 1
+   * indicates top-right dependency.
    */
   int sync_range;
   /*!
+   * Denotes the additional number of superblocks in the previous row to be
+   * complete to start encoding the current superblock when intraBC tool is
+   * enabled. This additional top-right delay is required to satisfy the
+   * hardware constraints for intraBC tool when row multithreading is enabled.
+   */
+  int intrabc_extra_top_right_sb_delay;
+  /*!
    * Number of superblock rows.
    */
   int rows;
@@ -1445,6 +1450,8 @@ typedef struct ThreadData {
   // store source variance and log of source variance of each 4x4 sub-block
   // for subsequent retrieval.
   Block4x4VarInfo *src_var_info_of_4x4_sub_blocks;
+  // The pc tree root for RTC non-rd case.
+  PC_TREE *rt_pc_root;
 } ThreadData;
 
 struct EncWorkerData;
@@ -1505,17 +1512,17 @@ typedef struct {
   /**@}*/
 } AV1EncRowMultiThreadInfo;
 
-#if CONFIG_FRAME_PARALLEL_ENCODE
-/*!
- * \brief Max number of frames that can be encoded in a parallel encode set.
- */
-#define MAX_PARALLEL_FRAMES 4
 /*!
  * \brief Max number of recodes used to track the frame probabilities.
  */
 #define NUM_RECODES_PER_FRAME 10
 
 /*!
+ * \brief Max number of frames that can be encoded in a parallel encode set.
+ */
+#define MAX_PARALLEL_FRAMES 4
+
+/*!
  * \brief Buffers to be backed up during parallel encode set to be restored
  * later.
  */
@@ -1540,7 +1547,6 @@ typedef struct RestoreStateBuffers {
    */
   RestorationLineBuffers *rlbs;
 } RestoreStateBuffers;
-#endif  // CONFIG_FRAME_PARALLEL_ENCODE
 
 /*!
  * \brief Primary Encoder parameters related to multi-threading.
@@ -1572,7 +1578,6 @@ typedef struct PrimaryMultiThreadInfo {
    */
   AV1CdefWorkerData *cdef_worker;
 
-#if CONFIG_FRAME_PARALLEL_ENCODE
   /*!
    * Primary(Level 1) Synchronization object used to launch job in the worker
    * thread.
@@ -1583,7 +1588,6 @@ typedef struct PrimaryMultiThreadInfo {
    * Number of primary workers created for multi-threading.
    */
   int p_num_workers;
-#endif  // CONFIG_FRAME_PARALLEL_ENCODE
 } PrimaryMultiThreadInfo;
 
 /*!
@@ -1667,12 +1671,10 @@ typedef struct MultiThreadInfo {
    */
   AV1CdefWorkerData *cdef_worker;
 
-#if CONFIG_FRAME_PARALLEL_ENCODE
   /*!
    * Buffers to be stored/restored before/after parallel encode.
    */
   RestoreStateBuffers restore_state_buf;
-#endif  // CONFIG_FRAME_PARALLEL_ENCODE
 } MultiThreadInfo;
 
 /*!\cond */
@@ -2310,6 +2312,61 @@ typedef struct {
   uint8_t *entropy_ctx;
 } CoeffBufferPool;
 
+#if !CONFIG_REALTIME_ONLY
+/*!\cond */
+// DUCKY_ENCODE_FRAME_MODE is c version of EncodeFrameMode
+enum {
+  DUCKY_ENCODE_FRAME_MODE_NONE,  // Let native AV1 determine q index and rdmult
+  DUCKY_ENCODE_FRAME_MODE_QINDEX,  // DuckyEncode determines q index and AV1
+                                   // determines rdmult
+  DUCKY_ENCODE_FRAME_MODE_QINDEX_RDMULT,  // DuckyEncode determines q index and
+                                          // rdmult
+} UENUM1BYTE(DUCKY_ENCODE_FRAME_MODE);
+
+enum {
+  DUCKY_ENCODE_GOP_MODE_NONE,  // native AV1 decides GOP
+  DUCKY_ENCODE_GOP_MODE_RCL,   // rate control lib decides GOP
+} UENUM1BYTE(DUCKY_ENCODE_GOP_MODE);
+
+typedef struct DuckyEncodeFrameInfo {
+  DUCKY_ENCODE_FRAME_MODE qp_mode;
+  DUCKY_ENCODE_GOP_MODE gop_mode;
+  int q_index;
+  int rdmult;
+} DuckyEncodeFrameInfo;
+
+typedef struct DuckyEncodeFrameResult {
+  int global_order_idx;
+  int q_index;
+  int rdmult;
+  int rate;
+  int64_t dist;
+  double psnr;
+} DuckyEncodeFrameResult;
+
+typedef struct DuckyEncodeInfo {
+  DuckyEncodeFrameInfo frame_info;
+  DuckyEncodeFrameResult frame_result;
+} DuckyEncodeInfo;
+/*!\endcond */
+#endif
+
+/*!\cond */
+typedef struct RTC_REF {
+  /*!
+   * LAST_FRAME (0), LAST2_FRAME(1), LAST3_FRAME(2), GOLDEN_FRAME(3),
+   * BWDREF_FRAME(4), ALTREF2_FRAME(5), ALTREF_FRAME(6).
+   */
+  int reference[INTER_REFS_PER_FRAME];
+  int ref_idx[INTER_REFS_PER_FRAME];
+  int refresh[REF_FRAMES];
+  int set_ref_frame_config;
+  int non_reference_frame;
+  int ref_frame_comp[3];
+  int gld_idx_1layer;
+} RTC_REF;
+/*!\endcond */
+
 /*!
  * \brief Structure to hold data corresponding to an encoded frame.
  */
@@ -2358,19 +2415,17 @@ typedef struct AV1_COMP_DATA {
    * Decide to pop the source for this frame from input buffer queue.
    */
   int pop_lookahead;
-#if CONFIG_FRAME_PARALLEL_ENCODE
+
   /*!
    * Display order hint of frame whose packed data is in cx_data buffer.
    */
   int frame_display_order_hint;
-#endif
 } AV1_COMP_DATA;
 
 /*!
  * \brief Top level primary encoder structure
  */
 typedef struct AV1_PRIMARY {
-#if CONFIG_FRAME_PARALLEL_ENCODE
   /*!
    * Array of frame level encoder stage top level structures
    */
@@ -2381,7 +2436,6 @@ typedef struct AV1_PRIMARY {
    * encode set.
    */
   struct AV1_COMP_DATA parallel_frames_data[MAX_PARALLEL_FRAMES - 1];
-
 #if CONFIG_FPMT_TEST
   /*!
    * Flag which enables/disables simulation path for fpmt unit test.
@@ -2407,7 +2461,13 @@ typedef struct AV1_PRIMARY {
    * model across frames.
    */
   int temp_valid_gm_model_found[FRAME_UPDATE_TYPES];
-#endif
+#endif  // CONFIG_FPMT_TEST
+  /*!
+   * Copy of cm->ref_frame_map maintained to facilitate sequential update of
+   * ref_frame_map by lower layer depth frames encoded ahead of time in a
+   * parallel encode set.
+   */
+  RefCntBuffer *ref_frame_map_copy[REF_FRAMES];
 
   /*!
    * Start time stamp of the last encoded show frame
@@ -2418,15 +2478,7 @@ typedef struct AV1_PRIMARY {
    * End time stamp of the last encoded show frame
    */
   int64_t ts_end_last_show_frame;
-#if CONFIG_FRAME_PARALLEL_ENCODE_2
-  /*!
-   * Copy of cm->ref_frame_map maintained to facilitate sequential update of
-   * ref_frame_map by lower layer depth frames encoded ahead of time in a
-   * parallel encode set.
-   */
-  RefCntBuffer *ref_frame_map_copy[REF_FRAMES];
-#endif  // CONFIG_FRAME_PARALLEL_ENCODE_2
-#endif  // CONFIG_FRAME_PARALLEL_ENCODE
+
   /*!
    * Number of frame level contexts(cpis)
    */
@@ -2449,8 +2501,7 @@ typedef struct AV1_PRIMARY {
 
   /*!
    * Encode stage top level structure
-   * When CONFIG_FRAME_PARALLEL_ENCODE is enabled this is the same as
-   * parallel_cpi[0]
+   * During frame parallel encode, this is the same as parallel_cpi[0]
    */
   struct AV1_COMP *cpi;
 
@@ -2806,6 +2857,11 @@ typedef struct AV1_COMP {
   RefreshFrameInfo refresh_frame;
 
   /*!
+   * Flag to reduce the number of reference frame buffers used in rt.
+   */
+  int rt_reduce_num_ref_buffers;
+
+  /*!
    * Flags signalled by the external interface at frame level.
    */
   ExternalFlags ext_flags;
@@ -2985,7 +3041,6 @@ typedef struct AV1_COMP {
    */
   VarBasedPartitionInfo vbp_info;
 
-#if CONFIG_FRAME_PARALLEL_ENCODE
   /*!
    * Number of recodes in the frame.
    */
@@ -3029,7 +3084,6 @@ typedef struct AV1_COMP {
    * post encode updates for parallel frames.
    */
   double new_framerate;
-#endif
 
   /*!
    * Retain condition for fast_extra_bits calculation.
@@ -3230,13 +3284,11 @@ typedef struct AV1_COMP {
    */
   ExtPartController ext_part_controller;
 
-#if CONFIG_FRAME_PARALLEL_ENCODE
   /*!
    * Motion vector stats of the current encoded frame, used to update the
    * ppi->mv_stats during postencode.
    */
   MV_STATS mv_stats;
-#if CONFIG_FRAME_PARALLEL_ENCODE_2
   /*!
    * Stores the reference refresh index for the current frame.
    */
@@ -3262,9 +3314,7 @@ typedef struct AV1_COMP {
    */
 
   int wanted_fb;
-#endif
-#endif  // CONFIG_FRAME_PARALLEL_ENCODE_2
-#endif  // CONFIG_FRAME_PARALLEL_ENCODE
+#endif  // CONFIG_FPMT_TEST
 
   /*!
    * A flag to indicate frames that will update their data to the primary
@@ -3338,6 +3388,34 @@ typedef struct AV1_COMP {
    * Buffer to store 64x64 SAD
    */
   uint64_t *src_sad_blk_64x64;
+
+  /*!
+   * A flag to indicate whether the encoder is controlled by DuckyEncode or not.
+   * 1:yes 0:no
+   */
+  int use_ducky_encode;
+
+#if !CONFIG_REALTIME_ONLY
+  /*! A structure that facilitates the communication between DuckyEncode and AV1
+   * encoder.
+   */
+  DuckyEncodeInfo ducky_encode_info;
+#endif  // CONFIG_REALTIME_ONLY
+        //
+  /*!
+   * Frames since last frame with cdf update.
+   */
+  int frames_since_last_update;
+
+  /*!
+   * Struct for the reference structure for RTC.
+   */
+  RTC_REF rtc_ref;
+
+  /*!
+   * Block level thresholds to force zeromv-skip at partition level.
+   */
+  unsigned int zeromv_skip_thresh_exit_part[BLOCK_SIZES_ALL];
 } AV1_COMP;
 
 /*!
@@ -3412,16 +3490,17 @@ typedef struct {
   size_t size;  // Size of resulting bitstream
 } EncodeFrameResults;
 
-void av1_initialize_enc(void);
+void av1_initialize_enc(unsigned int usage, enum aom_rc_mode end_usage);
 
-struct AV1_COMP *av1_create_compressor(AV1_PRIMARY *ppi, AV1EncoderConfig *oxcf,
+struct AV1_COMP *av1_create_compressor(AV1_PRIMARY *ppi,
+                                       const AV1EncoderConfig *oxcf,
                                        BufferPool *const pool,
                                        COMPRESSOR_STAGE stage,
                                        int lap_lag_in_frames);
 
 struct AV1_PRIMARY *av1_create_primary_compressor(
     struct aom_codec_pkt_list *pkt_list_head, int num_lap_buffers,
-    AV1EncoderConfig *oxcf);
+    const AV1EncoderConfig *oxcf);
 
 void av1_remove_compressor(AV1_COMP *cpi);
 
@@ -3449,7 +3528,6 @@ void av1_init_seq_coding_tools(AV1_PRIMARY *const ppi,
 void av1_post_encode_updates(AV1_COMP *const cpi,
                              const AV1_COMP_DATA *const cpi_data);
 
-#if CONFIG_FRAME_PARALLEL_ENCODE
 void av1_scale_references_fpmt(AV1_COMP *cpi, int *ref_buffers_used_map);
 
 void av1_increment_scaled_ref_counts_fpmt(BufferPool *buffer_pool,
@@ -3468,8 +3546,6 @@ AV1_COMP *av1_get_parallel_frame_enc_data(AV1_PRIMARY *const ppi,
 int av1_init_parallel_frame_context(const AV1_COMP_DATA *const first_cpi_data,
                                     AV1_PRIMARY *const ppi,
                                     int *ref_buffers_used_map);
-#endif  // CONFIG_FRAME_PARALLEL_ENCODE
-
 /*!\endcond */
 
 /*!\brief Obtain the raw frame data
@@ -3551,7 +3627,8 @@ int av1_get_active_map(AV1_COMP *cpi, unsigned char *map, int rows, int cols);
 
 int av1_set_internal_size(AV1EncoderConfig *const oxcf,
                           ResizePendingParams *resize_pending_params,
-                          AOM_SCALING horiz_mode, AOM_SCALING vert_mode);
+                          AOM_SCALING_MODE horiz_mode,
+                          AOM_SCALING_MODE vert_mode);
 
 int av1_get_quantizer(struct AV1_COMP *cpi);
 
@@ -3612,7 +3689,7 @@ static INLINE void init_ref_map_pair(
   }
 }
 
-#if CONFIG_FRAME_PARALLEL_ENCODE && CONFIG_FPMT_TEST
+#if CONFIG_FPMT_TEST
 static AOM_INLINE void calc_frame_data_update_flag(
     GF_GROUP *const gf_group, int gf_frame_index,
     bool *const do_frame_data_update) {
@@ -3686,8 +3763,10 @@ static INLINE void alloc_frame_mvs(AV1_COMMON *const cm, RefCntBuffer *buf) {
 // the frame token allocation.
 static INLINE unsigned int allocated_tokens(const TileInfo *tile,
                                             int sb_size_log2, int num_planes) {
-  int tile_mb_rows = (tile->mi_row_end - tile->mi_row_start + 2) >> 2;
-  int tile_mb_cols = (tile->mi_col_end - tile->mi_col_start + 2) >> 2;
+  int tile_mb_rows =
+      ROUND_POWER_OF_TWO(tile->mi_row_end - tile->mi_row_start, 2);
+  int tile_mb_cols =
+      ROUND_POWER_OF_TWO(tile->mi_col_end - tile->mi_col_start, 2);
 
   return get_token_alloc(tile_mb_rows, tile_mb_cols, sb_size_log2, num_planes);
 }
@@ -3772,6 +3851,13 @@ static INLINE int is_one_pass_rt_params(const AV1_COMP *cpi) {
          cpi->oxcf.gf_cfg.lag_in_frames == 0;
 }
 
+// Use default/internal reference structure for single-layer RTC.
+static INLINE int use_rtc_reference_structure_one_layer(const AV1_COMP *cpi) {
+  return is_one_pass_rt_params(cpi) && cpi->ppi->number_spatial_layers == 1 &&
+         cpi->ppi->number_temporal_layers == 1 &&
+         !cpi->rtc_ref.set_ref_frame_config;
+}
+
 // Function return size of frame stats buffer
 static INLINE int get_stats_buf_size(int num_lap_buffer, int num_lag_buffer) {
   /* if lookahead is enabled return num_lap_buffers else num_lag_buffers */
@@ -3818,12 +3904,12 @@ void av1_setup_frame_size(AV1_COMP *cpi);
 
 // Returns 1 if a frame is scaled and 0 otherwise.
 static INLINE int av1_resize_scaled(const AV1_COMMON *cm) {
-  return !(cm->superres_upscaled_width == cm->render_width &&
-           cm->superres_upscaled_height == cm->render_height);
+  return cm->superres_upscaled_width != cm->render_width ||
+         cm->superres_upscaled_height != cm->render_height;
 }
 
 static INLINE int av1_frame_scaled(const AV1_COMMON *cm) {
-  return !av1_superres_scaled(cm) && av1_resize_scaled(cm);
+  return av1_superres_scaled(cm) || av1_resize_scaled(cm);
 }
 
 // Don't allow a show_existing_frame to coincide with an error resilient
@@ -3980,20 +4066,38 @@ static AOM_INLINE int is_psnr_calc_enabled(const AV1_COMP *cpi) {
          cm->show_frame;
 }
 
-static INLINE int is_frame_resize_pending(AV1_COMP *const cpi) {
-  ResizePendingParams *const resize_pending_params =
+static INLINE int is_frame_resize_pending(const AV1_COMP *const cpi) {
+  const ResizePendingParams *const resize_pending_params =
       &cpi->resize_pending_params;
   return (resize_pending_params->width && resize_pending_params->height &&
           (cpi->common.width != resize_pending_params->width ||
            cpi->common.height != resize_pending_params->height));
 }
 
+// Check if CDEF is used.
+static INLINE int is_cdef_used(const AV1_COMMON *const cm) {
+  return cm->seq_params->enable_cdef && !cm->features.coded_lossless &&
+         !cm->tiles.large_scale;
+}
+
 // Check if loop restoration filter is used.
 static INLINE int is_restoration_used(const AV1_COMMON *const cm) {
   return cm->seq_params->enable_restoration && !cm->features.all_lossless &&
          !cm->tiles.large_scale;
 }
 
+static INLINE int is_inter_tx_size_search_level_one(
+    const TX_SPEED_FEATURES *tx_sf) {
+  return (tx_sf->inter_tx_size_search_init_depth_rect >= 1 &&
+          tx_sf->inter_tx_size_search_init_depth_sqr >= 1);
+}
+
+// Enable switchable motion mode only if warp and OBMC tools are allowed
+static INLINE bool is_switchable_motion_mode_allowed(bool allow_warped_motion,
+                                                     bool enable_obmc) {
+  return (allow_warped_motion || enable_obmc);
+}
+
 #if CONFIG_AV1_TEMPORAL_DENOISING
 static INLINE int denoise_svc(const struct AV1_COMP *const cpi) {
   return (!cpi->ppi->use_svc ||
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/encoder_alloc.h b/chromium/third_party/libaom/source/libaom/av1/encoder/encoder_alloc.h
index ae99aeebc0b..fd350f0e14c 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/encoder_alloc.h
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/encoder_alloc.h
@@ -46,7 +46,7 @@ static AOM_INLINE void alloc_context_buffers_ext(
     dealloc_context_buffers_ext(mbmi_ext_info);
     CHECK_MEM_ERROR(
         cm, mbmi_ext_info->frame_base,
-        aom_calloc(new_ext_mi_size, sizeof(*mbmi_ext_info->frame_base)));
+        aom_malloc(new_ext_mi_size * sizeof(*mbmi_ext_info->frame_base)));
     mbmi_ext_info->alloc_size = new_ext_mi_size;
   }
   // The stride needs to be updated regardless of whether new allocation
@@ -261,10 +261,8 @@ static AOM_INLINE void dealloc_compressor_data(AV1_COMP *cpi) {
 #endif
 
   if (!is_stat_generation_stage(cpi)) {
-    int num_cdef_workers =
-        av1_get_num_mod_workers_for_alloc(&cpi->ppi->p_mt_info, MOD_CDEF);
     av1_free_cdef_buffers(cm, &cpi->ppi->p_mt_info.cdef_worker,
-                          &cpi->mt_info.cdef_sync, num_cdef_workers);
+                          &cpi->mt_info.cdef_sync);
   }
 
   aom_free_frame_buffer(&cpi->trial_frame_rst);
@@ -296,6 +294,8 @@ static AOM_INLINE void dealloc_compressor_data(AV1_COMP *cpi) {
   }
 
   if (cpi->ppi->use_svc) av1_free_svc_cyclic_refresh(cpi);
+  aom_free(cpi->svc.layer_context);
+  cpi->svc.layer_context = NULL;
 
   if (cpi->consec_zero_mv) {
     aom_free(cpi->consec_zero_mv);
@@ -378,7 +378,7 @@ static AOM_INLINE YV12_BUFFER_CONFIG *realloc_and_scale_source(
           cm->seq_params->subsampling_x, cm->seq_params->subsampling_y,
           cm->seq_params->use_highbitdepth, AOM_BORDER_IN_PIXELS,
           cm->features.byte_alignment, NULL, NULL, NULL,
-          cpi->oxcf.tool_cfg.enable_global_motion))
+          cpi->oxcf.tool_cfg.enable_global_motion, 0))
     aom_internal_error(cm->error, AOM_CODEC_MEM_ERROR,
                        "Failed to reallocate scaled source buffer");
   assert(cpi->scaled_source.y_crop_width == scaled_width);
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/encoder_utils.c b/chromium/third_party/libaom/source/libaom/av1/encoder/encoder_utils.c
index cebea60eee3..0c69e4d4820 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/encoder_utils.c
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/encoder_utils.c
@@ -311,7 +311,7 @@ static void configure_static_seg_features(AV1_COMP *cpi) {
   struct segmentation *const seg = &cm->seg;
 
   double avg_q;
-#if CONFIG_FRAME_PARALLEL_ENCODE && CONFIG_FPMT_TEST
+#if CONFIG_FPMT_TEST
   avg_q = ((cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] > 0) &&
            (cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE))
               ? cpi->ppi->p_rc.temp_avg_q
@@ -636,7 +636,6 @@ void av1_update_film_grain_parameters_seq(struct AV1_PRIMARY *ppi,
 void av1_update_film_grain_parameters(struct AV1_COMP *cpi,
                                       const AV1EncoderConfig *oxcf) {
   AV1_COMMON *const cm = &cpi->common;
-  cpi->oxcf = *oxcf;
   const TuneCfg *const tune_cfg = &oxcf->tune_cfg;
 
   if (cpi->film_grain_table) {
@@ -658,8 +657,8 @@ void av1_update_film_grain_parameters(struct AV1_COMP *cpi,
       }
     }
   } else if (tune_cfg->film_grain_table_filename) {
-    cpi->film_grain_table = aom_malloc(sizeof(*cpi->film_grain_table));
-    memset(cpi->film_grain_table, 0, sizeof(aom_film_grain_table_t));
+    CHECK_MEM_ERROR(cm, cpi->film_grain_table,
+                    aom_calloc(1, sizeof(*cpi->film_grain_table)));
 
     aom_film_grain_table_read(cpi->film_grain_table,
                               tune_cfg->film_grain_table_filename, cm->error);
@@ -725,7 +724,7 @@ void av1_scale_references(AV1_COMP *cpi, const InterpFilter filter,
                   &new_fb->buf, cm->width, cm->height,
                   cm->seq_params->subsampling_x, cm->seq_params->subsampling_y,
                   cm->seq_params->use_highbitdepth, AOM_BORDER_IN_PIXELS,
-                  cm->features.byte_alignment, NULL, NULL, NULL, 0)) {
+                  cm->features.byte_alignment, NULL, NULL, NULL, 0, 0)) {
             if (force_scaling) {
               // Release the reference acquired in the get_free_fb() call above.
               --new_fb->ref_count;
@@ -792,9 +791,14 @@ BLOCK_SIZE av1_select_sb_size(const AV1EncoderConfig *const oxcf, int width,
       oxcf->resize_cfg.resize_mode != RESIZE_NONE) {
     // Use the configured size (top resolution) for spatial layers or
     // on resize.
-    return AOMMIN(oxcf->frm_dim_cfg.width, oxcf->frm_dim_cfg.height) > 480
+    return AOMMIN(oxcf->frm_dim_cfg.width, oxcf->frm_dim_cfg.height) > 720
                ? BLOCK_128X128
                : BLOCK_64X64;
+  } else if (oxcf->mode == REALTIME) {
+    if (oxcf->tune_cfg.content == AOM_CONTENT_SCREEN)
+      return AOMMIN(width, height) >= 720 ? BLOCK_128X128 : BLOCK_64X64;
+    else
+      return AOMMIN(width, height) > 720 ? BLOCK_128X128 : BLOCK_64X64;
   }
 
   // TODO(any): Possibly could improve this with a heuristic.
@@ -806,8 +810,7 @@ BLOCK_SIZE av1_select_sb_size(const AV1EncoderConfig *const oxcf, int width,
   if (oxcf->superres_cfg.superres_mode == AOM_SUPERRES_NONE &&
       oxcf->resize_cfg.resize_mode == RESIZE_NONE) {
     int is_480p_or_lesser = AOMMIN(width, height) <= 480;
-    if ((oxcf->speed >= 1 || oxcf->mode == REALTIME) && is_480p_or_lesser)
-      return BLOCK_64X64;
+    if (oxcf->speed >= 1 && is_480p_or_lesser) return BLOCK_64X64;
 
     // For 1080p and lower resolutions, choose SB size adaptively based on
     // resolution and speed level for multi-thread encode.
@@ -917,7 +920,7 @@ static void screen_content_tools_determination(
   AV1_COMMON *const cm = &cpi->common;
   FeatureFlags *const features = &cm->features;
 
-#if CONFIG_FRAME_PARALLEL_ENCODE && CONFIG_FPMT_TEST
+#if CONFIG_FPMT_TEST
   projected_size_pass[pass] =
       ((cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] > 0) &&
        (cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE))
@@ -1256,9 +1259,7 @@ int av1_is_integer_mv(const YV12_BUFFER_CONFIG *cur_picture,
 
 void av1_set_mb_ssim_rdmult_scaling(AV1_COMP *cpi) {
   const CommonModeInfoParams *const mi_params = &cpi->common.mi_params;
-  ThreadData *td = &cpi->td;
-  MACROBLOCK *x = &td->mb;
-  MACROBLOCKD *xd = &x->e_mbd;
+  const MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
   uint8_t *y_buffer = cpi->source->y_buffer;
   const int y_stride = cpi->source->y_stride;
   const int block_size = BLOCK_16X16;
@@ -1268,7 +1269,6 @@ void av1_set_mb_ssim_rdmult_scaling(AV1_COMP *cpi) {
   const int num_cols = (mi_params->mi_cols + num_mi_w - 1) / num_mi_w;
   const int num_rows = (mi_params->mi_rows + num_mi_h - 1) / num_mi_h;
   double log_sum = 0.0;
-  const int use_hbd = cpi->source->flags & YV12_FLAG_HIGHBITDEPTH;
 
   // Loop through each 16x16 block.
   for (int row = 0; row < num_rows; ++row) {
@@ -1290,13 +1290,8 @@ void av1_set_mb_ssim_rdmult_scaling(AV1_COMP *cpi) {
           buf.buf = y_buffer + row_offset_y * y_stride + col_offset_y;
           buf.stride = y_stride;
 
-          if (use_hbd) {
-            var += av1_high_get_sby_perpixel_variance(cpi, &buf, BLOCK_8X8,
-                                                      xd->bd);
-          } else {
-            var += av1_get_sby_perpixel_variance(cpi, &buf, BLOCK_8X8);
-          }
-
+          var += av1_get_perpixel_variance_facade(cpi, xd, &buf, BLOCK_8X8,
+                                                  AOM_PLANE_Y);
           num_of_var += 1.0;
         }
       }
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/encoder_utils.h b/chromium/third_party/libaom/source/libaom/av1/encoder/encoder_utils.h
index 5ff9ca31064..dd91bb191af 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/encoder_utils.h
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/encoder_utils.h
@@ -49,22 +49,26 @@ static AOM_INLINE void suppress_active_map(AV1_COMP *cpi) {
         seg_map[i] = AM_SEGMENT_ID_ACTIVE;
 }
 
-static AOM_INLINE void set_mb_mi(CommonModeInfoParams *mi_params, int width,
-                                 int height) {
+// Returns 'size' in the number of Mode Info (MI) units. 'size' is either the
+// width or height.
+static AOM_INLINE int size_in_mi(int size) {
   // Ensure that the decoded width and height are both multiples of
   // 8 luma pixels (note: this may only be a multiple of 4 chroma pixels if
   // subsampling is used).
   // This simplifies the implementation of various experiments,
   // eg. cdef, which operates on units of 8x8 luma pixels.
-  const int aligned_width = ALIGN_POWER_OF_TWO(width, 3);
-  const int aligned_height = ALIGN_POWER_OF_TWO(height, 3);
+  const int aligned_size = ALIGN_POWER_OF_TWO(size, 3);
+  return aligned_size >> MI_SIZE_LOG2;
+}
 
-  mi_params->mi_cols = aligned_width >> MI_SIZE_LOG2;
-  mi_params->mi_rows = aligned_height >> MI_SIZE_LOG2;
+static AOM_INLINE void set_mb_mi(CommonModeInfoParams *mi_params, int width,
+                                 int height) {
+  mi_params->mi_cols = size_in_mi(width);
+  mi_params->mi_rows = size_in_mi(height);
   mi_params->mi_stride = calc_mi_size(mi_params->mi_cols);
 
-  mi_params->mb_cols = (mi_params->mi_cols + 2) >> 2;
-  mi_params->mb_rows = (mi_params->mi_rows + 2) >> 2;
+  mi_params->mb_cols = ROUND_POWER_OF_TWO(mi_params->mi_cols, 2);
+  mi_params->mb_rows = ROUND_POWER_OF_TWO(mi_params->mi_rows, 2);
   mi_params->MBs = mi_params->mb_rows * mi_params->mb_cols;
 
   const int mi_alloc_size_1d = mi_size_wide[mi_params->mi_alloc_bsize];
@@ -872,7 +876,7 @@ static AOM_INLINE void copy_frame_prob_info(AV1_COMP *cpi) {
              default_switchable_interp_probs);
   }
 
-#if CONFIG_FRAME_PARALLEL_ENCODE && CONFIG_FPMT_TEST
+#if CONFIG_FPMT_TEST
   if (cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE) {
     FrameProbInfo *const temp_frame_probs = &cpi->ppi->temp_frame_probs;
     if (cpi->sf.tx_sf.tx_type_search.prune_tx_type_using_stats) {
@@ -970,6 +974,7 @@ static AOM_INLINE int combine_prior_with_tpl_boost(double min_factor,
 static AOM_INLINE void set_size_independent_vars(AV1_COMP *cpi) {
   int i;
   AV1_COMMON *const cm = &cpi->common;
+  FeatureFlags *const features = &cm->features;
   for (i = LAST_FRAME; i <= ALTREF_FRAME; ++i) {
     cm->global_motion[i] = default_warp_params;
   }
@@ -977,8 +982,9 @@ static AOM_INLINE void set_size_independent_vars(AV1_COMP *cpi) {
 
   av1_set_speed_features_framesize_independent(cpi, cpi->speed);
   av1_set_rd_speed_thresholds(cpi);
-  cm->features.interp_filter = SWITCHABLE;
-  cm->features.switchable_motion_mode = 1;
+  features->interp_filter = SWITCHABLE;
+  features->switchable_motion_mode = is_switchable_motion_mode_allowed(
+      features->allow_warped_motion, cpi->oxcf.motion_mode_cfg.enable_obmc);
 }
 
 static AOM_INLINE void release_scaled_references(AV1_COMP *cpi) {
@@ -997,12 +1003,34 @@ static AOM_INLINE void restore_all_coding_context(AV1_COMP *cpi) {
   if (!frame_is_intra_only(&cpi->common)) release_scaled_references(cpi);
 }
 
+static AOM_INLINE int reduce_num_ref_buffers(const AV1_COMP *cpi) {
+  const SequenceHeader *const seq_params = cpi->common.seq_params;
+  return is_one_pass_rt_params(cpi) &&
+         use_rtc_reference_structure_one_layer(cpi) &&
+         (seq_params->order_hint_info.enable_order_hint == 0) &&
+         cpi->rt_reduce_num_ref_buffers;
+}
+
 // Refresh reference frame buffers according to refresh_frame_flags.
 static AOM_INLINE void refresh_reference_frames(AV1_COMP *cpi) {
   AV1_COMMON *const cm = &cpi->common;
   // All buffers are refreshed for shown keyframes and S-frames.
+  // In case of RT, golden frame refreshes the 6th slot and other reference
+  // frames refresh slots 0 to 5. Slot 7 is not refreshed by any reference
+  // frame. Thus, only 7 buffers are refreshed for keyframes and S-frames
+  // instead of 8.
+  int num_ref_buffers = REF_FRAMES;
+  if (reduce_num_ref_buffers(cpi)) {
+    const int refresh_all_bufs =
+        (cpi->ppi->gf_group.refbuf_state[cpi->gf_frame_index] == REFBUF_RESET ||
+         frame_is_sframe(cm));
+    assert(IMPLIES(((cm->current_frame.refresh_frame_flags >> 7) & 1) == 1,
+                   refresh_all_bufs));
+    (void)refresh_all_bufs;
+    num_ref_buffers--;
+  }
 
-  for (int ref_frame = 0; ref_frame < REF_FRAMES; ref_frame++) {
+  for (int ref_frame = 0; ref_frame < num_ref_buffers; ref_frame++) {
     if (((cm->current_frame.refresh_frame_flags >> ref_frame) & 1) == 1) {
       assign_frame_buffer_p(&cm->ref_frame_map[ref_frame], cm->cur_frame);
     }
@@ -1047,6 +1075,26 @@ void av1_save_all_coding_context(AV1_COMP *cpi);
 void av1_dump_filtered_recon_frames(AV1_COMP *cpi);
 #endif
 
+static AOM_INLINE int av1_get_enc_border_size(bool resize, bool all_intra,
+                                              BLOCK_SIZE sb_size) {
+  // For allintra encoding mode, inter-frame motion search is not applicable and
+  // the intraBC motion vectors are restricted within the tile boundaries. Hence
+  // a smaller frame border size (AOM_ENC_ALLINTRA_BORDER) is used in this case.
+  if (resize) {
+    return AOM_BORDER_IN_PIXELS;
+  }
+  if (all_intra) {
+    return AOM_ENC_ALLINTRA_BORDER;
+  }
+  return block_size_wide[sb_size] + 32;
+}
+
+static AOM_INLINE bool av1_is_resize_needed(const AV1EncoderConfig *oxcf) {
+  const ResizeCfg *resize_cfg = &oxcf->resize_cfg;
+  const SuperResCfg *superres_cfg = &oxcf->superres_cfg;
+  return resize_cfg->resize_mode || superres_cfg->superres_mode;
+}
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/encodetxb.c b/chromium/third_party/libaom/source/libaom/av1/encoder/encodetxb.c
index c0593663817..4ea4f4c80d2 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/encodetxb.c
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/encodetxb.c
@@ -44,14 +44,17 @@ void av1_alloc_txb_buf(AV1_COMP *cpi) {
 
   av1_free_txb_buf(cpi);
   // TODO(jingning): This should be further reduced.
-  cpi->coeff_buffer_base = aom_malloc(sizeof(*cpi->coeff_buffer_base) * size);
+  CHECK_MEM_ERROR(cm, cpi->coeff_buffer_base,
+                  aom_malloc(sizeof(*cpi->coeff_buffer_base) * size));
   CHECK_MEM_ERROR(
       cm, coeff_buf_pool->tcoeff,
       aom_memalign(32, sizeof(*coeff_buf_pool->tcoeff) * num_tcoeffs));
-  coeff_buf_pool->eobs =
-      aom_malloc(sizeof(*coeff_buf_pool->eobs) * num_tcoeffs / txb_unit_size);
-  coeff_buf_pool->entropy_ctx = aom_malloc(
-      sizeof(*coeff_buf_pool->entropy_ctx) * num_tcoeffs / txb_unit_size);
+  CHECK_MEM_ERROR(
+      cm, coeff_buf_pool->eobs,
+      aom_malloc(sizeof(*coeff_buf_pool->eobs) * num_tcoeffs / txb_unit_size));
+  CHECK_MEM_ERROR(cm, coeff_buf_pool->entropy_ctx,
+                  aom_malloc(sizeof(*coeff_buf_pool->entropy_ctx) *
+                             num_tcoeffs / txb_unit_size));
 
   tran_low_t *tcoeff_ptr = coeff_buf_pool->tcoeff;
   uint16_t *eob_ptr = coeff_buf_pool->eobs;
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/ethread.c b/chromium/third_party/libaom/source/libaom/av1/encoder/ethread.c
index 7eb63baac04..1701a91f4c0 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/ethread.c
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/ethread.c
@@ -118,7 +118,8 @@ void av1_row_mt_sync_read(AV1EncRowMultiThreadSync *row_mt_sync, int r, int c) {
     pthread_mutex_t *const mutex = &row_mt_sync->mutex_[r - 1];
     pthread_mutex_lock(mutex);
 
-    while (c > row_mt_sync->num_finished_cols[r - 1] - nsync) {
+    while (c > row_mt_sync->num_finished_cols[r - 1] - nsync -
+                   row_mt_sync->intrabc_extra_top_right_sb_delay) {
       pthread_cond_wait(&row_mt_sync->cond_[r - 1], mutex);
     }
     pthread_mutex_unlock(mutex);
@@ -142,7 +143,7 @@ void av1_row_mt_sync_write(AV1EncRowMultiThreadSync *row_mt_sync, int r, int c,
     cur = c;
     if (c % nsync) sig = 0;
   } else {
-    cur = cols + nsync;
+    cur = cols + nsync + row_mt_sync->intrabc_extra_top_right_sb_delay;
   }
 
   if (sig) {
@@ -443,6 +444,12 @@ static int enc_row_mt_worker_hook(void *arg1, void *unused) {
   pthread_mutex_t *enc_row_mt_mutex_ = enc_row_mt->mutex_;
 #endif
   (void)unused;
+  // Preallocate the pc_tree for realtime coding to reduce the cost of memory
+  // allocation.
+  thread_data->td->rt_pc_root =
+      cpi->sf.rt_sf.use_nonrd_pick_mode
+          ? av1_alloc_pc_tree_node(cm->seq_params->sb_size)
+          : NULL;
 
   assert(cur_tile_id != -1);
 
@@ -514,6 +521,8 @@ static int enc_row_mt_worker_hook(void *arg1, void *unused) {
 #endif
   }
 
+  av1_free_pc_tree_recursive(thread_data->td->rt_pc_root, av1_num_planes(cm), 0,
+                             0);
   return 1;
 }
 
@@ -526,6 +535,12 @@ static int enc_worker_hook(void *arg1, void *unused) {
   int t;
 
   (void)unused;
+  // Preallocate the pc_tree for realtime coding to reduce the cost of memory
+  // allocation.
+  thread_data->td->rt_pc_root =
+      cpi->sf.rt_sf.use_nonrd_pick_mode
+          ? av1_alloc_pc_tree_node(cm->seq_params->sb_size)
+          : NULL;
 
   for (t = thread_data->start; t < tile_rows * tile_cols;
        t += cpi->mt_info.num_workers) {
@@ -539,6 +554,9 @@ static int enc_worker_hook(void *arg1, void *unused) {
     av1_encode_tile(cpi, thread_data->td, tile_row, tile_col);
   }
 
+  av1_free_pc_tree_recursive(thread_data->td->rt_pc_root, av1_num_planes(cm), 0,
+                             0);
+
   return 1;
 }
 
@@ -554,12 +572,10 @@ void av1_init_frame_mt(AV1_PRIMARY *ppi, AV1_COMP *cpi) {
 }
 
 void av1_init_cdef_worker(AV1_COMP *cpi) {
-#if CONFIG_FRAME_PARALLEL_ENCODE
   // The allocation is done only for level 0 parallel frames. No change
   // in config is supported in the middle of a parallel encode set, since the
   // rest of the MT modules also do not support dynamic change of config.
   if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] > 0) return;
-#endif  // CONFIG_FRAME_PARALLEL_ENCODE
   PrimaryMultiThreadInfo *const p_mt_info = &cpi->ppi->p_mt_info;
   int num_cdef_workers = av1_get_num_mod_workers_for_alloc(p_mt_info, MOD_CDEF);
 
@@ -575,10 +591,8 @@ void av1_init_lr_mt_buffers(AV1_COMP *cpi) {
   if (lr_sync->sync_range) {
     int num_lr_workers =
         av1_get_num_mod_workers_for_alloc(&cpi->ppi->p_mt_info, MOD_LR);
-#if CONFIG_FRAME_PARALLEL_ENCODE
     if (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] > 0)
       return;
-#endif  // CONFIG_FRAME_PARALLEL_ENCODE
     lr_sync->lrworkerdata[num_lr_workers - 1].rst_tmpbuf = cm->rst_tmpbuf;
     lr_sync->lrworkerdata[num_lr_workers - 1].rlbs = cm->rlbs;
   }
@@ -629,8 +643,7 @@ void av1_init_mt_sync(AV1_COMP *cpi, int is_first_pass) {
     AV1LfSync *lf_sync = &mt_info->lf_row_sync;
     // Number of superblock rows
     const int sb_rows =
-        ALIGN_POWER_OF_TWO(cm->height >> MI_SIZE_LOG2, MAX_MIB_SIZE_LOG2) >>
-        MAX_MIB_SIZE_LOG2;
+        CEIL_POWER_OF_TWO(cm->height >> MI_SIZE_LOG2, MAX_MIB_SIZE_LOG2);
     PrimaryMultiThreadInfo *const p_mt_info = &cpi->ppi->p_mt_info;
     int num_lf_workers = av1_get_num_mod_workers_for_alloc(p_mt_info, MOD_LPF);
 
@@ -703,9 +716,7 @@ void av1_init_tile_thread_data(AV1_PRIMARY *ppi, int is_first_pass) {
       AOM_CHECK_MEM_ERROR(&ppi->error, thread_data->td,
                           aom_memalign(32, sizeof(*thread_data->td)));
       av1_zero(*thread_data->td);
-#if CONFIG_FRAME_PARALLEL_ENCODE
       thread_data->original_td = thread_data->td;
-#endif
 
       // Set up shared coeff buffers.
       av1_setup_shared_coeff_buffer(
@@ -794,17 +805,11 @@ void av1_init_tile_thread_data(AV1_PRIMARY *ppi, int is_first_pass) {
 
     if (!is_first_pass && ppi->cpi->oxcf.row_mt == 1 && i < num_enc_workers) {
       if (i == 0) {
-#if CONFIG_FRAME_PARALLEL_ENCODE
         for (int j = 0; j < ppi->num_fp_contexts; j++) {
           AOM_CHECK_MEM_ERROR(&ppi->error, ppi->parallel_cpi[j]->td.tctx,
                               (FRAME_CONTEXT *)aom_memalign(
                                   16, sizeof(*ppi->parallel_cpi[j]->td.tctx)));
         }
-#else
-        AOM_CHECK_MEM_ERROR(
-            &ppi->error, ppi->cpi->td.tctx,
-            (FRAME_CONTEXT *)aom_memalign(16, sizeof(*ppi->cpi->td.tctx)));
-#endif
       } else {
         AOM_CHECK_MEM_ERROR(
             &ppi->error, thread_data->td->tctx,
@@ -848,7 +853,6 @@ void av1_create_workers(AV1_PRIMARY *ppi, int num_workers) {
   }
 }
 
-#if CONFIG_FRAME_PARALLEL_ENCODE
 // This function returns 1 if frame parallel encode is supported for
 // the current configuration. Returns 0 otherwise.
 static AOM_INLINE int is_fpmt_config(AV1_PRIMARY *ppi, AV1EncoderConfig *oxcf) {
@@ -907,13 +911,11 @@ int av1_check_fpmt_config(AV1_PRIMARY *const ppi,
                    reset_size);
     av1_zero_array(&ppi->gf_group.is_frame_non_ref[cur_gf_index], reset_size);
     av1_zero_array(&ppi->gf_group.src_offset[cur_gf_index], reset_size);
-#if CONFIG_FRAME_PARALLEL_ENCODE_2
     memset(&ppi->gf_group.skip_frame_refresh[cur_gf_index][0], INVALID_IDX,
            sizeof(ppi->gf_group.skip_frame_refresh[cur_gf_index][0]) *
                reset_size * REF_FRAMES);
     memset(&ppi->gf_group.skip_frame_as_ref[cur_gf_index], INVALID_IDX,
            sizeof(ppi->gf_group.skip_frame_as_ref[cur_gf_index]) * reset_size);
-#endif
     ppi->num_fp_contexts = 1;
   }
   return 0;
@@ -926,10 +928,8 @@ int av1_check_fpmt_config(AV1_PRIMARY *const ppi,
 // Computes the max number of enc workers possible for each resolution.
 static AOM_INLINE int compute_max_num_enc_workers(
     CommonModeInfoParams *const mi_params, int mib_size_log2) {
-  int num_sb_rows =
-      ALIGN_POWER_OF_TWO(mi_params->mi_rows, mib_size_log2) >> mib_size_log2;
-  int num_sb_cols =
-      ALIGN_POWER_OF_TWO(mi_params->mi_cols, mib_size_log2) >> mib_size_log2;
+  int num_sb_rows = CEIL_POWER_OF_TWO(mi_params->mi_rows, mib_size_log2);
+  int num_sb_cols = CEIL_POWER_OF_TWO(mi_params->mi_cols, mib_size_log2);
 
   return AOMMIN((num_sb_cols + 1) >> 1, num_sb_rows);
 }
@@ -1176,7 +1176,6 @@ int av1_compress_parallel_frames(AV1_PRIMARY *const ppi,
                                 ref_buffers_used_map);
   return AOM_CODEC_OK;
 }
-#endif  // CONFIG_FRAME_PARALLEL_ENCODE
 
 static AOM_INLINE void launch_workers(MultiThreadInfo *const mt_info,
                                       int num_workers) {
@@ -1213,17 +1212,16 @@ static AOM_INLINE void accumulate_counters_enc_workers(AV1_COMP *cpi,
     EncWorkerData *const thread_data = (EncWorkerData *)worker->data1;
     cpi->intrabc_used |= thread_data->td->intrabc_used;
     cpi->deltaq_used |= thread_data->td->deltaq_used;
-    // Accumulate cyclic refresh params.
-    if (cpi->oxcf.q_cfg.aq_mode == CYCLIC_REFRESH_AQ &&
-        !frame_is_intra_only(&cpi->common))
-      av1_accumulate_cyclic_refresh_counters(cpi->cyclic_refresh,
-                                             &thread_data->td->mb);
+    // Accumulate rtc counters.
+    if (!frame_is_intra_only(&cpi->common))
+      av1_accumulate_rtc_counters(cpi, &thread_data->td->mb);
     if (thread_data->td != &cpi->td) {
+      // Keep these conditional expressions in sync with the corresponding ones
+      // in prepare_enc_workers().
       if (cpi->sf.inter_sf.mv_cost_upd_level != INTERNAL_COST_UPD_OFF) {
         aom_free(thread_data->td->mb.mv_costs);
       }
-      if (cpi->sf.intra_sf.dv_cost_upd_level != INTERNAL_COST_UPD_OFF &&
-          av1_need_dv_costs(cpi)) {
+      if (cpi->sf.intra_sf.dv_cost_upd_level != INTERNAL_COST_UPD_OFF) {
         aom_free(thread_data->td->mb.dv_costs);
       }
     }
@@ -1262,13 +1260,9 @@ static AOM_INLINE void prepare_enc_workers(AV1_COMP *cpi, AVxWorkerHook hook,
     thread_data->cpi = cpi;
     if (i == 0) {
       thread_data->td = &cpi->td;
-#if !CONFIG_FRAME_PARALLEL_ENCODE
-    }
-#else
     } else {
       thread_data->td = thread_data->original_td;
     }
-#endif  // CONFIG_FRAME_PARALLEL_ENCODE
 
     thread_data->td->intrabc_used = 0;
     thread_data->td->deltaq_used = 0;
@@ -1292,24 +1286,31 @@ static AOM_INLINE void prepare_enc_workers(AV1_COMP *cpi, AVxWorkerHook hook,
               thread_data->td->hash_value_buffer[x][y];
         }
       }
+      // Keep these conditional expressions in sync with the corresponding ones
+      // in accumulate_counters_enc_workers().
       if (cpi->sf.inter_sf.mv_cost_upd_level != INTERNAL_COST_UPD_OFF) {
         CHECK_MEM_ERROR(cm, thread_data->td->mb.mv_costs,
                         (MvCosts *)aom_malloc(sizeof(MvCosts)));
         memcpy(thread_data->td->mb.mv_costs, cpi->td.mb.mv_costs,
                sizeof(MvCosts));
       }
-      if ((cpi->sf.intra_sf.dv_cost_upd_level != INTERNAL_COST_UPD_OFF) &&
-          av1_need_dv_costs(cpi)) {
-        CHECK_MEM_ERROR(cm, thread_data->td->mb.dv_costs,
-                        (IntraBCMVCosts *)aom_malloc(sizeof(IntraBCMVCosts)));
-        memcpy(thread_data->td->mb.dv_costs, cpi->td.mb.dv_costs,
-               sizeof(IntraBCMVCosts));
+      if (cpi->sf.intra_sf.dv_cost_upd_level != INTERNAL_COST_UPD_OFF) {
+        // Reset dv_costs to NULL for worker threads when dv cost update is
+        // enabled so that only dv_cost_upd_level needs to be checked before the
+        // aom_free() call for the same.
+        thread_data->td->mb.dv_costs = NULL;
+        if (av1_need_dv_costs(cpi)) {
+          CHECK_MEM_ERROR(cm, thread_data->td->mb.dv_costs,
+                          (IntraBCMVCosts *)aom_malloc(sizeof(IntraBCMVCosts)));
+          memcpy(thread_data->td->mb.dv_costs, cpi->td.mb.dv_costs,
+                 sizeof(IntraBCMVCosts));
+        }
       }
     }
     av1_alloc_mb_data(cpi, &thread_data->td->mb);
 
-    // Reset cyclic refresh counters.
-    av1_init_cyclic_refresh_counters(&thread_data->td->mb);
+    // Reset rtc counters.
+    av1_init_rtc_counters(&thread_data->td->mb);
 
     if (thread_data->td->counts != &cpi->counts) {
       memcpy(thread_data->td->counts, &cpi->counts, sizeof(cpi->counts));
@@ -1358,17 +1359,15 @@ static AOM_INLINE void fp_prepare_enc_workers(AV1_COMP *cpi, AVxWorkerHook hook,
     thread_data->cpi = cpi;
     if (i == 0) {
       thread_data->td = &cpi->td;
-#if !CONFIG_FRAME_PARALLEL_ENCODE
-    }
-#else
     } else {
       thread_data->td = thread_data->original_td;
     }
-#endif  // CONFIG_FRAME_PARALLEL_ENCODE
 
     // Before encoding a frame, copy the thread data from cpi.
     if (thread_data->td != &cpi->td) {
       thread_data->td->mb = cpi->td.mb;
+      // Keep this conditional expression in sync with the corresponding one
+      // in av1_fp_encode_tiles_row_mt().
       if (cpi->sf.inter_sf.mv_cost_upd_level != INTERNAL_COST_UPD_OFF) {
         CHECK_MEM_ERROR(cm, thread_data->td->mb.mv_costs,
                         (MvCosts *)aom_malloc(sizeof(MvCosts)));
@@ -1410,7 +1409,7 @@ static AOM_INLINE int compute_num_enc_tile_mt_workers(AV1_COMMON *const cm,
 }
 
 // Find max worker of all MT stages
-int av1_get_max_num_workers(AV1_COMP *cpi) {
+int av1_get_max_num_workers(const AV1_COMP *cpi) {
   int max_num_workers = 0;
   for (int i = MOD_FP; i < NUM_MT_MODULES; i++)
     max_num_workers =
@@ -1569,6 +1568,8 @@ void av1_encode_tiles_row_mt(AV1_COMP *cpi) {
              sizeof(*row_mt_sync->num_finished_cols) * max_sb_rows);
       row_mt_sync->next_mi_row = this_tile->tile_info.mi_row_start;
       row_mt_sync->num_threads_working = 0;
+      row_mt_sync->intrabc_extra_top_right_sb_delay =
+          av1_get_intrabc_extra_top_right_sb_delay(cm);
 
       av1_inter_mode_data_init(this_tile);
       av1_zero_above_context(cm, &cpi->td.mb.e_mbd,
@@ -1639,6 +1640,10 @@ void av1_fp_encode_tiles_row_mt(AV1_COMP *cpi) {
              sizeof(*row_mt_sync->num_finished_cols) * max_mb_rows);
       row_mt_sync->next_mi_row = this_tile->tile_info.mi_row_start;
       row_mt_sync->num_threads_working = 0;
+
+      // intraBC mode is not evaluated during first-pass encoding. Hence, no
+      // additional top-right delay is required.
+      row_mt_sync->intrabc_extra_top_right_sb_delay = 0;
     }
   }
 
@@ -1651,6 +1656,8 @@ void av1_fp_encode_tiles_row_mt(AV1_COMP *cpi) {
   for (int i = num_workers - 1; i >= 0; i--) {
     EncWorkerData *const thread_data = &cpi->mt_info.tile_thr_data[i];
     if (thread_data->td != &cpi->td) {
+      // Keep this conditional expression in sync with the corresponding one
+      // in fp_prepare_enc_workers().
       if (cpi->sf.inter_sf.mv_cost_upd_level != INTERNAL_COST_UPD_OFF) {
         aom_free(thread_data->td->mb.mv_costs);
       }
@@ -1829,13 +1836,9 @@ static AOM_INLINE void prepare_tpl_workers(AV1_COMP *cpi, AVxWorkerHook hook,
     thread_data->cpi = cpi;
     if (i == 0) {
       thread_data->td = &cpi->td;
-#if !CONFIG_FRAME_PARALLEL_ENCODE
-    }
-#else
     } else {
       thread_data->td = thread_data->original_td;
     }
-#endif  // CONFIG_FRAME_PARALLEL_ENCODE
 
     // Before encoding a frame, copy the thread data from cpi.
     if (thread_data->td != &cpi->td) {
@@ -1972,13 +1975,9 @@ static void prepare_tf_workers(AV1_COMP *cpi, AVxWorkerHook hook,
     thread_data->cpi = cpi;
     if (i == 0) {
       thread_data->td = &cpi->td;
-#if !CONFIG_FRAME_PARALLEL_ENCODE
-    }
-#else
     } else {
       thread_data->td = thread_data->original_td;
     }
-#endif  // CONFIG_FRAME_PARALLEL_ENCODE
 
     // Before encoding a frame, copy the thread data from cpi.
     if (thread_data->td != &cpi->td) {
@@ -1986,8 +1985,11 @@ static void prepare_tf_workers(AV1_COMP *cpi, AVxWorkerHook hook,
       // OBMC buffers are used only to init MS params and remain unused when
       // called from tf, hence set the buffers to defaults.
       av1_init_obmc_buffer(&thread_data->td->mb.obmc_buffer);
-      tf_alloc_and_reset_data(&thread_data->td->tf_data, cpi->tf_ctx.num_pels,
-                              is_highbitdepth);
+      if (!tf_alloc_and_reset_data(&thread_data->td->tf_data,
+                                   cpi->tf_ctx.num_pels, is_highbitdepth)) {
+        aom_internal_error(cpi->common.error, AOM_CODEC_MEM_ERROR,
+                           "Error allocating temporal filter data");
+      }
     }
   }
 }
@@ -2168,13 +2170,9 @@ static AOM_INLINE void prepare_gm_workers(AV1_COMP *cpi, AVxWorkerHook hook,
     thread_data->cpi = cpi;
     if (i == 0) {
       thread_data->td = &cpi->td;
-#if !CONFIG_FRAME_PARALLEL_ENCODE
-    }
-#else
     } else {
       thread_data->td = thread_data->original_td;
     }
-#endif  // CONFIG_FRAME_PARALLEL_ENCODE
   }
 }
 
@@ -2467,13 +2465,9 @@ static void prepare_pack_bs_workers(AV1_COMP *const cpi,
     EncWorkerData *const thread_data = &mt_info->tile_thr_data[i];
     if (i == 0) {
       thread_data->td = &cpi->td;
-#if !CONFIG_FRAME_PARALLEL_ENCODE
-    }
-#else
     } else {
       thread_data->td = thread_data->original_td;
     }
-#endif  // CONFIG_FRAME_PARALLEL_ENCODE
 
     if (thread_data->td != &cpi->td) thread_data->td->mb = cpi->td.mb;
 
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/ethread.h b/chromium/third_party/libaom/source/libaom/av1/encoder/ethread.h
index f56dcc02da5..a1de988b085 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/ethread.h
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/ethread.h
@@ -22,9 +22,7 @@ struct ThreadData;
 typedef struct EncWorkerData {
   struct AV1_COMP *cpi;
   struct ThreadData *td;
-#if CONFIG_FRAME_PARALLEL_ENCODE
   struct ThreadData *original_td;
-#endif  // CONFIG_FRAME_PARALLEL_ENCODE
   int start;
   int thread_id;
 } EncWorkerData;
@@ -79,7 +77,7 @@ void av1_tf_mt_dealloc(AV1TemporalFilterSync *tf_sync);
 
 void av1_compute_num_workers_for_mt(AV1_COMP *cpi);
 
-int av1_get_max_num_workers(AV1_COMP *cpi);
+int av1_get_max_num_workers(const AV1_COMP *cpi);
 
 void av1_create_workers(AV1_PRIMARY *ppi, int num_workers);
 
@@ -114,15 +112,12 @@ void av1_write_tile_obu_mt(
 
 int av1_compute_num_enc_workers(AV1_COMP *cpi, int max_workers);
 
-#if CONFIG_FRAME_PARALLEL_ENCODE
 int av1_compute_num_fp_contexts(AV1_PRIMARY *ppi, AV1EncoderConfig *oxcf);
 
 int av1_check_fpmt_config(AV1_PRIMARY *const ppi, AV1EncoderConfig *const oxcf);
 
 int av1_compress_parallel_frames(AV1_PRIMARY *const ppi,
                                  AV1_COMP_DATA *const first_cpi_data);
-#endif
-
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/firstpass.c b/chromium/third_party/libaom/source/libaom/av1/encoder/firstpass.c
index ffdf418a3eb..8434208ad33 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/firstpass.c
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/firstpass.c
@@ -36,6 +36,7 @@
 #include "av1/encoder/encodemb.h"
 #include "av1/encoder/encodemv.h"
 #include "av1/encoder/encoder.h"
+#include "av1/encoder/encoder_utils.h"
 #include "av1/encoder/encode_strategy.h"
 #include "av1/encoder/ethread.h"
 #include "av1/encoder/extend.h"
@@ -473,8 +474,10 @@ static int firstpass_intra_prediction(
 
   set_mi_offsets(mi_params, xd, unit_row * unit_scale, unit_col * unit_scale);
   xd->plane[0].dst.buf = this_frame->y_buffer + y_offset;
-  xd->plane[1].dst.buf = this_frame->u_buffer + uv_offset;
-  xd->plane[2].dst.buf = this_frame->v_buffer + uv_offset;
+  if (num_planes > 1) {
+    xd->plane[1].dst.buf = this_frame->u_buffer + uv_offset;
+    xd->plane[2].dst.buf = this_frame->v_buffer + uv_offset;
+  }
   xd->left_available = (unit_col != 0);
   xd->mi[0]->bsize = bsize;
   xd->mi[0]->ref_frame[0] = INTRA_FRAME;
@@ -761,8 +764,10 @@ static int firstpass_inter_prediction(
 
     // Reset to last frame as reference buffer.
     xd->plane[0].pre[0].buf = last_frame->y_buffer + recon_yoffset;
-    xd->plane[1].pre[0].buf = last_frame->u_buffer + recon_uvoffset;
-    xd->plane[2].pre[0].buf = last_frame->v_buffer + recon_uvoffset;
+    if (av1_num_planes(&cpi->common) > 1) {
+      xd->plane[1].pre[0].buf = last_frame->u_buffer + recon_uvoffset;
+      xd->plane[2].pre[0].buf = last_frame->v_buffer + recon_uvoffset;
+    }
   } else {
     stats->sr_coded_error += motion_error;
   }
@@ -934,14 +939,18 @@ static void update_firstpass_stats(AV1_COMP *cpi,
   if (cpi->ppi->twopass.stats_buf_ctx->total_stats != NULL) {
     av1_accumulate_stats(cpi->ppi->twopass.stats_buf_ctx->total_stats, &fps);
   }
-  /*In the case of two pass, first pass uses it as a circular buffer,
-   * when LAP is enabled it is used as a linear buffer*/
   twopass->stats_buf_ctx->stats_in_end++;
-  if ((cpi->oxcf.pass == AOM_RC_FIRST_PASS) &&
-      (twopass->stats_buf_ctx->stats_in_end >=
-       twopass->stats_buf_ctx->stats_in_buf_end)) {
-    twopass->stats_buf_ctx->stats_in_end =
-        twopass->stats_buf_ctx->stats_in_start;
+  // When ducky encode is on, we always use linear buffer for stats_buf_ctx.
+  if (cpi->use_ducky_encode == 0) {
+    // TODO(angiebird): Figure out why first pass uses circular buffer.
+    /* In the case of two pass, first pass uses it as a circular buffer,
+     * when LAP is enabled it is used as a linear buffer*/
+    if ((cpi->oxcf.pass == AOM_RC_FIRST_PASS) &&
+        (twopass->stats_buf_ctx->stats_in_end >=
+         twopass->stats_buf_ctx->stats_in_buf_end)) {
+      twopass->stats_buf_ctx->stats_in_end =
+          twopass->stats_buf_ctx->stats_in_start;
+    }
   }
 }
 
@@ -1026,10 +1035,8 @@ static void free_firstpass_data(FirstPassData *firstpass_data) {
 int av1_get_unit_rows_in_tile(const TileInfo *tile,
                               const BLOCK_SIZE fp_block_size) {
   const int unit_height_log2 = mi_size_high_log2[fp_block_size];
-  const int unit_height = 1 << unit_height_log2;
   const int mi_rows = tile->mi_row_end - tile->mi_row_start;
-  // Calculate (int)ceil((double)mi_rows / unit_height).
-  const int unit_rows = (mi_rows + unit_height - 1) >> unit_height_log2;
+  const int unit_rows = CEIL_POWER_OF_TWO(mi_rows, unit_height_log2);
 
   return unit_rows;
 }
@@ -1037,10 +1044,8 @@ int av1_get_unit_rows_in_tile(const TileInfo *tile,
 int av1_get_unit_cols_in_tile(const TileInfo *tile,
                               const BLOCK_SIZE fp_block_size) {
   const int unit_width_log2 = mi_size_wide_log2[fp_block_size];
-  const int unit_width = 1 << unit_width_log2;
   const int mi_cols = tile->mi_col_end - tile->mi_col_start;
-  // Calculate (int)ceil((double)mi_cols / unit_width).
-  const int unit_cols = (mi_cols + unit_width - 1) >> unit_width_log2;
+  const int unit_cols = CEIL_POWER_OF_TWO(mi_cols, unit_width_log2);
 
   return unit_cols;
 }
@@ -1196,8 +1201,10 @@ void av1_first_pass_row(AV1_COMP *cpi, ThreadData *td, TileDataEnc *tile_data,
 
     // Adjust to the next column of MBs.
     x->plane[0].src.buf += fp_block_size_width;
-    x->plane[1].src.buf += uv_mb_height;
-    x->plane[2].src.buf += uv_mb_height;
+    if (num_planes > 1) {
+      x->plane[1].src.buf += uv_mb_height;
+      x->plane[2].src.buf += uv_mb_height;
+    }
 
     recon_yoffset += fp_block_size_width;
     src_yoffset += fp_block_size_width;
@@ -1213,8 +1220,18 @@ void av1_noop_first_pass_frame(AV1_COMP *cpi, const int64_t ts_duration) {
   AV1_COMMON *const cm = &cpi->common;
   CurrentFrame *const current_frame = &cm->current_frame;
   const CommonModeInfoParams *const mi_params = &cm->mi_params;
-  const int unit_rows = get_unit_rows(BLOCK_16X16, mi_params->mb_rows);
-  const int unit_cols = get_unit_cols(BLOCK_16X16, mi_params->mb_cols);
+  int max_mb_rows = mi_params->mb_rows;
+  int max_mb_cols = mi_params->mb_cols;
+  if (cpi->oxcf.frm_dim_cfg.forced_max_frame_width) {
+    int max_mi_cols = size_in_mi(cpi->oxcf.frm_dim_cfg.forced_max_frame_width);
+    max_mb_cols = ROUND_POWER_OF_TWO(max_mi_cols, 2);
+  }
+  if (cpi->oxcf.frm_dim_cfg.forced_max_frame_height) {
+    int max_mi_rows = size_in_mi(cpi->oxcf.frm_dim_cfg.forced_max_frame_height);
+    max_mb_rows = ROUND_POWER_OF_TWO(max_mi_rows, 2);
+  }
+  const int unit_rows = get_unit_rows(BLOCK_16X16, max_mb_rows);
+  const int unit_cols = get_unit_cols(BLOCK_16X16, max_mb_cols);
   setup_firstpass_data(cm, &cpi->firstpass_data, unit_rows, unit_cols);
   FRAME_STATS *mb_stats = cpi->firstpass_data.mb_stats;
   FRAME_STATS stats = accumulate_frame_stats(mb_stats, unit_rows, unit_cols);
@@ -1236,6 +1253,8 @@ void av1_first_pass(AV1_COMP *cpi, const int64_t ts_duration) {
   // Detect if the key frame is screen content type.
   if (frame_is_intra_only(cm)) {
     FeatureFlags *const features = &cm->features;
+    assert(cpi->source != NULL);
+    xd->cur_buf = cpi->source;
     av1_set_screen_content_options(cpi, features);
   }
 
@@ -1246,10 +1265,21 @@ void av1_first_pass(AV1_COMP *cpi, const int64_t ts_duration) {
   const BLOCK_SIZE fp_block_size =
       get_fp_block_size(cpi->is_screen_content_type);
 
+  int max_mb_rows = mi_params->mb_rows;
+  int max_mb_cols = mi_params->mb_cols;
+  if (cpi->oxcf.frm_dim_cfg.forced_max_frame_width) {
+    int max_mi_cols = size_in_mi(cpi->oxcf.frm_dim_cfg.forced_max_frame_width);
+    max_mb_cols = ROUND_POWER_OF_TWO(max_mi_cols, 2);
+  }
+  if (cpi->oxcf.frm_dim_cfg.forced_max_frame_height) {
+    int max_mi_rows = size_in_mi(cpi->oxcf.frm_dim_cfg.forced_max_frame_height);
+    max_mb_rows = ROUND_POWER_OF_TWO(max_mi_rows, 2);
+  }
+
   // Number of rows in the unit size.
-  // Note mi_params->mb_rows and mi_params->mb_cols are in the unit of 16x16.
-  const int unit_rows = get_unit_rows(fp_block_size, mi_params->mb_rows);
-  const int unit_cols = get_unit_cols(fp_block_size, mi_params->mb_cols);
+  // Note max_mb_rows and max_mb_cols are in the unit of 16x16.
+  const int unit_rows = get_unit_rows(fp_block_size, max_mb_rows);
+  const int unit_cols = get_unit_cols(fp_block_size, max_mb_cols);
 
   // Set fp_block_size, for the convenience of multi-thread usage.
   cpi->fp_block_size = fp_block_size;
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/firstpass.h b/chromium/third_party/libaom/source/libaom/av1/encoder/firstpass.h
index e9afdf507ea..be52e8e73ec 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/firstpass.h
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/firstpass.h
@@ -38,7 +38,7 @@ struct ThreadData;
  * normalized to each MB. MV related stats (MVc, MVr, etc.) are normalized to
  * the frame width and height. See function normalize_firstpass_stats.
  */
-typedef struct {
+typedef struct FIRSTPASS_STATS {
   /*!
    * Frame number in display order, if stats are for a single frame.
    * No real meaning for a collection of frames.
@@ -362,6 +362,23 @@ typedef struct GF_GROUP {
   REFBUF_STATE refbuf_state[MAX_STATIC_GF_GROUP_LENGTH];
   int arf_index;  // the index in the gf group of ARF, if no arf, then -1
   int size;       // The total length of a GOP
+
+  // The offset into lookahead_ctx for choosing
+  // source of frame parallel encodes.
+  int src_offset[MAX_STATIC_GF_GROUP_LENGTH];
+  // Stores the display order hint of each frame in the current GF_GROUP.
+  int display_idx[MAX_STATIC_GF_GROUP_LENGTH];
+
+  // The reference frame list maps the reference frame indexes to its
+  // buffer index in the decoded buffer. A value of -1 means the
+  // corresponding reference frame index doesn't point towards any
+  // previously decoded frame.
+  int8_t ref_frame_list[MAX_STATIC_GF_GROUP_LENGTH][REF_FRAMES];
+  // Update frame index
+  int update_ref_idx[MAX_STATIC_GF_GROUP_LENGTH];
+  // The map_idx of primary reference
+  int primary_ref_idx[MAX_STATIC_GF_GROUP_LENGTH];
+
   // Indicates the level of parallelism in frame parallel encodes.
   // 0 : frame is independently encoded (not part of parallel encodes).
   // 1 : frame is the first in encode order in a given parallel encode set.
@@ -372,21 +389,12 @@ typedef struct GF_GROUP {
   // 1 : frame is a non-reference frame.
   int is_frame_non_ref[MAX_STATIC_GF_GROUP_LENGTH];
 
-  // The offset into lookahead_ctx for choosing
-  // source of frame parallel encodes.
-  int src_offset[MAX_STATIC_GF_GROUP_LENGTH];
-#if CONFIG_FRAME_PARALLEL_ENCODE
-#if CONFIG_FRAME_PARALLEL_ENCODE_2
-  // Stores the display order hint of each frame in the current GF_GROUP.
-  int display_idx[MAX_STATIC_GF_GROUP_LENGTH];
   // Stores the display order hint of the frames not to be
   // refreshed by the current frame.
   int skip_frame_refresh[MAX_STATIC_GF_GROUP_LENGTH][REF_FRAMES];
   // Stores the display order hint of the frame to be excluded during reference
   // assignment.
   int skip_frame_as_ref[MAX_STATIC_GF_GROUP_LENGTH];
-#endif  // CONFIG_FRAME_PARALLEL_ENCODE_2
-#endif  // CONFIG_FRAME_PARALLEL_ENCODE
   /*!\endcond */
 } GF_GROUP;
 /*!\cond */
@@ -570,7 +578,7 @@ void av1_accumulate_stats(FIRSTPASS_STATS *section,
  * \param[in]    cpi            Top-level encoder structure
  * \param[in]    ts_duration    Duration of the frame / collection of frames
  *
- * \return Nothing is returned. Instead, the "TWO_PASS" structure inside "cpi"
+ * \remark Nothing is returned. Instead, the "TWO_PASS" structure inside "cpi"
  * is modified to store information computed in this function.
  */
 void av1_first_pass(struct AV1_COMP *cpi, const int64_t ts_duration);
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/global_motion.c b/chromium/third_party/libaom/source/libaom/av1/encoder/global_motion.c
index 499d597c9bd..5e03d79aeaf 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/global_motion.c
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/global_motion.c
@@ -11,6 +11,7 @@
 
 #include <stdio.h>
 #include <stdlib.h>
+#include <stdbool.h>
 #include <memory.h>
 #include <math.h>
 #include <assert.h>
@@ -374,9 +375,10 @@ unsigned char *av1_downconvert_frame(YV12_BUFFER_CONFIG *frm, int bit_depth) {
   return buf_8bit;
 }
 
-static void get_inliers_from_indices(MotionModel *params,
+static bool get_inliers_from_indices(MotionModel *params,
                                      int *correspondences) {
   int *inliers_tmp = (int *)aom_malloc(2 * MAX_CORNERS * sizeof(*inliers_tmp));
+  if (!inliers_tmp) return false;
   memset(inliers_tmp, 0, 2 * MAX_CORNERS * sizeof(*inliers_tmp));
 
   for (int i = 0; i < params->num_inliers; i++) {
@@ -386,6 +388,7 @@ static void get_inliers_from_indices(MotionModel *params,
   }
   memcpy(params->inliers, inliers_tmp, sizeof(*inliers_tmp) * 2 * MAX_CORNERS);
   aom_free(inliers_tmp);
+  return true;
 }
 
 #define FEAT_COUNT_TR 3
@@ -442,6 +445,7 @@ static int compute_global_motion_feature_based(
   // find correspondences between the two images
   correspondences =
       (int *)malloc(num_src_corners * 4 * sizeof(*correspondences));
+  if (!correspondences) return 0;
   num_correspondences = av1_determine_correspondence(
       src_buffer, (int *)src_corners, num_src_corners, ref_buffer,
       (int *)ref_corners, num_ref_corners, src_width, src_height, src_stride,
@@ -455,8 +459,10 @@ static int compute_global_motion_feature_based(
     if (num_inliers_by_motion[i] < MIN_INLIER_PROB * num_correspondences ||
         num_correspondences == 0) {
       num_inliers_by_motion[i] = 0;
-    } else {
-      get_inliers_from_indices(&params_by_motion[i], correspondences);
+    } else if (!get_inliers_from_indices(&params_by_motion[i],
+                                         correspondences)) {
+      free(correspondences);
+      return 0;
     }
   }
 
@@ -707,38 +713,45 @@ static INLINE void sobel_xy_image_gradient(const uint8_t *src, int src_stride,
   }
 }
 
+static void free_pyramid(ImagePyramid *pyr) {
+  aom_free(pyr->level_buffer);
+  if (pyr->has_gradient) {
+    aom_free(pyr->level_dx_buffer);
+    aom_free(pyr->level_dy_buffer);
+  }
+  aom_free(pyr);
+}
+
 static ImagePyramid *alloc_pyramid(int width, int height, int pad_size,
                                    int compute_gradient) {
-  ImagePyramid *pyr = aom_malloc(sizeof(*pyr));
+  ImagePyramid *pyr = aom_calloc(1, sizeof(*pyr));
+  if (!pyr) return NULL;
   pyr->has_gradient = compute_gradient;
   // 2 * width * height is the upper bound for a buffer that fits
   // all pyramid levels + padding for each level
   const int buffer_size = sizeof(*pyr->level_buffer) * 2 * width * height +
                           (width + 2 * pad_size) * 2 * pad_size * N_LEVELS;
   pyr->level_buffer = aom_malloc(buffer_size);
+  if (!pyr->level_buffer) {
+    free_pyramid(pyr);
+    return NULL;
+  }
   memset(pyr->level_buffer, 0, buffer_size);
 
   if (compute_gradient) {
     const int gradient_size =
         sizeof(*pyr->level_dx_buffer) * 2 * width * height +
         (width + 2 * pad_size) * 2 * pad_size * N_LEVELS;
-    pyr->level_dx_buffer = aom_malloc(gradient_size);
-    pyr->level_dy_buffer = aom_malloc(gradient_size);
-    memset(pyr->level_dx_buffer, 0, gradient_size);
-    memset(pyr->level_dy_buffer, 0, gradient_size);
+    pyr->level_dx_buffer = aom_calloc(1, gradient_size);
+    pyr->level_dy_buffer = aom_calloc(1, gradient_size);
+    if (!(pyr->level_dx_buffer && pyr->level_dy_buffer)) {
+      free_pyramid(pyr);
+      return NULL;
+    }
   }
   return pyr;
 }
 
-static void free_pyramid(ImagePyramid *pyr) {
-  aom_free(pyr->level_buffer);
-  if (pyr->has_gradient) {
-    aom_free(pyr->level_dx_buffer);
-    aom_free(pyr->level_dy_buffer);
-  }
-  aom_free(pyr);
-}
-
 static INLINE void update_level_dims(ImagePyramid *frm_pyr, int level) {
   frm_pyr->widths[level] = frm_pyr->widths[level - 1] >> 1;
   frm_pyr->heights[level] = frm_pyr->heights[level - 1] >> 1;
@@ -853,13 +866,18 @@ static INLINE void compute_flow_at_point(unsigned char *frm, unsigned char *ref,
 }
 
 // make sure flow_u and flow_v start at 0
-static void compute_flow_field(ImagePyramid *frm_pyr, ImagePyramid *ref_pyr,
+static bool compute_flow_field(ImagePyramid *frm_pyr, ImagePyramid *ref_pyr,
                                double *flow_u, double *flow_v) {
   int cur_width, cur_height, cur_stride, cur_loc, patch_loc, patch_center;
   double *u_upscale =
       aom_malloc(frm_pyr->strides[0] * frm_pyr->heights[0] * sizeof(*flow_u));
   double *v_upscale =
       aom_malloc(frm_pyr->strides[0] * frm_pyr->heights[0] * sizeof(*flow_v));
+  if (!(u_upscale && v_upscale)) {
+    aom_free(u_upscale);
+    aom_free(v_upscale);
+    return false;
+  }
 
   assert(frm_pyr->n_levels == ref_pyr->n_levels);
 
@@ -903,6 +921,7 @@ static void compute_flow_field(ImagePyramid *frm_pyr, ImagePyramid *ref_pyr,
   }
   aom_free(u_upscale);
   aom_free(v_upscale);
+  return true;
 }
 
 static int compute_global_motion_disflow_based(
@@ -939,40 +958,43 @@ static int compute_global_motion_disflow_based(
   int compute_gradient = 1;
   ImagePyramid *frm_pyr =
       alloc_pyramid(frm_width, frm_height, pad_size, compute_gradient);
+  if (!frm_pyr) return 0;
   compute_flow_pyramids(frm_buffer, frm_width, frm_height, frm_stride, n_levels,
                         pad_size, compute_gradient, frm_pyr);
   // Allocate ref image pyramids
   compute_gradient = 0;
   ImagePyramid *ref_pyr =
       alloc_pyramid(ref_width, ref_height, pad_size, compute_gradient);
+  if (!ref_pyr) {
+    free_pyramid(frm_pyr);
+    return 0;
+  }
   compute_flow_pyramids(ref_buffer, ref_width, ref_height, ref->y_stride,
                         n_levels, pad_size, compute_gradient, ref_pyr);
 
+  int ret = 0;
   double *flow_u =
       aom_malloc(frm_pyr->strides[0] * frm_pyr->heights[0] * sizeof(*flow_u));
   double *flow_v =
       aom_malloc(frm_pyr->strides[0] * frm_pyr->heights[0] * sizeof(*flow_v));
+  if (!(flow_u && flow_v)) goto Error;
 
   memset(flow_u, 0,
          frm_pyr->strides[0] * frm_pyr->heights[0] * sizeof(*flow_u));
   memset(flow_v, 0,
          frm_pyr->strides[0] * frm_pyr->heights[0] * sizeof(*flow_v));
 
-  compute_flow_field(frm_pyr, ref_pyr, flow_u, flow_v);
+  if (!compute_flow_field(frm_pyr, ref_pyr, flow_u, flow_v)) goto Error;
 
   // find correspondences between the two images using the flow field
   correspondences = aom_malloc(num_frm_corners * 4 * sizeof(*correspondences));
+  if (!correspondences) goto Error;
   num_correspondences = determine_disflow_correspondence(
       frm_corners, num_frm_corners, flow_u, flow_v, frm_width, frm_height,
       frm_pyr->strides[0], correspondences);
   ransac(correspondences, num_correspondences, num_inliers_by_motion,
          params_by_motion, num_motions);
 
-  free_pyramid(frm_pyr);
-  free_pyramid(ref_pyr);
-  aom_free(correspondences);
-  aom_free(flow_u);
-  aom_free(flow_v);
   // Set num_inliers = 0 for motions with too few inliers so they are ignored.
   for (int i = 0; i < num_motions; ++i) {
     if (num_inliers_by_motion[i] < MIN_INLIER_PROB * num_correspondences) {
@@ -982,9 +1004,19 @@ static int compute_global_motion_disflow_based(
 
   // Return true if any one of the motions has inliers.
   for (int i = 0; i < num_motions; ++i) {
-    if (num_inliers_by_motion[i] > 0) return 1;
+    if (num_inliers_by_motion[i] > 0) {
+      ret = 1;
+      break;
+    }
   }
-  return 0;
+
+  aom_free(correspondences);
+Error:
+  free_pyramid(frm_pyr);
+  free_pyramid(ref_pyr);
+  aom_free(flow_u);
+  aom_free(flow_v);
+  return ret;
 }
 
 int av1_compute_global_motion(TransformationType type,
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/global_motion_facade.c b/chromium/third_party/libaom/source/libaom/av1/encoder/global_motion_facade.c
index 8237b466d37..5cddc7e63e9 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/global_motion_facade.c
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/global_motion_facade.c
@@ -133,6 +133,16 @@ static AOM_INLINE void compute_global_motion_for_ref_frame(
       params_this_motion = params_by_motion[i].params;
       av1_convert_model_to_params(params_this_motion, &tmp_wm_params);
 
+      // Work around a bug in the AV1 specification
+      //
+      // For TRANSLATION type global motion models, gm_get_motion_vector() gives
+      // the wrong motion vector (see comments in that function for details).
+      // As translation-type models do not give much gain, we can avoid this bug
+      // by never choosing a TRANSLATION type model
+      if (tmp_wm_params.wmtype == TRANSLATION) {
+        continue;
+      }
+
       if (tmp_wm_params.wmtype != IDENTITY) {
         av1_compute_feature_segmentation_map(
             segment_map, segment_map_w, segment_map_h,
@@ -154,6 +164,12 @@ static AOM_INLINE void compute_global_motion_for_ref_frame(
             GM_REFINEMENT_COUNT, best_warp_error, segment_map, segment_map_w,
             erroradv_threshold);
 
+        // av1_refine_integerized_param() can return a TRANSLATION type model
+        // even if its input is some other type, so we have to skip those too
+        if (tmp_wm_params.wmtype == TRANSLATION) {
+          continue;
+        }
+
         if (warp_error < best_warp_error) {
           best_warp_error = warp_error;
           // Save the wm_params modified by
@@ -168,6 +184,8 @@ static AOM_INLINE void compute_global_motion_for_ref_frame(
       if (!av1_get_shear_params(&cm->global_motion[frame]))
         cm->global_motion[frame] = default_warp_params;
 
+#if 0
+    // We never choose translational models, so this code is disabled
     if (cm->global_motion[frame].wmtype == TRANSLATION) {
       cm->global_motion[frame].wmmat[0] =
           convert_to_trans_prec(cm->features.allow_high_precision_mv,
@@ -178,6 +196,7 @@ static AOM_INLINE void compute_global_motion_for_ref_frame(
                                 cm->global_motion[frame].wmmat[1]) *
           GM_TRANS_ONLY_DECODE_FACTOR;
     }
+#endif
 
     if (cm->global_motion[frame].wmtype == IDENTITY) continue;
 
@@ -354,30 +373,38 @@ static AOM_INLINE void update_valid_ref_frames_for_gm(
   }
 }
 
+// Deallocates segment_map and inliers.
+static AOM_INLINE void dealloc_global_motion_data(MotionModel *params_by_motion,
+                                                  uint8_t *segment_map) {
+  aom_free(segment_map);
+
+  for (int m = 0; m < RANSAC_NUM_MOTIONS; m++) {
+    aom_free(params_by_motion[m].inliers);
+  }
+}
+
 // Allocates and initializes memory for segment_map and MotionModel.
-static AOM_INLINE void alloc_global_motion_data(MotionModel *params_by_motion,
+static AOM_INLINE bool alloc_global_motion_data(MotionModel *params_by_motion,
                                                 uint8_t **segment_map,
                                                 const int segment_map_w,
                                                 const int segment_map_h) {
+  av1_zero_array(params_by_motion, RANSAC_NUM_MOTIONS);
   for (int m = 0; m < RANSAC_NUM_MOTIONS; m++) {
-    av1_zero(params_by_motion[m]);
     params_by_motion[m].inliers =
         aom_malloc(sizeof(*(params_by_motion[m].inliers)) * 2 * MAX_CORNERS);
+    if (!params_by_motion[m].inliers) {
+      dealloc_global_motion_data(params_by_motion, NULL);
+      return false;
+    }
   }
 
-  *segment_map = (uint8_t *)aom_malloc(sizeof(*segment_map) * segment_map_w *
-                                       segment_map_h);
-  av1_zero_array(*segment_map, segment_map_w * segment_map_h);
-}
-
-// Deallocates segment_map and inliers.
-static AOM_INLINE void dealloc_global_motion_data(MotionModel *params_by_motion,
-                                                  uint8_t *segment_map) {
-  aom_free(segment_map);
-
-  for (int m = 0; m < RANSAC_NUM_MOTIONS; m++) {
-    aom_free(params_by_motion[m].inliers);
+  *segment_map = (uint8_t *)aom_calloc(segment_map_w * segment_map_h,
+                                       sizeof(*segment_map));
+  if (!*segment_map) {
+    dealloc_global_motion_data(params_by_motion, NULL);
+    return false;
   }
+  return true;
 }
 
 // Initializes parameters used for computing global motion.
@@ -460,7 +487,7 @@ void av1_compute_global_motion_facade(AV1_COMP *cpi) {
     if (cpi->gf_frame_index == 0) {
       for (int i = 0; i < FRAME_UPDATE_TYPES; i++) {
         cpi->ppi->valid_gm_model_found[i] = INT32_MAX;
-#if CONFIG_FRAME_PARALLEL_ENCODE && CONFIG_FPMT_TEST
+#if CONFIG_FPMT_TEST
         if (cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE)
           cpi->ppi->temp_valid_gm_model_found[i] = INT32_MAX;
 #endif
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/gop_structure.c b/chromium/third_party/libaom/source/libaom/av1/encoder/gop_structure.c
index 7b1380d8adc..f1689dec608 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/gop_structure.c
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/gop_structure.c
@@ -58,8 +58,6 @@ static void set_src_offset(GF_GROUP *const gf_group, int *first_frame_index,
   }
 }
 
-#if CONFIG_FRAME_PARALLEL_ENCODE
-#if CONFIG_FRAME_PARALLEL_ENCODE_2
 // Sets the GF_GROUP params for LF_UPDATE frames.
 static AOM_INLINE void set_params_for_leaf_frames(
     const TWO_PASS *twopass, const TWO_PASS_FRAME *twopass_frame,
@@ -431,8 +429,6 @@ static AOM_INLINE void set_multi_layer_params_for_gf14(
     }
   }
 }
-#endif  // CONFIG_FRAME_PARALLEL_ENCODE_2
-#endif  // CONFIG_FRAME_PARALLEL_ENCODE
 
 // Set parameters for frames between 'start' and 'end' (excluding both).
 static void set_multi_layer_params(
@@ -539,14 +535,10 @@ static int construct_multi_layer_gf_structure(
   int frame_index = 0;
   int cur_frame_index = 0;
 
-#if CONFIG_FRAME_PARALLEL_ENCODE
-#if CONFIG_FRAME_PARALLEL_ENCODE_2
   // Set the display order hint for the first frame in the GF_GROUP.
   int cur_disp_index = (first_frame_update_type == KF_UPDATE)
                            ? 0
                            : cpi->common.current_frame.frame_number;
-#endif  // CONFIG_FRAME_PARALLEL_ENCODE_2
-#endif  // CONFIG_FRAME_PARALLEL_ENCODE
 
   // Initialize gf_group->frame_parallel_level and gf_group->is_frame_non_ref to
   // 0.
@@ -557,8 +549,6 @@ static int construct_multi_layer_gf_structure(
          sizeof(gf_group->is_frame_non_ref[0]) * MAX_STATIC_GF_GROUP_LENGTH);
   memset(gf_group->src_offset, 0,
          sizeof(gf_group->src_offset[0]) * MAX_STATIC_GF_GROUP_LENGTH);
-#if CONFIG_FRAME_PARALLEL_ENCODE
-#if CONFIG_FRAME_PARALLEL_ENCODE_2
   // Initialize gf_group->skip_frame_refresh and gf_group->skip_frame_as_ref
   // with INVALID_IDX.
   memset(gf_group->skip_frame_refresh, INVALID_IDX,
@@ -566,8 +556,6 @@ static int construct_multi_layer_gf_structure(
              MAX_STATIC_GF_GROUP_LENGTH * REF_FRAMES);
   memset(gf_group->skip_frame_as_ref, INVALID_IDX,
          sizeof(gf_group->skip_frame_as_ref[0]) * MAX_STATIC_GF_GROUP_LENGTH);
-#endif  // CONFIG_FRAME_PARALLEL_ENCODE_2
-#endif  // CONFIG_FRAME_PARALLEL_ENCODE
 
   int kf_decomp = cpi->oxcf.kf_cfg.enable_keyframe_filtering > 1;
   // This is a patch that fixes https://crbug.com/aomedia/3163
@@ -589,12 +577,8 @@ static int construct_multi_layer_gf_structure(
     gf_group->frame_type[frame_index] = KEY_FRAME;
     gf_group->refbuf_state[frame_index] = REFBUF_RESET;
     gf_group->max_layer_depth = 0;
-#if CONFIG_FRAME_PARALLEL_ENCODE
-#if CONFIG_FRAME_PARALLEL_ENCODE_2
     gf_group->display_idx[frame_index] = cur_disp_index;
     if (!kf_decomp) cur_disp_index++;
-#endif  // CONFIG_FRAME_PARALLEL_ENCODE_2
-#endif  // CONFIG_FRAME_PARALLEL_ENCODE
     ++frame_index;
 
     if (kf_decomp) {
@@ -605,12 +589,8 @@ static int construct_multi_layer_gf_structure(
       gf_group->frame_type[frame_index] = INTER_FRAME;
       gf_group->refbuf_state[frame_index] = REFBUF_UPDATE;
       gf_group->max_layer_depth = 0;
-#if CONFIG_FRAME_PARALLEL_ENCODE
-#if CONFIG_FRAME_PARALLEL_ENCODE_2
       gf_group->display_idx[frame_index] = cur_disp_index;
       cur_disp_index++;
-#endif  // CONFIG_FRAME_PARALLEL_ENCODE_2
-#endif  // CONFIG_FRAME_PARALLEL_ENCODE
       ++frame_index;
     }
     cur_frame_index++;
@@ -624,12 +604,8 @@ static int construct_multi_layer_gf_structure(
     gf_group->frame_type[frame_index] = INTER_FRAME;
     gf_group->refbuf_state[frame_index] = REFBUF_UPDATE;
     gf_group->max_layer_depth = 0;
-#if CONFIG_FRAME_PARALLEL_ENCODE
-#if CONFIG_FRAME_PARALLEL_ENCODE_2
     gf_group->display_idx[frame_index] = cur_disp_index;
     cur_disp_index++;
-#endif  // CONFIG_FRAME_PARALLEL_ENCODE_2
-#endif  // CONFIG_FRAME_PARALLEL_ENCODE
     ++frame_index;
     ++cur_frame_index;
   }
@@ -648,12 +624,8 @@ static int construct_multi_layer_gf_structure(
     gf_group->refbuf_state[frame_index] = REFBUF_UPDATE;
     gf_group->max_layer_depth = 1;
     gf_group->arf_index = frame_index;
-#if CONFIG_FRAME_PARALLEL_ENCODE
-#if CONFIG_FRAME_PARALLEL_ENCODE_2
     gf_group->display_idx[frame_index] =
         cur_disp_index + gf_group->arf_src_offset[frame_index];
-#endif  // CONFIG_FRAME_PARALLEL_ENCODE_2
-#endif  // CONFIG_FRAME_PARALLEL_ENCODE
     ++frame_index;
   } else {
     gf_group->arf_index = -1;
@@ -671,8 +643,6 @@ static int construct_multi_layer_gf_structure(
                                   gf_group->max_layer_depth_allowed >= 4);
 
   int first_frame_index = cur_frame_index;
-#if CONFIG_FRAME_PARALLEL_ENCODE
-#if CONFIG_FRAME_PARALLEL_ENCODE_2
   if (do_frame_parallel_encode) {
     // construct_multi_layer_gf_structure() takes the input parameter
     // 'gf_interval' as p_rc->baseline_gf_interval - 1 . Below code computes the
@@ -735,8 +705,9 @@ static int construct_multi_layer_gf_structure(
       }
     } else {
       // Set layer depth threshold for reordering as per the gf length.
-      int depth_thr =
-          (actual_gf_length == 16) ? 3 : (actual_gf_length == 32) ? 4 : INT_MAX;
+      int depth_thr = (actual_gf_length == 16)   ? 3
+                      : (actual_gf_length == 32) ? 4
+                                                 : INT_MAX;
 
       set_multi_layer_params_for_fp(
           twopass, &cpi->twopass_frame, gf_group, p_rc, rc, frame_info,
@@ -747,8 +718,6 @@ static int construct_multi_layer_gf_structure(
     }
     is_multi_layer_configured = 1;
   }
-#endif  // CONFIG_FRAME_PARALLEL_ENCODE_2
-#endif  // CONFIG_FRAME_PARALLEL_ENCODE
 
   // Rest of the frames.
   if (!is_multi_layer_configured)
@@ -767,11 +736,7 @@ static int construct_multi_layer_gf_structure(
     gf_group->frame_type[frame_index] = INTER_FRAME;
     gf_group->refbuf_state[frame_index] =
         is_fwd_kf ? REFBUF_RESET : REFBUF_UPDATE;
-#if CONFIG_FRAME_PARALLEL_ENCODE
-#if CONFIG_FRAME_PARALLEL_ENCODE_2
     gf_group->display_idx[frame_index] = cur_disp_index;
-#endif  // CONFIG_FRAME_PARALLEL_ENCODE_2
-#endif  // CONFIG_FRAME_PARALLEL_ENCODE
     ++frame_index;
   } else {
     for (; cur_frame_index <= gf_interval; ++cur_frame_index) {
@@ -785,12 +750,8 @@ static int construct_multi_layer_gf_structure(
       gf_group->max_layer_depth = AOMMAX(gf_group->max_layer_depth, 2);
       set_src_offset(gf_group, &first_frame_index, cur_frame_index,
                      frame_index);
-#if CONFIG_FRAME_PARALLEL_ENCODE
-#if CONFIG_FRAME_PARALLEL_ENCODE_2
       gf_group->display_idx[frame_index] = cur_disp_index;
       cur_disp_index++;
-#endif  // CONFIG_FRAME_PARALLEL_ENCODE_2
-#endif  // CONFIG_FRAME_PARALLEL_ENCODE
       ++frame_index;
     }
   }
@@ -864,10 +825,15 @@ void av1_gop_setup_structure(AV1_COMP *cpi) {
   const int key_frame = rc->frames_since_key == 0;
   FRAME_UPDATE_TYPE first_frame_update_type = ARF_UPDATE;
 
-  if (key_frame)
+  if (key_frame) {
     first_frame_update_type = KF_UPDATE;
-  else if (!cpi->ppi->gf_state.arf_gf_boost_lst)
+    if (cpi->oxcf.kf_max_pyr_height != -1) {
+      gf_group->max_layer_depth_allowed = AOMMIN(
+          cpi->oxcf.kf_max_pyr_height, gf_group->max_layer_depth_allowed);
+    }
+  } else if (!cpi->ppi->gf_state.arf_gf_boost_lst) {
     first_frame_update_type = GF_UPDATE;
+  }
 
   gf_group->size = construct_multi_layer_gf_structure(
       cpi, twopass, gf_group, rc, frame_info, p_rc->baseline_gf_interval,
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/gop_structure.h b/chromium/third_party/libaom/source/libaom/av1/encoder/gop_structure.h
index eb20c846161..ff22f541364 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/gop_structure.h
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/gop_structure.h
@@ -38,7 +38,7 @@ struct EncodeFrameParams;
  *
  * \param[in]    cpi          Top - level encoder instance structure
  *
- * \return No return value but this function updates group data structures.
+ * \remark No return value but this function updates group data structures.
  */
 void av1_gop_setup_structure(struct AV1_COMP *cpi);
 
@@ -58,7 +58,7 @@ void av1_gop_setup_structure(struct AV1_COMP *cpi);
  *                            uni-directional group.
  * \param[in]   gf_group_bits Bits available to be allocated.
  *
- * \return No return but updates the rate control and group data structures
+ * \remark No return but updates the rate control and group data structures
  *         to reflect the allocation of bits.
  */
 void av1_gop_bit_allocation(const AV1_COMP *cpi, RATE_CONTROL *const rc,
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/hash_motion.c b/chromium/third_party/libaom/source/libaom/av1/encoder/hash_motion.c
index 310cde886bf..164aa09783a 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/hash_motion.c
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/hash_motion.c
@@ -10,6 +10,7 @@
  */
 
 #include <assert.h>
+#include <stdbool.h>
 
 #include "config/av1_rtcd.h"
 
@@ -120,23 +121,26 @@ void av1_hash_table_destroy(hash_table *p_hash_table) {
   p_hash_table->p_lookup_table = NULL;
 }
 
-void av1_hash_table_create(hash_table *p_hash_table) {
+bool av1_hash_table_create(hash_table *p_hash_table) {
   if (p_hash_table->p_lookup_table != NULL) {
     av1_hash_table_clear_all(p_hash_table);
-    return;
+    return true;
   }
   p_hash_table->p_lookup_table =
-      (Vector **)aom_malloc(sizeof(p_hash_table->p_lookup_table[0]) * kMaxAddr);
-  memset(p_hash_table->p_lookup_table, 0,
-         sizeof(p_hash_table->p_lookup_table[0]) * kMaxAddr);
+      (Vector **)aom_calloc(kMaxAddr, sizeof(p_hash_table->p_lookup_table[0]));
+  if (!p_hash_table) return false;
+  return true;
 }
 
-static void hash_table_add_to_table(hash_table *p_hash_table,
+static bool hash_table_add_to_table(hash_table *p_hash_table,
                                     uint32_t hash_value,
                                     block_hash *curr_block_hash) {
   if (p_hash_table->p_lookup_table[hash_value] == NULL) {
     p_hash_table->p_lookup_table[hash_value] =
         aom_malloc(sizeof(p_hash_table->p_lookup_table[0][0]));
+    if (p_hash_table->p_lookup_table[hash_value] == NULL) {
+      return false;
+    }
     aom_vector_setup(p_hash_table->p_lookup_table[hash_value], 10,
                      sizeof(curr_block_hash[0]));
     aom_vector_push_back(p_hash_table->p_lookup_table[hash_value],
@@ -145,6 +149,7 @@ static void hash_table_add_to_table(hash_table *p_hash_table,
     aom_vector_push_back(p_hash_table->p_lookup_table[hash_value],
                          curr_block_hash);
   }
+  return true;
 }
 
 int32_t av1_hash_table_count(const hash_table *p_hash_table,
@@ -307,7 +312,7 @@ void av1_generate_block_hash_value(IntraBCHashInfo *intrabc_hash_info,
   }
 }
 
-void av1_add_to_hash_map_by_row_with_precal_data(hash_table *p_hash_table,
+bool av1_add_to_hash_map_by_row_with_precal_data(hash_table *p_hash_table,
                                                  uint32_t *pic_hash[2],
                                                  int8_t *pic_is_same,
                                                  int pic_width, int pic_height,
@@ -335,10 +340,14 @@ void av1_add_to_hash_map_by_row_with_precal_data(hash_table *p_hash_table,
         const uint32_t hash_value1 = (src_hash[0][pos] & crc_mask) + add_value;
         curr_block_hash.hash_value2 = src_hash[1][pos];
 
-        hash_table_add_to_table(p_hash_table, hash_value1, &curr_block_hash);
+        if (!hash_table_add_to_table(p_hash_table, hash_value1,
+                                     &curr_block_hash)) {
+          return false;
+        }
       }
     }
   }
+  return true;
 }
 
 int av1_hash_is_horizontal_perfect(const YV12_BUFFER_CONFIG *picture,
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/hash_motion.h b/chromium/third_party/libaom/source/libaom/av1/encoder/hash_motion.h
index e4ea1f3948c..8974ba27cbd 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/hash_motion.h
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/hash_motion.h
@@ -12,6 +12,8 @@
 #ifndef AOM_AV1_ENCODER_HASH_MOTION_H_
 #define AOM_AV1_ENCODER_HASH_MOTION_H_
 
+#include <stdbool.h>
+
 #include "config/aom_config.h"
 
 #include "aom/aom_integer.h"
@@ -56,7 +58,7 @@ typedef struct intrabc_hash_info {
 void av1_hash_table_init(IntraBCHashInfo *intra_bc_hash_info);
 void av1_hash_table_clear_all(hash_table *p_hash_table);
 void av1_hash_table_destroy(hash_table *p_hash_table);
-void av1_hash_table_create(hash_table *p_hash_table);
+bool av1_hash_table_create(hash_table *p_hash_table);
 int32_t av1_hash_table_count(const hash_table *p_hash_table,
                              uint32_t hash_value);
 Iterator av1_hash_get_first_iterator(hash_table *p_hash_table,
@@ -74,7 +76,7 @@ void av1_generate_block_hash_value(IntraBCHashInfo *intra_bc_hash_info,
                                    uint32_t *dst_pic_block_hash[2],
                                    int8_t *src_pic_block_same_info[3],
                                    int8_t *dst_pic_block_same_info[3]);
-void av1_add_to_hash_map_by_row_with_precal_data(hash_table *p_hash_table,
+bool av1_add_to_hash_map_by_row_with_precal_data(hash_table *p_hash_table,
                                                  uint32_t *pic_hash[2],
                                                  int8_t *pic_is_same,
                                                  int pic_width, int pic_height,
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/interp_search.c b/chromium/third_party/libaom/source/libaom/av1/encoder/interp_search.c
index c3133723055..2b7eb916f3a 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/interp_search.c
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/interp_search.c
@@ -448,7 +448,7 @@ static INLINE void find_best_non_dual_interp_filter(
     int use_actual_frame_probs = 1;
     const int *switchable_interp_p0;
     const int *switchable_interp_p1;
-#if CONFIG_FRAME_PARALLEL_ENCODE && CONFIG_FPMT_TEST
+#if CONFIG_FPMT_TEST
     use_actual_frame_probs =
         (cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE) ? 0 : 1;
     if (!use_actual_frame_probs) {
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/intra_mode_search.c b/chromium/third_party/libaom/source/libaom/av1/encoder/intra_mode_search.c
index 0f5660240e3..d8639100faf 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/intra_mode_search.c
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/intra_mode_search.c
@@ -18,6 +18,19 @@
 #include "av1/encoder/speed_features.h"
 #include "av1/encoder/tx_search.h"
 
+// Even though there are 7 delta angles, this macro is set to 9 to facilitate
+// the rd threshold check to prune -3 and 3 delta angles.
+#define SIZE_OF_ANGLE_DELTA_RD_COST_ARRAY (2 * MAX_ANGLE_DELTA + 3)
+
+// The order for evaluating delta angles while processing the luma directional
+// intra modes. Currently, this order of evaluation is applicable only when
+// speed feature prune_luma_odd_delta_angles_in_intra is enabled. In this case,
+// even angles are evaluated first in order to facilitate the pruning of odd
+// delta angles based on the rd costs of the neighboring delta angles.
+static const int8_t luma_delta_angles_order[2 * MAX_ANGLE_DELTA] = {
+  -2, 2, -3, -1, 1, 3,
+};
+
 /*!\cond */
 static const PREDICTION_MODE intra_rd_search_mode_order[INTRA_MODES] = {
   DC_PRED,       H_PRED,        V_PRED,    SMOOTH_PRED, PAETH_PRED,
@@ -215,7 +228,7 @@ static double intra_rd_variance_factor(const AV1_COMP *cpi, MACROBLOCK *x,
  */
 static int rd_pick_filter_intra_sby(const AV1_COMP *const cpi, MACROBLOCK *x,
                                     int *rate, int *rate_tokenonly,
-                                    int64_t *distortion, int *skippable,
+                                    int64_t *distortion, uint8_t *skippable,
                                     BLOCK_SIZE bsize, int mode_cost,
                                     PREDICTION_MODE best_mode_so_far,
                                     int64_t *best_rd, int64_t *best_model_rd,
@@ -365,15 +378,23 @@ void av1_count_colors_highbd(const uint8_t *src8, int stride, int rows,
   }
 }
 
-void set_y_mode_and_delta_angle(const int mode_idx, MB_MODE_INFO *const mbmi) {
+void set_y_mode_and_delta_angle(const int mode_idx, MB_MODE_INFO *const mbmi,
+                                int reorder_delta_angle_eval) {
   if (mode_idx < INTRA_MODE_END) {
     mbmi->mode = intra_rd_search_mode_order[mode_idx];
     mbmi->angle_delta[PLANE_TYPE_Y] = 0;
   } else {
     mbmi->mode = (mode_idx - INTRA_MODE_END) / (MAX_ANGLE_DELTA * 2) + V_PRED;
-    int angle_delta = (mode_idx - INTRA_MODE_END) % (MAX_ANGLE_DELTA * 2);
-    mbmi->angle_delta[PLANE_TYPE_Y] =
-        (angle_delta < 3 ? (angle_delta - 3) : (angle_delta - 2));
+    int delta_angle_eval_idx =
+        (mode_idx - INTRA_MODE_END) % (MAX_ANGLE_DELTA * 2);
+    if (reorder_delta_angle_eval) {
+      mbmi->angle_delta[PLANE_TYPE_Y] =
+          luma_delta_angles_order[delta_angle_eval_idx];
+    } else {
+      mbmi->angle_delta[PLANE_TYPE_Y] =
+          (delta_angle_eval_idx < 3 ? (delta_angle_eval_idx - 3)
+                                    : (delta_angle_eval_idx - 2));
+    }
   }
 }
 
@@ -578,45 +599,58 @@ static int64_t cfl_compute_rd(const AV1_COMP *const cpi, MACROBLOCK *x,
   return cfl_cost;
 }
 
-static void cfl_pick_plane_parameter(const AV1_COMP *const cpi, MACROBLOCK *x,
-                                     int plane, TX_SIZE tx_size,
-                                     int cfl_search_range,
-                                     RD_STATS cfl_rd_arr[CFL_MAGS_SIZE]) {
+static const int cfl_dir_ls[2] = { 1, -1 };
+
+// If cfl_search_range is CFL_MAGS_SIZE, return zero. Otherwise return the index
+// of the best alpha found using intra_model_rd().
+static int cfl_pick_plane_parameter(const AV1_COMP *const cpi, MACROBLOCK *x,
+                                    int plane, TX_SIZE tx_size,
+                                    int cfl_search_range) {
   assert(cfl_search_range >= 1 && cfl_search_range <= CFL_MAGS_SIZE);
-  MACROBLOCKD *const xd = &x->e_mbd;
 
-  xd->cfl.use_dc_pred_cache = 1;
+  if (cfl_search_range == CFL_MAGS_SIZE) return CFL_INDEX_ZERO;
 
-  MB_MODE_INFO *const mbmi = xd->mi[0];
+  const MACROBLOCKD *const xd = &x->e_mbd;
+  const MB_MODE_INFO *const mbmi = xd->mi[0];
   assert(mbmi->uv_mode == UV_CFL_PRED);
   const MACROBLOCKD_PLANE *pd = &xd->plane[plane];
   const BLOCK_SIZE plane_bsize =
       get_plane_block_size(mbmi->bsize, pd->subsampling_x, pd->subsampling_y);
 
-  const int dir_ls[2] = { 1, -1 };
-
   int est_best_cfl_idx = CFL_INDEX_ZERO;
-  if (cfl_search_range < CFL_MAGS_SIZE) {
-    int fast_mode = 1;
-    int start_cfl_idx = CFL_INDEX_ZERO;
-    int64_t best_cfl_cost = cfl_compute_rd(cpi, x, plane, tx_size, plane_bsize,
-                                           start_cfl_idx, fast_mode, NULL);
-    for (int si = 0; si < 2; ++si) {
-      const int dir = dir_ls[si];
-      for (int i = 1; i < CFL_MAGS_SIZE; ++i) {
-        int cfl_idx = start_cfl_idx + dir * i;
-        if (cfl_idx < 0 || cfl_idx >= CFL_MAGS_SIZE) break;
-        int64_t cfl_cost = cfl_compute_rd(cpi, x, plane, tx_size, plane_bsize,
-                                          cfl_idx, fast_mode, NULL);
-        if (cfl_cost < best_cfl_cost) {
-          best_cfl_cost = cfl_cost;
-          est_best_cfl_idx = cfl_idx;
-        } else {
-          break;
-        }
+  int fast_mode = 1;
+  int start_cfl_idx = CFL_INDEX_ZERO;
+  int64_t best_cfl_cost = cfl_compute_rd(cpi, x, plane, tx_size, plane_bsize,
+                                         start_cfl_idx, fast_mode, NULL);
+  for (int si = 0; si < 2; ++si) {
+    const int dir = cfl_dir_ls[si];
+    for (int i = 1; i < CFL_MAGS_SIZE; ++i) {
+      int cfl_idx = start_cfl_idx + dir * i;
+      if (cfl_idx < 0 || cfl_idx >= CFL_MAGS_SIZE) break;
+      int64_t cfl_cost = cfl_compute_rd(cpi, x, plane, tx_size, plane_bsize,
+                                        cfl_idx, fast_mode, NULL);
+      if (cfl_cost < best_cfl_cost) {
+        best_cfl_cost = cfl_cost;
+        est_best_cfl_idx = cfl_idx;
+      } else {
+        break;
       }
     }
   }
+  return est_best_cfl_idx;
+}
+
+static void cfl_pick_plane_rd(const AV1_COMP *const cpi, MACROBLOCK *x,
+                              int plane, TX_SIZE tx_size, int cfl_search_range,
+                              RD_STATS cfl_rd_arr[CFL_MAGS_SIZE],
+                              int est_best_cfl_idx) {
+  assert(cfl_search_range >= 1 && cfl_search_range <= CFL_MAGS_SIZE);
+  const MACROBLOCKD *const xd = &x->e_mbd;
+  const MB_MODE_INFO *const mbmi = xd->mi[0];
+  assert(mbmi->uv_mode == UV_CFL_PRED);
+  const MACROBLOCKD_PLANE *pd = &xd->plane[plane];
+  const BLOCK_SIZE plane_bsize =
+      get_plane_block_size(mbmi->bsize, pd->subsampling_x, pd->subsampling_y);
 
   for (int cfl_idx = 0; cfl_idx < CFL_MAGS_SIZE; ++cfl_idx) {
     av1_invalid_rd_stats(&cfl_rd_arr[cfl_idx]);
@@ -626,8 +660,11 @@ static void cfl_pick_plane_parameter(const AV1_COMP *const cpi, MACROBLOCK *x,
   int start_cfl_idx = est_best_cfl_idx;
   cfl_compute_rd(cpi, x, plane, tx_size, plane_bsize, start_cfl_idx, fast_mode,
                  &cfl_rd_arr[start_cfl_idx]);
+
+  if (cfl_search_range == 1) return;
+
   for (int si = 0; si < 2; ++si) {
-    const int dir = dir_ls[si];
+    const int dir = cfl_dir_ls[si];
     for (int i = 1; i < cfl_search_range; ++i) {
       int cfl_idx = start_cfl_idx + dir * i;
       if (cfl_idx < 0 || cfl_idx >= CFL_MAGS_SIZE) break;
@@ -635,9 +672,6 @@ static void cfl_pick_plane_parameter(const AV1_COMP *const cpi, MACROBLOCK *x,
                      &cfl_rd_arr[cfl_idx]);
     }
   }
-  xd->cfl.use_dc_pred_cache = 0;
-  xd->cfl.dc_pred_is_cached[0] = 0;
-  xd->cfl.dc_pred_is_cached[1] = 0;
 }
 
 /*!\brief Pick the optimal parameters for Chroma to Luma (CFL) component
@@ -677,11 +711,48 @@ static int cfl_rd_pick_alpha(MACROBLOCK *const x, const AV1_COMP *const cpi,
   const ModeCosts *mode_costs = &x->mode_costs;
   RD_STATS cfl_rd_arr_u[CFL_MAGS_SIZE];
   RD_STATS cfl_rd_arr_v[CFL_MAGS_SIZE];
+  MACROBLOCKD *const xd = &x->e_mbd;
+  int est_best_cfl_idx_u, est_best_cfl_idx_v;
 
   av1_invalid_rd_stats(best_rd_stats);
 
-  cfl_pick_plane_parameter(cpi, x, 1, tx_size, cfl_search_range, cfl_rd_arr_u);
-  cfl_pick_plane_parameter(cpi, x, 2, tx_size, cfl_search_range, cfl_rd_arr_v);
+  // As the dc pred data is same for different values of alpha, enable the
+  // caching of dc pred data.
+  xd->cfl.use_dc_pred_cache = 1;
+  // Evaluate alpha parameter of each chroma plane.
+  est_best_cfl_idx_u =
+      cfl_pick_plane_parameter(cpi, x, 1, tx_size, cfl_search_range);
+  est_best_cfl_idx_v =
+      cfl_pick_plane_parameter(cpi, x, 2, tx_size, cfl_search_range);
+
+  // For cfl_search_range=1, further refinement of alpha is not enabled. Hence
+  // CfL index=0 for both the chroma planes implies invalid CfL mode.
+  if (cfl_search_range == 1 && est_best_cfl_idx_u == CFL_INDEX_ZERO &&
+      est_best_cfl_idx_v == CFL_INDEX_ZERO) {
+    // Set invalid CfL parameters here as CfL mode is invalid.
+    *best_cfl_alpha_idx = 0;
+    *best_cfl_alpha_signs = 0;
+
+    // Clear the following flags to avoid the unintentional usage of cached dc
+    // pred data.
+    xd->cfl.use_dc_pred_cache = 0;
+    xd->cfl.dc_pred_is_cached[0] = 0;
+    xd->cfl.dc_pred_is_cached[1] = 0;
+    return 0;
+  }
+
+  // Compute the rd cost of each chroma plane using the alpha parameters which
+  // were already evaluated.
+  cfl_pick_plane_rd(cpi, x, 1, tx_size, cfl_search_range, cfl_rd_arr_u,
+                    est_best_cfl_idx_u);
+  cfl_pick_plane_rd(cpi, x, 2, tx_size, cfl_search_range, cfl_rd_arr_v,
+                    est_best_cfl_idx_v);
+
+  // Clear the following flags to avoid the unintentional usage of cached dc
+  // pred data.
+  xd->cfl.use_dc_pred_cache = 0;
+  xd->cfl.dc_pred_is_cached[0] = 0;
+  xd->cfl.dc_pred_is_cached[1] = 0;
 
   for (int ui = 0; ui < CFL_MAGS_SIZE; ++ui) {
     if (cfl_rd_arr_u[ui].rate == INT_MAX) continue;
@@ -725,9 +796,23 @@ static int cfl_rd_pick_alpha(MACROBLOCK *const x, const AV1_COMP *const cpi,
   return 1;
 }
 
+static bool should_prune_chroma_smooth_pred_based_on_source_variance(
+    const AV1_COMP *cpi, const MACROBLOCK *x, BLOCK_SIZE bsize) {
+  if (!cpi->sf.intra_sf.prune_smooth_intra_mode_for_chroma) return false;
+
+  // If the source variance of both chroma planes is less than 20 (empirically
+  // derived), prune UV_SMOOTH_PRED.
+  for (int i = AOM_PLANE_U; i < av1_num_planes(&cpi->common); i++) {
+    const unsigned int variance = av1_get_perpixel_variance_facade(
+        cpi, &x->e_mbd, &x->plane[i].src, bsize, i);
+    if (variance >= 20) return false;
+  }
+  return true;
+}
+
 int64_t av1_rd_pick_intra_sbuv_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
                                     int *rate, int *rate_tokenonly,
-                                    int64_t *distortion, int *skippable,
+                                    int64_t *distortion, uint8_t *skippable,
                                     BLOCK_SIZE bsize, TX_SIZE max_tx_size) {
   const AV1_COMMON *const cm = &cpi->common;
   MACROBLOCKD *xd = &x->e_mbd;
@@ -836,6 +921,11 @@ int64_t av1_rd_pick_intra_sbuv_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
                                     &this_rate, &tokenonly_rd_stats))
         continue;
     } else {
+      if (mode == UV_SMOOTH_PRED &&
+          should_prune_chroma_smooth_pred_based_on_source_variance(cpi, x,
+                                                                   bsize))
+        continue;
+
       // Predict directly if we don't need to search for angle delta.
       if (!av1_txfm_uvrd(cpi, x, &tokenonly_rd_stats, bsize, best_rd)) {
         continue;
@@ -1047,7 +1137,8 @@ static AOM_INLINE int intra_block_yrd(const AV1_COMP *const cpi, MACROBLOCK *x,
                                       BLOCK_SIZE bsize, const int *bmode_costs,
                                       int64_t *best_rd, int *rate,
                                       int *rate_tokenonly, int64_t *distortion,
-                                      int *skippable, MB_MODE_INFO *best_mbmi,
+                                      uint8_t *skippable,
+                                      MB_MODE_INFO *best_mbmi,
                                       PICK_MODE_CONTEXT *ctx) {
   MACROBLOCKD *const xd = &x->e_mbd;
   MB_MODE_INFO *const mbmi = xd->mi[0];
@@ -1089,7 +1180,7 @@ static AOM_INLINE int intra_block_yrd(const AV1_COMP *const cpi, MACROBLOCK *x,
  * \callergraph
  * This function loops through all filter_intra modes to find the best one.
  *
- * \return Returns nothing, but updates the mbmi and rd_stats.
+ * \remark Returns nothing, but updates the mbmi and rd_stats.
  */
 static INLINE void handle_filter_intra_mode(const AV1_COMP *cpi, MACROBLOCK *x,
                                             BLOCK_SIZE bsize,
@@ -1312,10 +1403,36 @@ int av1_search_intra_uv_modes_in_interframe(
   return 1;
 }
 
+// Checks if odd delta angles can be pruned based on rdcosts of even delta
+// angles of the corresponding directional mode.
+static AOM_INLINE int prune_luma_odd_delta_angles_using_rd_cost(
+    const MB_MODE_INFO *const mbmi, const int64_t *const intra_modes_rd_cost,
+    int64_t best_rd, int prune_luma_odd_delta_angles_in_intra) {
+  const int luma_delta_angle = mbmi->angle_delta[PLANE_TYPE_Y];
+  if (!prune_luma_odd_delta_angles_in_intra ||
+      !av1_is_directional_mode(mbmi->mode) || !(abs(luma_delta_angle) & 1) ||
+      best_rd == INT64_MAX)
+    return 0;
+
+  const int64_t rd_thresh = best_rd + (best_rd >> 3);
+
+  // Neighbour rdcosts are considered for pruning of odd delta angles as
+  // mentioned below:
+  // Delta angle      Delta angle rdcost
+  // to be pruned     to be considered
+  //    -3                   -2
+  //    -1                -2, 0
+  //     1                 0, 2
+  //     3                    2
+  return intra_modes_rd_cost[luma_delta_angle + MAX_ANGLE_DELTA] > rd_thresh &&
+         intra_modes_rd_cost[luma_delta_angle + MAX_ANGLE_DELTA + 2] >
+             rd_thresh;
+}
+
 // Finds the best non-intrabc mode on an intra frame.
 int64_t av1_rd_pick_intra_sby_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
                                    int *rate, int *rate_tokenonly,
-                                   int64_t *distortion, int *skippable,
+                                   int64_t *distortion, uint8_t *skippable,
                                    BLOCK_SIZE bsize, int64_t best_rd,
                                    PICK_MODE_CONTEXT *ctx) {
   MACROBLOCKD *const xd = &x->e_mbd;
@@ -1373,13 +1490,32 @@ int64_t av1_rd_pick_intra_sby_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
   for (int i = 0; i < TOP_INTRA_MODEL_COUNT; i++) {
     top_intra_model_rd[i] = INT64_MAX;
   }
+
+  // Initialize the rdcost corresponding to all the directional and
+  // non-directional intra modes.
+  // 1. For directional modes, it stores the rdcost values for delta angles -4,
+  // -3, ..., 3, 4.
+  // 2. The rdcost value for luma_delta_angle is stored at index
+  // luma_delta_angle + MAX_ANGLE_DELTA + 1.
+  // 3. The rdcost values for fictitious/nonexistent luma_delta_angle -4 and 4
+  // (array indices 0 and 8) are always set to INT64_MAX (the initial value).
+  int64_t intra_modes_rd_cost[INTRA_MODE_END]
+                             [SIZE_OF_ANGLE_DELTA_RD_COST_ARRAY];
+  for (int i = 0; i < INTRA_MODE_END; i++) {
+    for (int j = 0; j < SIZE_OF_ANGLE_DELTA_RD_COST_ARRAY; j++) {
+      intra_modes_rd_cost[i][j] = INT64_MAX;
+    }
+  }
+
   for (int mode_idx = INTRA_MODE_START; mode_idx < LUMA_MODE_COUNT;
        ++mode_idx) {
-    set_y_mode_and_delta_angle(mode_idx, mbmi);
+    set_y_mode_and_delta_angle(mode_idx, mbmi,
+                               intra_sf->prune_luma_odd_delta_angles_in_intra);
     RD_STATS this_rd_stats;
     int this_rate, this_rate_tokenonly, s;
     int is_diagonal_mode;
     int64_t this_distortion, this_rd;
+    const int luma_delta_angle = mbmi->angle_delta[PLANE_TYPE_Y];
 
     is_diagonal_mode = av1_is_diagonal_mode(mbmi->mode);
     if (is_diagonal_mode && !intra_mode_cfg->enable_diagonal_intra) continue;
@@ -1414,7 +1550,7 @@ int64_t av1_rd_pick_intra_sby_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
     if (is_directional_mode && directional_mode_skip_mask[mbmi->mode]) continue;
     if (is_directional_mode &&
         !(av1_use_angle_delta(bsize) && intra_mode_cfg->enable_angle_delta) &&
-        mbmi->angle_delta[PLANE_TYPE_Y] != 0)
+        luma_delta_angle != 0)
       continue;
 
     // Use intra_y_mode_mask speed feature to skip intra mode evaluation.
@@ -1422,6 +1558,11 @@ int64_t av1_rd_pick_intra_sby_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
           (1 << mbmi->mode)))
       continue;
 
+    if (prune_luma_odd_delta_angles_using_rd_cost(
+            mbmi, intra_modes_rd_cost[mbmi->mode], best_rd,
+            intra_sf->prune_luma_odd_delta_angles_in_intra))
+      continue;
+
     const TX_SIZE tx_size = AOMMIN(TX_32X32, max_txsize_lookup[bsize]);
     const int64_t this_model_rd =
         intra_model_rd(&cpi->common, x, 0, bsize, tx_size, /*use_hadamard=*/1);
@@ -1462,6 +1603,9 @@ int64_t av1_rd_pick_intra_sby_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
       this_rd = (int64_t)(this_rd * intra_rd_variance_factor(cpi, x, bsize));
     }
 
+    intra_modes_rd_cost[mbmi->mode][luma_delta_angle + MAX_ANGLE_DELTA + 1] =
+        this_rd;
+
     // Collect mode stats for multiwinner mode processing
     const int txfm_search_done = 1;
     store_winner_mode_stats(
@@ -1518,7 +1662,7 @@ int64_t av1_rd_pick_intra_sby_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
 
     for (int mode_idx = 0; mode_idx < x->winner_mode_count; mode_idx++) {
       *mbmi = x->winner_mode_stats[mode_idx].mbmi;
-      if (is_winner_mode_processing_enabled(cpi, x, mbmi, mbmi->mode)) {
+      if (is_winner_mode_processing_enabled(cpi, x, mbmi, 0)) {
         // Restore color_map of palette mode before winner mode processing
         if (mbmi->palette_mode_info.palette_size[0] > 0) {
           uint8_t *color_map_src =
@@ -1550,7 +1694,7 @@ int64_t av1_rd_pick_intra_sby_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
     // If previous searches use only the default tx type/no R-D optimization of
     // quantized coeffs, do an extra search for the best tx type/better R-D
     // optimization of quantized coeffs
-    if (is_winner_mode_processing_enabled(cpi, x, mbmi, best_mbmi.mode)) {
+    if (is_winner_mode_processing_enabled(cpi, x, mbmi, 0)) {
       // Set params for winner mode evaluation
       set_mode_eval_params(cpi, x, WINNER_MODE_EVAL);
       *mbmi = best_mbmi;
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/intra_mode_search.h b/chromium/third_party/libaom/source/libaom/av1/encoder/intra_mode_search.h
index 0968558e7d9..75289c4e3c4 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/intra_mode_search.h
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/intra_mode_search.h
@@ -62,7 +62,7 @@ typedef struct IntraModeSearchState {
   int rate_uv_intra;          /*!< \brief Total rate to transmit uv_mode */
   int rate_uv_tokenonly;      /*!< \brief Rate transmit txfm tokens */
   int64_t dist_uvs;           /*!< \brief Distortion of the uv_mode's recon */
-  int skip_uvs;               /*!< \brief Whether the uv txfm is skippable */
+  uint8_t skip_uvs;           /*!< \brief Whether the uv txfm is skippable */
   UV_PREDICTION_MODE mode_uv; /*!< \brief The best uv mode */
   PALETTE_MODE_INFO pmi_uv;   /*!< \brief Color map if mode_uv is palette */
   int8_t uv_angle_delta;      /*!< \brief Angle delta if mode_uv directional */
@@ -196,8 +196,6 @@ int av1_search_palette_mode(IntraModeSearchState *intra_search_state,
  * \param[in]    this_rd_cost       Struct to keep track of palette mode's
  *                                  rd_stats.
  * \param[in]    best_rd            Best RD seen for this block so far.
- *
- * \return Returns nothing.
  */
 void av1_search_palette_mode_luma(const AV1_COMP *cpi, MACROBLOCK *x,
                                   BLOCK_SIZE bsize, unsigned int ref_frame_cost,
@@ -236,7 +234,7 @@ void av1_search_palette_mode_luma(const AV1_COMP *cpi, MACROBLOCK *x,
  */
 int64_t av1_rd_pick_intra_sby_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
                                    int *rate, int *rate_tokenonly,
-                                   int64_t *distortion, int *skippable,
+                                   int64_t *distortion, uint8_t *skippable,
                                    BLOCK_SIZE bsize, int64_t best_rd,
                                    PICK_MODE_CONTEXT *ctx);
 
@@ -271,7 +269,7 @@ int64_t av1_rd_pick_intra_sby_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
  */
 int64_t av1_rd_pick_intra_sbuv_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
                                     int *rate, int *rate_tokenonly,
-                                    int64_t *distortion, int *skippable,
+                                    int64_t *distortion, uint8_t *skippable,
                                     BLOCK_SIZE bsize, TX_SIZE max_tx_size);
 
 /*! \brief Return the number of colors in src. Used by palette mode.
@@ -299,12 +297,15 @@ static AOM_INLINE void init_intra_mode_search_state(
  * The first 13 modes are from DC_PRED to PAETH_PRED, followed by directional
  * modes. Each of the main 8 directional modes have 6 = MAX_ANGLE_DELTA * 2
  * delta angles.
- * \param[in]    mode_idx           mode index in intra mode decision
- *                                  process.
- * \param[in]    mbmi               Pointer to structure holding
- *                                  the mode info for the current macroblock.
+ * \param[in]    mode_idx                  mode index in intra mode decision
+ *                                         process.
+ * \param[in]    mbmi                      Pointer to structure holding the mode
+ *                                         info for the current macroblock.
+ * \param[in]    reorder_delta_angle_eval  Indicates whether to reorder the
+ *                                         evaluation of delta angle modes.
  */
-void set_y_mode_and_delta_angle(const int mode_idx, MB_MODE_INFO *const mbmi);
+void set_y_mode_and_delta_angle(const int mode_idx, MB_MODE_INFO *const mbmi,
+                                int reorder_delta_angle_eval);
 
 /*! \brief prune luma intra mode based on the model rd.
  * \param[in]    this_model_rd              model rd for current mode.
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/level.c b/chromium/third_party/libaom/source/libaom/av1/encoder/level.c
index e3abe35dd18..eab47286576 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/level.c
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/level.c
@@ -1223,7 +1223,7 @@ void av1_update_level_info(AV1_COMP *cpi, size_t size, int64_t ts_start,
 
     // Check whether target level is met.
     const AV1_LEVEL target_level = level_params->target_seq_level_idx[i];
-    if (target_level < SEQ_LEVELS && cpi->oxcf.strict_level_conformance == 1) {
+    if (target_level < SEQ_LEVELS && cpi->oxcf.strict_level_conformance) {
       assert(is_valid_seq_level_idx(target_level));
       const int tier = seq_params->tier[i];
       const TARGET_LEVEL_FAIL_ID fail_id = check_level_constraints(
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/lookahead.c b/chromium/third_party/libaom/source/libaom/av1/encoder/lookahead.c
index e6d72131800..10fbb77cb49 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/lookahead.c
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/lookahead.c
@@ -82,8 +82,9 @@ struct lookahead_ctx *av1_lookahead_init(
       if (aom_realloc_frame_buffer(
               &ctx->buf[i].img, width, height, subsampling_x, subsampling_y,
               use_highbitdepth, border_in_pixels, byte_alignment, NULL, NULL,
-              NULL, enable_global_motion))
+              NULL, enable_global_motion, 0)) {
         goto fail;
+      }
     }
   }
   return ctx;
@@ -92,6 +93,11 @@ fail:
   return NULL;
 }
 
+int av1_lookahead_full(const struct lookahead_ctx *ctx) {
+  // TODO(angiebird): Test this function.
+  return ctx->read_ctxs[ENCODE_STAGE].sz >= ctx->read_ctxs[ENCODE_STAGE].pop_sz;
+}
+
 int av1_lookahead_push(struct lookahead_ctx *ctx, const YV12_BUFFER_CONFIG *src,
                        int64_t ts_start, int64_t ts_end, int use_highbitdepth,
                        aom_enc_frame_flags_t flags) {
@@ -104,12 +110,14 @@ int av1_lookahead_push(struct lookahead_ctx *ctx, const YV12_BUFFER_CONFIG *src,
   int larger_dimensions, new_dimensions;
 
   assert(ctx->read_ctxs[ENCODE_STAGE].valid == 1);
-  if (ctx->read_ctxs[ENCODE_STAGE].sz + 1 + ctx->max_pre_frames > ctx->max_sz)
+  if (ctx->read_ctxs[ENCODE_STAGE].sz + ctx->max_pre_frames > ctx->max_sz)
     return 1;
+
   ctx->read_ctxs[ENCODE_STAGE].sz++;
   if (ctx->read_ctxs[LAP_STAGE].valid) {
     ctx->read_ctxs[LAP_STAGE].sz++;
   }
+
   struct lookahead_entry *buf = pop(ctx, &ctx->write_idx);
 
   new_dimensions = width != buf->img.y_crop_width ||
@@ -126,7 +134,7 @@ int av1_lookahead_push(struct lookahead_ctx *ctx, const YV12_BUFFER_CONFIG *src,
     memset(&new_img, 0, sizeof(new_img));
     if (aom_alloc_frame_buffer(&new_img, width, height, subsampling_x,
                                subsampling_y, use_highbitdepth,
-                               AOM_BORDER_IN_PIXELS, 0))
+                               AOM_BORDER_IN_PIXELS, 0, 0))
       return 1;
     aom_free_frame_buffer(&buf->img);
     buf->img = new_img;
@@ -147,7 +155,10 @@ int av1_lookahead_push(struct lookahead_ctx *ctx, const YV12_BUFFER_CONFIG *src,
   buf->flags = flags;
   ++ctx->push_frame_count;
   aom_remove_metadata_from_frame_buffer(&buf->img);
-  aom_copy_metadata_to_frame_buffer(&buf->img, src->metadata);
+  if (src->metadata &&
+      aom_copy_metadata_to_frame_buffer(&buf->img, src->metadata)) {
+    return 1;
+  }
   return 0;
 }
 
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/lookahead.h b/chromium/third_party/libaom/source/libaom/av1/encoder/lookahead.h
index c9e1c9a52b4..bd7cae4c465 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/lookahead.h
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/lookahead.h
@@ -76,6 +76,10 @@ struct lookahead_ctx *av1_lookahead_init(
  */
 void av1_lookahead_destroy(struct lookahead_ctx *ctx);
 
+/**\brief Check if lookahead buffer is full
+ */
+int av1_lookahead_full(const struct lookahead_ctx *ctx);
+
 /**\brief Enqueue a source buffer
  *
  * This function will copy the source image into a new framebuffer with
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/mcomp.c b/chromium/third_party/libaom/source/libaom/av1/encoder/mcomp.c
index de0d811a2bc..ff48fc36d5f 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/mcomp.c
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/mcomp.c
@@ -1685,6 +1685,8 @@ int av1_full_pixel_search(const FULLPEL_MV start_mv,
     cost_list[4] = INT_MAX;
   }
 
+  assert(ms_params->ms_buffers.ref->stride == ms_params->search_sites->stride);
+
   switch (search_method) {
     case FAST_BIGDIA:
       var = fast_bigdia_search(start_mv, ms_params, step_param, 0, cost_list,
@@ -1944,7 +1946,8 @@ unsigned int av1_int_pro_motion_estimation(const AV1_COMP *cpi, MACROBLOCK *x,
   uint8_t const *ref_buf, *src_buf;
   int_mv *best_int_mv = &xd->mi[0]->mv[0];
   unsigned int best_sad, tmp_sad, this_sad[4];
-  const int norm_factor = 3 + (bw >> 5);
+  const int row_norm_factor = mi_size_high_log2[bsize] + 1;
+  const int col_norm_factor = 3 + (bw >> 5);
   const YV12_BUFFER_CONFIG *scaled_ref_frame =
       av1_get_scaled_ref_frame(cpi, mi->ref_frame[0]);
   static const MV search_pos[4] = {
@@ -1979,28 +1982,16 @@ unsigned int av1_int_pro_motion_estimation(const AV1_COMP *cpi, MACROBLOCK *x,
 
   // Set up prediction 1-D reference set
   ref_buf = xd->plane[0].pre[0].buf - (bw >> 1);
-  for (idx = 0; idx < search_width; idx += 16) {
-    aom_int_pro_row(&hbuf[idx], ref_buf, ref_stride, bh);
-    ref_buf += 16;
-  }
+  aom_int_pro_row(hbuf, ref_buf, ref_stride, search_width, bh, row_norm_factor);
 
   ref_buf = xd->plane[0].pre[0].buf - (bh >> 1) * ref_stride;
-  for (idx = 0; idx < search_height; ++idx) {
-    vbuf[idx] = aom_int_pro_col(ref_buf, bw) >> norm_factor;
-    ref_buf += ref_stride;
-  }
+  aom_int_pro_col(vbuf, ref_buf, ref_stride, bw, search_height,
+                  col_norm_factor);
 
   // Set up src 1-D reference set
-  for (idx = 0; idx < bw; idx += 16) {
-    src_buf = x->plane[0].src.buf + idx;
-    aom_int_pro_row(&src_hbuf[idx], src_buf, src_stride, bh);
-  }
-
   src_buf = x->plane[0].src.buf;
-  for (idx = 0; idx < bh; ++idx) {
-    src_vbuf[idx] = aom_int_pro_col(src_buf, bw) >> norm_factor;
-    src_buf += src_stride;
-  }
+  aom_int_pro_row(src_hbuf, src_buf, src_stride, bw, bh, row_norm_factor);
+  aom_int_pro_col(src_vbuf, src_buf, src_stride, bw, bh, col_norm_factor);
 
   // Find the best match per 1-D search
   best_int_mv->as_fullmv.col =
@@ -2142,16 +2133,13 @@ static int obmc_diamond_search_sad(
   const int32_t *wsrc = ms_buffers->wsrc;
   const int32_t *mask = ms_buffers->obmc_mask;
   const struct buf_2d *const ref_buf = ms_buffers->ref;
-  // search_step determines the length of the initial step and hence the number
-  // of iterations
-  // 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 =
-  // (MAX_FIRST_STEP/4) pel... etc.
 
-  const int tot_steps = MAX_MVSEARCH_STEPS - 1 - search_step;
+  // search_step determines the length of the initial step and hence the number
+  // of iterations.
+  const int tot_steps = cfg->num_search_steps - search_step;
   const uint8_t *best_address, *init_ref;
   int best_sad = INT_MAX;
   int best_site = 0;
-  int step;
 
   clamp_fullmv(&start_mv, &ms_params->mv_limits);
   best_address = init_ref = get_buf_from_fullmv(ref_buf, &start_mv);
@@ -2162,7 +2150,7 @@ static int obmc_diamond_search_sad(
   best_sad = fn_ptr->osdf(best_address, ref_buf->stride, wsrc, mask) +
              mvsad_err_cost_(best_mv, mv_cost_params);
 
-  for (step = tot_steps; step >= 0; --step) {
+  for (int step = tot_steps - 1; step >= 0; --step) {
     const search_site *const site = cfg->site[step];
     best_site = 0;
     for (int idx = 1; idx <= cfg->searches_per_step[step]; ++idx) {
@@ -2195,7 +2183,7 @@ static int obmc_diamond_search_sad(
 
 static int obmc_full_pixel_diamond(
     const FULLPEL_MOTION_SEARCH_PARAMS *ms_params, const FULLPEL_MV start_mv,
-    int step_param, int do_refine, FULLPEL_MV *best_mv) {
+    int step_param, FULLPEL_MV *best_mv) {
   const search_site_config *cfg = ms_params->search_sites;
   FULLPEL_MV tmp_mv;
   int thissme, n, num00 = 0;
@@ -2207,7 +2195,6 @@ static int obmc_full_pixel_diamond(
   // If there won't be more n-step search, check to see if refining search is
   // needed.
   const int further_steps = cfg->num_search_steps - 1 - step_param;
-  if (n > further_steps) do_refine = 0;
 
   while (n < further_steps) {
     ++n;
@@ -2219,9 +2206,6 @@ static int obmc_full_pixel_diamond(
                                         step_param + n, &num00);
       if (thissme < INT_MAX) thissme = get_obmc_mvpred_var(ms_params, &tmp_mv);
 
-      // check to see if refining search is needed.
-      if (num00 > further_steps - n) do_refine = 0;
-
       if (thissme < bestsme) {
         bestsme = thissme;
         *best_mv = tmp_mv;
@@ -2229,16 +2213,6 @@ static int obmc_full_pixel_diamond(
     }
   }
 
-  // final 1-away diamond refining search
-  if (do_refine) {
-    tmp_mv = *best_mv;
-    thissme = obmc_refining_search_sad(ms_params, &tmp_mv);
-    if (thissme < INT_MAX) thissme = get_obmc_mvpred_var(ms_params, &tmp_mv);
-    if (thissme < bestsme) {
-      bestsme = thissme;
-      *best_mv = tmp_mv;
-    }
-  }
   return bestsme;
 }
 
@@ -2246,9 +2220,8 @@ int av1_obmc_full_pixel_search(const FULLPEL_MV start_mv,
                                const FULLPEL_MOTION_SEARCH_PARAMS *ms_params,
                                const int step_param, FULLPEL_MV *best_mv) {
   if (!ms_params->fast_obmc_search) {
-    const int do_refine = 1;
-    const int bestsme = obmc_full_pixel_diamond(ms_params, start_mv, step_param,
-                                                do_refine, best_mv);
+    const int bestsme =
+        obmc_full_pixel_diamond(ms_params, start_mv, step_param, best_mv);
     return bestsme;
   } else {
     *best_mv = start_mv;
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/mcomp.h b/chromium/third_party/libaom/source/libaom/av1/encoder/mcomp.h
index 67f2328b10b..3917d84358b 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/mcomp.h
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/mcomp.h
@@ -22,40 +22,6 @@
 extern "C" {
 #endif
 
-// The maximum number of steps in a step search given the largest
-// allowed initial step
-#define MAX_MVSEARCH_STEPS 11
-// Max full pel mv specified in the unit of full pixel
-// Enable the use of motion vector in range [-1023, 1023].
-#define MAX_FULL_PEL_VAL ((1 << (MAX_MVSEARCH_STEPS - 1)) - 1)
-// Maximum size of the first step in full pel units
-#define MAX_FIRST_STEP (1 << (MAX_MVSEARCH_STEPS - 1))
-
-#define SEARCH_RANGE_8P 3
-#define SEARCH_GRID_STRIDE_8P (2 * SEARCH_RANGE_8P + 1)
-#define SEARCH_GRID_CENTER_8P \
-  (SEARCH_RANGE_8P * SEARCH_GRID_STRIDE_8P + SEARCH_RANGE_8P)
-
-// motion search site
-typedef struct search_site {
-  FULLPEL_MV mv;
-  int offset;
-} search_site;
-
-typedef struct search_site_config {
-  search_site site[MAX_MVSEARCH_STEPS * 2][16 + 1];
-  // Number of search steps.
-  int num_search_steps;
-  int searches_per_step[MAX_MVSEARCH_STEPS * 2];
-  int radius[MAX_MVSEARCH_STEPS * 2];
-  int stride;
-} search_site_config;
-
-typedef struct {
-  FULLPEL_MV coord;
-  int coord_offset;
-} search_neighbors;
-
 struct AV1_COMP;
 struct SPEED_FEATURES;
 
@@ -130,39 +96,6 @@ static INLINE void av1_set_ms_compound_refs(MSBuffers *ms_buffers,
 // =============================================================================
 //  Fullpixel Motion Search
 // =============================================================================
-enum {
-  // Search 8-points in the radius grid around center, up to 11 search stages.
-  DIAMOND = 0,
-  // Search 12-points in the radius/tan_radius grid around center,
-  // up to 15 search stages.
-  NSTEP = 1,
-  // Search 8-points in the radius grid around center, up to 16 search stages.
-  NSTEP_8PT = 2,
-  // Search 8-points in the radius grid around center, upto 11 search stages
-  // with clamping of search radius.
-  CLAMPED_DIAMOND = 3,
-  // Search maximum 8-points in the radius grid around center,
-  // up to 11 search stages. First stage consists of 8 search points
-  // and the rest with 6 search points each in hex shape.
-  HEX = 4,
-  // Search maximum 8-points in the radius grid around center,
-  // up to 11 search stages. First stage consists of 4 search
-  // points and the rest with 8 search points each.
-  BIGDIA = 5,
-  // Search 8-points in the square grid around center, up to 11 search stages.
-  SQUARE = 6,
-  // HEX search with up to 2 stages.
-  FAST_HEX = 7,
-  // BIGDIA search with up to 2 stages.
-  FAST_DIAMOND = 8,
-  // BIGDIA search with up to 3 stages.
-  FAST_BIGDIA = 9,
-  // Total number of search methods.
-  NUM_SEARCH_METHODS,
-  // Number of distinct search methods.
-  NUM_DISTINCT_SEARCH_METHODS = SQUARE + 1,
-} UENUM1BYTE(SEARCH_METHODS);
-
 // This struct holds fullpixel motion search parameters that should be constant
 // during the search
 typedef struct {
@@ -237,26 +170,40 @@ void av1_init_motion_compensation_hex(search_site_config *cfg, int stride,
 void av1_init_motion_compensation_square(search_site_config *cfg, int stride,
                                          int level);
 
+/*! Function pointer to search site config initialization of different search
+ * method functions. */
+typedef void (*av1_init_search_site_config)(search_site_config *cfg, int stride,
+                                            int level);
+
+/*! Array of function pointer used to set the motion search config. */
+static const av1_init_search_site_config
+    av1_init_motion_compensation[NUM_DISTINCT_SEARCH_METHODS] = {
+      av1_init_dsmotion_compensation,     av1_init_motion_compensation_nstep,
+      av1_init_motion_compensation_nstep, av1_init_dsmotion_compensation,
+      av1_init_motion_compensation_hex,   av1_init_motion_compensation_bigdia,
+      av1_init_motion_compensation_square
+    };
+
+// Array to inform which all search methods are having
+// same candidates and different in number of search steps.
+static const SEARCH_METHODS search_method_lookup[NUM_SEARCH_METHODS] = {
+  DIAMOND,          // DIAMOND
+  NSTEP,            // NSTEP
+  NSTEP_8PT,        // NSTEP_8PT
+  CLAMPED_DIAMOND,  // CLAMPED_DIAMOND
+  HEX,              // HEX
+  BIGDIA,           // BIGDIA
+  SQUARE,           // SQUARE
+  HEX,              // FAST_HEX
+  BIGDIA,           // FAST_DIAMOND
+  BIGDIA            // FAST_BIGDIA
+};
+
 // Mv beyond the range do not produce new/different prediction block.
 static INLINE void av1_set_mv_search_method(
     FULLPEL_MOTION_SEARCH_PARAMS *ms_params,
     const search_site_config search_sites[NUM_DISTINCT_SEARCH_METHODS],
     SEARCH_METHODS search_method) {
-  // Array to inform which all search methods are having
-  // same candidates and different in number of search steps.
-  static const SEARCH_METHODS search_method_lookup[NUM_SEARCH_METHODS] = {
-    DIAMOND,          // DIAMOND
-    NSTEP,            // NSTEP
-    NSTEP_8PT,        // NSTEP_8PT
-    CLAMPED_DIAMOND,  // CLAMPED_DIAMOND
-    HEX,              // HEX
-    BIGDIA,           // BIGDIA
-    SQUARE,           // SQUARE
-    HEX,              // FAST_HEX
-    BIGDIA,           // FAST_DIAMOND
-    BIGDIA            // FAST_BIGDIA
-  };
-
   ms_params->search_method = search_method;
   ms_params->search_sites =
       &search_sites[search_method_lookup[ms_params->search_method]];
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/mcomp_structs.h b/chromium/third_party/libaom/source/libaom/av1/encoder/mcomp_structs.h
new file mode 100644
index 00000000000..322021841de
--- /dev/null
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/mcomp_structs.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2022, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#ifndef AOM_AV1_ENCODER_MCOMP_STRUCTS_H_
+#define AOM_AV1_ENCODER_MCOMP_STRUCTS_H_
+
+#include "av1/common/mv.h"
+
+// The maximum number of steps in a step search given the largest
+// allowed initial step
+#define MAX_MVSEARCH_STEPS 11
+// Max full pel mv specified in the unit of full pixel
+// Enable the use of motion vector in range [-1023, 1023].
+#define MAX_FULL_PEL_VAL ((1 << (MAX_MVSEARCH_STEPS - 1)) - 1)
+// Maximum size of the first step in full pel units
+#define MAX_FIRST_STEP (1 << (MAX_MVSEARCH_STEPS - 1))
+
+#define SEARCH_RANGE_8P 3
+#define SEARCH_GRID_STRIDE_8P (2 * SEARCH_RANGE_8P + 1)
+#define SEARCH_GRID_CENTER_8P \
+  (SEARCH_RANGE_8P * SEARCH_GRID_STRIDE_8P + SEARCH_RANGE_8P)
+
+typedef struct {
+  FULLPEL_MV coord;
+  int coord_offset;
+} search_neighbors;
+// motion search site
+typedef struct search_site {
+  FULLPEL_MV mv;
+  int offset;
+} search_site;
+
+typedef struct search_site_config {
+  search_site site[MAX_MVSEARCH_STEPS * 2][16 + 1];
+  // Number of search steps.
+  int num_search_steps;
+  int searches_per_step[MAX_MVSEARCH_STEPS * 2];
+  int radius[MAX_MVSEARCH_STEPS * 2];
+  int stride;
+} search_site_config;
+
+enum {
+  // Search 8-points in the radius grid around center, up to 11 search stages.
+  DIAMOND = 0,
+  // Search 12-points in the radius/tan_radius grid around center,
+  // up to 15 search stages.
+  NSTEP = 1,
+  // Search 8-points in the radius grid around center, up to 16 search stages.
+  NSTEP_8PT = 2,
+  // Search 8-points in the radius grid around center, upto 11 search stages
+  // with clamping of search radius.
+  CLAMPED_DIAMOND = 3,
+  // Search maximum 8-points in the radius grid around center,
+  // up to 11 search stages. First stage consists of 8 search points
+  // and the rest with 6 search points each in hex shape.
+  HEX = 4,
+  // Search maximum 8-points in the radius grid around center,
+  // up to 11 search stages. First stage consists of 4 search
+  // points and the rest with 8 search points each.
+  BIGDIA = 5,
+  // Search 8-points in the square grid around center, up to 11 search stages.
+  SQUARE = 6,
+  // HEX search with up to 2 stages.
+  FAST_HEX = 7,
+  // BIGDIA search with up to 2 stages.
+  FAST_DIAMOND = 8,
+  // BIGDIA search with up to 3 stages.
+  FAST_BIGDIA = 9,
+  // Total number of search methods.
+  NUM_SEARCH_METHODS,
+  // Number of distinct search methods.
+  NUM_DISTINCT_SEARCH_METHODS = SQUARE + 1,
+} UENUM1BYTE(SEARCH_METHODS);
+
+#endif  // AOM_AV1_ENCODER_MCOMP_STRUCTS_H_
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/mips/msa/error_msa.c b/chromium/third_party/libaom/source/libaom/av1/encoder/mips/msa/error_msa.c
deleted file mode 100644
index 2e86dee4309..00000000000
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/mips/msa/error_msa.c
+++ /dev/null
@@ -1,109 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "config/av1_rtcd.h"
-
-#include "aom_dsp/mips/macros_msa.h"
-
-#define BLOCK_ERROR_BLOCKSIZE_MSA(BSize)                                     \
-  static int64_t block_error_##BSize##size_msa(                              \
-      const int16_t *coeff_ptr, const int16_t *dq_coeff_ptr, int64_t *ssz) { \
-    int64_t err = 0;                                                         \
-    uint32_t loop_cnt;                                                       \
-    v8i16 coeff, dq_coeff, coeff_r_h, coeff_l_h;                             \
-    v4i32 diff_r, diff_l, coeff_r_w, coeff_l_w;                              \
-    v2i64 sq_coeff_r, sq_coeff_l;                                            \
-    v2i64 err0, err_dup0, err1, err_dup1;                                    \
-                                                                             \
-    coeff = LD_SH(coeff_ptr);                                                \
-    dq_coeff = LD_SH(dq_coeff_ptr);                                          \
-    UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w);                                \
-    ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h);                      \
-    HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l);                       \
-    DOTP_SW2_SD(coeff_r_w, coeff_l_w, coeff_r_w, coeff_l_w, sq_coeff_r,      \
-                sq_coeff_l);                                                 \
-    DOTP_SW2_SD(diff_r, diff_l, diff_r, diff_l, err0, err1);                 \
-                                                                             \
-    coeff = LD_SH(coeff_ptr + 8);                                            \
-    dq_coeff = LD_SH(dq_coeff_ptr + 8);                                      \
-    UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w);                                \
-    ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h);                      \
-    HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l);                       \
-    DPADD_SD2_SD(coeff_r_w, coeff_l_w, sq_coeff_r, sq_coeff_l);              \
-    DPADD_SD2_SD(diff_r, diff_l, err0, err1);                                \
-                                                                             \
-    coeff_ptr += 16;                                                         \
-    dq_coeff_ptr += 16;                                                      \
-                                                                             \
-    for (loop_cnt = ((BSize >> 4) - 1); loop_cnt--;) {                       \
-      coeff = LD_SH(coeff_ptr);                                              \
-      dq_coeff = LD_SH(dq_coeff_ptr);                                        \
-      UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w);                              \
-      ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h);                    \
-      HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l);                     \
-      DPADD_SD2_SD(coeff_r_w, coeff_l_w, sq_coeff_r, sq_coeff_l);            \
-      DPADD_SD2_SD(diff_r, diff_l, err0, err1);                              \
-                                                                             \
-      coeff = LD_SH(coeff_ptr + 8);                                          \
-      dq_coeff = LD_SH(dq_coeff_ptr + 8);                                    \
-      UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w);                              \
-      ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h);                    \
-      HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l);                     \
-      DPADD_SD2_SD(coeff_r_w, coeff_l_w, sq_coeff_r, sq_coeff_l);            \
-      DPADD_SD2_SD(diff_r, diff_l, err0, err1);                              \
-                                                                             \
-      coeff_ptr += 16;                                                       \
-      dq_coeff_ptr += 16;                                                    \
-    }                                                                        \
-                                                                             \
-    err_dup0 = __msa_splati_d(sq_coeff_r, 1);                                \
-    err_dup1 = __msa_splati_d(sq_coeff_l, 1);                                \
-    sq_coeff_r += err_dup0;                                                  \
-    sq_coeff_l += err_dup1;                                                  \
-    *ssz = __msa_copy_s_d(sq_coeff_r, 0);                                    \
-    *ssz += __msa_copy_s_d(sq_coeff_l, 0);                                   \
-                                                                             \
-    err_dup0 = __msa_splati_d(err0, 1);                                      \
-    err_dup1 = __msa_splati_d(err1, 1);                                      \
-    err0 += err_dup0;                                                        \
-    err1 += err_dup1;                                                        \
-    err = __msa_copy_s_d(err0, 0);                                           \
-    err += __msa_copy_s_d(err1, 0);                                          \
-                                                                             \
-    return err;                                                              \
-  }
-
-/* clang-format off */
-BLOCK_ERROR_BLOCKSIZE_MSA(16)
-BLOCK_ERROR_BLOCKSIZE_MSA(64)
-BLOCK_ERROR_BLOCKSIZE_MSA(256)
-BLOCK_ERROR_BLOCKSIZE_MSA(1024)
-/* clang-format on */
-
-int64_t av1_block_error_msa(const tran_low_t *coeff_ptr,
-                            const tran_low_t *dq_coeff_ptr, intptr_t blk_size,
-                            int64_t *ssz) {
-  int64_t err;
-  const int16_t *coeff = (const int16_t *)coeff_ptr;
-  const int16_t *dq_coeff = (const int16_t *)dq_coeff_ptr;
-
-  switch (blk_size) {
-    case 16: err = block_error_16size_msa(coeff, dq_coeff, ssz); break;
-    case 64: err = block_error_64size_msa(coeff, dq_coeff, ssz); break;
-    case 256: err = block_error_256size_msa(coeff, dq_coeff, ssz); break;
-    case 1024: err = block_error_1024size_msa(coeff, dq_coeff, ssz); break;
-    default:
-      err = av1_block_error_c(coeff_ptr, dq_coeff_ptr, blk_size, ssz);
-      break;
-  }
-
-  return err;
-}
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/mips/msa/fdct4x4_msa.c b/chromium/third_party/libaom/source/libaom/av1/encoder/mips/msa/fdct4x4_msa.c
deleted file mode 100644
index 085c08bfb81..00000000000
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/mips/msa/fdct4x4_msa.c
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include <assert.h>
-
-#include "av1/common/enums.h"
-
-void av1_fwht4x4_msa(const int16_t *input, int16_t *output,
-                     int32_t src_stride) {
-  v8i16 in0, in1, in2, in3, in4;
-
-  LD_SH4(input, src_stride, in0, in1, in2, in3);
-
-  in0 += in1;
-  in3 -= in2;
-  in4 = (in0 - in3) >> 1;
-  SUB2(in4, in1, in4, in2, in1, in2);
-  in0 -= in2;
-  in3 += in1;
-
-  TRANSPOSE4x4_SH_SH(in0, in2, in3, in1, in0, in2, in3, in1);
-
-  in0 += in2;
-  in1 -= in3;
-  in4 = (in0 - in1) >> 1;
-  SUB2(in4, in2, in4, in3, in2, in3);
-  in0 -= in3;
-  in1 += in2;
-
-  SLLI_4V(in0, in1, in2, in3, 2);
-
-  TRANSPOSE4x4_SH_SH(in0, in3, in1, in2, in0, in3, in1, in2);
-
-  ST4x2_UB(in0, output, 4);
-  ST4x2_UB(in3, output + 4, 4);
-  ST4x2_UB(in1, output + 8, 4);
-  ST4x2_UB(in2, output + 12, 4);
-}
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/mips/msa/temporal_filter_msa.c b/chromium/third_party/libaom/source/libaom/av1/encoder/mips/msa/temporal_filter_msa.c
deleted file mode 100644
index effa75b83f3..00000000000
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/mips/msa/temporal_filter_msa.c
+++ /dev/null
@@ -1,286 +0,0 @@
-/*
- * Copyright (c) 2016, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "config/av1_rtcd.h"
-
-#include "aom_dsp/mips/macros_msa.h"
-
-static void temporal_filter_apply_8size_msa(uint8_t *frm1_ptr, uint32_t stride,
-                                            uint8_t *frm2_ptr, int32_t filt_sth,
-                                            int32_t filt_wgt, uint32_t *acc,
-                                            uint16_t *cnt) {
-  uint32_t row;
-  uint64_t f0, f1, f2, f3;
-  v16i8 frm2, frm1 = { 0 };
-  v16i8 frm4, frm3 = { 0 };
-  v16u8 frm_r, frm_l;
-  v8i16 frm2_r, frm2_l;
-  v8i16 diff0, diff1, mod0_h, mod1_h;
-  v4i32 cnst3, cnst16, filt_wt, strength;
-  v4i32 mod0_w, mod1_w, mod2_w, mod3_w;
-  v4i32 diff0_r, diff0_l, diff1_r, diff1_l;
-  v4i32 frm2_rr, frm2_rl, frm2_lr, frm2_ll;
-  v4i32 acc0, acc1, acc2, acc3;
-  v8i16 cnt0, cnt1;
-
-  filt_wt = __msa_fill_w(filt_wgt);
-  strength = __msa_fill_w(filt_sth);
-  cnst3 = __msa_ldi_w(3);
-  cnst16 = __msa_ldi_w(16);
-
-  for (row = 2; row--;) {
-    LD4(frm1_ptr, stride, f0, f1, f2, f3);
-    frm1_ptr += (4 * stride);
-
-    LD_SB2(frm2_ptr, 16, frm2, frm4);
-    frm2_ptr += 32;
-
-    LD_SW2(acc, 4, acc0, acc1);
-    LD_SW2(acc + 8, 4, acc2, acc3);
-    LD_SH2(cnt, 8, cnt0, cnt1);
-
-    INSERT_D2_SB(f0, f1, frm1);
-    INSERT_D2_SB(f2, f3, frm3);
-    ILVRL_B2_UB(frm1, frm2, frm_r, frm_l);
-    HSUB_UB2_SH(frm_r, frm_l, diff0, diff1);
-    UNPCK_SH_SW(diff0, diff0_r, diff0_l);
-    UNPCK_SH_SW(diff1, diff1_r, diff1_l);
-    MUL4(diff0_r, diff0_r, diff0_l, diff0_l, diff1_r, diff1_r, diff1_l, diff1_l,
-         mod0_w, mod1_w, mod2_w, mod3_w);
-    MUL4(mod0_w, cnst3, mod1_w, cnst3, mod2_w, cnst3, mod3_w, cnst3, mod0_w,
-         mod1_w, mod2_w, mod3_w);
-    SRAR_W4_SW(mod0_w, mod1_w, mod2_w, mod3_w, strength);
-
-    diff0_r = (mod0_w < cnst16);
-    diff0_l = (mod1_w < cnst16);
-    diff1_r = (mod2_w < cnst16);
-    diff1_l = (mod3_w < cnst16);
-
-    SUB4(cnst16, mod0_w, cnst16, mod1_w, cnst16, mod2_w, cnst16, mod3_w, mod0_w,
-         mod1_w, mod2_w, mod3_w);
-
-    mod0_w = diff0_r & mod0_w;
-    mod1_w = diff0_l & mod1_w;
-    mod2_w = diff1_r & mod2_w;
-    mod3_w = diff1_l & mod3_w;
-
-    MUL4(mod0_w, filt_wt, mod1_w, filt_wt, mod2_w, filt_wt, mod3_w, filt_wt,
-         mod0_w, mod1_w, mod2_w, mod3_w);
-    PCKEV_H2_SH(mod1_w, mod0_w, mod3_w, mod2_w, mod0_h, mod1_h);
-    ADD2(mod0_h, cnt0, mod1_h, cnt1, mod0_h, mod1_h);
-    ST_SH2(mod0_h, mod1_h, cnt, 8);
-    cnt += 16;
-
-    UNPCK_UB_SH(frm2, frm2_r, frm2_l);
-    UNPCK_SH_SW(frm2_r, frm2_rr, frm2_rl);
-    UNPCK_SH_SW(frm2_l, frm2_lr, frm2_ll);
-    MUL4(mod0_w, frm2_rr, mod1_w, frm2_rl, mod2_w, frm2_lr, mod3_w, frm2_ll,
-         mod0_w, mod1_w, mod2_w, mod3_w);
-    ADD4(mod0_w, acc0, mod1_w, acc1, mod2_w, acc2, mod3_w, acc3, mod0_w, mod1_w,
-         mod2_w, mod3_w);
-
-    ST_SW2(mod0_w, mod1_w, acc, 4);
-    acc += 8;
-    ST_SW2(mod2_w, mod3_w, acc, 4);
-    acc += 8;
-
-    LD_SW2(acc, 4, acc0, acc1);
-    LD_SW2(acc + 8, 4, acc2, acc3);
-    LD_SH2(cnt, 8, cnt0, cnt1);
-
-    ILVRL_B2_UB(frm3, frm4, frm_r, frm_l);
-    HSUB_UB2_SH(frm_r, frm_l, diff0, diff1);
-    UNPCK_SH_SW(diff0, diff0_r, diff0_l);
-    UNPCK_SH_SW(diff1, diff1_r, diff1_l);
-    MUL4(diff0_r, diff0_r, diff0_l, diff0_l, diff1_r, diff1_r, diff1_l, diff1_l,
-         mod0_w, mod1_w, mod2_w, mod3_w);
-    MUL4(mod0_w, cnst3, mod1_w, cnst3, mod2_w, cnst3, mod3_w, cnst3, mod0_w,
-         mod1_w, mod2_w, mod3_w);
-    SRAR_W4_SW(mod0_w, mod1_w, mod2_w, mod3_w, strength);
-
-    diff0_r = (mod0_w < cnst16);
-    diff0_l = (mod1_w < cnst16);
-    diff1_r = (mod2_w < cnst16);
-    diff1_l = (mod3_w < cnst16);
-
-    SUB4(cnst16, mod0_w, cnst16, mod1_w, cnst16, mod2_w, cnst16, mod3_w, mod0_w,
-         mod1_w, mod2_w, mod3_w);
-
-    mod0_w = diff0_r & mod0_w;
-    mod1_w = diff0_l & mod1_w;
-    mod2_w = diff1_r & mod2_w;
-    mod3_w = diff1_l & mod3_w;
-
-    MUL4(mod0_w, filt_wt, mod1_w, filt_wt, mod2_w, filt_wt, mod3_w, filt_wt,
-         mod0_w, mod1_w, mod2_w, mod3_w);
-    PCKEV_H2_SH(mod1_w, mod0_w, mod3_w, mod2_w, mod0_h, mod1_h);
-    ADD2(mod0_h, cnt0, mod1_h, cnt1, mod0_h, mod1_h);
-    ST_SH2(mod0_h, mod1_h, cnt, 8);
-    cnt += 16;
-    UNPCK_UB_SH(frm4, frm2_r, frm2_l);
-    UNPCK_SH_SW(frm2_r, frm2_rr, frm2_rl);
-    UNPCK_SH_SW(frm2_l, frm2_lr, frm2_ll);
-    MUL4(mod0_w, frm2_rr, mod1_w, frm2_rl, mod2_w, frm2_lr, mod3_w, frm2_ll,
-         mod0_w, mod1_w, mod2_w, mod3_w);
-    ADD4(mod0_w, acc0, mod1_w, acc1, mod2_w, acc2, mod3_w, acc3, mod0_w, mod1_w,
-         mod2_w, mod3_w);
-
-    ST_SW2(mod0_w, mod1_w, acc, 4);
-    acc += 8;
-    ST_SW2(mod2_w, mod3_w, acc, 4);
-    acc += 8;
-  }
-}
-
-static void temporal_filter_apply_16size_msa(uint8_t *frm1_ptr, uint32_t stride,
-                                             uint8_t *frm2_ptr,
-                                             int32_t filt_sth, int32_t filt_wgt,
-                                             uint32_t *acc, uint16_t *cnt) {
-  uint32_t row;
-  v16i8 frm1, frm2, frm3, frm4;
-  v16u8 frm_r, frm_l;
-  v16i8 zero = { 0 };
-  v8u16 frm2_r, frm2_l;
-  v8i16 diff0, diff1, mod0_h, mod1_h;
-  v4i32 cnst3, cnst16, filt_wt, strength;
-  v4i32 mod0_w, mod1_w, mod2_w, mod3_w;
-  v4i32 diff0_r, diff0_l, diff1_r, diff1_l;
-  v4i32 frm2_rr, frm2_rl, frm2_lr, frm2_ll;
-  v4i32 acc0, acc1, acc2, acc3;
-  v8i16 cnt0, cnt1;
-
-  filt_wt = __msa_fill_w(filt_wgt);
-  strength = __msa_fill_w(filt_sth);
-  cnst3 = __msa_ldi_w(3);
-  cnst16 = __msa_ldi_w(16);
-
-  for (row = 8; row--;) {
-    LD_SB2(frm1_ptr, stride, frm1, frm3);
-    frm1_ptr += stride;
-
-    LD_SB2(frm2_ptr, 16, frm2, frm4);
-    frm2_ptr += 16;
-
-    LD_SW2(acc, 4, acc0, acc1);
-    LD_SW2(acc, 4, acc2, acc3);
-    LD_SH2(cnt, 8, cnt0, cnt1);
-
-    ILVRL_B2_UB(frm1, frm2, frm_r, frm_l);
-    HSUB_UB2_SH(frm_r, frm_l, diff0, diff1);
-    UNPCK_SH_SW(diff0, diff0_r, diff0_l);
-    UNPCK_SH_SW(diff1, diff1_r, diff1_l);
-    MUL4(diff0_r, diff0_r, diff0_l, diff0_l, diff1_r, diff1_r, diff1_l, diff1_l,
-         mod0_w, mod1_w, mod2_w, mod3_w);
-    MUL4(mod0_w, cnst3, mod1_w, cnst3, mod2_w, cnst3, mod3_w, cnst3, mod0_w,
-         mod1_w, mod2_w, mod3_w);
-    SRAR_W4_SW(mod0_w, mod1_w, mod2_w, mod3_w, strength);
-
-    diff0_r = (mod0_w < cnst16);
-    diff0_l = (mod1_w < cnst16);
-    diff1_r = (mod2_w < cnst16);
-    diff1_l = (mod3_w < cnst16);
-
-    SUB4(cnst16, mod0_w, cnst16, mod1_w, cnst16, mod2_w, cnst16, mod3_w, mod0_w,
-         mod1_w, mod2_w, mod3_w);
-
-    mod0_w = diff0_r & mod0_w;
-    mod1_w = diff0_l & mod1_w;
-    mod2_w = diff1_r & mod2_w;
-    mod3_w = diff1_l & mod3_w;
-
-    MUL4(mod0_w, filt_wt, mod1_w, filt_wt, mod2_w, filt_wt, mod3_w, filt_wt,
-         mod0_w, mod1_w, mod2_w, mod3_w);
-    PCKEV_H2_SH(mod1_w, mod0_w, mod3_w, mod2_w, mod0_h, mod1_h);
-    ADD2(mod0_h, cnt0, mod1_h, cnt1, mod0_h, mod1_h);
-    ST_SH2(mod0_h, mod1_h, cnt, 8);
-    cnt += 16;
-
-    ILVRL_B2_UH(zero, frm2, frm2_r, frm2_l);
-    UNPCK_SH_SW(frm2_r, frm2_rr, frm2_rl);
-    UNPCK_SH_SW(frm2_l, frm2_lr, frm2_ll);
-    MUL4(mod0_w, frm2_rr, mod1_w, frm2_rl, mod2_w, frm2_lr, mod3_w, frm2_ll,
-         mod0_w, mod1_w, mod2_w, mod3_w);
-    ADD4(mod0_w, acc0, mod1_w, acc1, mod2_w, acc2, mod3_w, acc3, mod0_w, mod1_w,
-         mod2_w, mod3_w);
-
-    ST_SW2(mod0_w, mod1_w, acc, 4);
-    acc += 8;
-    ST_SW2(mod2_w, mod3_w, acc, 4);
-    acc += 8;
-
-    LD_SW2(acc, 4, acc0, acc1);
-    LD_SW2(acc + 8, 4, acc2, acc3);
-    LD_SH2(cnt, 8, cnt0, cnt1);
-
-    ILVRL_B2_UB(frm3, frm4, frm_r, frm_l);
-    HSUB_UB2_SH(frm_r, frm_l, diff0, diff1);
-    UNPCK_SH_SW(diff0, diff0_r, diff0_l);
-    UNPCK_SH_SW(diff1, diff1_r, diff1_l);
-    MUL4(diff0_r, diff0_r, diff0_l, diff0_l, diff1_r, diff1_r, diff1_l, diff1_l,
-         mod0_w, mod1_w, mod2_w, mod3_w);
-    MUL4(mod0_w, cnst3, mod1_w, cnst3, mod2_w, cnst3, mod3_w, cnst3, mod0_w,
-         mod1_w, mod2_w, mod3_w);
-    SRAR_W4_SW(mod0_w, mod1_w, mod2_w, mod3_w, strength);
-
-    diff0_r = (mod0_w < cnst16);
-    diff0_l = (mod1_w < cnst16);
-    diff1_r = (mod2_w < cnst16);
-    diff1_l = (mod3_w < cnst16);
-
-    SUB4(cnst16, mod0_w, cnst16, mod1_w, cnst16, mod2_w, cnst16, mod3_w, mod0_w,
-         mod1_w, mod2_w, mod3_w);
-
-    mod0_w = diff0_r & mod0_w;
-    mod1_w = diff0_l & mod1_w;
-    mod2_w = diff1_r & mod2_w;
-    mod3_w = diff1_l & mod3_w;
-
-    MUL4(mod0_w, filt_wt, mod1_w, filt_wt, mod2_w, filt_wt, mod3_w, filt_wt,
-         mod0_w, mod1_w, mod2_w, mod3_w);
-    PCKEV_H2_SH(mod1_w, mod0_w, mod3_w, mod2_w, mod0_h, mod1_h);
-    ADD2(mod0_h, cnt0, mod1_h, cnt1, mod0_h, mod1_h);
-    ST_SH2(mod0_h, mod1_h, cnt, 8);
-    cnt += 16;
-
-    ILVRL_B2_UH(zero, frm4, frm2_r, frm2_l);
-    UNPCK_SH_SW(frm2_r, frm2_rr, frm2_rl);
-    UNPCK_SH_SW(frm2_l, frm2_lr, frm2_ll);
-    MUL4(mod0_w, frm2_rr, mod1_w, frm2_rl, mod2_w, frm2_lr, mod3_w, frm2_ll,
-         mod0_w, mod1_w, mod2_w, mod3_w);
-    ADD4(mod0_w, acc0, mod1_w, acc1, mod2_w, acc2, mod3_w, acc3, mod0_w, mod1_w,
-         mod2_w, mod3_w);
-    ST_SW2(mod0_w, mod1_w, acc, 4);
-    acc += 8;
-    ST_SW2(mod2_w, mod3_w, acc, 4);
-    acc += 8;
-
-    frm1_ptr += stride;
-    frm2_ptr += 16;
-  }
-}
-
-// TODO(yunqing) The following optimization is not used since c code changes.
-void av1_temporal_filter_apply_msa(uint8_t *frame1_ptr, uint32_t stride,
-                                   uint8_t *frame2_ptr, uint32_t blk_w,
-                                   uint32_t blk_h, int32_t strength,
-                                   int32_t filt_wgt, uint32_t *accu,
-                                   uint16_t *cnt) {
-  if (8 == (blk_w * blk_h)) {
-    temporal_filter_apply_8size_msa(frame1_ptr, stride, frame2_ptr, strength,
-                                    filt_wgt, accu, cnt);
-  } else if (16 == (blk_w * blk_h)) {
-    temporal_filter_apply_16size_msa(frame1_ptr, stride, frame2_ptr, strength,
-                                     filt_wgt, accu, cnt);
-  } else {
-    av1_temporal_filter_apply_c(frame1_ptr, stride, frame2_ptr, blk_w, blk_h,
-                                strength, filt_wgt, accu, cnt);
-  }
-}
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/model_rd.h b/chromium/third_party/libaom/source/libaom/av1/encoder/model_rd.h
index db5ede4948a..f7e8b96b5ba 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/model_rd.h
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/model_rd.h
@@ -35,13 +35,11 @@ extern "C" {
 #define MODELRD_TYPE_INTRA 1
 #define MODELRD_TYPE_MOTION_MODE_RD 1
 
-typedef void (*model_rd_for_sb_type)(const AV1_COMP *const cpi,
-                                     BLOCK_SIZE bsize, MACROBLOCK *x,
-                                     MACROBLOCKD *xd, int plane_from,
-                                     int plane_to, int *out_rate_sum,
-                                     int64_t *out_dist_sum, int *skip_txfm_sb,
-                                     int64_t *skip_sse_sb, int *plane_rate,
-                                     int64_t *plane_sse, int64_t *plane_dist);
+typedef void (*model_rd_for_sb_type)(
+    const AV1_COMP *const cpi, BLOCK_SIZE bsize, MACROBLOCK *x, MACROBLOCKD *xd,
+    int plane_from, int plane_to, int *out_rate_sum, int64_t *out_dist_sum,
+    uint8_t *skip_txfm_sb, int64_t *skip_sse_sb, int *plane_rate,
+    int64_t *plane_sse, int64_t *plane_dist);
 typedef void (*model_rd_from_sse_type)(const AV1_COMP *const cpi,
                                        const MACROBLOCK *const x,
                                        BLOCK_SIZE plane_bsize, int plane,
@@ -160,7 +158,7 @@ static AOM_INLINE void model_rd_with_curvfit(const AV1_COMP *const cpi,
 static AOM_INLINE void model_rd_for_sb(
     const AV1_COMP *const cpi, BLOCK_SIZE bsize, MACROBLOCK *x, MACROBLOCKD *xd,
     int plane_from, int plane_to, int *out_rate_sum, int64_t *out_dist_sum,
-    int *skip_txfm_sb, int64_t *skip_sse_sb, int *plane_rate,
+    uint8_t *skip_txfm_sb, int64_t *skip_sse_sb, int *plane_rate,
     int64_t *plane_sse, int64_t *plane_dist) {
   // Note our transform coeffs are 8 times an orthogonal transform.
   // Hence quantizer step is also 8 times. To get effective quantizer
@@ -212,7 +210,7 @@ static AOM_INLINE void model_rd_for_sb(
 static AOM_INLINE void model_rd_for_sb_with_curvfit(
     const AV1_COMP *const cpi, BLOCK_SIZE bsize, MACROBLOCK *x, MACROBLOCKD *xd,
     int plane_from, int plane_to, int *out_rate_sum, int64_t *out_dist_sum,
-    int *skip_txfm_sb, int64_t *skip_sse_sb, int *plane_rate,
+    uint8_t *skip_txfm_sb, int64_t *skip_sse_sb, int *plane_rate,
     int64_t *plane_sse, int64_t *plane_dist) {
   // Note our transform coeffs are 8 times an orthogonal transform.
   // Hence quantizer step is also 8 times. To get effective quantizer
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/motion_search_facade.c b/chromium/third_party/libaom/source/libaom/av1/encoder/motion_search_facade.c
index cad4e6225a2..2a2ad2e27eb 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/motion_search_facade.c
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/motion_search_facade.c
@@ -216,11 +216,19 @@ void av1_single_motion_search(const AV1_COMP *const cpi, MACROBLOCK *x,
     }
   }
 
+  // Hot fix for asan complaints when resize mode is on. When resize mode is on,
+  // the stride of the reference frame can be different from indicated by
+  // MotionVectorSearchParams::search_site_cfg. When this happens, we need to
+  // readjust the stride.
+  const SEARCH_METHODS search_method = cpi->sf.mv_sf.search_method;
+  const int ref_stride = xd->plane[0].pre[0].stride;
+  const search_site_config *src_search_site_cfg = av1_get_search_site_config(
+      x->search_site_cfg_buf, mv_search_params, search_method, ref_stride);
+
   // Further reduce the search range.
   if (search_range < INT_MAX) {
     const search_site_config *search_site_cfg =
-        &mv_search_params
-             ->search_site_cfg[SS_CFG_SRC][cpi->sf.mv_sf.search_method];
+        &src_search_site_cfg[search_method_lookup[cpi->sf.mv_sf.search_method]];
     // Max step_param is search_site_cfg->num_search_steps.
     if (search_range < 1) {
       step_param = search_site_cfg->num_search_steps;
@@ -238,11 +246,9 @@ void av1_single_motion_search(const AV1_COMP *const cpi, MACROBLOCK *x,
 
   // Allow more mesh searches for screen content type on the ARF.
   const int fine_search_interval = use_fine_search_interval(cpi);
-  const search_site_config *src_search_sites =
-      mv_search_params->search_site_cfg[SS_CFG_SRC];
   FULLPEL_MOTION_SEARCH_PARAMS full_ms_params;
   av1_make_default_fullpel_ms_params(&full_ms_params, cpi, x, bsize, &ref_mv,
-                                     src_search_sites, fine_search_interval);
+                                     src_search_site_cfg, fine_search_interval);
 
   switch (mbmi->motion_mode) {
     case SIMPLE_TRANSLATION: {
@@ -590,8 +596,11 @@ int av1_joint_motion_search(const AV1_COMP *cpi, MACROBLOCK *x,
 
     // Make motion search params
     FULLPEL_MOTION_SEARCH_PARAMS full_ms_params;
-    const search_site_config *src_search_sites =
-        cpi->mv_search_params.search_site_cfg[SS_CFG_SRC];
+    const SEARCH_METHODS search_method = cpi->sf.mv_sf.search_method;
+    const int ref_stride = xd->plane[0].pre[0].stride;
+    const search_site_config *src_search_sites = av1_get_search_site_config(
+        x->search_site_cfg_buf, &cpi->mv_search_params, search_method,
+        ref_stride);
     av1_make_default_fullpel_ms_params(&full_ms_params, cpi, x, bsize,
                                        &ref_mv[id].as_mv, src_search_sites,
                                        /*fine_search_interval=*/0);
@@ -738,8 +747,11 @@ int av1_compound_single_motion_search(const AV1_COMP *cpi, MACROBLOCK *x,
 
   // Make motion search params
   FULLPEL_MOTION_SEARCH_PARAMS full_ms_params;
+  const SEARCH_METHODS search_method = cpi->sf.mv_sf.search_method;
+  const int ref_stride = xd->plane[0].pre[0].stride;
   const search_site_config *src_search_sites =
-      cpi->mv_search_params.search_site_cfg[SS_CFG_SRC];
+      av1_get_search_site_config(x->search_site_cfg_buf, &cpi->mv_search_params,
+                                 search_method, ref_stride);
   av1_make_default_fullpel_ms_params(&full_ms_params, cpi, x, bsize,
                                      &ref_mv.as_mv, src_search_sites,
                                      /*fine_search_interval=*/0);
@@ -940,8 +952,6 @@ int_mv av1_simple_motion_search(AV1_COMP *const cpi, MACROBLOCK *x, int mi_row,
       AOMMIN(cpi->mv_search_params.mv_step_param +
                  cpi->sf.part_sf.simple_motion_search_reduce_search_steps,
              MAX_MVSEARCH_STEPS - 2);
-  const search_site_config *src_search_sites =
-      cpi->mv_search_params.search_site_cfg[SS_CFG_SRC];
   int cost_list[5];
   const int ref_idx = 0;
   int var;
@@ -959,6 +969,11 @@ int_mv av1_simple_motion_search(AV1_COMP *const cpi, MACROBLOCK *x, int mi_row,
   // Allow more mesh searches for screen content type on the ARF.
   const int fine_search_interval = use_fine_search_interval(cpi);
   FULLPEL_MOTION_SEARCH_PARAMS full_ms_params;
+  const SEARCH_METHODS search_method = cpi->sf.mv_sf.search_method;
+  const int ref_stride = xd->plane[0].pre[0].stride;
+  const search_site_config *src_search_sites =
+      av1_get_search_site_config(x->search_site_cfg_buf, &cpi->mv_search_params,
+                                 search_method, ref_stride);
   av1_make_default_fullpel_ms_params(&full_ms_params, cpi, x, bsize, &ref_mv,
                                      src_search_sites, fine_search_interval);
 
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/motion_search_facade.h b/chromium/third_party/libaom/source/libaom/av1/encoder/motion_search_facade.h
index bf81fe243a3..bc69a6574c7 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/motion_search_facade.h
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/motion_search_facade.h
@@ -70,6 +70,28 @@ int_mv av1_simple_motion_sse_var(struct AV1_COMP *cpi, MACROBLOCK *x,
                                  const FULLPEL_MV start_mv, int use_subpixel,
                                  unsigned int *sse, unsigned int *var);
 
+static AOM_INLINE const search_site_config *av1_get_search_site_config(
+    search_site_config *ss_cfg_buf,
+    const MotionVectorSearchParams *mv_search_params,
+    SEARCH_METHODS search_method, const int ref_stride) {
+  if (ref_stride == mv_search_params->search_site_cfg[SS_CFG_SRC]->stride) {
+    return mv_search_params->search_site_cfg[SS_CFG_SRC];
+  } else if (ref_stride ==
+             mv_search_params->search_site_cfg[SS_CFG_LOOKAHEAD]->stride) {
+    return mv_search_params->search_site_cfg[SS_CFG_LOOKAHEAD];
+  }
+
+  if (ref_stride != ss_cfg_buf[search_method].stride) {
+    const int level =
+        search_method == NSTEP_8PT || search_method == CLAMPED_DIAMOND;
+    search_method = search_method_lookup[search_method];
+    av1_init_motion_compensation[search_method](&ss_cfg_buf[search_method],
+                                                ref_stride, level);
+  }
+
+  return ss_cfg_buf;
+}
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/mv_prec.c b/chromium/third_party/libaom/source/libaom/av1/encoder/mv_prec.c
index 2ff713d7417..b64f4dcd0e7 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/mv_prec.c
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/mv_prec.c
@@ -346,12 +346,7 @@ static AOM_INLINE void collect_mv_stats_tile(MV_STATS *mv_stats,
 }
 
 void av1_collect_mv_stats(AV1_COMP *cpi, int current_q) {
-  MV_STATS *mv_stats;
-#if CONFIG_FRAME_PARALLEL_ENCODE
-  mv_stats = &cpi->mv_stats;
-#else
-  mv_stats = &cpi->ppi->mv_stats;
-#endif
+  MV_STATS *mv_stats = &cpi->mv_stats;
   const AV1_COMMON *cm = &cpi->common;
   const int tile_cols = cm->tiles.cols;
   const int tile_rows = cm->tiles.rows;
@@ -416,12 +411,7 @@ static AOM_INLINE int get_smart_mv_prec(AV1_COMP *cpi, const MV_STATS *mv_stats,
 void av1_pick_and_set_high_precision_mv(AV1_COMP *cpi, int qindex) {
   int use_hp = qindex < HIGH_PRECISION_MV_QTHRESH;
 #if !CONFIG_REALTIME_ONLY
-  MV_STATS *mv_stats;
-#if CONFIG_FRAME_PARALLEL_ENCODE
-  mv_stats = &cpi->mv_stats;
-#else
-  mv_stats = &cpi->ppi->mv_stats;
-#endif
+  MV_STATS *mv_stats = &cpi->mv_stats;
 #endif  // !CONFIG_REALTIME_ONLY
 
   if (cpi->sf.hl_sf.high_precision_mv_usage == QTR_ONLY) {
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/nonrd_opt.h b/chromium/third_party/libaom/source/libaom/av1/encoder/nonrd_opt.h
index 39049e5929a..8a25061d272 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/nonrd_opt.h
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/nonrd_opt.h
@@ -14,6 +14,27 @@
 
 #include "av1/encoder/rdopt_utils.h"
 
+#define RTC_INTER_MODES (4)
+#define RTC_INTRA_MODES (4)
+#define RTC_MODES (AOMMAX(RTC_INTER_MODES, RTC_INTRA_MODES))
+
+static const PREDICTION_MODE intra_mode_list[] = { DC_PRED, V_PRED, H_PRED,
+                                                   SMOOTH_PRED };
+
+static const PREDICTION_MODE inter_mode_list[] = { NEARESTMV, NEARMV, GLOBALMV,
+                                                   NEWMV };
+
+static const THR_MODES mode_idx[REF_FRAMES][RTC_MODES] = {
+  { THR_DC, THR_V_PRED, THR_H_PRED, THR_SMOOTH },
+  { THR_NEARESTMV, THR_NEARMV, THR_GLOBALMV, THR_NEWMV },
+  { THR_NEARESTL2, THR_NEARL2, THR_GLOBALL2, THR_NEWL2 },
+  { THR_NEARESTL3, THR_NEARL3, THR_GLOBALL3, THR_NEWL3 },
+  { THR_NEARESTG, THR_NEARG, THR_GLOBALG, THR_NEWG },
+  { THR_NEARESTB, THR_NEARB, THR_GLOBALB, THR_NEWB },
+  { THR_NEARESTA2, THR_NEARA2, THR_GLOBALA2, THR_NEWA2 },
+  { THR_NEARESTA, THR_NEARA, THR_GLOBALA, THR_NEWA },
+};
+
 /*!\brief Finds predicted motion vectors for a block.
  *
  * \ingroup nonrd_mode_search
@@ -37,7 +58,7 @@
  *                                        prune for low temporal variance block
  * \param[in]    skip_pred_mv             Flag indicating to skip av1_mv_pred
  *
- * \return Nothing is returned. Instead, predicted MVs are placed into
+ * \remark Nothing is returned. Instead, predicted MVs are placed into
  * \c frame_mv array
  */
 static INLINE void find_predictors(
@@ -80,7 +101,9 @@ static INLINE void find_predictors(
                   bsize);
     }
   }
-  av1_count_overlappable_neighbors(cm, xd);
+  if (cm->features.switchable_motion_mode) {
+    av1_count_overlappable_neighbors(cm, xd);
+  }
   mbmi->num_proj_ref = 1;
 }
 
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/nonrd_pickmode.c b/chromium/third_party/libaom/source/libaom/av1/encoder/nonrd_pickmode.c
index 46bd24cd820..e797423b36d 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/nonrd_pickmode.c
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/nonrd_pickmode.c
@@ -15,32 +15,30 @@
 #include <math.h>
 #include <stdio.h>
 
-#include "aom_dsp/txfm_common.h"
-#include "av1/common/blockd.h"
-#include "av1/encoder/encoder.h"
 #include "config/aom_dsp_rtcd.h"
 #include "config/av1_rtcd.h"
 
 #include "aom_dsp/aom_dsp_common.h"
-#include "aom_dsp/blend.h"
-#include "aom_mem/aom_mem.h"
-#include "aom_ports/aom_timer.h"
+#include "aom_dsp/txfm_common.h"
 #include "aom_ports/mem.h"
 
-#include "av1/encoder/model_rd.h"
+#include "av1/common/blockd.h"
 #include "av1/common/mvref_common.h"
 #include "av1/common/pred_common.h"
 #include "av1/common/reconinter.h"
 #include "av1/common/reconintra.h"
 
 #include "av1/encoder/encodemv.h"
+#include "av1/encoder/encoder.h"
+#include "av1/encoder/intra_mode_search.h"
+#include "av1/encoder/model_rd.h"
+#include "av1/encoder/motion_search_facade.h"
 #include "av1/encoder/nonrd_opt.h"
 #include "av1/encoder/rdopt.h"
 #include "av1/encoder/reconinter_enc.h"
 #include "av1/encoder/var_based_part.h"
-#include "av1/encoder/palette.h"
-#include "av1/encoder/intra_mode_search.h"
 
+#define CALC_BIASED_RDCOST(rdcost) (7 * (rdcost) >> 3)
 extern int g_pick_inter_mode_cnt;
 /*!\cond */
 typedef struct {
@@ -73,42 +71,39 @@ typedef struct {
 } REF_MODE;
 
 typedef struct {
+  MV_REFERENCE_FRAME ref_frame[2];
+  PREDICTION_MODE pred_mode;
+} COMP_REF_MODE;
+
+typedef struct {
   InterpFilter filter_x;
   InterpFilter filter_y;
 } INTER_FILTER;
 /*!\endcond */
 
-#define NUM_INTER_MODES_RT 9
-#define NUM_INTER_MODES_REDUCED 8
-
-static const REF_MODE ref_mode_set_rt[NUM_INTER_MODES_RT] = {
-  { LAST_FRAME, NEARESTMV },   { LAST_FRAME, NEARMV },
-  { LAST_FRAME, NEWMV },       { GOLDEN_FRAME, NEARESTMV },
-  { GOLDEN_FRAME, NEARMV },    { GOLDEN_FRAME, NEWMV },
-  { ALTREF_FRAME, NEARESTMV }, { ALTREF_FRAME, NEARMV },
-  { ALTREF_FRAME, NEWMV }
-};
+#define NUM_COMP_INTER_MODES_RT (6)
+#define NUM_INTER_MODES 12
 
 // GLOBALMV in the set below is in fact ZEROMV as we don't do global ME in RT
 // mode
-static const REF_MODE ref_mode_set_reduced[NUM_INTER_MODES_REDUCED] = {
-  { LAST_FRAME, GLOBALMV },   { LAST_FRAME, NEARESTMV },
-  { GOLDEN_FRAME, GLOBALMV }, { LAST_FRAME, NEARMV },
-  { LAST_FRAME, NEWMV },      { GOLDEN_FRAME, NEARESTMV },
-  { GOLDEN_FRAME, NEARMV },   { GOLDEN_FRAME, NEWMV }
+static const REF_MODE ref_mode_set[NUM_INTER_MODES] = {
+  { LAST_FRAME, NEARESTMV },   { LAST_FRAME, NEARMV },
+  { LAST_FRAME, GLOBALMV },    { LAST_FRAME, NEWMV },
+  { GOLDEN_FRAME, NEARESTMV }, { GOLDEN_FRAME, NEARMV },
+  { GOLDEN_FRAME, GLOBALMV },  { GOLDEN_FRAME, NEWMV },
+  { ALTREF_FRAME, NEARESTMV }, { ALTREF_FRAME, NEARMV },
+  { ALTREF_FRAME, GLOBALMV },  { ALTREF_FRAME, NEWMV },
 };
 
-static const THR_MODES mode_idx[REF_FRAMES][4] = {
-  { THR_DC, THR_V_PRED, THR_H_PRED, THR_SMOOTH },
-  { THR_NEARESTMV, THR_NEARMV, THR_GLOBALMV, THR_NEWMV },
-  { THR_NEARESTL2, THR_NEARL2, THR_GLOBALL2, THR_NEWL2 },
-  { THR_NEARESTL3, THR_NEARL3, THR_GLOBALL3, THR_NEWL3 },
-  { THR_NEARESTG, THR_NEARG, THR_GLOBALMV, THR_NEWG },
+static const COMP_REF_MODE comp_ref_mode_set[NUM_COMP_INTER_MODES_RT] = {
+  { { LAST_FRAME, GOLDEN_FRAME }, GLOBAL_GLOBALMV },
+  { { LAST_FRAME, GOLDEN_FRAME }, NEAREST_NEARESTMV },
+  { { LAST_FRAME, LAST2_FRAME }, GLOBAL_GLOBALMV },
+  { { LAST_FRAME, LAST2_FRAME }, NEAREST_NEARESTMV },
+  { { LAST_FRAME, ALTREF_FRAME }, GLOBAL_GLOBALMV },
+  { { LAST_FRAME, ALTREF_FRAME }, NEAREST_NEARESTMV },
 };
 
-static const PREDICTION_MODE intra_mode_list[] = { DC_PRED, V_PRED, H_PRED,
-                                                   SMOOTH_PRED };
-
 static const INTER_FILTER filters_ref_set[9] = {
   { EIGHTTAP_REGULAR, EIGHTTAP_REGULAR }, { EIGHTTAP_SMOOTH, EIGHTTAP_SMOOTH },
   { EIGHTTAP_REGULAR, EIGHTTAP_SMOOTH },  { EIGHTTAP_SMOOTH, EIGHTTAP_REGULAR },
@@ -117,20 +112,6 @@ static const INTER_FILTER filters_ref_set[9] = {
   { MULTITAP_SHARP, EIGHTTAP_SMOOTH }
 };
 
-static INLINE int mode_offset(const PREDICTION_MODE mode) {
-  if (mode >= NEARESTMV) {
-    return INTER_OFFSET(mode);
-  } else {
-    switch (mode) {
-      case DC_PRED: return 0;
-      case V_PRED: return 1;
-      case H_PRED: return 2;
-      case SMOOTH_PRED: return 3;
-      default: assert(0); return -1;
-    }
-  }
-}
-
 enum {
   //  INTER_ALL = (1 << NEARESTMV) | (1 << NEARMV) | (1 << NEWMV),
   INTER_NEAREST = (1 << NEARESTMV),
@@ -139,6 +120,137 @@ enum {
   INTER_NEAR_NEW = (1 << NEARMV) | (1 << NEWMV),
 };
 
+// The original scan order (default_scan_8x8) is modified according to the extra
+// transpose in hadamard c implementation, i.e., aom_hadamard_lp_8x8_c and
+// aom_hadamard_8x8_c.
+static const int16_t default_scan_8x8_transpose[64] = {
+  0,  8,  1,  2,  9,  16, 24, 17, 10, 3,  4,  11, 18, 25, 32, 40,
+  33, 26, 19, 12, 5,  6,  13, 20, 27, 34, 41, 48, 56, 49, 42, 35,
+  28, 21, 14, 7,  15, 22, 29, 36, 43, 50, 57, 58, 51, 44, 37, 30,
+  23, 31, 38, 45, 52, 59, 60, 53, 46, 39, 47, 54, 61, 62, 55, 63
+};
+
+// The original scan order (av1_default_iscan_8x8) is modified to match
+// hadamard AVX2 implementation, i.e., aom_hadamard_lp_8x8_avx2 and
+// aom_hadamard_8x8_avx2. Since hadamard AVX2 implementation will modify the
+// order of coefficients, such that the normal scan order is no longer
+// guaranteed to scan low coefficients first, therefore we modify the scan order
+// accordingly.
+// Note that this one has to be used together with default_scan_8x8_transpose.
+static const int16_t av1_default_iscan_8x8_transpose[64] = {
+  0,  2,  3,  9,  10, 20, 21, 35, 1,  4,  8,  11, 19, 22, 34, 36,
+  5,  7,  12, 18, 23, 33, 37, 48, 6,  13, 17, 24, 32, 38, 47, 49,
+  14, 16, 25, 31, 39, 46, 50, 57, 15, 26, 30, 40, 45, 51, 56, 58,
+  27, 29, 41, 44, 52, 55, 59, 62, 28, 42, 43, 53, 54, 60, 61, 63
+};
+
+// The original scan order (default_scan_16x16) is modified according to the
+// extra transpose in hadamard c implementation in lp case, i.e.,
+// aom_hadamard_lp_16x16_c.
+static const int16_t default_scan_lp_16x16_transpose[256] = {
+  0,   8,   2,   4,   10,  16,  24,  18,  12,  6,   64,  14,  20,  26,  32,
+  40,  34,  28,  22,  72,  66,  68,  74,  80,  30,  36,  42,  48,  56,  50,
+  44,  38,  88,  82,  76,  70,  128, 78,  84,  90,  96,  46,  52,  58,  1,
+  9,   3,   60,  54,  104, 98,  92,  86,  136, 130, 132, 138, 144, 94,  100,
+  106, 112, 62,  5,   11,  17,  25,  19,  13,  7,   120, 114, 108, 102, 152,
+  146, 140, 134, 192, 142, 148, 154, 160, 110, 116, 122, 65,  15,  21,  27,
+  33,  41,  35,  29,  23,  73,  67,  124, 118, 168, 162, 156, 150, 200, 194,
+  196, 202, 208, 158, 164, 170, 176, 126, 69,  75,  81,  31,  37,  43,  49,
+  57,  51,  45,  39,  89,  83,  77,  71,  184, 178, 172, 166, 216, 210, 204,
+  198, 206, 212, 218, 224, 174, 180, 186, 129, 79,  85,  91,  97,  47,  53,
+  59,  61,  55,  105, 99,  93,  87,  137, 131, 188, 182, 232, 226, 220, 214,
+  222, 228, 234, 240, 190, 133, 139, 145, 95,  101, 107, 113, 63,  121, 115,
+  109, 103, 153, 147, 141, 135, 248, 242, 236, 230, 238, 244, 250, 193, 143,
+  149, 155, 161, 111, 117, 123, 125, 119, 169, 163, 157, 151, 201, 195, 252,
+  246, 254, 197, 203, 209, 159, 165, 171, 177, 127, 185, 179, 173, 167, 217,
+  211, 205, 199, 207, 213, 219, 225, 175, 181, 187, 189, 183, 233, 227, 221,
+  215, 223, 229, 235, 241, 191, 249, 243, 237, 231, 239, 245, 251, 253, 247,
+  255
+};
+
+#if CONFIG_AV1_HIGHBITDEPTH
+// The original scan order (default_scan_16x16) is modified according to the
+// extra shift in hadamard c implementation in fp case, i.e.,
+// aom_hadamard_16x16_c. Note that 16x16 lp and fp hadamard generate different
+// outputs, so we handle them separately.
+static const int16_t default_scan_fp_16x16_transpose[256] = {
+  0,   4,   2,   8,   6,   16,  20,  18,  12,  10,  64,  14,  24,  22,  32,
+  36,  34,  28,  26,  68,  66,  72,  70,  80,  30,  40,  38,  48,  52,  50,
+  44,  42,  84,  82,  76,  74,  128, 78,  88,  86,  96,  46,  56,  54,  1,
+  5,   3,   60,  58,  100, 98,  92,  90,  132, 130, 136, 134, 144, 94,  104,
+  102, 112, 62,  9,   7,   17,  21,  19,  13,  11,  116, 114, 108, 106, 148,
+  146, 140, 138, 192, 142, 152, 150, 160, 110, 120, 118, 65,  15,  25,  23,
+  33,  37,  35,  29,  27,  69,  67,  124, 122, 164, 162, 156, 154, 196, 194,
+  200, 198, 208, 158, 168, 166, 176, 126, 73,  71,  81,  31,  41,  39,  49,
+  53,  51,  45,  43,  85,  83,  77,  75,  180, 178, 172, 170, 212, 210, 204,
+  202, 206, 216, 214, 224, 174, 184, 182, 129, 79,  89,  87,  97,  47,  57,
+  55,  61,  59,  101, 99,  93,  91,  133, 131, 188, 186, 228, 226, 220, 218,
+  222, 232, 230, 240, 190, 137, 135, 145, 95,  105, 103, 113, 63,  117, 115,
+  109, 107, 149, 147, 141, 139, 244, 242, 236, 234, 238, 248, 246, 193, 143,
+  153, 151, 161, 111, 121, 119, 125, 123, 165, 163, 157, 155, 197, 195, 252,
+  250, 254, 201, 199, 209, 159, 169, 167, 177, 127, 181, 179, 173, 171, 213,
+  211, 205, 203, 207, 217, 215, 225, 175, 185, 183, 189, 187, 229, 227, 221,
+  219, 223, 233, 231, 241, 191, 245, 243, 237, 235, 239, 249, 247, 253, 251,
+  255
+};
+#endif
+
+// The original scan order (av1_default_iscan_16x16) is modified to match
+// hadamard AVX2 implementation, i.e., aom_hadamard_lp_16x16_avx2.
+// Since hadamard AVX2 implementation will modify the order of coefficients,
+// such that the normal scan order is no longer guaranteed to scan low
+// coefficients first, therefore we modify the scan order accordingly. Note that
+// this one has to be used together with default_scan_lp_16x16_transpose.
+static const int16_t av1_default_iscan_lp_16x16_transpose[256] = {
+  0,   44,  2,   46,  3,   63,  9,   69,  1,   45,  4,   64,  8,   68,  11,
+  87,  5,   65,  7,   67,  12,  88,  18,  94,  6,   66,  13,  89,  17,  93,
+  24,  116, 14,  90,  16,  92,  25,  117, 31,  123, 15,  91,  26,  118, 30,
+  122, 41,  148, 27,  119, 29,  121, 42,  149, 48,  152, 28,  120, 43,  150,
+  47,  151, 62,  177, 10,  86,  20,  96,  21,  113, 35,  127, 19,  95,  22,
+  114, 34,  126, 37,  144, 23,  115, 33,  125, 38,  145, 52,  156, 32,  124,
+  39,  146, 51,  155, 58,  173, 40,  147, 50,  154, 59,  174, 73,  181, 49,
+  153, 60,  175, 72,  180, 83,  198, 61,  176, 71,  179, 84,  199, 98,  202,
+  70,  178, 85,  200, 97,  201, 112, 219, 36,  143, 54,  158, 55,  170, 77,
+  185, 53,  157, 56,  171, 76,  184, 79,  194, 57,  172, 75,  183, 80,  195,
+  102, 206, 74,  182, 81,  196, 101, 205, 108, 215, 82,  197, 100, 204, 109,
+  216, 131, 223, 99,  203, 110, 217, 130, 222, 140, 232, 111, 218, 129, 221,
+  141, 233, 160, 236, 128, 220, 142, 234, 159, 235, 169, 245, 78,  193, 104,
+  208, 105, 212, 135, 227, 103, 207, 106, 213, 134, 226, 136, 228, 107, 214,
+  133, 225, 137, 229, 164, 240, 132, 224, 138, 230, 163, 239, 165, 241, 139,
+  231, 162, 238, 166, 242, 189, 249, 161, 237, 167, 243, 188, 248, 190, 250,
+  168, 244, 187, 247, 191, 251, 210, 254, 186, 246, 192, 252, 209, 253, 211,
+  255
+};
+
+#if CONFIG_AV1_HIGHBITDEPTH
+// The original scan order (av1_default_iscan_16x16) is modified to match
+// hadamard AVX2 implementation, i.e., aom_hadamard_16x16_avx2.
+// Since hadamard AVX2 implementation will modify the order of coefficients,
+// such that the normal scan order is no longer guaranteed to scan low
+// coefficients first, therefore we modify the scan order accordingly. Note that
+// this one has to be used together with default_scan_fp_16x16_transpose.
+static const int16_t av1_default_iscan_fp_16x16_transpose[256] = {
+  0,   44,  2,   46,  1,   45,  4,   64,  3,   63,  9,   69,  8,   68,  11,
+  87,  5,   65,  7,   67,  6,   66,  13,  89,  12,  88,  18,  94,  17,  93,
+  24,  116, 14,  90,  16,  92,  15,  91,  26,  118, 25,  117, 31,  123, 30,
+  122, 41,  148, 27,  119, 29,  121, 28,  120, 43,  150, 42,  149, 48,  152,
+  47,  151, 62,  177, 10,  86,  20,  96,  19,  95,  22,  114, 21,  113, 35,
+  127, 34,  126, 37,  144, 23,  115, 33,  125, 32,  124, 39,  146, 38,  145,
+  52,  156, 51,  155, 58,  173, 40,  147, 50,  154, 49,  153, 60,  175, 59,
+  174, 73,  181, 72,  180, 83,  198, 61,  176, 71,  179, 70,  178, 85,  200,
+  84,  199, 98,  202, 97,  201, 112, 219, 36,  143, 54,  158, 53,  157, 56,
+  171, 55,  170, 77,  185, 76,  184, 79,  194, 57,  172, 75,  183, 74,  182,
+  81,  196, 80,  195, 102, 206, 101, 205, 108, 215, 82,  197, 100, 204, 99,
+  203, 110, 217, 109, 216, 131, 223, 130, 222, 140, 232, 111, 218, 129, 221,
+  128, 220, 142, 234, 141, 233, 160, 236, 159, 235, 169, 245, 78,  193, 104,
+  208, 103, 207, 106, 213, 105, 212, 135, 227, 134, 226, 136, 228, 107, 214,
+  133, 225, 132, 224, 138, 230, 137, 229, 164, 240, 163, 239, 165, 241, 139,
+  231, 162, 238, 161, 237, 167, 243, 166, 242, 189, 249, 188, 248, 190, 250,
+  168, 244, 187, 247, 186, 246, 192, 252, 191, 251, 210, 254, 209, 253, 211,
+  255
+};
+#endif
+
 static INLINE int early_term_inter_search_with_sse(int early_term_idx,
                                                    BLOCK_SIZE bsize,
                                                    int64_t this_sse,
@@ -187,19 +299,57 @@ static INLINE void init_best_pickmode(BEST_PICKMODE *bp) {
   memset(&bp->pmi, 0, sizeof(bp->pmi));
 }
 
-static INLINE int subpel_select(AV1_COMP *cpi, BLOCK_SIZE bsize, int_mv *mv) {
-  int mv_thresh = 4;
-  const int is_low_resoln =
-      (cpi->common.width * cpi->common.height <= 320 * 240);
-  mv_thresh = (bsize > BLOCK_32X32) ? 2 : (bsize > BLOCK_16X16) ? 4 : 6;
-  if (cpi->rc.avg_frame_low_motion > 0 && cpi->rc.avg_frame_low_motion < 40)
-    mv_thresh = 12;
-  mv_thresh = (is_low_resoln) ? mv_thresh >> 1 : mv_thresh;
-  if (abs(mv->as_fullmv.row) >= mv_thresh ||
-      abs(mv->as_fullmv.col) >= mv_thresh)
-    return HALF_PEL;
-  else
-    return cpi->sf.mv_sf.subpel_force_stop;
+static INLINE int subpel_select(AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
+                                int_mv *mv, MV ref_mv, FULLPEL_MV start_mv,
+                                bool fullpel_performed_well) {
+  const int frame_lowmotion = cpi->rc.avg_frame_low_motion;
+  // Reduce MV precision for higher int MV value & frame-level motion
+  if (cpi->sf.rt_sf.reduce_mv_pel_precision_highmotion >= 3) {
+    int mv_thresh = 4;
+    const int is_low_resoln =
+        (cpi->common.width * cpi->common.height <= 320 * 240);
+    mv_thresh = (bsize > BLOCK_32X32) ? 2 : (bsize > BLOCK_16X16) ? 4 : 6;
+    if (frame_lowmotion > 0 && frame_lowmotion < 40) mv_thresh = 12;
+    mv_thresh = (is_low_resoln) ? mv_thresh >> 1 : mv_thresh;
+    if (abs(mv->as_fullmv.row) >= mv_thresh ||
+        abs(mv->as_fullmv.col) >= mv_thresh)
+      return HALF_PEL;
+  } else if (cpi->sf.rt_sf.reduce_mv_pel_precision_highmotion >= 1) {
+    int mv_thresh;
+    const int th_vals[2][3] = { { 4, 8, 10 }, { 4, 6, 8 } };
+    const int th_idx = cpi->sf.rt_sf.reduce_mv_pel_precision_highmotion - 1;
+    assert(th_idx >= 0 && th_idx < 2);
+    if (frame_lowmotion > 0 && frame_lowmotion < 40)
+      mv_thresh = 12;
+    else
+      mv_thresh = (bsize >= BLOCK_32X32)   ? th_vals[th_idx][0]
+                  : (bsize >= BLOCK_16X16) ? th_vals[th_idx][1]
+                                           : th_vals[th_idx][2];
+    if (abs(mv->as_fullmv.row) >= (mv_thresh << 1) ||
+        abs(mv->as_fullmv.col) >= (mv_thresh << 1))
+      return FULL_PEL;
+    else if (abs(mv->as_fullmv.row) >= mv_thresh ||
+             abs(mv->as_fullmv.col) >= mv_thresh)
+      return HALF_PEL;
+  }
+  // Reduce MV precision for relatively static (e.g. background), low-complex
+  // large areas
+  if (cpi->sf.rt_sf.reduce_mv_pel_precision_lowcomplex >= 2) {
+    const int qband = x->qindex >> (QINDEX_BITS - 2);
+    assert(qband < 4);
+    if (x->content_state_sb.source_sad_nonrd <= kVeryLowSad &&
+        bsize > BLOCK_16X16 && qband != 0) {
+      if (x->source_variance < 500)
+        return FULL_PEL;
+      else if (x->source_variance < 5000)
+        return HALF_PEL;
+    }
+  } else if (cpi->sf.rt_sf.reduce_mv_pel_precision_lowcomplex >= 1) {
+    if (fullpel_performed_well && ref_mv.row == 0 && ref_mv.col == 0 &&
+        start_mv.row == 0 && start_mv.col == 0)
+      return HALF_PEL;
+  }
+  return cpi->sf.mv_sf.subpel_force_stop;
 }
 
 /*!\brief Runs Motion Estimation for a specific block and specific ref frame.
@@ -233,10 +383,11 @@ static int combined_motion_search(AV1_COMP *cpi, MACROBLOCK *x,
   MACROBLOCKD *xd = &x->e_mbd;
   const AV1_COMMON *cm = &cpi->common;
   const int num_planes = av1_num_planes(cm);
+  const SPEED_FEATURES *sf = &cpi->sf;
   MB_MODE_INFO *mi = xd->mi[0];
   struct buf_2d backup_yv12[MAX_MB_PLANE] = { { 0, 0, 0, 0, 0 } };
-  int step_param = (cpi->sf.rt_sf.fullpel_search_step_param)
-                       ? cpi->sf.rt_sf.fullpel_search_step_param
+  int step_param = (sf->rt_sf.fullpel_search_step_param)
+                       ? sf->rt_sf.fullpel_search_step_param
                        : cpi->mv_search_params.mv_step_param;
   FULLPEL_MV start_mv;
   const int ref = mi->ref_frame[0];
@@ -265,16 +416,20 @@ static int combined_motion_search(AV1_COMP *cpi, MACROBLOCK *x,
     center_mv = ref_mv;
   else
     center_mv = tmp_mv->as_mv;
-  const search_site_config *src_search_sites =
-      cpi->mv_search_params.search_site_cfg[SS_CFG_SRC];
+
+  const SEARCH_METHODS search_method = sf->mv_sf.search_method;
+  const MotionVectorSearchParams *mv_search_params = &cpi->mv_search_params;
+  const int ref_stride = xd->plane[0].pre[0].stride;
+  const search_site_config *src_search_sites = av1_get_search_site_config(
+      x->search_site_cfg_buf, mv_search_params, search_method, ref_stride);
   FULLPEL_MOTION_SEARCH_PARAMS full_ms_params;
   av1_make_default_fullpel_ms_params(&full_ms_params, cpi, x, bsize, &center_mv,
                                      src_search_sites,
                                      /*fine_search_interval=*/0);
 
-  av1_full_pixel_search(start_mv, &full_ms_params, step_param,
-                        cond_cost_list(cpi, cost_list), &tmp_mv->as_fullmv,
-                        NULL);
+  const unsigned int full_var_rd = av1_full_pixel_search(
+      start_mv, &full_ms_params, step_param, cond_cost_list(cpi, cost_list),
+      &tmp_mv->as_fullmv, NULL);
 
   // calculate the bit cost on motion vector
   MV mvp_full = get_mv_from_fullmv(&tmp_mv->as_fullmv);
@@ -289,13 +444,27 @@ static int combined_motion_search(AV1_COMP *cpi, MACROBLOCK *x,
     SUBPEL_MOTION_SEARCH_PARAMS ms_params;
     av1_make_default_subpel_ms_params(&ms_params, cpi, x, bsize, &ref_mv,
                                       cost_list);
-    if (cpi->sf.rt_sf.force_half_pel_block &&
-        cpi->sf.mv_sf.subpel_force_stop < HALF_PEL)
-      ms_params.forced_stop = subpel_select(cpi, bsize, tmp_mv);
+    const bool fullpel_performed_well =
+        (bsize == BLOCK_64X64 && full_var_rd * 40 < 62267 * 7) ||
+        (bsize == BLOCK_32X32 && full_var_rd * 8 < 42380) ||
+        (bsize == BLOCK_16X16 && full_var_rd * 8 < 10127);
+    if (sf->rt_sf.reduce_mv_pel_precision_highmotion ||
+        sf->rt_sf.reduce_mv_pel_precision_lowcomplex)
+      ms_params.forced_stop = subpel_select(cpi, x, bsize, tmp_mv, ref_mv,
+                                            start_mv, fullpel_performed_well);
+
     MV subpel_start_mv = get_mv_from_fullmv(&tmp_mv->as_fullmv);
-    cpi->mv_search_params.find_fractional_mv_step(
-        xd, cm, &ms_params, subpel_start_mv, &tmp_mv->as_mv, &dis,
-        &x->pred_sse[ref], NULL);
+    if (sf->rt_sf.use_adaptive_subpel_search &&
+        (fullpel_performed_well ||
+         x->content_state_sb.source_sad_nonrd <= kLowSad)) {
+      av1_find_best_sub_pixel_tree_pruned_more(xd, cm, &ms_params,
+                                               subpel_start_mv, &tmp_mv->as_mv,
+                                               &dis, &x->pred_sse[ref], NULL);
+    } else {
+      cpi->mv_search_params.find_fractional_mv_step(
+          xd, cm, &ms_params, subpel_start_mv, &tmp_mv->as_mv, &dis,
+          &x->pred_sse[ref], NULL);
+    }
 
     *rate_mv =
         av1_mv_bit_cost(&tmp_mv->as_mv, &ref_mv, x->mv_costs->nmv_joint_cost,
@@ -306,8 +475,8 @@ static int combined_motion_search(AV1_COMP *cpi, MACROBLOCK *x,
     int i;
     for (i = 0; i < MAX_MB_PLANE; i++) xd->plane[i].pre[0] = backup_yv12[i];
   }
-  // Final MV can not be equal to referance MV as this will trigger assert
-  // later. This can happen if both NEAREST and NEAR modes were skipped
+  // The final MV can not be equal to the reference MV as this will trigger an
+  // assert later. This can happen if both NEAREST and NEAR modes were skipped.
   rv = (tmp_mv->as_mv.col != ref_mv.col || tmp_mv->as_mv.row != ref_mv.row);
   return rv;
 }
@@ -371,9 +540,12 @@ static int search_new_mv(AV1_COMP *cpi, MACROBLOCK *x,
 
     SUBPEL_MOTION_SEARCH_PARAMS ms_params;
     av1_make_default_subpel_ms_params(&ms_params, cpi, x, bsize, &ref_mv, NULL);
-    if (cpi->sf.rt_sf.force_half_pel_block &&
-        cpi->sf.mv_sf.subpel_force_stop < HALF_PEL)
-      ms_params.forced_stop = subpel_select(cpi, bsize, &best_mv);
+    if (cpi->sf.rt_sf.reduce_mv_pel_precision_highmotion ||
+        cpi->sf.rt_sf.reduce_mv_pel_precision_lowcomplex) {
+      FULLPEL_MV start_mv = { .row = 0, .col = 0 };
+      ms_params.forced_stop =
+          subpel_select(cpi, x, bsize, &best_mv, ref_mv, start_mv, false);
+    }
     MV start_mv = get_mv_from_fullmv(&best_mv.as_fullmv);
     cpi->mv_search_params.find_fractional_mv_step(
         xd, cm, &ms_params, start_mv, &best_mv.as_mv, &dis,
@@ -395,7 +567,7 @@ static int search_new_mv(AV1_COMP *cpi, MACROBLOCK *x,
 static void estimate_single_ref_frame_costs(const AV1_COMMON *cm,
                                             const MACROBLOCKD *xd,
                                             const ModeCosts *mode_costs,
-                                            int segment_id,
+                                            int segment_id, BLOCK_SIZE bsize,
                                             unsigned int *ref_costs_single) {
   int seg_ref_active =
       segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME);
@@ -406,6 +578,11 @@ static void estimate_single_ref_frame_costs(const AV1_COMMON *cm,
     ref_costs_single[INTRA_FRAME] =
         mode_costs->intra_inter_cost[intra_inter_ctx][0];
     unsigned int base_cost = mode_costs->intra_inter_cost[intra_inter_ctx][1];
+    if (cm->current_frame.reference_mode == REFERENCE_MODE_SELECT &&
+        is_comp_ref_allowed(bsize)) {
+      const int comp_ref_type_ctx = av1_get_comp_reference_type_context(xd);
+      base_cost += mode_costs->comp_ref_type_cost[comp_ref_type_ctx][1];
+    }
     ref_costs_single[LAST_FRAME] = base_cost;
     ref_costs_single[GOLDEN_FRAME] = base_cost;
     ref_costs_single[ALTREF_FRAME] = base_cost;
@@ -557,7 +734,9 @@ static void model_skip_for_sb_y_large(AV1_COMP *cpi, BLOCK_SIZE bsize,
                                       int mi_row, int mi_col, MACROBLOCK *x,
                                       MACROBLOCKD *xd, RD_STATS *rd_stats,
                                       int *early_term, int calculate_rd,
-                                      int64_t best_sse) {
+                                      int64_t best_sse,
+                                      unsigned int *var_output,
+                                      unsigned int var_prune_threshold) {
   // Note our transform coeffs are 8 times an orthogonal transform.
   // Hence quantizer step is also 8 times. To get effective quantizer
   // we need to divide by 8 before sending to modeling function.
@@ -581,7 +760,7 @@ static void model_skip_for_sb_y_large(AV1_COMP *cpi, BLOCK_SIZE bsize,
   TX_SIZE tx_size;
   int k;
 
-  if (x->force_zeromv_skip) {
+  if (x->force_zeromv_skip_for_blk) {
     *early_term = 1;
     rd_stats->rate = 0;
     rd_stats->dist = 0;
@@ -594,6 +773,12 @@ static void model_skip_for_sb_y_large(AV1_COMP *cpi, BLOCK_SIZE bsize,
   block_variance(p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride,
                  4 << bw, 4 << bh, &sse, &sum, 8, sse8x8, sum8x8, var8x8);
   var = sse - (unsigned int)(((int64_t)sum * sum) >> (bw + bh + 4));
+  if (var_output) {
+    *var_output = var;
+    if (*var_output > var_prune_threshold) {
+      return;
+    }
+  }
 
   rd_stats->sse = sse;
 
@@ -614,8 +799,8 @@ static void model_skip_for_sb_y_large(AV1_COMP *cpi, BLOCK_SIZE bsize,
   // Skipping test
   *early_term = 0;
   tx_size = calculate_tx_size(cpi, bsize, x, var, sse, early_term);
-  // The code below for setting skip flag assumes tranform size of at least 8x8,
-  // so force this lower limit on transform.
+  // The code below for setting skip flag assumes transform size of at least
+  // 8x8, so force this lower limit on transform.
   if (tx_size < TX_8X8) tx_size = TX_8X8;
   xd->mi[0]->tx_size = tx_size;
 
@@ -729,7 +914,15 @@ static void model_skip_for_sb_y_large(AV1_COMP *cpi, BLOCK_SIZE bsize,
 
 static void model_rd_for_sb_y(const AV1_COMP *const cpi, BLOCK_SIZE bsize,
                               MACROBLOCK *x, MACROBLOCKD *xd,
-                              RD_STATS *rd_stats, int calculate_rd) {
+                              RD_STATS *rd_stats, unsigned int *var_out,
+                              int calculate_rd, int *early_term) {
+  if (x->force_zeromv_skip_for_blk && early_term != NULL) {
+    *early_term = 1;
+    rd_stats->rate = 0;
+    rd_stats->dist = 0;
+    rd_stats->sse = 0;
+  }
+
   // Note our transform coeffs are 8 times an orthogonal transform.
   // Hence quantizer step is also 8 times. To get effective quantizer
   // we need to divide by 8 before sending to modeling function.
@@ -747,6 +940,9 @@ static void model_rd_for_sb_y(const AV1_COMP *const cpi, BLOCK_SIZE bsize,
       p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride, &sse);
   int force_skip = 0;
   xd->mi[0]->tx_size = calculate_tx_size(cpi, bsize, x, var, sse, &force_skip);
+  if (var_out) {
+    *var_out = var;
+  }
 
   if (calculate_rd && (!force_skip || ref == INTRA_FRAME)) {
     const int bwide = block_size_wide[bsize];
@@ -773,9 +969,11 @@ static void model_rd_for_sb_y(const AV1_COMP *const cpi, BLOCK_SIZE bsize,
   rd_stats->dist = dist;
 }
 
-static INLINE void aom_process_hadamard_8x16(MACROBLOCK *x, int max_blocks_high,
-                                             int max_blocks_wide, int num_4x4_w,
-                                             int step, int block_step) {
+static INLINE void aom_process_hadamard_lp_8x16(MACROBLOCK *x,
+                                                int max_blocks_high,
+                                                int max_blocks_wide,
+                                                int num_4x4_w, int step,
+                                                int block_step) {
   struct macroblock_plane *const p = &x->plane[0];
   const int bw = 4 * num_4x4_w;
   const int num_4x4 = AOMMIN(num_4x4_w, max_blocks_wide);
@@ -785,7 +983,7 @@ static INLINE void aom_process_hadamard_8x16(MACROBLOCK *x, int max_blocks_high,
     for (int c = 0; c < num_4x4; c += 2 * block_step) {
       const int16_t *src_diff = &p->src_diff[(r * bw + c) << 2];
       int16_t *low_coeff = (int16_t *)p->coeff + BLOCK_OFFSET(block);
-      aom_hadamard_8x8_dual(src_diff, (ptrdiff_t)bw, low_coeff);
+      aom_hadamard_lp_8x8_dual(src_diff, (ptrdiff_t)bw, low_coeff);
       block += 2 * step;
     }
   }
@@ -862,7 +1060,7 @@ static AOM_FORCE_INLINE void update_yrd_loop_vars(
  * \param[in]    tx_type        Transform kernel type
  * \param[in]    is_inter_mode  Flag to indicate inter mode
  *
- * \return Nothing is returned. Instead, calculated RD cost is placed to
+ * \remark Nothing is returned. Instead, calculated RD cost is placed to
  * \c this_rdc. \c skippable flag is set if there is no non-zero quantized
  * coefficients for Hadamard transform
  */
@@ -963,12 +1161,19 @@ void av1_block_yrd(const AV1_COMP *const cpi, MACROBLOCK *x, int mi_row,
     // For block sizes 8x16 or above, Hadamard txfm of two adjacent 8x8 blocks
     // can be done per function call. Hence the call of Hadamard txfm is
     // abstracted here for the specified cases.
-    const int is_tx_8x8_dual_applicable =
+    int is_tx_8x8_dual_applicable =
         (tx_size == TX_8X8 && block_size_wide[bsize] >= 16 &&
          block_size_high[bsize] >= 8);
+
+#if CONFIG_AV1_HIGHBITDEPTH
+    // As of now, dual implementation of hadamard txfm is available for low
+    // bitdepth and when tx_type != IDTX.
+    if (use_hbd || tx_type == IDTX) is_tx_8x8_dual_applicable = 0;
+#endif
+
     if (is_tx_8x8_dual_applicable) {
-      aom_process_hadamard_8x16(x, max_blocks_high, max_blocks_wide, num_4x4_w,
-                                step, block_step);
+      aom_process_hadamard_lp_8x16(x, max_blocks_high, max_blocks_wide,
+                                   num_4x4_w, step, block_step);
     }
 
     // Keep track of the row and column of the blocks we use so that we know
@@ -989,8 +1194,12 @@ void av1_block_yrd(const AV1_COMP *const cpi, MACROBLOCK *x, int mi_row,
               aom_hadamard_16x16(src_diff, diff_stride, coeff);
               av1_quantize_fp(coeff, 16 * 16, p->zbin_QTX, p->round_fp_QTX,
                               p->quant_fp_QTX, p->quant_shift_QTX, qcoeff,
-                              dqcoeff, p->dequant_QTX, eob, scan_order->scan,
-                              scan_order->iscan);
+                              dqcoeff, p->dequant_QTX, eob,
+                              // default_scan_fp_16x16_transpose and
+                              // av1_default_iscan_fp_16x16_transpose have to be
+                              // used together.
+                              default_scan_fp_16x16_transpose,
+                              av1_default_iscan_fp_16x16_transpose);
             } else {
               if (tx_type == IDTX) {
                 aom_pixel_scale(src_diff, diff_stride, low_coeff, 3, 2, 2);
@@ -999,31 +1208,42 @@ void av1_block_yrd(const AV1_COMP *const cpi, MACROBLOCK *x, int mi_row,
               }
               av1_quantize_lp(low_coeff, 16 * 16, p->round_fp_QTX,
                               p->quant_fp_QTX, low_qcoeff, low_dqcoeff,
-                              p->dequant_QTX, eob, scan_order->scan,
-                              scan_order->iscan);
+                              p->dequant_QTX, eob,
+                              // default_scan_lp_16x16_transpose and
+                              // av1_default_iscan_lp_16x16_transpose have to be
+                              // used together.
+                              default_scan_lp_16x16_transpose,
+                              av1_default_iscan_lp_16x16_transpose);
             }
             break;
           case TX_8X8:
             if (use_hbd) {
               aom_hadamard_8x8(src_diff, diff_stride, coeff);
-              av1_quantize_fp(coeff, 8 * 8, p->zbin_QTX, p->round_fp_QTX,
-                              p->quant_fp_QTX, p->quant_shift_QTX, qcoeff,
-                              dqcoeff, p->dequant_QTX, eob, scan_order->scan,
-                              scan_order->iscan);
+              av1_quantize_fp(
+                  coeff, 8 * 8, p->zbin_QTX, p->round_fp_QTX, p->quant_fp_QTX,
+                  p->quant_shift_QTX, qcoeff, dqcoeff, p->dequant_QTX, eob,
+                  default_scan_8x8_transpose, av1_default_iscan_8x8_transpose);
             } else {
               if (tx_type == IDTX) {
                 aom_pixel_scale(src_diff, diff_stride, low_coeff, 3, 1, 1);
-              } else {
+              } else if (!is_tx_8x8_dual_applicable) {
                 aom_hadamard_lp_8x8(src_diff, diff_stride, low_coeff);
+              } else {
+                assert(is_tx_8x8_dual_applicable);
               }
-              av1_quantize_lp(low_coeff, 8 * 8, p->round_fp_QTX,
-                              p->quant_fp_QTX, low_qcoeff, low_dqcoeff,
-                              p->dequant_QTX, eob, scan_order->scan,
-                              scan_order->iscan);
+              av1_quantize_lp(
+                  low_coeff, 8 * 8, p->round_fp_QTX, p->quant_fp_QTX,
+                  low_qcoeff, low_dqcoeff, p->dequant_QTX, eob,
+                  // default_scan_8x8_transpose and
+                  // av1_default_iscan_8x8_transpose have to be used together.
+                  default_scan_8x8_transpose, av1_default_iscan_8x8_transpose);
             }
             break;
           default:
             assert(tx_size == TX_4X4);
+            // In tx_size=4x4 case, aom_fdct4x4 and aom_fdct4x4_lp generate
+            // normal coefficients order, so we don't need to change the scan
+            // order here.
             if (use_hbd) {
               aom_fdct4x4(src_diff, coeff, diff_stride);
               av1_quantize_fp(coeff, 4 * 4, p->zbin_QTX, p->round_fp_QTX,
@@ -1050,8 +1270,9 @@ void av1_block_yrd(const AV1_COMP *const cpi, MACROBLOCK *x, int mi_row,
             aom_hadamard_lp_16x16(src_diff, diff_stride, low_coeff);
             av1_quantize_lp(low_coeff, 16 * 16, p->round_fp_QTX,
                             p->quant_fp_QTX, low_qcoeff, low_dqcoeff,
-                            p->dequant_QTX, eob, scan_order->scan,
-                            scan_order->iscan);
+                            p->dequant_QTX, eob,
+                            default_scan_lp_16x16_transpose,
+                            av1_default_iscan_lp_16x16_transpose);
             break;
           case TX_8X8:
             if (!is_tx_8x8_dual_applicable) {
@@ -1061,7 +1282,8 @@ void av1_block_yrd(const AV1_COMP *const cpi, MACROBLOCK *x, int mi_row,
             }
             av1_quantize_lp(low_coeff, 8 * 8, p->round_fp_QTX, p->quant_fp_QTX,
                             low_qcoeff, low_dqcoeff, p->dequant_QTX, eob,
-                            scan_order->scan, scan_order->iscan);
+                            default_scan_8x8_transpose,
+                            av1_default_iscan_8x8_transpose);
             break;
           default:
             aom_fdct4x4_lp(src_diff, low_coeff, diff_stride);
@@ -1158,6 +1380,36 @@ static void free_pred_buffer(PRED_BUFFER *p) {
   if (p != NULL) p->in_use = 0;
 }
 
+static INLINE int get_drl_cost(const PREDICTION_MODE this_mode,
+                               const int ref_mv_idx,
+                               const MB_MODE_INFO_EXT *mbmi_ext,
+                               const int (*const drl_mode_cost0)[2],
+                               int8_t ref_frame_type) {
+  int cost = 0;
+  if (this_mode == NEWMV || this_mode == NEW_NEWMV) {
+    for (int idx = 0; idx < 2; ++idx) {
+      if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) {
+        uint8_t drl_ctx = av1_drl_ctx(mbmi_ext->weight[ref_frame_type], idx);
+        cost += drl_mode_cost0[drl_ctx][ref_mv_idx != idx];
+        if (ref_mv_idx == idx) return cost;
+      }
+    }
+    return cost;
+  }
+
+  if (have_nearmv_in_inter_mode(this_mode)) {
+    for (int idx = 1; idx < 3; ++idx) {
+      if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) {
+        uint8_t drl_ctx = av1_drl_ctx(mbmi_ext->weight[ref_frame_type], idx);
+        cost += drl_mode_cost0[drl_ctx][ref_mv_idx != (idx - 1)];
+        if (ref_mv_idx == (idx - 1)) return cost;
+      }
+    }
+    return cost;
+  }
+  return cost;
+}
+
 static int cost_mv_ref(const ModeCosts *const mode_costs, PREDICTION_MODE mode,
                        int16_t mode_context) {
   if (is_inter_compound_mode(mode)) {
@@ -1203,7 +1455,7 @@ static void newmv_diff_bias(MACROBLOCKD *xd, PREDICTION_MODE this_mode,
     int left_mv_valid = 0;
     int above_row = INVALID_MV_ROW_COL, above_col = INVALID_MV_ROW_COL;
     int left_row = INVALID_MV_ROW_COL, left_col = INVALID_MV_ROW_COL;
-    if (bsize >= BLOCK_64X64 && content_state_sb.source_sad != kHighSad &&
+    if (bsize >= BLOCK_64X64 && content_state_sb.source_sad_nonrd != kHighSad &&
         spatial_variance < 300 &&
         (mv_row > 16 || mv_row < -16 || mv_col > 16 || mv_col < -16)) {
       this_rdc->rdcost = this_rdc->rdcost << 2;
@@ -1328,10 +1580,10 @@ struct estimate_block_intra_args {
  * \param[in]    col            Column of a current TX block
  * \param[in]    plane_bsize    Block size of a current prediction block
  * \param[in]    tx_size        Transform size
- * \param[in]    arg            Pointer to a structure that holds paramaters
+ * \param[in]    arg            Pointer to a structure that holds parameters
  *                              for intra mode search
  *
- * \return Nothing is returned. Instead, best mode and RD Cost of the best mode
+ * \remark Nothing is returned. Instead, best mode and RD Cost of the best mode
  * are set in \c args->rdc and \c args->mode
  */
 static void estimate_block_intra(int plane, int block, int row, int col,
@@ -1352,6 +1604,7 @@ static void estimate_block_intra(int plane, int block, int row, int col,
   RD_STATS this_rdc;
 
   (void)block;
+  (void)plane_bsize;
 
   av1_predict_intra_block_facade(cm, xd, plane, col, row, tx_size);
   av1_invalid_rd_stats(&this_rdc);
@@ -1364,7 +1617,7 @@ static void estimate_block_intra(int plane, int block, int row, int col,
                   AOMMIN(tx_size, TX_16X16), DCT_DCT, 0);
   } else {
     int64_t sse = 0;
-    model_rd_for_sb_uv(cpi, plane_bsize, x, xd, &this_rdc, &sse, plane, plane);
+    model_rd_for_sb_uv(cpi, bsize_tx, x, xd, &this_rdc, &sse, plane, plane);
   }
 
   p->src.buf = src_buf_base;
@@ -1417,7 +1670,7 @@ static void recheck_zeromv_after_denoising(
     BEST_PICKMODE *best_pickmode, BLOCK_SIZE bsize, int mi_row, int mi_col) {
   // If INTRA or GOLDEN reference was selected, re-evaluate ZEROMV on
   // denoised result. Only do this under noise conditions, and if rdcost of
-  // ZEROMV onoriginal source is not significantly higher than rdcost of best
+  // ZEROMV on original source is not significantly higher than rdcost of best
   // mode.
   if (cpi->noise_estimate.enabled && cpi->noise_estimate.level > kLow &&
       ctx_den->zero_last_cost_orig < (best_rdc->rdcost << 3) &&
@@ -1440,7 +1693,8 @@ static void recheck_zeromv_after_denoising(
     mi->interp_filters = av1_broadcast_interp_filter(EIGHTTAP_REGULAR);
     xd->plane[0].pre[0] = yv12_mb[LAST_FRAME][0];
     av1_enc_build_inter_predictor_y(xd, mi_row, mi_col);
-    model_rd_for_sb_y(cpi, bsize, x, xd, &this_rdc, 1);
+    unsigned int var;
+    model_rd_for_sb_y(cpi, bsize, x, xd, &this_rdc, &var, 1, NULL);
 
     const int16_t mode_ctx =
         av1_mode_context_analyzer(mbmi_ext->mode_context, mi->ref_frame);
@@ -1481,7 +1735,7 @@ static void recheck_zeromv_after_denoising(
 
 #define FILTER_SEARCH_SIZE 2
 
-/*!\brief Searches for the best intrpolation filter
+/*!\brief Searches for the best interpolation filter
  *
  * \ingroup nonrd_mode_search
  * \callgraph
@@ -1506,13 +1760,15 @@ static void recheck_zeromv_after_denoising(
  *                                    for prediction re-use
  * \param[out]   this_early_term      Flag, indicating that transform can be
  *                                    skipped
+ * \param[out]   var                  The residue variance of the current
+ *                                    predictor.
  * \param[in]    use_model_yrd_large  Flag, indicating special logic to handle
  *                                    large blocks
  * \param[in]    best_sse             Best sse so far.
  *
- * \return Nothing is returned. Instead, calculated RD cost is placed to
+ * \remark Nothing is returned. Instead, calculated RD cost is placed to
  * \c this_rdc and best filter is placed to \c mi->interp_filters. In case
- * \c reuse_inter_pred flag is set, this function also ouputs
+ * \c reuse_inter_pred flag is set, this function also outputs
  * \c this_mode_pred. Also \c this_early_temp is set if transform can be
  * skipped
  */
@@ -1520,8 +1776,8 @@ static void search_filter_ref(AV1_COMP *cpi, MACROBLOCK *x, RD_STATS *this_rdc,
                               int mi_row, int mi_col, PRED_BUFFER *tmp,
                               BLOCK_SIZE bsize, int reuse_inter_pred,
                               PRED_BUFFER **this_mode_pred,
-                              int *this_early_term, int use_model_yrd_large,
-                              int64_t best_sse) {
+                              int *this_early_term, unsigned int *var,
+                              int use_model_yrd_large, int64_t best_sse) {
   AV1_COMMON *const cm = &cpi->common;
   MACROBLOCKD *const xd = &x->e_mbd;
   struct macroblockd_plane *const pd = &xd->plane[0];
@@ -1544,16 +1800,19 @@ static void search_filter_ref(AV1_COMP *cpi, MACROBLOCK *x, RD_STATS *this_rdc,
     mi->interp_filters.as_filters.x_filter = filters_ref_set[i].filter_x;
     mi->interp_filters.as_filters.y_filter = filters_ref_set[i].filter_y;
     av1_enc_build_inter_predictor_y(xd, mi_row, mi_col);
+    unsigned int curr_var = UINT_MAX;
     if (use_model_yrd_large)
       model_skip_for_sb_y_large(cpi, bsize, mi_row, mi_col, x, xd,
-                                &pf_rd_stats[i], this_early_term, 1, best_sse);
+                                &pf_rd_stats[i], this_early_term, 1, best_sse,
+                                &curr_var, UINT_MAX);
     else
-      model_rd_for_sb_y(cpi, bsize, x, xd, &pf_rd_stats[i], 1);
+      model_rd_for_sb_y(cpi, bsize, x, xd, &pf_rd_stats[i], &curr_var, 1, NULL);
     pf_rd_stats[i].rate += av1_get_switchable_rate(
         x, xd, cm->features.interp_filter, cm->seq_params->enable_dual_filter);
     cost = RDCOST(x->rdmult, pf_rd_stats[i].rate, pf_rd_stats[i].dist);
     pf_tx_size[i] = mi->tx_size;
     if (cost < best_cost) {
+      *var = curr_var;
       best_filter_index = i;
       best_cost = cost;
       best_skip = pf_rd_stats[i].skip_txfm;
@@ -1691,10 +1950,10 @@ static void search_motion_mode(AV1_COMP *cpi, MACROBLOCK *x, RD_STATS *this_rdc,
       av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0, 0);
       if (use_model_yrd_large)
         model_skip_for_sb_y_large(cpi, bsize, mi_row, mi_col, x, xd,
-                                  &pf_rd_stats[i], this_early_term, 1,
-                                  best_sse);
+                                  &pf_rd_stats[i], this_early_term, 1, best_sse,
+                                  NULL, UINT_MAX);
       else
-        model_rd_for_sb_y(cpi, bsize, x, xd, &pf_rd_stats[i], 1);
+        model_rd_for_sb_y(cpi, bsize, x, xd, &pf_rd_stats[i], NULL, 1, NULL);
       pf_rd_stats[i].rate +=
           av1_get_switchable_rate(x, xd, cm->features.interp_filter,
                                   cm->seq_params->enable_dual_filter);
@@ -1755,9 +2014,9 @@ static void search_motion_mode(AV1_COMP *cpi, MACROBLOCK *x, RD_STATS *this_rdc,
         if (use_model_yrd_large)
           model_skip_for_sb_y_large(cpi, bsize, mi_row, mi_col, x, xd,
                                     &pf_rd_stats[i], this_early_term, 1,
-                                    best_sse);
+                                    best_sse, NULL, UINT_MAX);
         else
-          model_rd_for_sb_y(cpi, bsize, x, xd, &pf_rd_stats[i], 1);
+          model_rd_for_sb_y(cpi, bsize, x, xd, &pf_rd_stats[i], NULL, 1, NULL);
 
         pf_rd_stats[i].rate +=
             mode_costs->motion_mode_cost[bsize][mi->motion_mode];
@@ -1789,18 +2048,32 @@ static void search_motion_mode(AV1_COMP *cpi, MACROBLOCK *x, RD_STATS *this_rdc,
 #endif  // !CONFIG_REALTIME_ONLY
 
 #define COLLECT_PICK_MODE_STAT 0
+#define COLLECT_NON_SQR_STAT 0
 
 #if COLLECT_PICK_MODE_STAT
+#include "aom_ports/aom_timer.h"
 typedef struct _mode_search_stat {
   int32_t num_blocks[BLOCK_SIZES];
-  int64_t avg_block_times[BLOCK_SIZES];
+  int64_t total_block_times[BLOCK_SIZES];
   int32_t num_searches[BLOCK_SIZES][MB_MODE_COUNT];
   int32_t num_nonskipped_searches[BLOCK_SIZES][MB_MODE_COUNT];
   int64_t search_times[BLOCK_SIZES][MB_MODE_COUNT];
   int64_t nonskipped_search_times[BLOCK_SIZES][MB_MODE_COUNT];
+  int64_t ms_time[BLOCK_SIZES][MB_MODE_COUNT];
+  int64_t ifs_time[BLOCK_SIZES][MB_MODE_COUNT];
+  int64_t model_rd_time[BLOCK_SIZES][MB_MODE_COUNT];
+  int64_t txfm_time[BLOCK_SIZES][MB_MODE_COUNT];
   struct aom_usec_timer timer1;
   struct aom_usec_timer timer2;
+  struct aom_usec_timer bsize_timer;
 } mode_search_stat;
+
+static void AOM_INLINE print_stage_time(const char *stage_name,
+                                        int64_t stage_time,
+                                        int64_t total_time) {
+  printf("    %s: %ld (%f%%)\n", stage_name, stage_time,
+         100 * stage_time / (float)total_time);
+}
 #endif  // COLLECT_PICK_MODE_STAT
 
 static void compute_intra_yprediction(const AV1_COMMON *cm,
@@ -1875,6 +2148,17 @@ void av1_nonrd_pick_intra_mode(AV1_COMP *cpi, MACROBLOCK *x, RD_STATS *rd_cost,
   // mode tests.
   for (int i = 0; i < 4; ++i) {
     PREDICTION_MODE this_mode = intra_mode_list[i];
+
+    // As per the statistics generated for intra mode evaluation in the nonrd
+    // path, it is found that the probability of H_PRED mode being the winner is
+    // very less when the best mode so far is V_PRED (out of DC_PRED and
+    // V_PRED). If V_PRED is the winner mode out of DC_PRED and V_PRED, it could
+    // imply the presence of a vertically dominant pattern. Hence, H_PRED mode
+    // is not evaluated.
+    if (cpi->sf.rt_sf.prune_h_pred_using_best_mode_so_far &&
+        this_mode == H_PRED && best_mode == V_PRED)
+      continue;
+
     this_rdc.dist = this_rdc.rate = 0;
     args.mode = this_mode;
     args.skippable = 1;
@@ -1932,22 +2216,28 @@ static AOM_INLINE void get_ref_frame_use_mask(AV1_COMP *cpi, MACROBLOCK *x,
   const struct segmentation *const seg = &cm->seg;
   const int is_small_sb = (cm->seq_params->sb_size == BLOCK_64X64);
 
-  // For SVC the usage of alt_ref is determined by the ref_frame_flags.
+  // When the ref_frame_config is used to set the reference frame structure
+  // then the usage of alt_ref is determined by the ref_frame_flags
+  // (and not the speed feature use_nonrd_altref_frame).
   int use_alt_ref_frame =
-      cpi->ppi->use_svc || cpi->sf.rt_sf.use_nonrd_altref_frame;
+      cpi->rtc_ref.set_ref_frame_config || cpi->sf.rt_sf.use_nonrd_altref_frame;
+
   int use_golden_ref_frame = 1;
   int use_last_ref_frame = 1;
 
-  if (cpi->ppi->use_svc)
+  // When the ref_frame_config is used to set the reference frame structure:
+  // check if LAST is used as a reference. And only remove golden and altref
+  // references below if last is used as a reference.
+  if (cpi->rtc_ref.set_ref_frame_config)
     use_last_ref_frame =
         cpi->ref_frame_flags & AOM_LAST_FLAG ? use_last_ref_frame : 0;
 
-  // Only remove golden and altref reference below if last is a reference,
-  // which may not be the case for svc.
-  if (use_last_ref_frame && cpi->rc.frames_since_golden == 0 &&
-      gf_temporal_ref) {
+  // frame_since_golden is not used when user sets the referene structure.
+  if (!cpi->rtc_ref.set_ref_frame_config && use_last_ref_frame &&
+      cpi->rc.frames_since_golden == 0 && gf_temporal_ref) {
     use_golden_ref_frame = 0;
   }
+
   if (use_last_ref_frame && cpi->sf.rt_sf.short_circuit_low_temp_var &&
       x->nonrd_prune_ref_frame_search) {
     if (is_small_sb)
@@ -1964,15 +2254,17 @@ static AOM_INLINE void get_ref_frame_use_mask(AV1_COMP *cpi, MACROBLOCK *x,
   }
 
   if (use_last_ref_frame &&
-      (x->nonrd_prune_ref_frame_search > 2 || x->force_zeromv_skip ||
+      (x->nonrd_prune_ref_frame_search > 2 || x->force_zeromv_skip_for_blk ||
        (x->nonrd_prune_ref_frame_search > 1 && bsize > BLOCK_64X64))) {
     use_golden_ref_frame = 0;
     use_alt_ref_frame = 0;
     // Keep golden (longer-term) reference if sb has high source sad, for
-    // frames whose average souce_sad is below threshold. This is to try to
+    // frames whose average source_sad is below threshold. This is to try to
     // capture case where only part of frame has high motion.
-    if (x->content_state_sb.source_sad >= kHighSad && bsize <= BLOCK_32X32 &&
-        cpi->rc.frame_source_sad < 50000)
+    // Exclude screen content mode.
+    if (cpi->oxcf.tune_cfg.content != AOM_CONTENT_SCREEN &&
+        x->content_state_sb.source_sad_nonrd >= kHighSad &&
+        bsize <= BLOCK_32X32 && cpi->rc.frame_source_sad < 50000)
       use_golden_ref_frame = 1;
   }
 
@@ -1982,6 +2274,19 @@ static AOM_INLINE void get_ref_frame_use_mask(AV1_COMP *cpi, MACROBLOCK *x,
     use_alt_ref_frame = 0;
   }
 
+  // Skip golden reference if color is set, on flat blocks with motion.
+  // For screen: always skip golden (if color_sensitivity_sb_g is set)
+  // except when x->nonrd_prune_ref_frame_search = 0. This latter flag
+  // may be set in the variance partition when golden is a much better
+  // reference than last, in which case it may not be worth skipping
+  // golden completely.
+  if (((cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN &&
+        x->nonrd_prune_ref_frame_search != 0) ||
+       (x->source_variance < 500 &&
+        x->content_state_sb.source_sad_nonrd > kLowSad)) &&
+      (x->color_sensitivity_sb_g[0] == 1 || x->color_sensitivity_sb_g[1] == 1))
+    use_golden_ref_frame = 0;
+
   use_alt_ref_frame =
       cpi->ref_frame_flags & AOM_ALT_FLAG ? use_alt_ref_frame : 0;
   use_golden_ref_frame =
@@ -2011,8 +2316,6 @@ static AOM_INLINE void get_ref_frame_use_mask(AV1_COMP *cpi, MACROBLOCK *x,
  * \param[in]    x                        Pointer to structure holding all the
  *                                        data for the current macroblock
  * \param[in]    bsize                    Current block size
- * \param[in]    use_modeled_non_rd_cost  Flag, indicating usage of curvfit
- *                                        model for RD cost
  * \param[in]    best_early_term          Flag, indicating that TX for the
  *                                        best inter mode was skipped
  * \param[in]    ref_cost_intra           Cost of signalling intra mode
@@ -2029,14 +2332,13 @@ static AOM_INLINE void get_ref_frame_use_mask(AV1_COMP *cpi, MACROBLOCK *x,
  * \param[in]    ctx                      Pointer to structure holding coding
  *                                        contexts and modes for the block
  *
- * \return Nothing is returned. Instead, calculated RD cost is placed to
+ * \remark Nothing is returned. Instead, calculated RD cost is placed to
  * \c best_rdc and best selected mode is placed to \c best_pickmode
  */
 static void estimate_intra_mode(
-    AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int use_modeled_non_rd_cost,
-    int best_early_term, unsigned int ref_cost_intra, int reuse_prediction,
-    struct buf_2d *orig_dst, PRED_BUFFER *tmp_buffers,
-    PRED_BUFFER **this_mode_pred, RD_STATS *best_rdc,
+    AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int best_early_term,
+    unsigned int ref_cost_intra, int reuse_prediction, struct buf_2d *orig_dst,
+    PRED_BUFFER *tmp_buffers, PRED_BUFFER **this_mode_pred, RD_STATS *best_rdc,
     BEST_PICKMODE *best_pickmode, PICK_MODE_CONTEXT *ctx) {
   AV1_COMMON *const cm = &cpi->common;
   MACROBLOCKD *const xd = &x->e_mbd;
@@ -2045,6 +2347,8 @@ static void estimate_intra_mode(
   const unsigned char segment_id = mi->segment_id;
   const int *const rd_threshes = cpi->rd.threshes[segment_id][bsize];
   const int *const rd_thresh_freq_fact = x->thresh_freq_fact[bsize];
+  const bool is_screen_content =
+      cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN;
   const int mi_row = xd->mi_row;
   const int mi_col = xd->mi_col;
   struct macroblockd_plane *const pd = &xd->plane[0];
@@ -2056,10 +2360,11 @@ static void estimate_intra_mode(
   int intra_cost_penalty = av1_get_intra_cost_penalty(
       quant_params->base_qindex, quant_params->y_dc_delta_q,
       cm->seq_params->bit_depth);
-  int64_t inter_mode_thresh = RDCOST(x->rdmult, intra_cost_penalty, 0);
+  int64_t inter_mode_thresh =
+      RDCOST(x->rdmult, ref_cost_intra + intra_cost_penalty, 0);
   int perform_intra_pred = cpi->sf.rt_sf.check_intra_pred_nonrd;
   int force_intra_check = 0;
-  // For spatial enhancemanent layer: turn off intra prediction if the
+  // For spatial enhancement layer: turn off intra prediction if the
   // previous spatial layer as golden ref is not chosen as best reference.
   // only do this for temporal enhancement layer and on non-key frames.
   if (cpi->svc.spatial_layer_id > 0 &&
@@ -2090,17 +2395,22 @@ static void estimate_intra_mode(
          abs(mi->mv[0].as_mv.row) >= motion_thresh ||
          abs(mi->mv[0].as_mv.col) >= motion_thresh)) {
       intra_cost_penalty = intra_cost_penalty >> 2;
-      inter_mode_thresh = RDCOST(x->rdmult, intra_cost_penalty, 0);
+      inter_mode_thresh =
+          RDCOST(x->rdmult, ref_cost_intra + intra_cost_penalty, 0);
       do_early_exit_rdthresh = 0;
     }
-    if (x->source_variance < AOMMAX(50, (spatial_var_thresh >> 1)) &&
-        x->content_state_sb.source_sad >= kHighSad)
+    if ((x->source_variance < AOMMAX(50, (spatial_var_thresh >> 1)) &&
+         x->content_state_sb.source_sad_nonrd >= kHighSad) ||
+        (is_screen_content && x->source_variance < 50 &&
+         ((bsize >= BLOCK_32X32 &&
+           x->content_state_sb.source_sad_nonrd != kZeroSad) ||
+          x->color_sensitivity[0] == 1 || x->color_sensitivity[1] == 1)))
       force_intra_check = 1;
     // For big blocks worth checking intra (since only DC will be checked),
     // even if best_early_term is set.
     if (bsize >= BLOCK_32X32) best_early_term = 0;
   } else if (cpi->sf.rt_sf.source_metrics_sb_nonrd &&
-             x->content_state_sb.source_sad == kLowSad) {
+             x->content_state_sb.source_sad_nonrd <= kLowSad) {
     perform_intra_pred = 0;
   }
 
@@ -2113,19 +2423,25 @@ static void estimate_intra_mode(
 
   if (!(best_rdc->rdcost == INT64_MAX || force_intra_check ||
         (perform_intra_pred && !best_early_term &&
-         best_rdc->rdcost > inter_mode_thresh &&
          bsize <= cpi->sf.part_sf.max_intra_bsize))) {
     return;
   }
 
+  // Early exit based on RD cost calculated using known rate. When
+  // is_screen_content is true, more bias is given to intra modes. Hence,
+  // considered conservative threshold in early exit for the same.
+  const int64_t known_rd = is_screen_content
+                               ? CALC_BIASED_RDCOST(inter_mode_thresh)
+                               : inter_mode_thresh;
+  if (known_rd > best_rdc->rdcost) return;
+
   struct estimate_block_intra_args args = { cpi, x, DC_PRED, 1, 0 };
   TX_SIZE intra_tx_size = AOMMIN(
       AOMMIN(max_txsize_lookup[bsize],
              tx_mode_to_biggest_tx_size[txfm_params->tx_mode_search_type]),
       TX_16X16);
-  if (cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN &&
-      cpi->rc.high_source_sad && x->source_variance > spatial_var_thresh &&
-      bsize <= BLOCK_16X16)
+  if (is_screen_content && cpi->rc.high_source_sad &&
+      x->source_variance > spatial_var_thresh && bsize <= BLOCK_16X16)
     intra_tx_size = TX_4X4;
 
   PRED_BUFFER *const best_pred = best_pickmode->best_pred;
@@ -2147,20 +2463,21 @@ static void estimate_intra_mode(
     const THR_MODES mode_index = mode_idx[INTRA_FRAME][mode_offset(this_mode)];
     const int64_t mode_rd_thresh = rd_threshes[mode_index];
 
-    if (i > 2 || !(force_intra_check == 1 &&
-                   best_pickmode->best_ref_frame != INTRA_FRAME)) {
+    if (i > 2 || force_intra_check == 0) {
       if (!((1 << this_mode) &
             cpi->sf.rt_sf.intra_y_mode_bsize_mask_nrd[bsize]))
         continue;
     }
 
-    if (cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN) {
+    if (is_screen_content && cpi->sf.rt_sf.source_metrics_sb_nonrd) {
       // For spatially flat blocks with zero motion only check
       // DC mode.
-      if (cpi->sf.rt_sf.source_metrics_sb_nonrd &&
-          x->content_state_sb.source_sad == kZeroSad &&
+      if (x->content_state_sb.source_sad_nonrd == kZeroSad &&
           x->source_variance == 0 && this_mode != DC_PRED)
         continue;
+      // Only test Intra for big blocks if spatial_variance is small.
+      else if (bsize > BLOCK_32X32 && x->source_variance > 50)
+        continue;
     }
 
     if (rd_less_than_thresh(best_rdc->rdcost, mode_rd_thresh,
@@ -2182,11 +2499,8 @@ static void estimate_intra_mode(
     mi->tx_size = intra_tx_size;
     compute_intra_yprediction(cm, this_mode, bsize, x, xd);
     // Look into selecting tx_size here, based on prediction residual.
-    if (use_modeled_non_rd_cost)
-      model_rd_for_sb_y(cpi, bsize, x, xd, &this_rdc, 1);
-    else
-      av1_block_yrd(cpi, x, mi_row, mi_col, &this_rdc, &args.skippable, bsize,
-                    mi->tx_size, DCT_DCT, 0);
+    av1_block_yrd(cpi, x, mi_row, mi_col, &this_rdc, &args.skippable, bsize,
+                  mi->tx_size, DCT_DCT, 0);
     // TODO(kyslov@) Need to account for skippable
     if (x->color_sensitivity[0]) {
       av1_foreach_transformed_block_in_plane(xd, uv_bsize, 1,
@@ -2212,6 +2526,20 @@ static void estimate_intra_mode(
     this_rdc.rate += mode_cost;
     this_rdc.rdcost = RDCOST(x->rdmult, this_rdc.rate, this_rdc.dist);
 
+    if (is_screen_content && cpi->sf.rt_sf.source_metrics_sb_nonrd) {
+      // For blocks with low spatial variance and color sad,
+      // favor the intra-modes, only on scene/slide change.
+      if (cpi->rc.high_source_sad && x->source_variance < 800 &&
+          (x->color_sensitivity[0] || x->color_sensitivity[1]))
+        this_rdc.rdcost = CALC_BIASED_RDCOST(this_rdc.rdcost);
+      // Otherwise bias against intra for blocks with zero
+      // motion and no color, on non-scene/slide changes.
+      else if (!cpi->rc.high_source_sad && x->source_variance > 0 &&
+               x->content_state_sb.source_sad_nonrd == kZeroSad &&
+               x->color_sensitivity[0] == 0 && x->color_sensitivity[1] == 0)
+        this_rdc.rdcost = (3 * this_rdc.rdcost) >> 1;
+    }
+
     if (this_rdc.rdcost < best_rdc->rdcost) {
       *best_rdc = this_rdc;
       best_pickmode->best_mode = this_mode;
@@ -2288,7 +2616,7 @@ static AOM_INLINE int skip_mode_by_low_temp(
     return 1;
   }
 
-  if (content_state_sb.source_sad != kHighSad && bsize >= BLOCK_64X64 &&
+  if (content_state_sb.source_sad_nonrd != kHighSad && bsize >= BLOCK_64X64 &&
       force_skip_low_temp_var && mode == NEWMV) {
     return 1;
   }
@@ -2319,20 +2647,30 @@ static AOM_INLINE int skip_mode_by_bsize_and_ref_frame(
   return 0;
 }
 
-void set_color_sensitivity(AV1_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd,
-                           BLOCK_SIZE bsize, int y_sad,
-                           unsigned int source_variance) {
-  const int factor = (bsize >= BLOCK_32X32) ? 2 : 3;
+void set_color_sensitivity(AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
+                           int y_sad, unsigned int source_variance,
+                           struct buf_2d yv12_mb[MAX_MB_PLANE]) {
+  const int subsampling_x = cpi->common.seq_params->subsampling_x;
+  const int subsampling_y = cpi->common.seq_params->subsampling_y;
+  int factor = (bsize >= BLOCK_32X32) ? 2 : 3;
+  int shift = 3;
+  if (cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN &&
+      cpi->rc.high_source_sad) {
+    factor = 1;
+    shift = 6;
+  }
   NOISE_LEVEL noise_level = kLow;
   int norm_sad =
       y_sad >> (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]);
+  unsigned int thresh_spatial = (cpi->common.width > 1920) ? 5000 : 1000;
   // If the spatial source variance is high and the normalized y_sad
   // is low, then y-channel is likely good for mode estimation, so keep
   // color_sensitivity off. For low noise content for now, since there is
   // some bdrate regression for noisy color clip.
   if (cpi->noise_estimate.enabled)
     noise_level = av1_noise_estimate_extract_level(&cpi->noise_estimate);
-  if (noise_level == kLow && source_variance > 1000 && norm_sad < 50) {
+  if (noise_level == kLow && source_variance > thresh_spatial &&
+      norm_sad < 50) {
     x->color_sensitivity[0] = 0;
     x->color_sensitivity[1] = 0;
     return;
@@ -2340,36 +2678,30 @@ void set_color_sensitivity(AV1_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd,
   for (int i = 1; i <= 2; ++i) {
     if (x->color_sensitivity[i - 1] == 2 || source_variance < 50) {
       struct macroblock_plane *const p = &x->plane[i];
-      struct macroblockd_plane *const pd = &xd->plane[i];
       const BLOCK_SIZE bs =
-          get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y);
-      const int uv_sad = cpi->ppi->fn_ptr[bs].sdf(p->src.buf, p->src.stride,
-                                                  pd->dst.buf, pd->dst.stride);
+          get_plane_block_size(bsize, subsampling_x, subsampling_y);
+
+      const int uv_sad = cpi->ppi->fn_ptr[bs].sdf(
+          p->src.buf, p->src.stride, yv12_mb[i].buf, yv12_mb[i].stride);
+
       const int norm_uv_sad =
           uv_sad >> (b_width_log2_lookup[bs] + b_height_log2_lookup[bs]);
       x->color_sensitivity[i - 1] =
-          uv_sad > (factor * (y_sad >> 3)) && norm_uv_sad > 40;
+          uv_sad > (factor * (y_sad >> shift)) && norm_uv_sad > 40;
       if (source_variance < 50 && norm_uv_sad > 100)
         x->color_sensitivity[i - 1] = 1;
     }
   }
 }
 
-void setup_compound_prediction(AV1_COMP *cpi, MACROBLOCK *x,
+void setup_compound_prediction(const AV1_COMMON *cm, MACROBLOCK *x,
                                struct buf_2d yv12_mb[8][MAX_MB_PLANE],
-                               int *use_ref_frame_mask, int flag_comp,
-                               int *ref_mv_idx) {
-  AV1_COMMON *const cm = &cpi->common;
+                               const int *use_ref_frame_mask,
+                               const MV_REFERENCE_FRAME *rf, int *ref_mv_idx) {
   MACROBLOCKD *const xd = &x->e_mbd;
   MB_MODE_INFO *const mbmi = xd->mi[0];
   MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
-  MV_REFERENCE_FRAME rf[2] = { LAST_FRAME, GOLDEN_FRAME };
   MV_REFERENCE_FRAME ref_frame_comp;
-  if (flag_comp == 1) {
-    rf[1] = LAST2_FRAME;
-  } else if (flag_comp == 2) {
-    rf[1] = ALTREF_FRAME;
-  }
   if (!use_ref_frame_mask[rf[1]]) {
     // Need to setup pred_block, if it hasn't been done in find_predictors.
     const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_yv12_buf(cm, rf[1]);
@@ -2390,95 +2722,202 @@ void setup_compound_prediction(AV1_COMP *cpi, MACROBLOCK *x,
   *ref_mv_idx = mbmi->ref_mv_idx + 1;
 }
 
-static void set_compound_mode(MACROBLOCK *x, int comp_index, int ref_frame,
-                              int ref_frame2, int ref_mv_idx,
+static void set_compound_mode(MACROBLOCK *x, int ref_frame, int ref_frame2,
+                              int ref_mv_idx,
                               int_mv frame_mv[MB_MODE_COUNT][REF_FRAMES],
-                              PREDICTION_MODE *this_mode) {
+                              PREDICTION_MODE this_mode) {
   MACROBLOCKD *const xd = &x->e_mbd;
   MB_MODE_INFO *const mi = xd->mi[0];
-  *this_mode = GLOBAL_GLOBALMV;
   mi->ref_frame[0] = ref_frame;
   mi->ref_frame[1] = ref_frame2;
   mi->compound_idx = 1;
   mi->comp_group_idx = 0;
   mi->interinter_comp.type = COMPOUND_AVERAGE;
   MV_REFERENCE_FRAME ref_frame_comp = av1_ref_frame_type(mi->ref_frame);
-  if (comp_index % 3 == 0) {
-    frame_mv[*this_mode][ref_frame].as_int = 0;
-    frame_mv[*this_mode][ref_frame2].as_int = 0;
-  } else if (comp_index % 3 == 1) {
-    *this_mode = NEAREST_NEARESTMV;
-    frame_mv[*this_mode][ref_frame].as_int =
+  if (this_mode == GLOBAL_GLOBALMV) {
+    frame_mv[this_mode][ref_frame].as_int = 0;
+    frame_mv[this_mode][ref_frame2].as_int = 0;
+  } else if (this_mode == NEAREST_NEARESTMV) {
+    frame_mv[this_mode][ref_frame].as_int =
         xd->ref_mv_stack[ref_frame_comp][0].this_mv.as_int;
-    frame_mv[*this_mode][ref_frame2].as_int =
+    frame_mv[this_mode][ref_frame2].as_int =
         xd->ref_mv_stack[ref_frame_comp][0].comp_mv.as_int;
-  } else if (comp_index % 3 == 2) {
-    *this_mode = NEAR_NEARMV;
-    frame_mv[*this_mode][ref_frame].as_int =
+  } else if (this_mode == NEAR_NEARMV) {
+    frame_mv[this_mode][ref_frame].as_int =
         xd->ref_mv_stack[ref_frame_comp][ref_mv_idx].this_mv.as_int;
-    frame_mv[*this_mode][ref_frame2].as_int =
+    frame_mv[this_mode][ref_frame2].as_int =
         xd->ref_mv_stack[ref_frame_comp][ref_mv_idx].comp_mv.as_int;
   }
 }
 
-static int skip_comp_based_on_sad(AV1_COMP *cpi, MACROBLOCK *x,
-                                  const int mi_row, const int mi_col,
-                                  BLOCK_SIZE bsize) {
-  AV1_COMMON *const cm = &cpi->common;
-  assert(!(mi_row % 16) && !(mi_col % 16));
-  const int sb_size_by_mb = (cm->seq_params->sb_size == BLOCK_128X128)
-                                ? (cm->seq_params->mib_size >> 1)
-                                : cm->seq_params->mib_size;
-  const int sb_cols =
-      (cm->mi_params.mi_cols + sb_size_by_mb - 1) / sb_size_by_mb;
-  const uint64_t sad_skp_comp_th[2][3] = { { 2700, 3100 },    // CPU 9
-                                           { 2700, 3200 } };  // CPU 10
-  const uint64_t sad_blkwise_var_th = 5000;
-  const float qindex_th_scale[5] = { 0.75f, 0.9f, 1.0f, 1.1f, 1.25f };
-  const int qindex_band = (5 * x->qindex) >> QINDEX_BITS;
-  assert(qindex_band < 5);
-  const int sp_idx = (cpi->sf.rt_sf.sad_based_comp_prune >= 2);
-  const int bsize_idx = (bsize == BLOCK_128X128);
-  const uint64_t sad_skp_comp_th_val = (uint64_t)(
-      sad_skp_comp_th[sp_idx][bsize_idx] * qindex_th_scale[qindex_band]);
-  uint64_t blk_sad = 0, sad00, sad01, sad10, sad11, min_sad, max_sad;
-  const int sbi_col = mi_col / 16;
-  const int sbi_row = mi_row / 16;
-  const uint64_t *cur_blk_sad =
-      &cpi->src_sad_blk_64x64[sbi_col + sbi_row * sb_cols];
-
-  if (bsize == BLOCK_128X128) {
-    sad00 = cur_blk_sad[0];
-    sad01 = cur_blk_sad[1];
-    sad10 = cur_blk_sad[sb_cols];
-    sad11 = cur_blk_sad[1 + sb_cols];
-    min_sad = AOMMIN(AOMMIN(AOMMIN(sad00, sad01), sad10), sad11);
-    max_sad = AOMMAX(AOMMAX(AOMMAX(sad00, sad01), sad10), sad11);
-    if (max_sad - min_sad > sad_blkwise_var_th) return 0;
-    blk_sad = (sad00 + sad01 + sad10 + sad11 + 2) >> 2;
-  } else if (bsize == BLOCK_128X64) {
-    sad00 = cur_blk_sad[0];
-    sad01 = cur_blk_sad[1];
-    min_sad = AOMMIN(sad00, sad01);
-    max_sad = AOMMAX(sad00, sad01);
-    if (max_sad - min_sad > sad_blkwise_var_th) return 0;
-    blk_sad = (sad00 + sad01 + 1) >> 1;
-  } else if (bsize == BLOCK_64X128) {
-    sad00 = cur_blk_sad[0];
-    sad10 = cur_blk_sad[sb_cols];
-    min_sad = AOMMIN(sad00, sad10);
-    max_sad = AOMMAX(sad00, sad10);
-    if (max_sad - min_sad > sad_blkwise_var_th) return 0;
-    blk_sad = (sad00 + sad10 + 1) >> 1;
-  } else if (bsize <= BLOCK_64X64) {
-    blk_sad = cur_blk_sad[0];
-  } else {
-    assert(0);
+// Prune compound mode if the single mode variance is lower than a fixed
+// percentage of the median value.
+static bool skip_comp_based_on_var(
+    const unsigned int (*single_vars)[REF_FRAMES], BLOCK_SIZE bsize) {
+  unsigned int best_var = UINT_MAX;
+  for (int cur_mode_idx = 0; cur_mode_idx < RTC_INTER_MODES; cur_mode_idx++) {
+    for (int ref_idx = 0; ref_idx < REF_FRAMES; ref_idx++) {
+      best_var = AOMMIN(best_var, single_vars[cur_mode_idx][ref_idx]);
+    }
   }
+  const unsigned int thresh_64 = (unsigned int)(0.57356805f * 8659);
+  const unsigned int thresh_32 = (unsigned int)(0.23964763f * 4281);
 
-  if (blk_sad < sad_skp_comp_th_val) return 1;
+  // Currently, the thresh for 128 and 16 are not well-tuned. We are using the
+  // results from 64 and 32 as an heuristic.
+  switch (bsize) {
+    case BLOCK_128X128: return best_var < 4 * thresh_64;
+    case BLOCK_64X64: return best_var < thresh_64;
+    case BLOCK_32X32: return best_var < thresh_32;
+    case BLOCK_16X16: return best_var < thresh_32 / 4;
+    default: return false;
+  }
+}
 
-  return 0;
+static AOM_FORCE_INLINE void fill_single_inter_mode_costs(
+    int (*single_inter_mode_costs)[REF_FRAMES], const int num_inter_modes,
+    const REF_MODE *reference_mode_set, const ModeCosts *mode_costs,
+    const int16_t *mode_context) {
+  bool ref_frame_used[REF_FRAMES] = { false };
+  for (int idx = 0; idx < num_inter_modes; idx++) {
+    ref_frame_used[reference_mode_set[idx].ref_frame] = true;
+  }
+
+  for (int this_ref_frame = LAST_FRAME; this_ref_frame < REF_FRAMES;
+       this_ref_frame++) {
+    if (!ref_frame_used[this_ref_frame]) {
+      continue;
+    }
+
+    const MV_REFERENCE_FRAME rf[2] = { this_ref_frame, NONE_FRAME };
+    const int16_t mode_ctx = av1_mode_context_analyzer(mode_context, rf);
+    for (PREDICTION_MODE this_mode = NEARESTMV; this_mode <= NEWMV;
+         this_mode++) {
+      single_inter_mode_costs[INTER_OFFSET(this_mode)][this_ref_frame] =
+          cost_mv_ref(mode_costs, this_mode, mode_ctx);
+    }
+  }
+}
+
+static AOM_INLINE bool is_globalmv_better(
+    PREDICTION_MODE this_mode, MV_REFERENCE_FRAME ref_frame, int rate_mv,
+    const ModeCosts *mode_costs,
+    const int (*single_inter_mode_costs)[REF_FRAMES],
+    const MB_MODE_INFO_EXT *mbmi_ext) {
+  const int globalmv_mode_cost =
+      single_inter_mode_costs[INTER_OFFSET(GLOBALMV)][ref_frame];
+  int this_mode_cost =
+      rate_mv + single_inter_mode_costs[INTER_OFFSET(this_mode)][ref_frame];
+  if (this_mode == NEWMV || this_mode == NEARMV) {
+    const MV_REFERENCE_FRAME rf[2] = { ref_frame, NONE_FRAME };
+    this_mode_cost += get_drl_cost(
+        NEWMV, 0, mbmi_ext, mode_costs->drl_mode_cost0, av1_ref_frame_type(rf));
+  }
+  return this_mode_cost > globalmv_mode_cost;
+}
+
+// Set up the mv/ref_frames etc based on the comp_index. Returns 1 if it
+// succeeds, 0 if it fails.
+static AOM_INLINE int setup_compound_params_from_comp_idx(
+    const AV1_COMP *cpi, MACROBLOCK *x, struct buf_2d yv12_mb[8][MAX_MB_PLANE],
+    PREDICTION_MODE *this_mode, MV_REFERENCE_FRAME *ref_frame,
+    MV_REFERENCE_FRAME *ref_frame2, int_mv frame_mv[MB_MODE_COUNT][REF_FRAMES],
+    const int *use_ref_frame_mask, int comp_index,
+    bool comp_use_zero_zeromv_only, MV_REFERENCE_FRAME *last_comp_ref_frame) {
+  const MV_REFERENCE_FRAME *rf = comp_ref_mode_set[comp_index].ref_frame;
+  *this_mode = comp_ref_mode_set[comp_index].pred_mode;
+  *ref_frame = rf[0];
+  *ref_frame2 = rf[1];
+  assert(*ref_frame == LAST_FRAME);
+  assert(*this_mode == GLOBAL_GLOBALMV || *this_mode == NEAREST_NEARESTMV);
+  if (comp_use_zero_zeromv_only && *this_mode != GLOBAL_GLOBALMV) {
+    return 0;
+  }
+  if (*ref_frame2 == GOLDEN_FRAME &&
+      (cpi->sf.rt_sf.ref_frame_comp_nonrd[0] == 0 ||
+       !(cpi->ref_frame_flags & AOM_GOLD_FLAG))) {
+    return 0;
+  } else if (*ref_frame2 == LAST2_FRAME &&
+             (cpi->sf.rt_sf.ref_frame_comp_nonrd[1] == 0 ||
+              !(cpi->ref_frame_flags & AOM_LAST2_FLAG))) {
+    return 0;
+  } else if (*ref_frame2 == ALTREF_FRAME &&
+             (cpi->sf.rt_sf.ref_frame_comp_nonrd[2] == 0 ||
+              !(cpi->ref_frame_flags & AOM_ALT_FLAG))) {
+    return 0;
+  }
+  int ref_mv_idx = 0;
+  if (*last_comp_ref_frame != rf[1]) {
+    // Only needs to be done once per reference pair.
+    setup_compound_prediction(&cpi->common, x, yv12_mb, use_ref_frame_mask, rf,
+                              &ref_mv_idx);
+    *last_comp_ref_frame = rf[1];
+  }
+  set_compound_mode(x, *ref_frame, *ref_frame2, ref_mv_idx, frame_mv,
+                    *this_mode);
+  if (*this_mode != GLOBAL_GLOBALMV &&
+      frame_mv[*this_mode][*ref_frame].as_int == 0 &&
+      frame_mv[*this_mode][*ref_frame2].as_int == 0) {
+    return 0;
+  }
+
+  return 1;
+}
+
+static AOM_INLINE bool previous_mode_performed_poorly(
+    PREDICTION_MODE mode, MV_REFERENCE_FRAME ref_frame,
+    const unsigned int (*vars)[REF_FRAMES],
+    const int64_t (*uv_dist)[REF_FRAMES]) {
+  unsigned int best_var = UINT_MAX;
+  int64_t best_uv_dist = INT64_MAX;
+  for (int midx = 0; midx < RTC_INTER_MODES; midx++) {
+    best_var = AOMMIN(best_var, vars[midx][ref_frame]);
+    best_uv_dist = AOMMIN(best_uv_dist, uv_dist[midx][ref_frame]);
+  }
+  assert(best_var != UINT_MAX && "Invalid variance data.");
+  const float mult = 1.125f;
+  bool var_bad = mult * best_var < vars[INTER_OFFSET(mode)][ref_frame];
+  if (uv_dist[INTER_OFFSET(mode)][ref_frame] < INT64_MAX &&
+      best_uv_dist != uv_dist[INTER_OFFSET(mode)][ref_frame]) {
+    // If we have chroma info, then take it into account
+    var_bad &= mult * best_uv_dist < uv_dist[INTER_OFFSET(mode)][ref_frame];
+  }
+  return var_bad;
+}
+
+static AOM_INLINE bool prune_compoundmode_with_singlemode_var(
+    PREDICTION_MODE compound_mode, MV_REFERENCE_FRAME ref_frame,
+    MV_REFERENCE_FRAME ref_frame2, const int_mv (*frame_mv)[REF_FRAMES],
+    const uint8_t (*mode_checked)[REF_FRAMES],
+    const unsigned int (*vars)[REF_FRAMES],
+    const int64_t (*uv_dist)[REF_FRAMES]) {
+  const PREDICTION_MODE single_mode0 = compound_ref0_mode(compound_mode);
+  const PREDICTION_MODE single_mode1 = compound_ref1_mode(compound_mode);
+
+  bool first_ref_valid = false, second_ref_valid = false;
+  bool first_ref_bad = false, second_ref_bad = false;
+  if (mode_checked[single_mode0][ref_frame] &&
+      frame_mv[single_mode0][ref_frame].as_int ==
+          frame_mv[compound_mode][ref_frame].as_int &&
+      vars[INTER_OFFSET(single_mode0)][ref_frame] < UINT_MAX) {
+    first_ref_valid = true;
+    first_ref_bad =
+        previous_mode_performed_poorly(single_mode0, ref_frame, vars, uv_dist);
+  }
+  if (mode_checked[single_mode1][ref_frame2] &&
+      frame_mv[single_mode1][ref_frame2].as_int ==
+          frame_mv[compound_mode][ref_frame2].as_int &&
+      vars[INTER_OFFSET(single_mode1)][ref_frame2] < UINT_MAX) {
+    second_ref_valid = true;
+    second_ref_bad =
+        previous_mode_performed_poorly(single_mode1, ref_frame2, vars, uv_dist);
+  }
+  if (first_ref_valid && second_ref_valid) {
+    return first_ref_bad && second_ref_bad;
+  } else if (first_ref_valid || second_ref_valid) {
+    return first_ref_bad || second_ref_bad;
+  }
+  return false;
 }
 
 void av1_nonrd_pick_inter_mode_sb(AV1_COMP *cpi, TileDataEnc *tile_data,
@@ -2489,6 +2928,7 @@ void av1_nonrd_pick_inter_mode_sb(AV1_COMP *cpi, TileDataEnc *tile_data,
   MACROBLOCKD *const xd = &x->e_mbd;
   MB_MODE_INFO *const mi = xd->mi[0];
   struct macroblockd_plane *const pd = &xd->plane[0];
+  const MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
   const InterpFilter filter_ref = cm->features.interp_filter;
   const InterpFilter default_interp_filter = EIGHTTAP_REGULAR;
   BEST_PICKMODE best_pickmode;
@@ -2509,17 +2949,9 @@ void av1_nonrd_pick_inter_mode_sb(AV1_COMP *cpi, TileDataEnc *tile_data,
   int force_skip_low_temp_var = 0;
   int use_ref_frame_mask[REF_FRAMES] = { 0 };
   unsigned int sse_zeromv_norm = UINT_MAX;
-  // Use mode set that includes zeromv (via globalmv) for speed >= 9 for
-  // content with low motion, and always for force_zeromv_skip.
-  int use_zeromv =
-      cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN ||
-      ((cpi->oxcf.speed >= 9 && cpi->rc.avg_frame_low_motion > 70) ||
-       cpi->sf.rt_sf.nonrd_agressive_skip || x->force_zeromv_skip);
   int skip_pred_mv = 0;
-  const int num_inter_modes =
-      use_zeromv ? NUM_INTER_MODES_REDUCED : NUM_INTER_MODES_RT;
-  const REF_MODE *const ref_mode_set =
-      use_zeromv ? ref_mode_set_reduced : ref_mode_set_rt;
+  const int num_inter_modes = NUM_INTER_MODES;
+  bool check_globalmv = cpi->sf.rt_sf.check_globalmv_on_single_ref;
   PRED_BUFFER tmp[4];
   DECLARE_ALIGNED(16, uint8_t, pred_buf[3 * 128 * 128]);
   PRED_BUFFER *this_mode_pred = NULL;
@@ -2535,7 +2967,7 @@ void av1_nonrd_pick_inter_mode_sb(AV1_COMP *cpi, TileDataEnc *tile_data,
   const TxfmSearchParams *txfm_params = &x->txfm_search_params;
   TxfmSearchInfo *txfm_info = &x->txfm_search_info;
 #if COLLECT_PICK_MODE_STAT
-  aom_usec_timer_start(&ms_stat.timer2);
+  aom_usec_timer_start(&ms_stat.bsize_timer);
 #endif
   int64_t thresh_sad_pred = INT64_MAX;
   const int mi_row = xd->mi_row;
@@ -2543,12 +2975,16 @@ void av1_nonrd_pick_inter_mode_sb(AV1_COMP *cpi, TileDataEnc *tile_data,
   int svc_mv_col = 0;
   int svc_mv_row = 0;
   int force_mv_inter_layer = 0;
-  int use_modeled_non_rd_cost = 0;
-  int comp_pred = 0;
-  int num_comp_modes_ref = 0;
-  int tot_num_comp_modes = 9;
-  int ref_mv_idx = 0;
-  int skip_comp_mode = 0;
+  bool comp_use_zero_zeromv_only = 0;
+  int tot_num_comp_modes = NUM_COMP_INTER_MODES_RT;
+  unsigned int vars[RTC_INTER_MODES][REF_FRAMES];
+  int64_t uv_dist[RTC_INTER_MODES][REF_FRAMES];
+  for (int idx = 0; idx < RTC_INTER_MODES; idx++) {
+    for (int ref = 0; ref < REF_FRAMES; ref++) {
+      vars[idx][ref] = UINT_MAX;
+      uv_dist[idx][ref] = INT64_MAX;
+    }
+  }
 #if CONFIG_AV1_TEMPORAL_DENOISING
   const int denoise_recheck_zeromv = 1;
   AV1_PICKMODE_CTX_DEN ctx_den;
@@ -2562,7 +2998,7 @@ void av1_nonrd_pick_inter_mode_sb(AV1_COMP *cpi, TileDataEnc *tile_data,
 
   const ModeCosts *mode_costs = &x->mode_costs;
 
-  estimate_single_ref_frame_costs(cm, xd, mode_costs, segment_id,
+  estimate_single_ref_frame_costs(cm, xd, mode_costs, segment_id, bsize,
                                   ref_costs_single);
 
   memset(&mode_checked[0][0], 0, MB_MODE_COUNT * REF_FRAMES);
@@ -2606,7 +3042,7 @@ void av1_nonrd_pick_inter_mode_sb(AV1_COMP *cpi, TileDataEnc *tile_data,
   // (phase = 8), the target decimated pixel is shifted by (1/2, 1/2) relative
   // to source, so use subpel motion vector to compensate. The nonzero motion
   // is half pixel shifted to left and top, so (-4, -4). This has more effect
-  // on higher resolutins, so condition it on that for now.
+  // on higher resolutions, so condition it on that for now.
   if (cpi->ppi->use_svc && svc->spatial_layer_id > 0 &&
       svc->downsample_filter_phase[svc->spatial_layer_id - 1] == 8 &&
       cm->width * cm->height > 640 * 480) {
@@ -2617,20 +3053,18 @@ void av1_nonrd_pick_inter_mode_sb(AV1_COMP *cpi, TileDataEnc *tile_data,
   get_ref_frame_use_mask(cpi, x, mi, mi_row, mi_col, bsize, gf_temporal_ref,
                          use_ref_frame_mask, &force_skip_low_temp_var);
 
-  skip_pred_mv = (x->nonrd_prune_ref_frame_search > 2 &&
+  skip_pred_mv = x->force_zeromv_skip_for_blk ||
+                 (x->nonrd_prune_ref_frame_search > 2 &&
                   x->color_sensitivity[0] != 2 && x->color_sensitivity[1] != 2);
 
-  // Compound modes per reference pair (GOLDEN_LAST/LAST2_LAST/ALTREF_LAST):
-  // (0_0)/(NEAREST_NEAREST)/(NEAR_NEAR).
-  // For now to reduce slowdowm, use only (0,0) for blocks above 16x16
-  // for non-svc case or on enhancement layers for svc.
   if (cpi->sf.rt_sf.use_comp_ref_nonrd && is_comp_ref_allowed(bsize)) {
-    if (cpi->ppi->use_svc && cpi->svc.temporal_layer_id == 0)
-      num_comp_modes_ref = 2;
-    else if (bsize > BLOCK_16X16)
-      num_comp_modes_ref = 1;
-    else
+    // Only search compound if bsize \gt BLOCK_16X16.
+    if (bsize > BLOCK_16X16) {
+      comp_use_zero_zeromv_only =
+          cpi->sf.rt_sf.check_only_zero_zeromv_on_large_blocks;
+    } else {
       tot_num_comp_modes = 0;
+    }
   } else {
     tot_num_comp_modes = 0;
   }
@@ -2644,7 +3078,7 @@ void av1_nonrd_pick_inter_mode_sb(AV1_COMP *cpi, TileDataEnc *tile_data,
   }
 
   thresh_sad_pred = ((int64_t)x->pred_mv_sad[LAST_FRAME]) << 1;
-  // Increase threshold for less agressive pruning.
+  // Increase threshold for less aggressive pruning.
   if (cpi->sf.rt_sf.nonrd_prune_ref_frame_search == 1)
     thresh_sad_pred += (x->pred_mv_sad[LAST_FRAME] >> 2);
 
@@ -2657,21 +3091,6 @@ void av1_nonrd_pick_inter_mode_sb(AV1_COMP *cpi, TileDataEnc *tile_data,
   const int enable_filter_search =
       is_filter_search_enabled(cpi, mi_row, mi_col, bsize, segment_id);
 
-  // TODO(marpan): Look into reducing these conditions. For now constrain
-  // it to avoid significant bdrate loss.
-  if (cpi->sf.rt_sf.use_modeled_non_rd_cost) {
-    if (cpi->svc.non_reference_frame)
-      use_modeled_non_rd_cost = 1;
-    else if (cpi->svc.number_temporal_layers > 1 &&
-             cpi->svc.temporal_layer_id == 0)
-      use_modeled_non_rd_cost = 0;
-    else
-      use_modeled_non_rd_cost =
-          (quant_params->base_qindex > 120 && x->source_variance > 100 &&
-           bsize <= BLOCK_16X16 && !x->content_state_sb.lighting_change &&
-           x->content_state_sb.source_sad != kHighSad);
-  }
-
 #if COLLECT_PICK_MODE_STAT
   ms_stat.num_blocks[bsize]++;
 #endif
@@ -2681,69 +3100,43 @@ void av1_nonrd_pick_inter_mode_sb(AV1_COMP *cpi, TileDataEnc *tile_data,
              tx_mode_to_biggest_tx_size[txfm_params->tx_mode_search_type]),
       TX_16X16);
 
-  // Skip compound mode based on sad
-  if ((cpi->sf.rt_sf.sad_based_comp_prune) && (bsize >= BLOCK_64X64) &&
-      (cpi->src_sad_blk_64x64 != NULL))
-    skip_comp_mode = skip_comp_based_on_sad(cpi, x, mi_row, mi_col, bsize);
+  int single_inter_mode_costs[RTC_INTER_MODES][REF_FRAMES];
+  fill_single_inter_mode_costs(single_inter_mode_costs, num_inter_modes,
+                               ref_mode_set, mode_costs,
+                               mbmi_ext->mode_context);
+
+  MV_REFERENCE_FRAME last_comp_ref_frame = NONE_FRAME;
 
   for (int idx = 0; idx < num_inter_modes + tot_num_comp_modes; ++idx) {
+    // If we are at the first compound mode, and the single modes already
+    // perform well, then end the search.
+    if (cpi->sf.rt_sf.skip_compound_based_on_var && idx == num_inter_modes &&
+        skip_comp_based_on_var(vars, bsize)) {
+      break;
+    }
+
     const struct segmentation *const seg = &cm->seg;
 
     int rate_mv = 0;
     int is_skippable;
     int this_early_term = 0;
     int skip_this_mv = 0;
-    comp_pred = 0;
+    int comp_pred = 0;
+    unsigned int var = UINT_MAX;
     PREDICTION_MODE this_mode;
-    MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
     RD_STATS nonskip_rdc;
     av1_invalid_rd_stats(&nonskip_rdc);
     memset(txfm_info->blk_skip, 0,
            sizeof(txfm_info->blk_skip[0]) * num_8x8_blocks);
 
     if (idx >= num_inter_modes) {
-      if (skip_comp_mode) continue;
-      int comp_index = idx - num_inter_modes;
-      if (comp_index % 3 == 0) {
-        int i = 0;
-        ref_mv_idx = 0;
-        // Only needs to be done once per reference pair.
-        if (comp_index == 3) i = 1;
-        if (comp_index == 6) i = 2;
-        if (cpi->sf.rt_sf.ref_frame_comp_nonrd[i])
-          setup_compound_prediction(cpi, x, yv12_mb, use_ref_frame_mask, i,
-                                    &ref_mv_idx);
-      }
-      // num_comp_modes_ref == 1 only do (0,0)
-      if (num_comp_modes_ref == 1 && comp_index % 3 != 0) continue;
-      // num_comp_modes_ref == 2 only do (0,0) and (NEAREST_NEAREST)
-      if (num_comp_modes_ref == 2 && comp_index % 3 == 2) continue;
-      ref_frame = LAST_FRAME;
-      ref_frame2 = GOLDEN_FRAME;
-      if (comp_index >= 0 && comp_index < 3) {
-        // comp_index = 0,1,2 for (0/NEAREST/NEAR) for GOLDEN_LAST.
-        if (cpi->sf.rt_sf.ref_frame_comp_nonrd[0] == 0 ||
-            !(cpi->ref_frame_flags & AOM_GOLD_FLAG))
-          continue;
-      } else if (comp_index >= 3 && comp_index < 6) {
-        // comp_index = 3,4,5 for (0/NEAREST/NEAR) for LAST2_LAST.
-        ref_frame2 = LAST2_FRAME;
-        if (cpi->sf.rt_sf.ref_frame_comp_nonrd[1] == 0 ||
-            !(cpi->ref_frame_flags & AOM_LAST2_FLAG))
-          continue;
-      } else if (comp_index >= 6 && comp_index < 9) {
-        // comp_index = 6,7,8 for (0/NEAREST/NEAR) for ALTREF_LAST.
-        ref_frame2 = ALTREF_FRAME;
-        if (cpi->sf.rt_sf.ref_frame_comp_nonrd[2] == 0 ||
-            !(cpi->ref_frame_flags & AOM_ALT_FLAG))
-          continue;
-      }
-      set_compound_mode(x, comp_index, ref_frame, ref_frame2, ref_mv_idx,
-                        frame_mv, &this_mode);
-      if (this_mode != GLOBAL_GLOBALMV &&
-          frame_mv[this_mode][ref_frame].as_int == 0 &&
-          frame_mv[this_mode][ref_frame2].as_int == 0)
+      const int comp_index = idx - num_inter_modes;
+      if (!setup_compound_params_from_comp_idx(
+              cpi, x, yv12_mb, &this_mode, &ref_frame, &ref_frame2, frame_mv,
+              use_ref_frame_mask, comp_index, comp_use_zero_zeromv_only,
+              &last_comp_ref_frame)) {
         continue;
+      }
       comp_pred = 1;
     } else {
       this_mode = ref_mode_set[idx].pred_mode;
@@ -2751,6 +3144,14 @@ void av1_nonrd_pick_inter_mode_sb(AV1_COMP *cpi, TileDataEnc *tile_data,
       ref_frame2 = NONE_FRAME;
     }
 
+    if (!comp_pred && mode_checked[this_mode][ref_frame]) {
+      continue;
+    }
+
+    if (!check_globalmv && this_mode == GLOBALMV) {
+      continue;
+    }
+
 #if COLLECT_PICK_MODE_STAT
     aom_usec_timer_start(&ms_stat.timer1);
     ms_stat.num_searches[bsize][this_mode]++;
@@ -2761,14 +3162,25 @@ void av1_nonrd_pick_inter_mode_sb(AV1_COMP *cpi, TileDataEnc *tile_data,
 
     if (!use_ref_frame_mask[ref_frame]) continue;
 
-    if (x->force_zeromv_skip &&
-        (this_mode != GLOBALMV || ref_frame != LAST_FRAME))
+    if (x->force_zeromv_skip_for_blk &&
+        ((!(this_mode == NEARESTMV &&
+            frame_mv[this_mode][ref_frame].as_int == 0) &&
+          this_mode != GLOBALMV) ||
+         ref_frame != LAST_FRAME))
       continue;
 
+    if (cpi->sf.rt_sf.prune_compoundmode_with_singlemode_var && comp_pred &&
+        prune_compoundmode_with_singlemode_var(this_mode, ref_frame, ref_frame2,
+                                               frame_mv, mode_checked, vars,
+                                               uv_dist)) {
+      continue;
+    }
+
     force_mv_inter_layer = 0;
     if (cpi->ppi->use_svc && svc->spatial_layer_id > 0 &&
         ((ref_frame == LAST_FRAME && svc->skip_mvsearch_last) ||
-         (ref_frame == GOLDEN_FRAME && svc->skip_mvsearch_gf))) {
+         (ref_frame == GOLDEN_FRAME && svc->skip_mvsearch_gf) ||
+         (ref_frame == ALTREF_FRAME && svc->skip_mvsearch_altref))) {
       // Only test mode if NEARESTMV/NEARMV is (svc_mv_col, svc_mv_row),
       // otherwise set NEWMV to (svc_mv_col, svc_mv_row).
       // Skip newmv and filter search.
@@ -2792,24 +3204,32 @@ void av1_nonrd_pick_inter_mode_sb(AV1_COMP *cpi, TileDataEnc *tile_data,
     if (cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN) {
       // If source_sad is computed: skip non-zero motion
       // check for stationary (super)blocks. Otherwise if superblock
-      // has motion skip the modes with zero motion for flat blocks.
+      // has motion skip the modes with zero motion for flat blocks,
+      // and color is not set.
+      // For the latter condition: the same condition should apply
+      // to newmv if (0, 0), so this latter condition is repeated
+      // below after search_new_mv.
       if (cpi->sf.rt_sf.source_metrics_sb_nonrd) {
         if ((frame_mv[this_mode][ref_frame].as_int != 0 &&
-             x->content_state_sb.source_sad == kZeroSad) ||
+             x->content_state_sb.source_sad_nonrd == kZeroSad) ||
             (frame_mv[this_mode][ref_frame].as_int == 0 &&
-             x->content_state_sb.source_sad != kZeroSad &&
+             x->content_state_sb.source_sad_nonrd != kZeroSad &&
+             ((x->color_sensitivity[0] == 0 && x->color_sensitivity[1] == 0) ||
+              cpi->rc.high_source_sad) &&
              x->source_variance == 0))
           continue;
       }
-      // Skip NEWMV search on scene cuts for flat blocks.
-      if (cpi->rc.high_source_sad && this_mode == NEWMV &&
-          (x->source_variance < 100))
+      // Skip NEWMV search for flat blocks.
+      if (this_mode == NEWMV && x->source_variance < 100) continue;
+      // Skip non-LAST for color on flat blocks.
+      if (ref_frame > LAST_FRAME && x->source_variance == 0 &&
+          (x->color_sensitivity[0] == 1 || x->color_sensitivity[1] == 1))
         continue;
     }
 
     if (skip_mode_by_bsize_and_ref_frame(
             this_mode, ref_frame, bsize, x->nonrd_prune_ref_frame_search,
-            sse_zeromv_norm, cpi->sf.rt_sf.nonrd_agressive_skip))
+            sse_zeromv_norm, cpi->sf.rt_sf.nonrd_aggressive_skip))
       continue;
 
     if (skip_mode_by_low_temp(this_mode, ref_frame, bsize, x->content_state_sb,
@@ -2837,7 +3257,7 @@ void av1_nonrd_pick_inter_mode_sb(AV1_COMP *cpi, TileDataEnc *tile_data,
               this_mode, ref_frame, frame_mv[this_mode][ref_frame],
               cpi->rc.frames_since_golden, rd_threshes, rd_thresh_freq_fact,
               best_rdc.rdcost, best_pickmode.best_mode_skip_txfm,
-              (cpi->sf.rt_sf.nonrd_agressive_skip ? 1 : 0)))
+              (cpi->sf.rt_sf.nonrd_aggressive_skip ? 1 : 0)))
         continue;
     }
 
@@ -2852,15 +3272,26 @@ void av1_nonrd_pick_inter_mode_sb(AV1_COMP *cpi, TileDataEnc *tile_data,
     set_ref_ptrs(cm, xd, ref_frame, ref_frame2);
 
     if (this_mode == NEWMV && !force_mv_inter_layer) {
-      if (search_new_mv(cpi, x, frame_mv, ref_frame, gf_temporal_ref, bsize,
-                        mi_row, mi_col, &rate_mv, &best_rdc))
+#if COLLECT_PICK_MODE_STAT
+      aom_usec_timer_start(&ms_stat.timer2);
+#endif
+      const bool skip_newmv =
+          search_new_mv(cpi, x, frame_mv, ref_frame, gf_temporal_ref, bsize,
+                        mi_row, mi_col, &rate_mv, &best_rdc);
+#if COLLECT_PICK_MODE_STAT
+      aom_usec_timer_mark(&ms_stat.timer2);
+      ms_stat.ms_time[bsize][this_mode] +=
+          aom_usec_timer_elapsed(&ms_stat.timer2);
+#endif
+      if (skip_newmv) {
         continue;
+      }
     }
 
     for (PREDICTION_MODE inter_mv_mode = NEARESTMV; inter_mv_mode <= NEWMV;
          inter_mv_mode++) {
       if (inter_mv_mode == this_mode) continue;
-      if (mode_checked[inter_mv_mode][ref_frame] &&
+      if (!comp_pred && mode_checked[inter_mv_mode][ref_frame] &&
           frame_mv[this_mode][ref_frame].as_int ==
               frame_mv[inter_mv_mode][ref_frame].as_int) {
         skip_this_mv = 1;
@@ -2870,6 +3301,19 @@ void av1_nonrd_pick_inter_mode_sb(AV1_COMP *cpi, TileDataEnc *tile_data,
 
     if (skip_this_mv && !comp_pred) continue;
 
+    // For screen: for spatially flat blocks with non-zero motion,
+    // skip newmv if the motion vector is (0, 0), and color is not set.
+    if (this_mode == NEWMV &&
+        cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN &&
+        cpi->sf.rt_sf.source_metrics_sb_nonrd) {
+      if (frame_mv[this_mode][ref_frame].as_int == 0 &&
+          x->content_state_sb.source_sad_nonrd != kZeroSad &&
+          ((x->color_sensitivity[0] == 0 && x->color_sensitivity[1] == 0) ||
+           cpi->rc.high_source_sad) &&
+          x->source_variance == 0)
+        continue;
+    }
+
     mi->mode = this_mode;
     mi->mv[0].as_int = frame_mv[this_mode][ref_frame].as_int;
     mi->mv[1].as_int = 0;
@@ -2884,23 +3328,29 @@ void av1_nonrd_pick_inter_mode_sb(AV1_COMP *cpi, TileDataEnc *tile_data,
         pd->dst.stride = bw;
       }
     }
-#if COLLECT_PICK_MODE_STAT
-    ms_stat.num_nonskipped_searches[bsize][this_mode]++;
-#endif
 
     if (idx == 0 && !skip_pred_mv) {
       // Set color sensitivity on first tested mode only.
       // Use y-sad already computed in find_predictors: take the sad with motion
       // vector closest to 0; the uv-sad computed below in set_color_sensitivity
       // is for zeromv.
-      int y_sad = x->pred_mv0_sad[LAST_FRAME];
-      if (x->pred_mv1_sad[LAST_FRAME] != INT_MAX &&
-          (abs(frame_mv[NEARMV][LAST_FRAME].as_mv.col) +
-           abs(frame_mv[NEARMV][LAST_FRAME].as_mv.row)) <
-              (abs(frame_mv[NEARESTMV][LAST_FRAME].as_mv.col) +
-               abs(frame_mv[NEARESTMV][LAST_FRAME].as_mv.row)))
-        y_sad = x->pred_mv1_sad[LAST_FRAME];
-      set_color_sensitivity(cpi, x, xd, bsize, y_sad, x->source_variance);
+      // For screen: first check if golden reference is being used, if so,
+      // force color_sensitivity on if the color sensitivity for sb_g is on.
+      if (cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN &&
+          use_ref_frame_mask[GOLDEN_FRAME]) {
+        if (x->color_sensitivity_sb_g[0] == 1) x->color_sensitivity[0] = 1;
+        if (x->color_sensitivity_sb_g[1] == 1) x->color_sensitivity[1] = 1;
+      } else {
+        int y_sad = x->pred_mv0_sad[LAST_FRAME];
+        if (x->pred_mv1_sad[LAST_FRAME] != INT_MAX &&
+            (abs(frame_mv[NEARMV][LAST_FRAME].as_mv.col) +
+             abs(frame_mv[NEARMV][LAST_FRAME].as_mv.row)) <
+                (abs(frame_mv[NEARESTMV][LAST_FRAME].as_mv.col) +
+                 abs(frame_mv[NEARESTMV][LAST_FRAME].as_mv.row)))
+          y_sad = x->pred_mv1_sad[LAST_FRAME];
+        set_color_sensitivity(cpi, x, bsize, y_sad, x->source_variance,
+                              yv12_mb[LAST_FRAME]);
+      }
     }
     mi->motion_mode = SIMPLE_TRANSLATION;
 #if !CONFIG_REALTIME_ONLY
@@ -2912,9 +3362,18 @@ void av1_nonrd_pick_inter_mode_sb(AV1_COMP *cpi, TileDataEnc *tile_data,
     if (enable_filter_search && !force_mv_inter_layer && !comp_pred &&
         ((mi->mv[0].as_mv.row & 0x07) || (mi->mv[0].as_mv.col & 0x07)) &&
         (ref_frame == LAST_FRAME || !x->nonrd_prune_ref_frame_search)) {
+#if COLLECT_PICK_MODE_STAT
+      aom_usec_timer_start(&ms_stat.timer2);
+#endif
       search_filter_ref(cpi, x, &this_rdc, mi_row, mi_col, tmp, bsize,
                         reuse_inter_pred, &this_mode_pred, &this_early_term,
+                        &vars[INTER_OFFSET(this_mode)][ref_frame],
                         use_model_yrd_large, best_pickmode.best_sse);
+#if COLLECT_PICK_MODE_STAT
+      aom_usec_timer_mark(&ms_stat.timer2);
+      ms_stat.ifs_time[bsize][this_mode] +=
+          aom_usec_timer_elapsed(&ms_stat.timer2);
+#endif
 #if !CONFIG_REALTIME_ONLY
     } else if (cpi->oxcf.motion_mode_cfg.allow_warped_motion &&
                this_mode == NEWMV) {
@@ -2946,20 +3405,48 @@ void av1_nonrd_pick_inter_mode_sb(AV1_COMP *cpi, TileDataEnc *tile_data,
             mi->interp_filters = av1_broadcast_interp_filter(EIGHTTAP_SMOOTH);
         }
       }
+#if COLLECT_PICK_MODE_STAT
+      aom_usec_timer_start(&ms_stat.timer2);
+#endif
       if (!comp_pred)
         av1_enc_build_inter_predictor_y(xd, mi_row, mi_col);
       else
         av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0,
                                       0);
 
+      unsigned int var_threshold = UINT_MAX;
+      if (cpi->sf.rt_sf.prune_compoundmode_with_singlecompound_var &&
+          comp_pred && use_model_yrd_large) {
+        const PREDICTION_MODE single_mode0 = compound_ref0_mode(this_mode);
+        const PREDICTION_MODE single_mode1 = compound_ref1_mode(this_mode);
+        var_threshold =
+            AOMMIN(var_threshold, vars[INTER_OFFSET(single_mode0)][ref_frame]);
+        var_threshold =
+            AOMMIN(var_threshold, vars[INTER_OFFSET(single_mode1)][ref_frame2]);
+      }
       if (use_model_yrd_large) {
         model_skip_for_sb_y_large(cpi, bsize, mi_row, mi_col, x, xd, &this_rdc,
-                                  &this_early_term, use_modeled_non_rd_cost,
-                                  best_pickmode.best_sse);
+                                  &this_early_term, 0, best_pickmode.best_sse,
+                                  &var, var_threshold);
       } else {
-        model_rd_for_sb_y(cpi, bsize, x, xd, &this_rdc,
-                          use_modeled_non_rd_cost);
+        model_rd_for_sb_y(cpi, bsize, x, xd, &this_rdc, &var, 0,
+                          &this_early_term);
+      }
+      if (!comp_pred) {
+        vars[INTER_OFFSET(this_mode)][ref_frame] = var;
+        if (frame_mv[this_mode][ref_frame].as_int == 0) {
+          vars[INTER_OFFSET(GLOBALMV)][ref_frame] = var;
+        }
+      }
+      if (comp_pred && var > var_threshold) {
+        if (reuse_inter_pred) free_pred_buffer(this_mode_pred);
+        continue;
       }
+#if COLLECT_PICK_MODE_STAT
+      aom_usec_timer_mark(&ms_stat.timer2);
+      ms_stat.model_rd_time[bsize][this_mode] +=
+          aom_usec_timer_elapsed(&ms_stat.timer2);
+#endif
     }
 
     if (ref_frame == LAST_FRAME && frame_mv[this_mode][ref_frame].as_int == 0) {
@@ -2976,6 +3463,10 @@ void av1_nonrd_pick_inter_mode_sb(AV1_COMP *cpi, TileDataEnc *tile_data,
       continue;
     }
 
+#if COLLECT_PICK_MODE_STAT
+    ms_stat.num_nonskipped_searches[bsize][this_mode]++;
+#endif
+
     const int skip_ctx = av1_get_skip_txfm_context(xd);
     const int skip_txfm_cost = mode_costs->skip_txfm_cost[skip_ctx][1];
     const int no_skip_txfm_cost = mode_costs->skip_txfm_cost[skip_ctx][0];
@@ -2985,30 +3476,25 @@ void av1_nonrd_pick_inter_mode_sb(AV1_COMP *cpi, TileDataEnc *tile_data,
       this_rdc.rate = skip_txfm_cost;
       this_rdc.dist = this_rdc.sse << 4;
     } else {
-      if (use_modeled_non_rd_cost) {
-        if (this_rdc.skip_txfm) {
-          this_rdc.rate = skip_txfm_cost;
-        } else {
-          this_rdc.rate += no_skip_txfm_cost;
+#if COLLECT_PICK_MODE_STAT
+      aom_usec_timer_start(&ms_stat.timer2);
+#endif
+      av1_block_yrd(cpi, x, mi_row, mi_col, &this_rdc, &is_skippable, bsize,
+                    mi->tx_size, DCT_DCT, 1);
+      if (this_rdc.skip_txfm ||
+          RDCOST(x->rdmult, this_rdc.rate, this_rdc.dist) >=
+              RDCOST(x->rdmult, 0, this_rdc.sse)) {
+        if (!this_rdc.skip_txfm) {
+          // Need to store "real" rdc for possible future use if UV rdc
+          // disallows tx skip
+          nonskip_rdc = this_rdc;
+          nonskip_rdc.rate += no_skip_txfm_cost;
         }
+        this_rdc.rate = skip_txfm_cost;
+        this_rdc.skip_txfm = 1;
+        this_rdc.dist = this_rdc.sse;
       } else {
-        av1_block_yrd(cpi, x, mi_row, mi_col, &this_rdc, &is_skippable, bsize,
-                      mi->tx_size, DCT_DCT, 1);
-        if (this_rdc.skip_txfm ||
-            RDCOST(x->rdmult, this_rdc.rate, this_rdc.dist) >=
-                RDCOST(x->rdmult, 0, this_rdc.sse)) {
-          if (!this_rdc.skip_txfm) {
-            // Need to store "real" rdc for possible furure use if UV rdc
-            // disallows tx skip
-            nonskip_rdc = this_rdc;
-            nonskip_rdc.rate += no_skip_txfm_cost;
-          }
-          this_rdc.rate = skip_txfm_cost;
-          this_rdc.skip_txfm = 1;
-          this_rdc.dist = this_rdc.sse;
-        } else {
-          this_rdc.rate += no_skip_txfm_cost;
-        }
+        this_rdc.rate += no_skip_txfm_cost;
       }
       if ((x->color_sensitivity[0] || x->color_sensitivity[1])) {
         RD_STATS rdc_uv;
@@ -3027,25 +3513,54 @@ void av1_nonrd_pick_inter_mode_sb(AV1_COMP *cpi, TileDataEnc *tile_data,
         if (this_rdc.skip_txfm && !rdc_uv.skip_txfm &&
             nonskip_rdc.rate != INT_MAX)
           this_rdc = nonskip_rdc;
+        if (!comp_pred) {
+          uv_dist[INTER_OFFSET(this_mode)][ref_frame] = rdc_uv.dist;
+        }
         this_rdc.rate += rdc_uv.rate;
         this_rdc.dist += rdc_uv.dist;
         this_rdc.skip_txfm = this_rdc.skip_txfm && rdc_uv.skip_txfm;
       }
+#if COLLECT_PICK_MODE_STAT
+      aom_usec_timer_mark(&ms_stat.timer2);
+      ms_stat.txfm_time[bsize][this_mode] +=
+          aom_usec_timer_elapsed(&ms_stat.timer2);
+#endif
     }
+    PREDICTION_MODE this_best_mode = this_mode;
 
     // TODO(kyslov) account for UV prediction cost
     this_rdc.rate += rate_mv;
-    const int16_t mode_ctx =
-        av1_mode_context_analyzer(mbmi_ext->mode_context, mi->ref_frame);
-    this_rdc.rate += cost_mv_ref(mode_costs, this_mode, mode_ctx);
+    if (comp_pred) {
+      const int16_t mode_ctx =
+          av1_mode_context_analyzer(mbmi_ext->mode_context, mi->ref_frame);
+      this_rdc.rate += cost_mv_ref(mode_costs, this_mode, mode_ctx);
+    } else {
+      // If the current mode has zeromv but is not GLOBALMV, compare the rate
+      // cost. If GLOBALMV is cheaper, use GLOBALMV instead.
+      if (this_mode != GLOBALMV && frame_mv[this_mode][ref_frame].as_int ==
+                                       frame_mv[GLOBALMV][ref_frame].as_int) {
+        if (is_globalmv_better(this_mode, ref_frame, rate_mv, mode_costs,
+                               single_inter_mode_costs, mbmi_ext)) {
+          this_best_mode = GLOBALMV;
+        }
+      }
+
+      this_rdc.rate +=
+          single_inter_mode_costs[INTER_OFFSET(this_best_mode)][ref_frame];
+    }
+
+    if (!comp_pred && frame_mv[this_mode][ref_frame].as_int == 0 &&
+        var < UINT_MAX) {
+      vars[INTER_OFFSET(GLOBALMV)][ref_frame] = var;
+    }
 
     this_rdc.rate += ref_costs_single[ref_frame];
 
     this_rdc.rdcost = RDCOST(x->rdmult, this_rdc.rate, this_rdc.dist);
     if (cpi->oxcf.rc_cfg.mode == AOM_CBR && !comp_pred) {
-      newmv_diff_bias(xd, this_mode, &this_rdc, bsize,
-                      frame_mv[this_mode][ref_frame].as_mv.row,
-                      frame_mv[this_mode][ref_frame].as_mv.col, cpi->speed,
+      newmv_diff_bias(xd, this_best_mode, &this_rdc, bsize,
+                      frame_mv[this_best_mode][ref_frame].as_mv.row,
+                      frame_mv[this_best_mode][ref_frame].as_mv.col, cpi->speed,
                       x->source_variance, x->content_state_sb);
     }
 #if CONFIG_AV1_TEMPORAL_DENOISING
@@ -3061,6 +3576,17 @@ void av1_nonrd_pick_inter_mode_sb(AV1_COMP *cpi, TileDataEnc *tile_data,
 #endif
 
     mode_checked[this_mode][ref_frame] = 1;
+    mode_checked[this_best_mode][ref_frame] = 1;
+
+    if (check_globalmv) {
+      int32_t abs_mv = abs(frame_mv[this_best_mode][ref_frame].as_mv.row) +
+                       abs(frame_mv[this_best_mode][ref_frame].as_mv.col);
+      // Early exit check: if the magnitude of this_best_mode's mv is small
+      // enough, we skip GLOBALMV check in the next loop iteration.
+      if (abs_mv < 2) {
+        check_globalmv = false;
+      }
+    }
 #if COLLECT_PICK_MODE_STAT
     aom_usec_timer_mark(&ms_stat.timer1);
     ms_stat.nonskipped_search_times[bsize][this_mode] +=
@@ -3070,7 +3596,7 @@ void av1_nonrd_pick_inter_mode_sb(AV1_COMP *cpi, TileDataEnc *tile_data,
       best_rdc = this_rdc;
       best_early_term = this_early_term;
       best_pickmode.best_sse = sse_y;
-      best_pickmode.best_mode = this_mode;
+      best_pickmode.best_mode = this_best_mode;
       best_pickmode.best_motion_mode = mi->motion_mode;
       best_pickmode.wm_params = mi->wm_params;
       best_pickmode.num_proj_ref = mi->num_proj_ref;
@@ -3081,17 +3607,18 @@ void av1_nonrd_pick_inter_mode_sb(AV1_COMP *cpi, TileDataEnc *tile_data,
       best_pickmode.best_mode_skip_txfm = this_rdc.skip_txfm;
       best_pickmode.best_mode_initial_skip_flag =
           (nonskip_rdc.rate == INT_MAX && this_rdc.skip_txfm);
-      if (!best_pickmode.best_mode_skip_txfm && !use_modeled_non_rd_cost) {
+      if (!best_pickmode.best_mode_skip_txfm) {
         memcpy(best_pickmode.blk_skip, txfm_info->blk_skip,
                sizeof(txfm_info->blk_skip[0]) * num_8x8_blocks);
       }
 
       // This is needed for the compound modes.
-      frame_mv_best[this_mode][ref_frame].as_int =
-          frame_mv[this_mode][ref_frame].as_int;
-      if (ref_frame2 > NONE_FRAME)
-        frame_mv_best[this_mode][ref_frame2].as_int =
-            frame_mv[this_mode][ref_frame2].as_int;
+      frame_mv_best[this_best_mode][ref_frame].as_int =
+          frame_mv[this_best_mode][ref_frame].as_int;
+      if (ref_frame2 > NONE_FRAME) {
+        frame_mv_best[this_best_mode][ref_frame2].as_int =
+            frame_mv[this_best_mode][ref_frame2].as_int;
+      }
 
       if (reuse_inter_pred) {
         free_pred_buffer(best_pickmode.best_pred);
@@ -3100,7 +3627,7 @@ void av1_nonrd_pick_inter_mode_sb(AV1_COMP *cpi, TileDataEnc *tile_data,
     } else {
       if (reuse_inter_pred) free_pred_buffer(this_mode_pred);
     }
-    if (best_early_term && (idx > 0 || cpi->sf.rt_sf.nonrd_agressive_skip)) {
+    if (best_early_term && (idx > 0 || cpi->sf.rt_sf.nonrd_aggressive_skip)) {
       txfm_info->skip_txfm = 1;
       break;
     }
@@ -3130,14 +3657,28 @@ void av1_nonrd_pick_inter_mode_sb(AV1_COMP *cpi, TileDataEnc *tile_data,
   mi->angle_delta[PLANE_TYPE_UV] = 0;
   mi->filter_intra_mode_info.use_filter_intra = 0;
 
-  if (!x->force_zeromv_skip)
-    estimate_intra_mode(cpi, x, bsize, use_modeled_non_rd_cost, best_early_term,
+#if COLLECT_PICK_MODE_STAT
+  aom_usec_timer_start(&ms_stat.timer1);
+  ms_stat.num_searches[bsize][DC_PRED]++;
+  ms_stat.num_nonskipped_searches[bsize][DC_PRED]++;
+#endif
+
+  if (!x->force_zeromv_skip_for_blk)
+    estimate_intra_mode(cpi, x, bsize, best_early_term,
                         ref_costs_single[INTRA_FRAME], reuse_inter_pred,
                         &orig_dst, tmp, &this_mode_pred, &best_rdc,
                         &best_pickmode, ctx);
 
-  if (cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN &&
-      !x->force_zeromv_skip && is_inter_mode(best_pickmode.best_mode) &&
+  int skip_idtx_palette =
+      (x->color_sensitivity[0] || x->color_sensitivity[1]) &&
+      x->content_state_sb.source_sad_nonrd != kZeroSad &&
+      !cpi->rc.high_source_sad;
+
+  // Check for IDTX: based only on Y channel, so avoid when color_sensitivity
+  // is set.
+  if (cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN && !skip_idtx_palette &&
+      !cpi->oxcf.txfm_cfg.use_inter_dct_only && !x->force_zeromv_skip_for_blk &&
+      is_inter_mode(best_pickmode.best_mode) &&
       (!cpi->sf.rt_sf.prune_idtx_nonrd ||
        (cpi->sf.rt_sf.prune_idtx_nonrd && bsize <= BLOCK_32X32 &&
         best_pickmode.best_mode_skip_txfm != 1 && x->source_variance > 200))) {
@@ -3152,6 +3693,9 @@ void av1_nonrd_pick_inter_mode_sb(AV1_COMP *cpi, TileDataEnc *tile_data,
                   mi->tx_size, IDTX, 1);
     int64_t idx_rdcost = RDCOST(x->rdmult, idtx_rdc.rate, idtx_rdc.dist);
     if (idx_rdcost < best_rdc.rdcost) {
+      // Keep the skip_txfm off if the color_sensitivity is set.
+      if (x->color_sensitivity[0] || x->color_sensitivity[1])
+        idtx_rdc.skip_txfm = 0;
       best_pickmode.tx_type = IDTX;
       best_rdc.rdcost = idx_rdcost;
       best_pickmode.best_mode_skip_txfm = idtx_rdc.skip_txfm;
@@ -3167,11 +3711,11 @@ void av1_nonrd_pick_inter_mode_sb(AV1_COMP *cpi, TileDataEnc *tile_data,
   }
 
   int try_palette =
-      cpi->oxcf.tool_cfg.enable_palette &&
+      !skip_idtx_palette && cpi->oxcf.tool_cfg.enable_palette &&
       av1_allow_palette(cpi->common.features.allow_screen_content_tools,
                         mi->bsize);
   try_palette = try_palette && is_mode_intra(best_pickmode.best_mode) &&
-                x->source_variance > 0 && !x->force_zeromv_skip &&
+                x->source_variance > 0 && !x->force_zeromv_skip_for_blk &&
                 (cpi->rc.high_source_sad || x->source_variance > 500);
 
   if (try_palette) {
@@ -3187,6 +3731,9 @@ void av1_nonrd_pick_inter_mode_sb(AV1_COMP *cpi, TileDataEnc *tile_data,
       best_rdc.dist = this_rdc.dist;
       best_rdc.rdcost = this_rdc.rdcost;
       best_pickmode.best_mode_skip_txfm = this_rdc.skip_txfm;
+      // Keep the skip_txfm off if the color_sensitivity is set.
+      if (x->color_sensitivity[0] || x->color_sensitivity[1])
+        this_rdc.skip_txfm = 0;
       if (!this_rdc.skip_txfm) {
         memcpy(ctx->blk_skip, txfm_info->blk_skip,
                sizeof(txfm_info->blk_skip[0]) * ctx->num_4x4_blk);
@@ -3196,6 +3743,12 @@ void av1_nonrd_pick_inter_mode_sb(AV1_COMP *cpi, TileDataEnc *tile_data,
     }
   }
 
+#if COLLECT_PICK_MODE_STAT
+  aom_usec_timer_mark(&ms_stat.timer1);
+  ms_stat.nonskipped_search_times[bsize][DC_PRED] +=
+      aom_usec_timer_elapsed(&ms_stat.timer1);
+#endif
+
   pd->dst = orig_dst;
   if (try_palette) mi->palette_mode_info = best_pickmode.pmi;
   mi->mode = best_pickmode.best_mode;
@@ -3271,42 +3824,64 @@ void av1_nonrd_pick_inter_mode_sb(AV1_COMP *cpi, TileDataEnc *tile_data,
   store_coding_context(x, ctx);
 #endif  // CONFIG_INTERNAL_STATS
 #if COLLECT_PICK_MODE_STAT
-  aom_usec_timer_mark(&ms_stat.timer2);
-  ms_stat.avg_block_times[bsize] += aom_usec_timer_elapsed(&ms_stat.timer2);
-  //
+  aom_usec_timer_mark(&ms_stat.bsize_timer);
+  ms_stat.total_block_times[bsize] +=
+      aom_usec_timer_elapsed(&ms_stat.bsize_timer);
   if ((mi_row + mi_size_high[bsize] >= (cpi->common.mi_params.mi_rows)) &&
       (mi_col + mi_size_wide[bsize] >= (cpi->common.mi_params.mi_cols))) {
-    int i, j;
-    PREDICTION_MODE used_modes[3] = { NEARESTMV, NEARMV, NEWMV };
-    BLOCK_SIZE bss[5] = { BLOCK_8X8, BLOCK_16X16, BLOCK_32X32, BLOCK_64X64,
-                          BLOCK_128X128 };
     int64_t total_time = 0l;
     int32_t total_blocks = 0;
+    for (BLOCK_SIZE bs = 0; bs < BLOCK_SIZES; bs++) {
+      total_time += ms_stat.total_block_times[bs];
+      total_blocks += ms_stat.num_blocks[bs];
+    }
 
     printf("\n");
-    for (i = 0; i < 5; i++) {
-      printf("BS(%d) Num %d, Avg_time %f: ", bss[i], ms_stat.num_blocks[bss[i]],
-             ms_stat.num_blocks[bss[i]] > 0
-                 ? (float)ms_stat.avg_block_times[bss[i]] /
-                       ms_stat.num_blocks[bss[i]]
-                 : 0);
-      total_time += ms_stat.avg_block_times[bss[i]];
-      total_blocks += ms_stat.num_blocks[bss[i]];
-      for (j = 0; j < 3; j++) {
-        printf("Mode %d, %d/%d tps %f ", used_modes[j],
-               ms_stat.num_nonskipped_searches[bss[i]][used_modes[j]],
-               ms_stat.num_searches[bss[i]][used_modes[j]],
-               ms_stat.num_nonskipped_searches[bss[i]][used_modes[j]] > 0
-                   ? (float)ms_stat
-                             .nonskipped_search_times[bss[i]][used_modes[j]] /
-                         ms_stat.num_nonskipped_searches[bss[i]][used_modes[j]]
+    for (BLOCK_SIZE bs = 0; bs < BLOCK_SIZES; bs++) {
+      if (ms_stat.num_blocks[bs] == 0) {
+        continue;
+      }
+      if (!COLLECT_NON_SQR_STAT && block_size_wide[bs] != block_size_high[bs]) {
+        continue;
+      }
+
+      printf("BLOCK_%dX%d Num %d, Time: %ld (%f%%), Avg_time %f:\n",
+             block_size_wide[bs], block_size_high[bs], ms_stat.num_blocks[bs],
+             ms_stat.total_block_times[bs],
+             100 * ms_stat.total_block_times[bs] / (float)total_time,
+             (float)ms_stat.total_block_times[bs] / ms_stat.num_blocks[bs]);
+      for (int j = 0; j < MB_MODE_COUNT; j++) {
+        if (ms_stat.nonskipped_search_times[bs][j] == 0) {
+          continue;
+        }
+
+        int64_t total_mode_time = ms_stat.nonskipped_search_times[bs][j];
+        printf("  Mode %d, %d/%d tps %f\n", j,
+               ms_stat.num_nonskipped_searches[bs][j],
+               ms_stat.num_searches[bs][j],
+               ms_stat.num_nonskipped_searches[bs][j] > 0
+                   ? (float)ms_stat.nonskipped_search_times[bs][j] /
+                         ms_stat.num_nonskipped_searches[bs][j]
                    : 0l);
+        if (j >= INTER_MODE_START) {
+          total_mode_time = ms_stat.ms_time[bs][j] + ms_stat.ifs_time[bs][j] +
+                            ms_stat.model_rd_time[bs][j] +
+                            ms_stat.txfm_time[bs][j];
+          print_stage_time("Motion Search Time", ms_stat.ms_time[bs][j],
+                           total_time);
+          print_stage_time("Filter Search Time", ms_stat.ifs_time[bs][j],
+                           total_time);
+          print_stage_time("Model    RD   Time", ms_stat.model_rd_time[bs][j],
+                           total_time);
+          print_stage_time("Tranfm Search Time", ms_stat.txfm_time[bs][j],
+                           total_time);
+        }
+        print_stage_time("Total  Mode   Time", total_mode_time, total_time);
       }
       printf("\n");
     }
     printf("Total time = %ld. Total blocks = %d\n", total_time, total_blocks);
   }
-  //
 #endif  // COLLECT_PICK_MODE_STAT
   *rd_cost = best_rdc;
 }
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/palette.c b/chromium/third_party/libaom/source/libaom/av1/encoder/palette.c
index 69f4523ef1e..4375175ad6e 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/palette.c
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/palette.c
@@ -219,7 +219,7 @@ static AOM_INLINE void palette_rd_y(
     BLOCK_SIZE bsize, int dc_mode_cost, const int *data, int *centroids, int n,
     uint16_t *color_cache, int n_cache, bool do_header_rd_based_gating,
     MB_MODE_INFO *best_mbmi, uint8_t *best_palette_color_map, int64_t *best_rd,
-    int *rate, int *rate_tokenonly, int64_t *distortion, int *skippable,
+    int *rate, int *rate_tokenonly, int64_t *distortion, uint8_t *skippable,
     int *beat_best_rd, PICK_MODE_CONTEXT *ctx, uint8_t *blk_skip,
     uint8_t *tx_type_map, int *beat_best_palette_rd,
     bool *do_header_rd_based_breakout, int discount_color_cost) {
@@ -328,7 +328,7 @@ static AOM_INLINE int perform_top_color_palette_search(
     int start_n, int end_n, int step_size, bool do_header_rd_based_gating,
     int *last_n_searched, uint16_t *color_cache, int n_cache,
     MB_MODE_INFO *best_mbmi, uint8_t *best_palette_color_map, int64_t *best_rd,
-    int *rate, int *rate_tokenonly, int64_t *distortion, int *skippable,
+    int *rate, int *rate_tokenonly, int64_t *distortion, uint8_t *skippable,
     int *beat_best_rd, PICK_MODE_CONTEXT *ctx, uint8_t *best_blk_skip,
     uint8_t *tx_type_map, int discount_color_cost) {
   int centroids[PALETTE_MAX_SIZE];
@@ -376,7 +376,7 @@ static AOM_INLINE int perform_k_means_palette_search(
     bool do_header_rd_based_gating, int *last_n_searched, uint16_t *color_cache,
     int n_cache, MB_MODE_INFO *best_mbmi, uint8_t *best_palette_color_map,
     int64_t *best_rd, int *rate, int *rate_tokenonly, int64_t *distortion,
-    int *skippable, int *beat_best_rd, PICK_MODE_CONTEXT *ctx,
+    uint8_t *skippable, int *beat_best_rd, PICK_MODE_CONTEXT *ctx,
     uint8_t *best_blk_skip, uint8_t *tx_type_map, uint8_t *color_map,
     int data_points, int discount_color_cost) {
   int centroids[PALETTE_MAX_SIZE];
@@ -468,10 +468,66 @@ static AOM_INLINE void fill_data_and_get_bounds(
   }
 }
 
+/*! \brief Colors are sorted by their count: the higher the better.
+ */
+struct ColorCount {
+  //! Color index in the histogram.
+  int index;
+  //! Histogram count.
+  int count;
+};
+
+int color_count_comp(const void *c1, const void *c2) {
+  const struct ColorCount *color_count1 = (const struct ColorCount *)c1;
+  const struct ColorCount *color_count2 = (const struct ColorCount *)c2;
+  if (color_count1->count > color_count2->count) return -1;
+  if (color_count1->count < color_count2->count) return 1;
+  if (color_count1->index < color_count2->index) return -1;
+  return 1;
+}
+
+static void find_top_colors(const int *const count_buf, int bit_depth,
+                            int n_colors, int *top_colors) {
+  // Top color array, serving as a priority queue if more than n_colors are
+  // found.
+  struct ColorCount top_color_counts[PALETTE_MAX_SIZE] = { { 0 } };
+  int n_color_count = 0;
+  for (int i = 0; i < (1 << bit_depth); ++i) {
+    if (count_buf[i] > 0) {
+      if (n_color_count < n_colors) {
+        // Keep adding to the top colors.
+        top_color_counts[n_color_count].index = i;
+        top_color_counts[n_color_count].count = count_buf[i];
+        ++n_color_count;
+        if (n_color_count == n_colors) {
+          qsort(top_color_counts, n_colors, sizeof(top_color_counts[0]),
+                color_count_comp);
+        }
+      } else {
+        // Check the worst in the sorted top.
+        if (count_buf[i] > top_color_counts[n_colors - 1].count) {
+          int j = n_colors - 1;
+          // Move up to the best one.
+          while (j >= 1 && count_buf[i] > top_color_counts[j - 1].count) --j;
+          memmove(top_color_counts + j + 1, top_color_counts + j,
+                  (n_colors - j - 1) * sizeof(top_color_counts[0]));
+          top_color_counts[j].index = i;
+          top_color_counts[j].count = count_buf[i];
+        }
+      }
+    }
+  }
+  assert(n_color_count == n_colors);
+
+  for (int i = 0; i < n_colors; ++i) {
+    top_colors[i] = top_color_counts[i].index;
+  }
+}
+
 void av1_rd_pick_palette_intra_sby(
     const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int dc_mode_cost,
     MB_MODE_INFO *best_mbmi, uint8_t *best_palette_color_map, int64_t *best_rd,
-    int *rate, int *rate_tokenonly, int64_t *distortion, int *skippable,
+    int *rate, int *rate_tokenonly, int64_t *distortion, uint8_t *skippable,
     int *beat_best_rd, PICK_MODE_CONTEXT *ctx, uint8_t *best_blk_skip,
     uint8_t *tx_type_map) {
   MACROBLOCKD *const xd = &x->e_mbd;
@@ -493,10 +549,10 @@ void av1_rd_pick_palette_intra_sby(
   const int discount_color_cost = cpi->sf.rt_sf.use_nonrd_pick_mode;
   int unused;
 
-  int count_buf[1 << 12];      // Maximum (1 << 12) color levels.
-  int count_buf_8bit[1 << 8];  // Maximum (1 << 8) bins for hbd path.
+  int count_buf[1 << 12];  // Maximum (1 << 12) color levels.
   int colors, colors_threshold = 0;
   if (is_hbd) {
+    int count_buf_8bit[1 << 8];  // Maximum (1 << 8) bins for hbd path.
     av1_count_colors_highbd(src, src_stride, rows, cols, bit_depth, count_buf,
                             count_buf_8bit, &colors_threshold, &colors);
   } else {
@@ -520,17 +576,8 @@ void av1_rd_pick_palette_intra_sby(
 
     // Find the dominant colors, stored in top_colors[].
     int top_colors[PALETTE_MAX_SIZE] = { 0 };
-    for (int i = 0; i < AOMMIN(colors, PALETTE_MAX_SIZE); ++i) {
-      int max_count = 0;
-      for (int j = 0; j < (1 << bit_depth); ++j) {
-        if (count_buf[j] > max_count) {
-          max_count = count_buf[j];
-          top_colors[i] = j;
-        }
-      }
-      assert(max_count > 0);
-      count_buf[top_colors[i]] = 0;
-    }
+    find_top_colors(count_buf, bit_depth, AOMMIN(colors, PALETTE_MAX_SIZE),
+                    top_colors);
 
     // The following are the approaches used for header rdcost based gating
     // for early termination for different values of prune_palette_search_level.
@@ -693,7 +740,7 @@ void av1_rd_pick_palette_intra_sbuv(const AV1_COMP *cpi, MACROBLOCK *x,
                                     MB_MODE_INFO *const best_mbmi,
                                     int64_t *best_rd, int *rate,
                                     int *rate_tokenonly, int64_t *distortion,
-                                    int *skippable) {
+                                    uint8_t *skippable) {
   MACROBLOCKD *const xd = &x->e_mbd;
   MB_MODE_INFO *const mbmi = xd->mi[0];
   assert(!is_inter_block(mbmi));
@@ -716,9 +763,9 @@ void av1_rd_pick_palette_intra_sbuv(const AV1_COMP *cpi, MACROBLOCK *x,
                            &plane_block_height, &rows, &cols);
 
   mbmi->uv_mode = UV_DC_PRED;
-  int count_buf[1 << 12];      // Maximum (1 << 12) color levels.
-  int count_buf_8bit[1 << 8];  // Maximum (1 << 8) bins for hbd path.
   if (seq_params->use_highbitdepth) {
+    int count_buf[1 << 12];      // Maximum (1 << 12) color levels.
+    int count_buf_8bit[1 << 8];  // Maximum (1 << 8) bins for hbd path.
     av1_count_colors_highbd(src_u, src_stride, rows, cols,
                             seq_params->bit_depth, count_buf, count_buf_8bit,
                             &colors_threshold_u, &colors_u);
@@ -726,6 +773,7 @@ void av1_rd_pick_palette_intra_sbuv(const AV1_COMP *cpi, MACROBLOCK *x,
                             seq_params->bit_depth, count_buf, count_buf_8bit,
                             &colors_threshold_v, &colors_v);
   } else {
+    int count_buf[1 << 8];
     av1_count_colors(src_u, src_stride, rows, cols, count_buf, &colors_u);
     av1_count_colors(src_v, src_stride, rows, cols, count_buf, &colors_v);
     colors_threshold_u = colors_u;
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/palette.h b/chromium/third_party/libaom/source/libaom/av1/encoder/palette.h
index 7d9a72f61d7..34d2ddc48b9 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/palette.h
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/palette.h
@@ -49,7 +49,7 @@ void AV1_K_MEANS_RENAME(av1_k_means, 2)(const int *data, int *centroids,
  * \param[in]    k                  Number of clusters.
  * \param[in]    dim                Data dimension.
  *
- * \return Returns nothing, but saves each data's cluster index in indices.
+ * \remark Returns nothing, but saves each data's cluster index in \a indices.
  */
 static INLINE void av1_calc_indices(const int *data, const int *centroids,
                                     uint8_t *indices, int n, int k, int dim) {
@@ -79,8 +79,8 @@ static INLINE void av1_calc_indices(const int *data, const int *centroids,
  * \param[in]    dim                Data dimension.
  * \param[in]    max_itr            Maximum number of iterations to run.
  *
- * \return Returns nothing, but saves each cluster's centroid in centroids and
- * each data's cluster index in indices.
+ * \remark Returns nothing, but saves each cluster's centroid in centroids and
+ * each data's cluster index in \a indices.
  *
  * \attention The output centroids are rounded off to nearest integers.
  */
@@ -186,7 +186,7 @@ void av1_rd_pick_palette_intra_sby(
     const struct AV1_COMP *cpi, struct macroblock *x, BLOCK_SIZE bsize,
     int dc_mode_cost, MB_MODE_INFO *best_mbmi, uint8_t *best_palette_color_map,
     int64_t *best_rd, int *rate, int *rate_tokenonly, int64_t *distortion,
-    int *skippable, int *beat_best_rd, struct PICK_MODE_CONTEXT *ctx,
+    uint8_t *skippable, int *beat_best_rd, struct PICK_MODE_CONTEXT *ctx,
     uint8_t *best_blk_skip, uint8_t *tx_type_map);
 
 /*!\brief Search for the best palette in the chroma plane.
@@ -201,7 +201,7 @@ void av1_rd_pick_palette_intra_sbuv(const struct AV1_COMP *cpi,
                                     MB_MODE_INFO *const best_mbmi,
                                     int64_t *best_rd, int *rate,
                                     int *rate_tokenonly, int64_t *distortion,
-                                    int *skippable);
+                                    uint8_t *skippable);
 
 /*!\brief Resets palette color map for chroma channels.
  */
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/partition_search.c b/chromium/third_party/libaom/source/libaom/av1/encoder/partition_search.c
index 787c005c7e6..9cae96a9b1c 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/partition_search.c
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/partition_search.c
@@ -450,13 +450,10 @@ static void encode_superblock(const AV1_COMP *const cpi, TileDataEnc *tile_data,
                            xd->block_ref_scale_factors[ref], num_planes);
     }
     // Predicted sample of inter mode (for Luma plane) cannot be reused if
-    // nonrd_check_partition_merge_mode or nonrd_check_partition_split speed
-    // feature is enabled, Since in such cases the buffer may not contain the
-    // predicted sample of best mode.
+    // nonrd_check_partition_split speed feature is enabled, Since in such cases
+    // the buffer may not contain the predicted sample of best mode.
     const int start_plane =
-        (cpi->sf.rt_sf.reuse_inter_pred_nonrd &&
-         (!cpi->sf.rt_sf.nonrd_check_partition_merge_mode) &&
-         (!cpi->sf.rt_sf.nonrd_check_partition_split) &&
+        (x->reuse_inter_pred && (!cpi->sf.rt_sf.nonrd_check_partition_split) &&
          cm->seq_params->bit_depth == AOM_BITS_8)
             ? 1
             : 0;
@@ -700,6 +697,11 @@ void av1_set_offsets(const AV1_COMP *const cpi, const TileInfo *const tile,
     }
     av1_init_plane_quantizers(cpi, x, mbmi->segment_id, 0);
   }
+#ifndef NDEBUG
+  x->last_set_offsets_loc.mi_row = mi_row;
+  x->last_set_offsets_loc.mi_col = mi_col;
+  x->last_set_offsets_loc.bsize = bsize;
+#endif  // NDEBUG
 }
 
 /*!\brief Hybrid intra mode search.
@@ -719,7 +721,7 @@ void av1_set_offsets(const AV1_COMP *const cpi, const TileInfo *const tile,
  * \param[in]    ctx            Structure to hold snapshot of coding context
                                 during the mode picking process
  *
- * \return Nothing is returned. Instead, the MB_MODE_INFO struct inside x
+ * \remark Nothing is returned. Instead, the MB_MODE_INFO struct inside x
  * is modified to store information about the best mode computed
  * in this function. The rd_cost struct is also updated with the RD stats
  * corresponding to the best mode found.
@@ -805,7 +807,7 @@ static AOM_INLINE void wait_for_top_right_sb(
  *                              chosen modes for the current block
  * \param[in]    best_rd        Upper bound of rd cost of a valid partition
  *
- * \return Nothing is returned. Instead, the chosen modes and contexts necessary
+ * \remark Nothing is returned. Instead, the chosen modes and contexts necessary
  * for reconstruction are stored in ctx, the rate-distortion stats are stored in
  * rd_cost. If no valid mode leading to rd_cost <= best_rd, the status will be
  * signalled by an INT64_MAX rd_cost->rdcost.
@@ -884,13 +886,8 @@ static void pick_sb_modes(AV1_COMP *const cpi, TileDataEnc *tile_data,
   // Reset skip mode flag.
   mbmi->skip_mode = 0;
 
-  if (is_cur_buf_hbd(xd)) {
-    x->source_variance = av1_high_get_sby_perpixel_variance(
-        cpi, &x->plane[0].src, bsize, xd->bd);
-  } else {
-    x->source_variance =
-        av1_get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize);
-  }
+  x->source_variance = av1_get_perpixel_variance_facade(
+      cpi, xd, &x->plane[0].src, bsize, AOM_PLANE_Y);
 
   // Initialize default mode evaluation params
   set_mode_eval_params(cpi, x, DEFAULT_EVAL);
@@ -1041,7 +1038,8 @@ static void update_stats(const AV1_COMMON *const cm, ThreadData *td) {
     ++td->counts->intrabc[is_intrabc];
 #endif  // CONFIG_ENTROPY_STATS
     if (is_intrabc) {
-      const int_mv dv_ref = x->mbmi_ext_frame->ref_mv_stack[0].this_mv;
+      const int8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
+      const int_mv dv_ref = mbmi_ext->ref_mv_stack[ref_frame_type][0].this_mv;
       av1_update_mv_stats(&mbmi->mv[0].as_mv, &dv_ref.as_mv, &fc->ndvc,
                           MV_SUBPEL_NONE);
     }
@@ -1390,7 +1388,7 @@ static void update_stats(const AV1_COMMON *const cm, ThreadData *td) {
  *                         chosen modes for the current block
  * \param[in]    rate      Pointer to the total rate for the current block
  *
- * \return Nothing is returned. Instead, reconstructions (w/o in-loop filters)
+ * \remark Nothing is returned. Instead, reconstructions (w/o in-loop filters)
  * will be updated in the pixel buffers in td->mb.e_mbd. Also, the chosen modes
  * will be stored in the MB_MODE_INFO buffer td->mb.e_mbd.mi[0].
  */
@@ -1553,7 +1551,7 @@ static void encode_b(const AV1_COMP *const cpi, TileDataEnc *tile_data,
  *                         partitions and mode info for the current block
  * \param[in]    rate      Pointer to the total rate for the current block
  *
- * \return Nothing is returned. Instead, reconstructions (w/o in-loop filters)
+ * \remark Nothing is returned. Instead, reconstructions (w/o in-loop filters)
  * will be updated in the pixel buffers in td->mb.e_mbd.
  */
 static void encode_sb(const AV1_COMP *const cpi, ThreadData *td,
@@ -1573,9 +1571,11 @@ static void encode_sb(const AV1_COMP *const cpi, ThreadData *td,
                       : -1;
   const PARTITION_TYPE partition = pc_tree->partitioning;
   const BLOCK_SIZE subsize = get_partition_subsize(bsize, partition);
+#if !CONFIG_REALTIME_ONLY
   int quarter_step = mi_size_wide[bsize] / 4;
   int i;
   BLOCK_SIZE bsize2 = get_partition_subsize(bsize, PARTITION_SPLIT);
+#endif
 
   if (mi_row >= mi_params->mi_rows || mi_col >= mi_params->mi_cols) return;
   if (subsize == BLOCK_INVALID) return;
@@ -1629,6 +1629,7 @@ static void encode_sb(const AV1_COMP *const cpi, ThreadData *td,
                 subsize, pc_tree->split[3], rate);
       break;
 
+#if !CONFIG_REALTIME_ONLY
     case PARTITION_HORZ_A:
       encode_b(cpi, tile_data, td, tp, mi_row, mi_col, dry_run, bsize2,
                partition, pc_tree->horizontala[0], rate);
@@ -1679,6 +1680,7 @@ static void encode_sb(const AV1_COMP *const cpi, ThreadData *td,
                  partition, pc_tree->vertical4[i], rate);
       }
       break;
+#endif
     default: assert(0 && "Invalid partition type."); break;
   }
 
@@ -1731,7 +1733,7 @@ reference for future sub-partitions
 * \param[in]    pc_tree   Pointer to the PC_TREE node holding the picked
 partitions and mode info for the current block
 *
-* \return Nothing is returned. The pc_tree struct is modified to store the
+* \remark Nothing is returned. The pc_tree struct is modified to store the
 * picked partition and modes. The rate and dist are also updated with those
 * corresponding to the best partition found.
 */
@@ -2121,7 +2123,7 @@ static void encode_b_nonrd(const AV1_COMP *const cpi, TileDataEnc *tile_data,
     if (tile_data->allow_update_cdf) update_stats(&cpi->common, td);
   }
   if (cpi->oxcf.q_cfg.aq_mode == CYCLIC_REFRESH_AQ && mbmi->skip_txfm &&
-      !cpi->rc.rtc_external_ratectrl)
+      !cpi->rc.rtc_external_ratectrl && cm->seg.enabled)
     av1_cyclic_reset_segment_skip(cpi, x, mi_row, mi_col, bsize);
   // TODO(Ravi/Remya): Move this copy function to a better logical place
   // This function will copy the best mode information from block
@@ -2136,6 +2138,46 @@ static void encode_b_nonrd(const AV1_COMP *const cpi, TileDataEnc *tile_data,
 #endif
 }
 
+static int get_force_zeromv_skip_flag_for_blk(const AV1_COMP *cpi,
+                                              const MACROBLOCK *x,
+                                              BLOCK_SIZE bsize) {
+  // Force zero MV skip based on SB level decision
+  if (x->force_zeromv_skip_for_sb < 2) return x->force_zeromv_skip_for_sb;
+
+  // For blocks of size equal to superblock size, the decision would have been
+  // already done at superblock level. Hence zeromv-skip decision is skipped.
+  const AV1_COMMON *const cm = &cpi->common;
+  if (bsize == cm->seq_params->sb_size) return 0;
+
+  const int num_planes = av1_num_planes(cm);
+  const MACROBLOCKD *const xd = &x->e_mbd;
+  const unsigned int thresh_exit_part_y =
+      cpi->zeromv_skip_thresh_exit_part[bsize];
+  const unsigned int thresh_exit_part_uv =
+      CALC_CHROMA_THRESH_FOR_ZEROMV_SKIP(thresh_exit_part_y);
+  const unsigned int thresh_exit_part[MAX_MB_PLANE] = { thresh_exit_part_y,
+                                                        thresh_exit_part_uv,
+                                                        thresh_exit_part_uv };
+  const YV12_BUFFER_CONFIG *const yv12 = get_ref_frame_yv12_buf(cm, LAST_FRAME);
+  const struct scale_factors *const sf =
+      get_ref_scale_factors_const(cm, LAST_FRAME);
+
+  struct buf_2d yv12_mb[MAX_MB_PLANE];
+  av1_setup_pred_block(xd, yv12_mb, yv12, sf, sf, num_planes);
+
+  for (int plane = 0; plane < num_planes; ++plane) {
+    const struct macroblock_plane *const p = &x->plane[plane];
+    const struct macroblockd_plane *const pd = &xd->plane[plane];
+    const BLOCK_SIZE bs =
+        get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y);
+    const unsigned int plane_sad = cpi->ppi->fn_ptr[bs].sdf(
+        p->src.buf, p->src.stride, yv12_mb[plane].buf, yv12_mb[plane].stride);
+    assert(plane < MAX_MB_PLANE);
+    if (plane_sad >= thresh_exit_part[plane]) return 0;
+  }
+  return 1;
+}
+
 /*!\brief Top level function to pick block mode for non-RD optimized case
  *
  * \ingroup partition_search
@@ -2163,7 +2205,7 @@ static void encode_b_nonrd(const AV1_COMP *const cpi, TileDataEnc *tile_data,
  * \param[in]    ctx            Pointer to structure holding coding contexts and
  *                              chosen modes for the current block
  *
- * \return Nothing is returned. Instead, the chosen modes and contexts necessary
+ * \remark Nothing is returned. Instead, the chosen modes and contexts necessary
  * for reconstruction are stored in ctx, the rate-distortion stats are stored in
  * rd_cost. If no valid mode leading to rd_cost <= best_rd, the status will be
  * signalled by an INT64_MAX rd_cost->rdcost.
@@ -2172,7 +2214,14 @@ static void pick_sb_modes_nonrd(AV1_COMP *const cpi, TileDataEnc *tile_data,
                                 MACROBLOCK *const x, int mi_row, int mi_col,
                                 RD_STATS *rd_cost, BLOCK_SIZE bsize,
                                 PICK_MODE_CONTEXT *ctx) {
-  av1_set_offsets(cpi, &tile_data->tile_info, x, mi_row, mi_col, bsize);
+  // For nonrd mode, av1_set_offsets is already called at the superblock level
+  // in encode_nonrd_sb when we determine the partitioning.
+  if (bsize != cpi->common.seq_params->sb_size) {
+    av1_set_offsets(cpi, &tile_data->tile_info, x, mi_row, mi_col, bsize);
+  }
+  assert(x->last_set_offsets_loc.mi_row == mi_row &&
+         x->last_set_offsets_loc.mi_col == mi_col &&
+         x->last_set_offsets_loc.bsize == bsize);
   AV1_COMMON *const cm = &cpi->common;
   const int num_planes = av1_num_planes(cm);
   MACROBLOCKD *const xd = &x->e_mbd;
@@ -2203,12 +2252,13 @@ static void pick_sb_modes_nonrd(AV1_COMP *const cpi, TileDataEnc *tile_data,
     p[i].txb_entropy_ctx = ctx->txb_entropy_ctx[i];
   }
   for (i = 0; i < 2; ++i) pd[i].color_index_map = ctx->color_index_map[i];
-  if (is_cur_buf_hbd(xd)) {
-    x->source_variance = av1_high_get_sby_perpixel_variance(
-        cpi, &x->plane[0].src, bsize, xd->bd);
-  } else {
-    x->source_variance =
-        av1_get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize);
+
+  x->force_zeromv_skip_for_blk =
+      get_force_zeromv_skip_flag_for_blk(cpi, x, bsize);
+
+  if (!x->force_zeromv_skip_for_blk) {
+    x->source_variance = av1_get_perpixel_variance_facade(
+        cpi, xd, &x->plane[0].src, bsize, AOM_PLANE_Y);
   }
   // Save rdmult before it might be changed, so it can be restored later.
   const int orig_rdmult = x->rdmult;
@@ -2244,6 +2294,12 @@ static void pick_sb_modes_nonrd(AV1_COMP *const cpi, TileDataEnc *tile_data,
 #endif
   }
   if (cpi->sf.rt_sf.skip_cdef_sb) {
+    // cdef_strength is initialized to 1 which means skip_cdef, and is updated
+    // here. Check to see is skipping cdef is allowed.
+    const int allow_cdef_skipping =
+        cpi->rc.frames_since_key > 10 && !cpi->rc.high_source_sad &&
+        !(x->color_sensitivity[0] || x->color_sensitivity[1]);
+
     // Find the corresponding 64x64 block. It'll be the 128x128 block if that's
     // the block size.
     const int mi_row_sb = mi_row - mi_row % MI_SIZE_64X64;
@@ -2253,12 +2309,11 @@ static void pick_sb_modes_nonrd(AV1_COMP *const cpi, TileDataEnc *tile_data,
         get_mi_grid_idx(&cm->mi_params, mi_row_sb, mi_col_sb);
     // Do not skip if intra or new mv is picked, or color sensitivity is set.
     // Never skip on slide/scene change.
-    mi_sb[0]->skip_cdef_curr_sb =
-        mi_sb[0]->skip_cdef_curr_sb && !cpi->rc.high_source_sad &&
-        !(x->color_sensitivity[0] || x->color_sensitivity[1]) &&
+    mi_sb[0]->cdef_strength =
+        mi_sb[0]->cdef_strength && allow_cdef_skipping &&
         !(mbmi->mode < INTRA_MODES || mbmi->mode == NEWMV);
     // Store in the pickmode context.
-    ctx->mic.skip_cdef_curr_sb = mi_sb[0]->skip_cdef_curr_sb;
+    ctx->mic.cdef_strength = mi_sb[0]->cdef_strength;
   }
   x->rdmult = orig_rdmult;
   ctx->rd_stats.rate = rd_cost->rate;
@@ -2269,6 +2324,197 @@ static void pick_sb_modes_nonrd(AV1_COMP *const cpi, TileDataEnc *tile_data,
 #endif
 }
 
+// Evaluate if the sub-partitions can be merged directly into a large partition
+// without calculating the RD cost.
+static void direct_partition_merging(AV1_COMP *cpi, ThreadData *td,
+                                     TileDataEnc *tile_data, MB_MODE_INFO **mib,
+                                     int mi_row, int mi_col, BLOCK_SIZE bsize) {
+  AV1_COMMON *const cm = &cpi->common;
+  const CommonModeInfoParams *const mi_params = &cm->mi_params;
+  TileInfo *const tile_info = &tile_data->tile_info;
+  MACROBLOCK *const x = &td->mb;
+  MACROBLOCKD *const xd = &x->e_mbd;
+  const int bs = mi_size_wide[bsize];
+  const int hbs = bs / 2;
+  const PARTITION_TYPE partition =
+      (bsize >= BLOCK_8X8) ? get_partition(cm, mi_row, mi_col, bsize)
+                           : PARTITION_NONE;
+  BLOCK_SIZE subsize = get_partition_subsize(bsize, partition);
+
+  MB_MODE_INFO **b0 = mib;
+  MB_MODE_INFO **b1 = mib + hbs;
+  MB_MODE_INFO **b2 = mib + hbs * mi_params->mi_stride;
+  MB_MODE_INFO **b3 = mib + hbs * mi_params->mi_stride + hbs;
+
+  // Check if the following conditions are met. This can be updated
+  // later with more support added.
+  const int further_split = b0[0]->bsize < subsize || b1[0]->bsize < subsize ||
+                            b2[0]->bsize < subsize || b3[0]->bsize < subsize;
+  if (further_split) return;
+
+  const int no_skip = !b0[0]->skip_txfm || !b1[0]->skip_txfm ||
+                      !b2[0]->skip_txfm || !b3[0]->skip_txfm;
+  if (no_skip) return;
+
+  const int compound = (b0[0]->ref_frame[1] != b1[0]->ref_frame[1] ||
+                        b0[0]->ref_frame[1] != b2[0]->ref_frame[1] ||
+                        b0[0]->ref_frame[1] != b3[0]->ref_frame[1] ||
+                        b0[0]->ref_frame[1] > NONE_FRAME);
+  if (compound) return;
+
+  // Intra modes aren't considered here.
+  const int different_ref = (b0[0]->ref_frame[0] != b1[0]->ref_frame[0] ||
+                             b0[0]->ref_frame[0] != b2[0]->ref_frame[0] ||
+                             b0[0]->ref_frame[0] != b3[0]->ref_frame[0] ||
+                             b0[0]->ref_frame[0] <= INTRA_FRAME);
+  if (different_ref) return;
+
+  const int different_mode =
+      (b0[0]->mode != b1[0]->mode || b0[0]->mode != b2[0]->mode ||
+       b0[0]->mode != b3[0]->mode);
+  if (different_mode) return;
+
+  const int unsupported_mode =
+      (b0[0]->mode != NEARESTMV && b0[0]->mode != GLOBALMV);
+  if (unsupported_mode) return;
+
+  const int different_mv = (b0[0]->mv[0].as_int != b1[0]->mv[0].as_int ||
+                            b0[0]->mv[0].as_int != b2[0]->mv[0].as_int ||
+                            b0[0]->mv[0].as_int != b3[0]->mv[0].as_int);
+  if (different_mv) return;
+
+  const int unsupported_motion_mode =
+      (b0[0]->motion_mode != b1[0]->motion_mode ||
+       b0[0]->motion_mode != b2[0]->motion_mode ||
+       b0[0]->motion_mode != b3[0]->motion_mode ||
+       b0[0]->motion_mode != SIMPLE_TRANSLATION);
+  if (unsupported_motion_mode) return;
+
+  const int diffent_filter =
+      (b0[0]->interp_filters.as_int != b1[0]->interp_filters.as_int ||
+       b0[0]->interp_filters.as_int != b2[0]->interp_filters.as_int ||
+       b0[0]->interp_filters.as_int != b3[0]->interp_filters.as_int);
+  if (diffent_filter) return;
+
+  const int different_seg = (b0[0]->segment_id != b1[0]->segment_id ||
+                             b0[0]->segment_id != b2[0]->segment_id ||
+                             b0[0]->segment_id != b3[0]->segment_id);
+  if (different_seg) return;
+
+  // Evaluate the ref_mv.
+  MB_MODE_INFO **this_mi = mib;
+  BLOCK_SIZE orig_bsize = this_mi[0]->bsize;
+  const PARTITION_TYPE orig_partition = this_mi[0]->partition;
+
+  this_mi[0]->bsize = bsize;
+  this_mi[0]->partition = PARTITION_NONE;
+  this_mi[0]->skip_txfm = 1;
+
+  // TODO(yunqing): functions called below can be optimized by
+  // removing unrelated operations.
+  av1_set_offsets_without_segment_id(cpi, &tile_data->tile_info, x, mi_row,
+                                     mi_col, bsize);
+
+  const MV_REFERENCE_FRAME ref_frame = this_mi[0]->ref_frame[0];
+  int_mv frame_mv[MB_MODE_COUNT][REF_FRAMES];
+  struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE];
+  int force_skip_low_temp_var = 0;
+  int skip_pred_mv = 0;
+
+  for (int i = 0; i < MB_MODE_COUNT; ++i) {
+    for (int j = 0; j < REF_FRAMES; ++j) {
+      frame_mv[i][j].as_int = INVALID_MV;
+    }
+  }
+  x->color_sensitivity[0] = x->color_sensitivity_sb[0];
+  x->color_sensitivity[1] = x->color_sensitivity_sb[1];
+  skip_pred_mv = (x->nonrd_prune_ref_frame_search > 2 &&
+                  x->color_sensitivity[0] != 2 && x->color_sensitivity[1] != 2);
+
+  find_predictors(cpi, x, ref_frame, frame_mv, tile_data, yv12_mb, bsize,
+                  force_skip_low_temp_var, skip_pred_mv);
+
+  int continue_merging = 1;
+  if (frame_mv[NEARESTMV][ref_frame].as_mv.row != b0[0]->mv[0].as_mv.row ||
+      frame_mv[NEARESTMV][ref_frame].as_mv.col != b0[0]->mv[0].as_mv.col)
+    continue_merging = 0;
+
+  if (!continue_merging) {
+    this_mi[0]->bsize = orig_bsize;
+    this_mi[0]->partition = orig_partition;
+
+    // TODO(yunqing): Store the results and restore here instead of
+    // calling find_predictors() again.
+    av1_set_offsets_without_segment_id(cpi, &tile_data->tile_info, x, mi_row,
+                                       mi_col, this_mi[0]->bsize);
+    find_predictors(cpi, x, ref_frame, frame_mv, tile_data, yv12_mb,
+                    this_mi[0]->bsize, force_skip_low_temp_var, skip_pred_mv);
+  } else {
+    struct scale_factors *sf = get_ref_scale_factors(cm, ref_frame);
+    const int is_scaled = av1_is_scaled(sf);
+    const int is_y_subpel_mv = (abs(this_mi[0]->mv[0].as_mv.row) % 8) ||
+                               (abs(this_mi[0]->mv[0].as_mv.col) % 8);
+    const int is_uv_subpel_mv = (abs(this_mi[0]->mv[0].as_mv.row) % 16) ||
+                                (abs(this_mi[0]->mv[0].as_mv.col) % 16);
+
+    if (cpi->ppi->use_svc || is_scaled || is_y_subpel_mv || is_uv_subpel_mv) {
+      const int num_planes = av1_num_planes(cm);
+      set_ref_ptrs(cm, xd, ref_frame, this_mi[0]->ref_frame[1]);
+      const YV12_BUFFER_CONFIG *cfg = get_ref_frame_yv12_buf(cm, ref_frame);
+      av1_setup_pre_planes(xd, 0, cfg, mi_row, mi_col,
+                           xd->block_ref_scale_factors[0], num_planes);
+
+      if (!cpi->ppi->use_svc && !is_scaled && !is_y_subpel_mv) {
+        assert(is_uv_subpel_mv == 1);
+        av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 1,
+                                      num_planes - 1);
+      } else {
+        av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0,
+                                      num_planes - 1);
+      }
+    }
+
+    // Copy out mbmi_ext information.
+    MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
+    MB_MODE_INFO_EXT_FRAME *mbmi_ext_frame = x->mbmi_ext_frame;
+    av1_copy_mbmi_ext_to_mbmi_ext_frame(
+        mbmi_ext_frame, mbmi_ext, av1_ref_frame_type(this_mi[0]->ref_frame));
+
+    const BLOCK_SIZE this_subsize =
+        get_partition_subsize(bsize, this_mi[0]->partition);
+    // Update partition contexts.
+    update_ext_partition_context(xd, mi_row, mi_col, this_subsize, bsize,
+                                 this_mi[0]->partition);
+
+    const int num_planes = av1_num_planes(cm);
+    av1_reset_entropy_context(xd, bsize, num_planes);
+
+    // Note: use x->txfm_search_params.tx_mode_search_type instead of
+    // cm->features.tx_mode here.
+    TX_SIZE tx_size =
+        tx_size_from_tx_mode(bsize, x->txfm_search_params.tx_mode_search_type);
+    if (xd->lossless[this_mi[0]->segment_id]) tx_size = TX_4X4;
+    this_mi[0]->tx_size = tx_size;
+    memset(this_mi[0]->inter_tx_size, this_mi[0]->tx_size,
+           sizeof(this_mi[0]->inter_tx_size));
+
+    // Update txfm contexts.
+    xd->above_txfm_context =
+        cm->above_contexts.txfm[tile_info->tile_row] + mi_col;
+    xd->left_txfm_context =
+        xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK);
+    set_txfm_ctxs(this_mi[0]->tx_size, xd->width, xd->height,
+                  this_mi[0]->skip_txfm && is_inter_block(this_mi[0]), xd);
+
+    // Update mi for this partition block.
+    for (int y = 0; y < bs; y++) {
+      for (int x_idx = 0; x_idx < bs; x_idx++) {
+        this_mi[x_idx + y * mi_params->mi_stride] = this_mi[0];
+      }
+    }
+  }
+}
+
 /*!\brief AV1 block partition application (minimal RD search).
 *
 * \ingroup partition_search
@@ -2295,7 +2541,7 @@ MI_SIZE
 * \param[in]    pc_tree   Pointer to the PC_TREE node holding the picked
 partitions and mode info for the current block
 *
-* \return Nothing is returned. The pc_tree struct is modified to store the
+* \remark Nothing is returned. The pc_tree struct is modified to store the
 * picked partition and modes.
 */
 void av1_nonrd_use_partition(AV1_COMP *cpi, ThreadData *td,
@@ -2338,9 +2584,14 @@ void av1_nonrd_use_partition(AV1_COMP *cpi, ThreadData *td,
   // Initialize default mode evaluation params
   set_mode_eval_params(cpi, x, DEFAULT_EVAL);
 
+  x->reuse_inter_pred = cpi->sf.rt_sf.reuse_inter_pred_nonrd;
   switch (partition) {
     case PARTITION_NONE:
-      pc_tree->none = av1_alloc_pmc(cpi, bsize, &td->shared_coeff_buf);
+      if (!pc_tree->none) {
+        pc_tree->none = av1_alloc_pmc(cpi, bsize, &td->shared_coeff_buf);
+      } else {
+        av1_reset_pmc(pc_tree->none);
+      }
       if (cpi->sf.rt_sf.nonrd_check_partition_split && do_split_check(bsize) &&
           !frame_is_intra_only(cm)) {
         RD_STATS split_rdc, none_rdc, block_rdc;
@@ -2411,8 +2662,12 @@ void av1_nonrd_use_partition(AV1_COMP *cpi, ThreadData *td,
       break;
     case PARTITION_VERT:
       for (int i = 0; i < SUB_PARTITIONS_RECT; ++i) {
-        pc_tree->vertical[i] =
-            av1_alloc_pmc(cpi, subsize, &td->shared_coeff_buf);
+        if (!pc_tree->vertical[i]) {
+          pc_tree->vertical[i] =
+              av1_alloc_pmc(cpi, subsize, &td->shared_coeff_buf);
+        } else {
+          av1_reset_pmc(pc_tree->vertical[i]);
+        }
       }
       pick_sb_modes_nonrd(cpi, tile_data, x, mi_row, mi_col, &dummy_cost,
                           subsize, pc_tree->vertical[0]);
@@ -2427,8 +2682,12 @@ void av1_nonrd_use_partition(AV1_COMP *cpi, ThreadData *td,
       break;
     case PARTITION_HORZ:
       for (int i = 0; i < SUB_PARTITIONS_RECT; ++i) {
-        pc_tree->horizontal[i] =
-            av1_alloc_pmc(cpi, subsize, &td->shared_coeff_buf);
+        if (!pc_tree->horizontal[i]) {
+          pc_tree->horizontal[i] =
+              av1_alloc_pmc(cpi, subsize, &td->shared_coeff_buf);
+        } else {
+          av1_reset_pmc(pc_tree->horizontal[i]);
+        }
       }
       pick_sb_modes_nonrd(cpi, tile_data, x, mi_row, mi_col, &dummy_cost,
                           subsize, pc_tree->horizontal[0]);
@@ -2444,9 +2703,12 @@ void av1_nonrd_use_partition(AV1_COMP *cpi, ThreadData *td,
       break;
     case PARTITION_SPLIT:
       for (int i = 0; i < SUB_PARTITIONS_SPLIT; ++i) {
-        pc_tree->split[i] = av1_alloc_pc_tree_node(subsize);
+        if (!pc_tree->split[i]) {
+          pc_tree->split[i] = av1_alloc_pc_tree_node(subsize);
+        }
         pc_tree->split[i]->index = i;
       }
+      bool do_split = false;
       if (cpi->sf.rt_sf.nonrd_check_partition_merge_mode &&
           av1_is_leaf_split_partition(cm, mi_row, mi_col, bsize) &&
           !frame_is_intra_only(cm) && bsize <= BLOCK_64X64) {
@@ -2460,7 +2722,11 @@ void av1_nonrd_use_partition(AV1_COMP *cpi, ThreadData *td,
         xd->left_txfm_context =
             xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK);
         pc_tree->partitioning = PARTITION_NONE;
-        pc_tree->none = av1_alloc_pmc(cpi, bsize, &td->shared_coeff_buf);
+        if (!pc_tree->none) {
+          pc_tree->none = av1_alloc_pmc(cpi, bsize, &td->shared_coeff_buf);
+        } else {
+          av1_reset_pmc(pc_tree->none);
+        }
         pick_sb_modes_nonrd(cpi, tile_data, x, mi_row, mi_col, &none_rdc, bsize,
                             pc_tree->none);
         none_rdc.rate += mode_costs->partition_cost[pl][PARTITION_NONE];
@@ -2469,11 +2735,10 @@ void av1_nonrd_use_partition(AV1_COMP *cpi, ThreadData *td,
         if (cpi->sf.rt_sf.nonrd_check_partition_merge_mode < 2 ||
             none_rdc.skip_txfm != 1 || pc_tree->none->mic.mode == NEWMV) {
           const int is_larger_qindex = cm->quant_params.base_qindex > 100;
-          const int do_split =
-              (cpi->sf.rt_sf.nonrd_check_partition_merge_mode == 3)
-                  ? (bsize <= BLOCK_32X32 ||
-                     (is_larger_qindex && bsize <= BLOCK_64X64))
-                  : 1;
+          do_split = (cpi->sf.rt_sf.nonrd_check_partition_merge_mode == 3)
+                         ? (bsize <= BLOCK_32X32 ||
+                            (is_larger_qindex && bsize <= BLOCK_64X64))
+                         : 1;
           if (do_split) {
             av1_init_rd_stats(&split_rdc);
             split_rdc.rate += mode_costs->partition_cost[pl][PARTITION_SPLIT];
@@ -2489,9 +2754,12 @@ void av1_nonrd_use_partition(AV1_COMP *cpi, ThreadData *td,
                   cm->above_contexts.txfm[tile_info->tile_row] + mi_col + x_idx;
               xd->left_txfm_context = xd->left_txfm_context_buffer +
                                       ((mi_row + y_idx) & MAX_MIB_MASK);
-              if (pc_tree->split[i]->none == NULL)
+              if (!pc_tree->split[i]->none) {
                 pc_tree->split[i]->none =
                     av1_alloc_pmc(cpi, subsize, &td->shared_coeff_buf);
+              } else {
+                av1_reset_pmc(pc_tree->split[i]->none);
+              }
               pc_tree->split[i]->partitioning = PARTITION_NONE;
               pick_sb_modes_nonrd(cpi, tile_data, x, mi_row + y_idx,
                                   mi_col + x_idx, &block_rdc, subsize,
@@ -2517,6 +2785,11 @@ void av1_nonrd_use_partition(AV1_COMP *cpi, ThreadData *td,
           }
         }
         if (none_rdc.rdcost < split_rdc.rdcost) {
+          /* Predicted samples can not be reused for PARTITION_NONE since same
+           * buffer is being used to store the reconstructed samples of
+           * PARTITION_SPLIT block. */
+          if (do_split) x->reuse_inter_pred = false;
+
           mib[0]->bsize = bsize;
           pc_tree->partitioning = PARTITION_NONE;
           encode_b_nonrd(cpi, tile_data, td, tp, mi_row, mi_col, 0, bsize,
@@ -2524,6 +2797,12 @@ void av1_nonrd_use_partition(AV1_COMP *cpi, ThreadData *td,
         } else {
           mib[0]->bsize = subsize;
           pc_tree->partitioning = PARTITION_SPLIT;
+          /* Predicted samples can not be reused for PARTITION_SPLIT since same
+           * buffer is being used to write the reconstructed samples. */
+          // TODO(Cherma): Store and reuse predicted samples generated by
+          // encode_b_nonrd() in DRY_RUN_NORMAL mode.
+          x->reuse_inter_pred = false;
+
           for (int i = 0; i < SUB_PARTITIONS_SPLIT; i++) {
             int x_idx = (i & 1) * hbs;
             int y_idx = (i >> 1) * hbs;
@@ -2531,9 +2810,14 @@ void av1_nonrd_use_partition(AV1_COMP *cpi, ThreadData *td,
                 (mi_col + x_idx >= mi_params->mi_cols))
               continue;
 
-            if (pc_tree->split[i]->none == NULL)
+            // Note: We don't reset pc_tree->split[i]->none here because it
+            // could contain results from the additional check. Instead, it is
+            // reset before we enter the nonrd_check_partition_merge_mode
+            // condition.
+            if (!pc_tree->split[i]->none) {
               pc_tree->split[i]->none =
                   av1_alloc_pmc(cpi, subsize, &td->shared_coeff_buf);
+            }
             encode_b_nonrd(cpi, tile_data, td, tp, mi_row + y_idx,
                            mi_col + x_idx, 0, subsize, PARTITION_NONE,
                            pc_tree->split[i]->none, NULL);
@@ -2553,8 +2837,6 @@ void av1_nonrd_use_partition(AV1_COMP *cpi, ThreadData *td,
               mi_row + y_idx, mi_col + x_idx, subsize, pc_tree->split[i]);
         }
 
-        // TODO(yunqing): Add this to PARTITION_HORZ and PARTITION_VERT. Make
-        // this work with nonrd_check_partition_merge_mode feature.
         // Note: Palette, cfl are not supported.
         if (!frame_is_intra_only(cm) && !tile_data->allow_update_cdf &&
             cpi->sf.rt_sf.partition_direct_merging &&
@@ -2562,191 +2844,8 @@ void av1_nonrd_use_partition(AV1_COMP *cpi, ThreadData *td,
                 mode_costs->partition_cost[pl][PARTITION_SPLIT] &&
             (mi_row + bs <= mi_params->mi_rows) &&
             (mi_col + bs <= mi_params->mi_cols)) {
-          MB_MODE_INFO **b0 = mib;
-          MB_MODE_INFO **b1 = mib + hbs;
-          MB_MODE_INFO **b2 = mib + hbs * mi_params->mi_stride;
-          MB_MODE_INFO **b3 = mib + hbs * mi_params->mi_stride + hbs;
-
-          // Check if the following conditions are met. This can be updated
-          // later with more support added.
-          const int further_split =
-              b0[0]->bsize < subsize || b1[0]->bsize < subsize ||
-              b2[0]->bsize < subsize || b3[0]->bsize < subsize;
-          if (further_split) break;
-
-          const int no_skip = !b0[0]->skip_txfm || !b1[0]->skip_txfm ||
-                              !b2[0]->skip_txfm || !b3[0]->skip_txfm;
-          if (no_skip) break;
-
-          const int compound = (b0[0]->ref_frame[1] != b1[0]->ref_frame[1] ||
-                                b0[0]->ref_frame[1] != b2[0]->ref_frame[1] ||
-                                b0[0]->ref_frame[1] != b3[0]->ref_frame[1] ||
-                                b0[0]->ref_frame[1] > NONE_FRAME);
-          if (compound) break;
-
-          // Intra modes aren't considered here.
-          const int different_ref =
-              (b0[0]->ref_frame[0] != b1[0]->ref_frame[0] ||
-               b0[0]->ref_frame[0] != b2[0]->ref_frame[0] ||
-               b0[0]->ref_frame[0] != b3[0]->ref_frame[0] ||
-               b0[0]->ref_frame[0] <= INTRA_FRAME);
-          if (different_ref) break;
-
-          const int different_mode =
-              (b0[0]->mode != b1[0]->mode || b0[0]->mode != b2[0]->mode ||
-               b0[0]->mode != b3[0]->mode);
-          if (different_mode) break;
-
-          const int unsupported_mode =
-              (b0[0]->mode != NEARESTMV && b0[0]->mode != GLOBALMV);
-          if (unsupported_mode) break;
-
-          const int different_mv =
-              (b0[0]->mv[0].as_int != b1[0]->mv[0].as_int ||
-               b0[0]->mv[0].as_int != b2[0]->mv[0].as_int ||
-               b0[0]->mv[0].as_int != b3[0]->mv[0].as_int);
-          if (different_mv) break;
-
-          const int unsupported_motion_mode =
-              (b0[0]->motion_mode != b1[0]->motion_mode ||
-               b0[0]->motion_mode != b2[0]->motion_mode ||
-               b0[0]->motion_mode != b3[0]->motion_mode ||
-               b0[0]->motion_mode != SIMPLE_TRANSLATION);
-          if (unsupported_motion_mode) break;
-
-          const int diffent_filter =
-              (b0[0]->interp_filters.as_int != b1[0]->interp_filters.as_int ||
-               b0[0]->interp_filters.as_int != b2[0]->interp_filters.as_int ||
-               b0[0]->interp_filters.as_int != b3[0]->interp_filters.as_int);
-          if (diffent_filter) break;
-
-          const int different_seg = (b0[0]->segment_id != b1[0]->segment_id ||
-                                     b0[0]->segment_id != b2[0]->segment_id ||
-                                     b0[0]->segment_id != b3[0]->segment_id);
-          if (different_seg) break;
-
-          // Evaluate the ref_mv.
-          MB_MODE_INFO **this_mi = mib;
-          BLOCK_SIZE orig_bsize = this_mi[0]->bsize;
-          const PARTITION_TYPE orig_partition = this_mi[0]->partition;
-
-          this_mi[0]->bsize = bsize;
-          this_mi[0]->partition = PARTITION_NONE;
-          this_mi[0]->skip_txfm = 1;
-
-          // TODO(yunqing): functions called below can be optimized with
-          // removing unrelated operations.
-          av1_set_offsets_without_segment_id(cpi, &tile_data->tile_info, x,
-                                             mi_row, mi_col, bsize);
-
-          const MV_REFERENCE_FRAME ref_frame = this_mi[0]->ref_frame[0];
-          int_mv frame_mv[MB_MODE_COUNT][REF_FRAMES];
-          struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE];
-          int force_skip_low_temp_var = 0;
-          int skip_pred_mv = 0;
-
-          for (int i = 0; i < MB_MODE_COUNT; ++i) {
-            for (int j = 0; j < REF_FRAMES; ++j) {
-              frame_mv[i][j].as_int = INVALID_MV;
-            }
-          }
-          x->color_sensitivity[0] = x->color_sensitivity_sb[0];
-          x->color_sensitivity[1] = x->color_sensitivity_sb[1];
-          skip_pred_mv =
-              (x->nonrd_prune_ref_frame_search > 2 &&
-               x->color_sensitivity[0] != 2 && x->color_sensitivity[1] != 2);
-
-          find_predictors(cpi, x, ref_frame, frame_mv, tile_data, yv12_mb,
-                          bsize, force_skip_low_temp_var, skip_pred_mv);
-
-          int continue_merging = 1;
-          if (frame_mv[NEARESTMV][ref_frame].as_mv.row !=
-                  b0[0]->mv[0].as_mv.row ||
-              frame_mv[NEARESTMV][ref_frame].as_mv.col !=
-                  b0[0]->mv[0].as_mv.col)
-            continue_merging = 0;
-
-          if (!continue_merging) {
-            this_mi[0]->bsize = orig_bsize;
-            this_mi[0]->partition = orig_partition;
-
-            // TODO(yunqing): Store the results and restore here instead of
-            // calling find_predictors() again.
-            av1_set_offsets_without_segment_id(cpi, &tile_data->tile_info, x,
-                                               mi_row, mi_col,
-                                               this_mi[0]->bsize);
-            find_predictors(cpi, x, ref_frame, frame_mv, tile_data, yv12_mb,
-                            this_mi[0]->bsize, force_skip_low_temp_var,
-                            skip_pred_mv);
-          } else {
-            struct scale_factors *sf = get_ref_scale_factors(cm, ref_frame);
-            const int is_scaled = av1_is_scaled(sf);
-            const int is_y_subpel_mv = (abs(this_mi[0]->mv[0].as_mv.row) % 8) ||
-                                       (abs(this_mi[0]->mv[0].as_mv.col) % 8);
-            const int is_uv_subpel_mv =
-                (abs(this_mi[0]->mv[0].as_mv.row) % 16) ||
-                (abs(this_mi[0]->mv[0].as_mv.col) % 16);
-
-            if (cpi->ppi->use_svc || is_scaled || is_y_subpel_mv ||
-                is_uv_subpel_mv) {
-              const int num_planes = av1_num_planes(cm);
-              set_ref_ptrs(cm, xd, ref_frame, this_mi[0]->ref_frame[1]);
-              const YV12_BUFFER_CONFIG *cfg =
-                  get_ref_frame_yv12_buf(cm, ref_frame);
-              av1_setup_pre_planes(xd, 0, cfg, mi_row, mi_col,
-                                   xd->block_ref_scale_factors[0], num_planes);
-
-              if (!cpi->ppi->use_svc && !is_scaled && !is_y_subpel_mv) {
-                assert(is_uv_subpel_mv == 1);
-                av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL,
-                                              bsize, 1, num_planes - 1);
-              } else {
-                av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL,
-                                              bsize, 0, num_planes - 1);
-              }
-            }
-
-            // Copy out mbmi_ext information.
-            MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
-            MB_MODE_INFO_EXT_FRAME *mbmi_ext_frame = x->mbmi_ext_frame;
-            av1_copy_mbmi_ext_to_mbmi_ext_frame(
-                mbmi_ext_frame, mbmi_ext,
-                av1_ref_frame_type(this_mi[0]->ref_frame));
-
-            const BLOCK_SIZE this_subsize =
-                get_partition_subsize(bsize, this_mi[0]->partition);
-            // Update partition contexts.
-            update_ext_partition_context(xd, mi_row, mi_col, this_subsize,
-                                         bsize, this_mi[0]->partition);
-
-            const int num_planes = av1_num_planes(cm);
-            av1_reset_entropy_context(xd, bsize, num_planes);
-
-            // Note: use x->txfm_search_params.tx_mode_search_type instead of
-            // cm->features.tx_mode here.
-            TX_SIZE tx_size = tx_size_from_tx_mode(
-                bsize, x->txfm_search_params.tx_mode_search_type);
-            if (xd->lossless[this_mi[0]->segment_id]) tx_size = TX_4X4;
-            this_mi[0]->tx_size = tx_size;
-            memset(this_mi[0]->inter_tx_size, this_mi[0]->tx_size,
-                   sizeof(this_mi[0]->inter_tx_size));
-
-            // Update txfm contexts.
-            xd->above_txfm_context =
-                cm->above_contexts.txfm[tile_info->tile_row] + mi_col;
-            xd->left_txfm_context =
-                xd->left_txfm_context_buffer + ((mi_row)&MAX_MIB_MASK);
-            set_txfm_ctxs(this_mi[0]->tx_size, xd->width, xd->height,
-                          this_mi[0]->skip_txfm && is_inter_block(this_mi[0]),
-                          xd);
-
-            // Update mi for this partition block.
-            for (int y = 0; y < bs; y++) {
-              for (int x_idx = 0; x_idx < bs; x_idx++) {
-                this_mi[x_idx + y * mi_params->mi_stride] = this_mi[0];
-              }
-            }
-          }
+          direct_partition_merging(cpi, td, tile_data, mib, mi_row, mi_col,
+                                   bsize);
         }
       }
       break;
@@ -4043,7 +4142,13 @@ static void split_partition_search(
           !(partition_none_valid && partition_none_better);
     }
   }
-  av1_restore_context(x, x_ctx, mi_row, mi_col, bsize, av1_num_planes(cm));
+  // Restore the context for the following cases:
+  // 1) Current block size not more than maximum partition size as dry run
+  // encode happens for these cases
+  // 2) Current block size same as superblock size as the final encode
+  // happens for this case
+  if (bsize <= x->sb_enc.max_partition_size || bsize == cm->seq_params->sb_size)
+    av1_restore_context(x, x_ctx, mi_row, mi_col, bsize, av1_num_planes(cm));
 }
 
 // The max number of nodes in the partition tree.
@@ -4813,7 +4918,7 @@ bool av1_rd_partition_search(AV1_COMP *const cpi, ThreadData *td,
     PC_TREE *const pc_tree = av1_alloc_pc_tree_node(bsize);
     num_configs = read_partition_tree(cpi, pc_tree, i);
     if (i == 0) {
-      rdcost = aom_calloc(num_configs, sizeof(*rdcost));
+      CHECK_MEM_ERROR(cm, rdcost, aom_calloc(num_configs, sizeof(*rdcost)));
     }
     if (num_configs <= 0) {
       av1_free_pc_tree_recursive(pc_tree, av1_num_planes(cm), 0, 0);
@@ -4852,6 +4957,22 @@ bool av1_rd_partition_search(AV1_COMP *const cpi, ThreadData *td,
   return true;
 }
 
+static AOM_INLINE bool should_do_dry_run_encode_for_current_block(
+    BLOCK_SIZE sb_size, BLOCK_SIZE max_partition_size, int curr_block_index,
+    BLOCK_SIZE bsize) {
+  if (bsize > max_partition_size) return false;
+
+  // Enable the reconstruction with dry-run for the 4th sub-block only if its
+  // parent block's reconstruction with dry-run is skipped. If
+  // max_partition_size is the same as immediate split of superblock, then avoid
+  // reconstruction of the 4th sub-block, as this data is not consumed.
+  if (curr_block_index != 3) return true;
+
+  const BLOCK_SIZE sub_sb_size =
+      get_partition_subsize(sb_size, PARTITION_SPLIT);
+  return bsize == max_partition_size && sub_sb_size != max_partition_size;
+}
+
 static void log_sub_block_var(const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bs,
                               double *var_min, double *var_max) {
   // This functions returns a the minimum and maximum log variances for 4x4
@@ -5158,13 +5279,8 @@ BEGIN_PARTITION_SEARCH:
 
   if (pb_source_variance == UINT_MAX) {
     av1_setup_src_planes(x, cpi->source, mi_row, mi_col, num_planes, bsize);
-    if (is_cur_buf_hbd(xd)) {
-      pb_source_variance = av1_high_get_sby_perpixel_variance(
-          cpi, &x->plane[0].src, bsize, xd->bd);
-    } else {
-      pb_source_variance =
-          av1_get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize);
-    }
+    pb_source_variance = av1_get_perpixel_variance_facade(
+        cpi, xd, &x->plane[0].src, bsize, AOM_PLANE_Y);
   }
 
   assert(IMPLIES(!cpi->oxcf.part_cfg.enable_rect_partitions,
@@ -5280,9 +5396,7 @@ BEGIN_PARTITION_SEARCH:
 #if CONFIG_COLLECT_COMPONENT_TIMING
   start_timing(cpi, encode_sb_time);
 #endif
-  // If a valid partition is found and reconstruction is required for future
-  // sub-blocks in the same group.
-  if (part_search_state.found_best_partition && pc_tree->index != 3) {
+  if (part_search_state.found_best_partition) {
     if (bsize == cm->seq_params->sb_size) {
       // Encode the superblock.
       const int emit_output = multi_pass_mode != SB_DRY_PASS;
@@ -5300,7 +5414,9 @@ BEGIN_PARTITION_SEARCH:
       // Dealloc the whole PC_TREE after a superblock is done.
       av1_free_pc_tree_recursive(pc_tree, num_planes, 0, 0);
       pc_tree_dealloc = 1;
-    } else {
+    } else if (should_do_dry_run_encode_for_current_block(
+                   cm->seq_params->sb_size, x->sb_enc.max_partition_size,
+                   pc_tree->index, bsize)) {
       // Encode the smaller blocks in DRY_RUN mode.
       encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, DRY_RUN_NORMAL, bsize,
                 pc_tree, NULL);
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/partition_strategy.c b/chromium/third_party/libaom/source/libaom/av1/encoder/partition_strategy.c
index ed1b43acb20..89c1a79568c 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/partition_strategy.c
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/partition_strategy.c
@@ -200,14 +200,22 @@ void av1_intra_mode_cnn_partition(const AV1_COMMON *const cm, MACROBLOCK *x,
         CONVERT_TO_SHORTPTR(x->plane[AOM_PLANE_Y].src.buf) - stride - 1
       };
 
-      av1_cnn_predict_img_multi_out_highbd(image, width, height, stride,
-                                           cnn_config, &thread_data, bit_depth,
-                                           &output);
+      if (!av1_cnn_predict_img_multi_out_highbd(image, width, height, stride,
+                                                cnn_config, &thread_data,
+                                                bit_depth, &output)) {
+        aom_internal_error(cm->error, AOM_CODEC_MEM_ERROR,
+                           "Error allocating CNN data");
+        return;
+      }
     } else {
       uint8_t *image[1] = { x->plane[AOM_PLANE_Y].src.buf - stride - 1 };
 
-      av1_cnn_predict_img_multi_out(image, width, height, stride, cnn_config,
-                                    &thread_data, &output);
+      if (!av1_cnn_predict_img_multi_out(image, width, height, stride,
+                                         cnn_config, &thread_data, &output)) {
+        aom_internal_error(cm->error, AOM_CODEC_MEM_ERROR,
+                           "Error allocating CNN data");
+        return;
+      }
     }
 
     part_info->cnn_output_valid = 1;
@@ -936,14 +944,10 @@ BLOCK_SIZE av1_predict_max_partition(const AV1_COMP *const cpi,
   } else if (cpi->sf.part_sf.auto_max_partition_based_on_simple_motion ==
              ADAPT_PRED) {
     const BLOCK_SIZE sb_size = cpi->common.seq_params->sb_size;
-    const MACROBLOCKD *const xd = &x->e_mbd;
     // TODO(debargha): x->source_variance is unavailable at this point,
     // so compute. The redundant recomputation later can be removed.
-    const unsigned int source_variance =
-        is_cur_buf_hbd(xd)
-            ? av1_high_get_sby_perpixel_variance(cpi, &x->plane[0].src, sb_size,
-                                                 xd->bd)
-            : av1_get_sby_perpixel_variance(cpi, &x->plane[0].src, sb_size);
+    const unsigned int source_variance = av1_get_perpixel_variance_facade(
+        cpi, &x->e_mbd, &x->plane[0].src, sb_size, AOM_PLANE_Y);
     if (source_variance > 16) {
       const double thresh = source_variance < 128 ? 0.05 : 0.1;
       for (result = MAX_NUM_CLASSES_MAX_MIN_PART_PRED - 1; result >= 0;
@@ -1157,13 +1161,8 @@ void av1_ml_prune_rect_partition(AV1_COMP *const cpi, const MACROBLOCK *const x,
   // Variance ratios
   const MACROBLOCKD *const xd = &x->e_mbd;
   int whole_block_variance;
-  if (is_cur_buf_hbd(xd)) {
-    whole_block_variance = av1_high_get_sby_perpixel_variance(
-        cpi, &x->plane[0].src, bsize, xd->bd);
-  } else {
-    whole_block_variance =
-        av1_get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize);
-  }
+  whole_block_variance = av1_get_perpixel_variance_facade(
+      cpi, xd, &x->plane[0].src, bsize, AOM_PLANE_Y);
   whole_block_variance = AOMMAX(whole_block_variance, 1);
 
   int split_variance[SUB_PARTITIONS_SPLIT];
@@ -1175,12 +1174,8 @@ void av1_ml_prune_rect_partition(AV1_COMP *const cpi, const MACROBLOCK *const x,
     const int x_idx = (i & 1) * bw / 2;
     const int y_idx = (i >> 1) * bw / 2;
     buf.buf = x->plane[0].src.buf + x_idx + y_idx * buf.stride;
-    if (is_cur_buf_hbd(xd)) {
-      split_variance[i] =
-          av1_high_get_sby_perpixel_variance(cpi, &buf, subsize, xd->bd);
-    } else {
-      split_variance[i] = av1_get_sby_perpixel_variance(cpi, &buf, subsize);
-    }
+    split_variance[i] =
+        av1_get_perpixel_variance_facade(cpi, xd, &buf, subsize, AOM_PLANE_Y);
   }
 
   for (int i = 0; i < SUB_PARTITIONS_SPLIT; i++)
@@ -1396,17 +1391,10 @@ void av1_ml_prune_4_partition(AV1_COMP *const cpi, MACROBLOCK *const x,
       horz_4_src.buf = src + i * block_size_high[horz_4_bs] * src_stride;
       vert_4_src.buf = src + i * block_size_wide[vert_4_bs];
 
-      if (is_cur_buf_hbd(xd)) {
-        horz_4_source_var[i] = av1_high_get_sby_perpixel_variance(
-            cpi, &horz_4_src, horz_4_bs, xd->bd);
-        vert_4_source_var[i] = av1_high_get_sby_perpixel_variance(
-            cpi, &vert_4_src, vert_4_bs, xd->bd);
-      } else {
-        horz_4_source_var[i] =
-            av1_get_sby_perpixel_variance(cpi, &horz_4_src, horz_4_bs);
-        vert_4_source_var[i] =
-            av1_get_sby_perpixel_variance(cpi, &vert_4_src, vert_4_bs);
-      }
+      horz_4_source_var[i] = av1_get_perpixel_variance_facade(
+          cpi, xd, &horz_4_src, horz_4_bs, AOM_PLANE_Y);
+      vert_4_source_var[i] = av1_get_perpixel_variance_facade(
+          cpi, xd, &vert_4_src, vert_4_bs, AOM_PLANE_Y);
     }
   }
 
@@ -1779,11 +1767,10 @@ int evaluate_ab_partition_based_on_split(
   // Threshold for number of winners
   // Conservative pruning for high quantizers
   const int num_win_thresh = AOMMIN(3 * (2 * (MAXQ - qindex) / MAXQ), 3);
-  int sub_part_win = (rect_part_win_info == NULL)
-                         ? (pc_tree->partitioning == rect_part)
-                         : (rect_part == PARTITION_HORZ)
-                               ? rect_part_win_info->rect_part_win[HORZ]
-                               : rect_part_win_info->rect_part_win[VERT];
+  int sub_part_win =
+      (rect_part_win_info == NULL)    ? (pc_tree->partitioning == rect_part)
+      : (rect_part == PARTITION_HORZ) ? rect_part_win_info->rect_part_win[HORZ]
+                                      : rect_part_win_info->rect_part_win[VERT];
   num_win += (sub_part_win) ? 1 : 0;
   if (pc_tree->split[split_idx1]) {
     num_win +=
@@ -1995,17 +1982,10 @@ static void prepare_features_after_part_ab(
       horz_4_src.buf = src + i * block_size_high[horz_4_bs] * src_stride;
       vert_4_src.buf = src + i * block_size_wide[vert_4_bs];
 
-      if (is_cur_buf_hbd(xd)) {
-        horz_4_source_var[i] = av1_high_get_sby_perpixel_variance(
-            cpi, &horz_4_src, horz_4_bs, xd->bd);
-        vert_4_source_var[i] = av1_high_get_sby_perpixel_variance(
-            cpi, &vert_4_src, vert_4_bs, xd->bd);
-      } else {
-        horz_4_source_var[i] =
-            av1_get_sby_perpixel_variance(cpi, &horz_4_src, horz_4_bs);
-        vert_4_source_var[i] =
-            av1_get_sby_perpixel_variance(cpi, &vert_4_src, vert_4_bs);
-      }
+      horz_4_source_var[i] = av1_get_perpixel_variance_facade(
+          cpi, xd, &horz_4_src, horz_4_bs, AOM_PLANE_Y);
+      vert_4_source_var[i] = av1_get_perpixel_variance_facade(
+          cpi, xd, &vert_4_src, vert_4_bs, AOM_PLANE_Y);
     }
   }
 
@@ -2423,6 +2403,13 @@ void av1_collect_motion_search_features_sb(AV1_COMP *const cpi, ThreadData *td,
   const int num_blocks = col_steps * row_steps;
   unsigned int *block_sse = aom_calloc(num_blocks, sizeof(*block_sse));
   unsigned int *block_var = aom_calloc(num_blocks, sizeof(*block_var));
+  if (!(block_sse && block_var)) {
+    aom_free(sms_tree);
+    aom_free(block_sse);
+    aom_free(block_var);
+    aom_internal_error(cm->error, AOM_CODEC_MEM_ERROR,
+                       "Error allocating block_sse & block_var");
+  }
   int idx = 0;
 
   for (int row = mi_row;
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/pass2_strategy.c b/chromium/third_party/libaom/source/libaom/av1/encoder/pass2_strategy.c
index 9e086eeafbf..bcba980ae89 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/pass2_strategy.c
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/pass2_strategy.c
@@ -173,7 +173,7 @@ static double calc_correction_factor(double err_per_mb, int q) {
 
 // Based on history adjust expectations of bits per macroblock.
 static void twopass_update_bpm_factor(AV1_COMP *cpi, int rate_err_tol) {
-  TWO_PASS *twopass = &cpi->ppi->twopass;
+  TWO_PASS *const twopass = &cpi->ppi->twopass;
   const PRIMARY_RATE_CONTROL *const p_rc = &cpi->ppi->p_rc;
 
   // Based on recent history adjust expectations of bits per macroblock.
@@ -212,7 +212,7 @@ static void twopass_update_bpm_factor(AV1_COMP *cpi, int rate_err_tol) {
   }
 
   int err_estimate = p_rc->rate_error_estimate;
-  int64_t bits_left = cpi->ppi->twopass.bits_left;
+  int64_t bits_left = twopass->bits_left;
   int64_t total_actual_bits = p_rc->total_actual_bits;
   int64_t bits_off_target = p_rc->vbr_bits_off_target;
   double rolling_arf_group_actual_bits =
@@ -220,7 +220,7 @@ static void twopass_update_bpm_factor(AV1_COMP *cpi, int rate_err_tol) {
   double rolling_arf_group_target_bits =
       (double)twopass->rolling_arf_group_target_bits;
 
-#if CONFIG_FRAME_PARALLEL_ENCODE && CONFIG_FPMT_TEST
+#if CONFIG_FPMT_TEST
   const int is_parallel_frame =
       cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] > 0 ? 1 : 0;
   const int simulate_parallel_frame =
@@ -231,8 +231,8 @@ static void twopass_update_bpm_factor(AV1_COMP *cpi, int rate_err_tol) {
                                               : p_rc->total_actual_bits;
   bits_off_target = simulate_parallel_frame ? p_rc->temp_vbr_bits_off_target
                                             : p_rc->vbr_bits_off_target;
-  bits_left = simulate_parallel_frame ? p_rc->temp_bits_left
-                                      : cpi->ppi->twopass.bits_left;
+  bits_left =
+      simulate_parallel_frame ? p_rc->temp_bits_left : twopass->bits_left;
   rolling_arf_group_target_bits =
       (double)(simulate_parallel_frame
                    ? p_rc->temp_rolling_arf_group_target_bits
@@ -512,12 +512,12 @@ static void accumulate_this_frame_stats(const FIRSTPASS_STATS *stats,
   gf_stats->gf_group_inactive_zone_rows += stats->inactive_zone_rows;
 }
 
-static void accumulate_next_frame_stats(const FIRSTPASS_STATS *stats,
-                                        const int flash_detected,
-                                        const int frames_since_key,
-                                        const int cur_idx,
-                                        GF_GROUP_STATS *gf_stats, int f_w,
-                                        int f_h) {
+void av1_accumulate_next_frame_stats(const FIRSTPASS_STATS *stats,
+                                     const int flash_detected,
+                                     const int frames_since_key,
+                                     const int cur_idx,
+                                     GF_GROUP_STATS *gf_stats, int f_w,
+                                     int f_h) {
   accumulate_frame_motion_stats(stats, gf_stats, f_w, f_h);
   // sum up the metric values of current gf group
   gf_stats->avg_sr_coded_error += stats->sr_coded_error;
@@ -799,11 +799,10 @@ static int64_t calculate_total_gf_group_bits(AV1_COMP *cpi,
   }
 
   // Clamp odd edge cases.
-  total_group_bits = (total_group_bits < 0)
-                         ? 0
-                         : (total_group_bits > twopass->kf_group_bits)
-                               ? twopass->kf_group_bits
-                               : total_group_bits;
+  total_group_bits = (total_group_bits < 0) ? 0
+                     : (total_group_bits > twopass->kf_group_bits)
+                         ? twopass->kf_group_bits
+                         : total_group_bits;
 
   // Clip based on user supplied data rate variability limit.
   if (total_group_bits > (int64_t)max_bits * p_rc->baseline_gf_interval)
@@ -1753,12 +1752,15 @@ void av1_identify_regions(const FIRSTPASS_STATS *const stats_start,
   if (total_frames <= 1) return;
 
   // store the initial decisions
-  REGIONS temp_regions[MAX_FIRSTPASS_ANALYSIS_FRAMES];
-  av1_zero_array(temp_regions, MAX_FIRSTPASS_ANALYSIS_FRAMES);
+  REGIONS *temp_regions =
+      (REGIONS *)aom_malloc(total_frames * sizeof(temp_regions[0]));
+  av1_zero_array(temp_regions, total_frames);
   // buffers for filtered stats
-  double filt_intra_err[MAX_FIRSTPASS_ANALYSIS_FRAMES] = { 0 };
-  double filt_coded_err[MAX_FIRSTPASS_ANALYSIS_FRAMES] = { 0 };
-  double grad_coded[MAX_FIRSTPASS_ANALYSIS_FRAMES] = { 0 };
+  double *filt_intra_err =
+      (double *)aom_calloc(total_frames, sizeof(*filt_intra_err));
+  double *filt_coded_err =
+      (double *)aom_calloc(total_frames, sizeof(*filt_coded_err));
+  double *grad_coded = (double *)aom_calloc(total_frames, sizeof(*grad_coded));
 
   int cur_region = 0, this_start = 0, this_last;
 
@@ -1847,6 +1849,11 @@ void av1_identify_regions(const FIRSTPASS_STATS *const stats_start,
     regions[k].start += offset;
     regions[k].last += offset;
   }
+
+  aom_free(temp_regions);
+  aom_free(filt_coded_err);
+  aom_free(filt_intra_err);
+  aom_free(grad_coded);
 }
 
 static int find_regions_index(const REGIONS *regions, int num_regions,
@@ -1868,7 +1875,7 @@ static int find_regions_index(const REGIONS *regions, int num_regions,
  * \param[in]    max_gop_length   Maximum length of the GF group
  * \param[in]    max_intervals    Maximum number of intervals to decide
  *
- * \return Nothing is returned. Instead, cpi->ppi->rc.gf_intervals is
+ * \remark Nothing is returned. Instead, cpi->ppi->rc.gf_intervals is
  * changed to store the decided GF group lengths.
  */
 static void calculate_gf_length(AV1_COMP *cpi, int max_gop_length,
@@ -1929,8 +1936,9 @@ static void calculate_gf_length(AV1_COMP *cpi, int max_gop_length,
       flash_detected = detect_flash(twopass, &cpi->twopass_frame, 0);
       // TODO(bohanli): remove redundant accumulations here, or unify
       // this and the ones in define_gf_group
-      accumulate_next_frame_stats(&next_frame, flash_detected,
-                                  rc->frames_since_key, i, &gf_stats, f_w, f_h);
+      av1_accumulate_next_frame_stats(&next_frame, flash_detected,
+                                      rc->frames_since_key, i, &gf_stats, f_w,
+                                      f_h);
 
       cut_here = detect_gf_cut(cpi, i, cur_start, flash_detected,
                                active_max_gf_interval, active_min_gf_interval,
@@ -2134,7 +2142,7 @@ static void correct_frames_to_key(AV1_COMP *cpi) {
  *
  * \param[in]    cpi             Top-level encoder structure
  *
- * \return Nothing is returned. Instead, cpi->ppi->gf_group is changed.
+ * \remark Nothing is returned. Instead, cpi->ppi->gf_group is changed.
  */
 static void define_gf_group_pass0(AV1_COMP *cpi) {
   RATE_CONTROL *const rc = &cpi->rc;
@@ -2266,8 +2274,9 @@ static void accumulate_gop_stats(AV1_COMP *cpi, int is_intra_only, int f_w,
     flash_detected = detect_flash(twopass, &cpi->twopass_frame, 0);
 
     // accumulate stats for next frame
-    accumulate_next_frame_stats(next_frame, flash_detected,
-                                rc->frames_since_key, i, gf_stats, f_w, f_h);
+    av1_accumulate_next_frame_stats(next_frame, flash_detected,
+                                    rc->frames_since_key, i, gf_stats, f_w,
+                                    f_h);
 
     ++i;
   }
@@ -2355,7 +2364,7 @@ static void set_gop_bits_boost(AV1_COMP *cpi, int i, int is_intra_only,
 
 #if GROUP_ADAPTIVE_MAXQ
   // Calculate an estimate of the maxq needed for the group.
-  // We are more agressive about correcting for sections
+  // We are more aggressive about correcting for sections
   // where there could be significant overshoot than for easier
   // sections where we do not wish to risk creating an overshoot
   // of the allocated bit budget.
@@ -2422,7 +2431,7 @@ static void set_gop_bits_boost(AV1_COMP *cpi, int i, int is_intra_only,
  * \param[in]    is_final_pass   Whether this is the final pass for the
  *                               GF group, or a trial (non-zero)
  *
- * \return Nothing is returned. Instead, cpi->ppi->gf_group is changed.
+ * \remark Nothing is returned. Instead, cpi->ppi->gf_group is changed.
  */
 static void define_gf_group(AV1_COMP *cpi, EncodeFrameParams *frame_params,
                             int is_final_pass) {
@@ -3137,8 +3146,6 @@ static double get_kf_boost_score(AV1_COMP *cpi, double kf_raw_err,
  *
  * \param[in]    cpi              Top-level encoder structure
  * \param[in]    this_frame       Pointer to first pass stats
- *
- * \return Nothing is returned.
  */
 static void find_next_key_frame(AV1_COMP *cpi, FIRSTPASS_STATS *this_frame) {
   RATE_CONTROL *const rc = &cpi->rc;
@@ -3430,13 +3437,13 @@ static void process_first_pass_stats(AV1_COMP *cpi,
 
   if (cpi->oxcf.rc_cfg.mode != AOM_Q && current_frame->frame_number == 0 &&
       cpi->gf_frame_index == 0 && total_stats &&
-      cpi->ppi->twopass.stats_buf_ctx->total_left_stats) {
+      twopass->stats_buf_ctx->total_left_stats) {
     if (cpi->ppi->lap_enabled) {
       /*
        * Accumulate total_stats using available limited number of stats,
        * and assign it to total_left_stats.
        */
-      *cpi->ppi->twopass.stats_buf_ctx->total_left_stats = *total_stats;
+      *twopass->stats_buf_ctx->total_left_stats = *total_stats;
     }
     // Special case code for first frame.
     const int section_target_bandwidth = get_section_target_bandwidth(cpi);
@@ -3463,8 +3470,7 @@ static void process_first_pass_stats(AV1_COMP *cpi,
     p_rc->avg_frame_qindex[KEY_FRAME] = p_rc->last_q[KEY_FRAME];
   }
 
-  if (cpi->twopass_frame.stats_in <
-      cpi->ppi->twopass.stats_buf_ctx->stats_in_end) {
+  if (cpi->twopass_frame.stats_in < twopass->stats_buf_ctx->stats_in_end) {
     *this_frame = *cpi->twopass_frame.stats_in;
     ++cpi->twopass_frame.stats_in;
   }
@@ -3626,11 +3632,30 @@ void av1_get_second_pass_params(AV1_COMP *cpi,
   GF_GROUP *const gf_group = &cpi->ppi->gf_group;
   const AV1EncoderConfig *const oxcf = &cpi->oxcf;
 
+  if (cpi->use_ducky_encode &&
+      cpi->ducky_encode_info.frame_info.gop_mode == DUCKY_ENCODE_GOP_MODE_RCL) {
+    frame_params->frame_type = gf_group->frame_type[cpi->gf_frame_index];
+    frame_params->show_frame =
+        !(gf_group->update_type[cpi->gf_frame_index] == ARF_UPDATE ||
+          gf_group->update_type[cpi->gf_frame_index] == INTNL_ARF_UPDATE);
+    return;
+  }
+
   const FIRSTPASS_STATS *const start_pos = cpi->twopass_frame.stats_in;
   int update_total_stats = 0;
 
   if (is_stat_consumption_stage(cpi) && !cpi->twopass_frame.stats_in) return;
 
+  // Check forced key frames.
+  const int frames_to_next_forced_key = detect_app_forced_key(cpi);
+  if (frames_to_next_forced_key == 0) {
+    rc->frames_to_key = 0;
+    frame_flags &= FRAMEFLAGS_KEY;
+  } else if (frames_to_next_forced_key > 0 &&
+             frames_to_next_forced_key < rc->frames_to_key) {
+    rc->frames_to_key = frames_to_next_forced_key;
+  }
+
   assert(cpi->twopass_frame.stats_in != NULL);
   const int update_type = gf_group->update_type[cpi->gf_frame_index];
   frame_params->frame_type = gf_group->frame_type[cpi->gf_frame_index];
@@ -3915,7 +3940,7 @@ void av1_init_second_pass(AV1_COMP *cpi) {
       (int64_t)(stats->duration * oxcf->rc_cfg.target_bandwidth / 10000000.0);
 
 #if CONFIG_BITRATE_ACCURACY
-  av1_vbr_rc_init(&cpi->vbr_rc_info, cpi->ppi->twopass.bits_left,
+  av1_vbr_rc_init(&cpi->vbr_rc_info, twopass->bits_left,
                   (int)round(stats->count));
 #endif
 
@@ -4005,11 +4030,15 @@ void av1_twopass_postencode_update(AV1_COMP *cpi) {
 
   // Increment the stats_in pointer.
   if (is_stat_consumption_stage(cpi) &&
+      !(cpi->use_ducky_encode && cpi->ducky_encode_info.frame_info.gop_mode ==
+                                     DUCKY_ENCODE_GOP_MODE_RCL) &&
       (cpi->gf_frame_index < cpi->ppi->gf_group.size ||
        rc->frames_to_key == 0)) {
     const int update_type = cpi->ppi->gf_group.update_type[cpi->gf_frame_index];
     if (update_type != ARF_UPDATE && update_type != INTNL_ARF_UPDATE) {
       FIRSTPASS_STATS this_frame;
+      assert(cpi->twopass_frame.stats_in >
+             twopass->stats_buf_ctx->stats_in_start);
       --cpi->twopass_frame.stats_in;
       if (cpi->ppi->lap_enabled) {
         input_stats_lap(twopass, &cpi->twopass_frame, &this_frame);
@@ -4017,8 +4046,7 @@ void av1_twopass_postencode_update(AV1_COMP *cpi) {
         input_stats(twopass, &cpi->twopass_frame, &this_frame);
       }
     } else if (cpi->ppi->lap_enabled) {
-      cpi->twopass_frame.stats_in =
-          cpi->ppi->twopass.stats_buf_ctx->stats_in_start;
+      cpi->twopass_frame.stats_in = twopass->stats_buf_ctx->stats_in_start;
     }
   }
 
@@ -4049,7 +4077,7 @@ void av1_twopass_postencode_update(AV1_COMP *cpi) {
     p_rc->rate_error_estimate = 0;
   }
 
-#if CONFIG_FRAME_PARALLEL_ENCODE && CONFIG_FPMT_TEST
+#if CONFIG_FPMT_TEST
   /* The variables temp_vbr_bits_off_target, temp_bits_left,
    * temp_rolling_arf_group_target_bits, temp_rolling_arf_group_actual_bits
    * temp_rate_error_estimate are introduced for quality simulation purpose,
@@ -4181,7 +4209,7 @@ void av1_twopass_postencode_update(AV1_COMP *cpi) {
       }
     }
 
-#if CONFIG_FRAME_PARALLEL_ENCODE && CONFIG_FPMT_TEST
+#if CONFIG_FPMT_TEST
     if (cpi->do_frame_data_update && !show_existing_between_parallel_frames &&
         simulate_parallel_frame) {
       cpi->ppi->p_rc.temp_vbr_bits_off_target_fast =
@@ -4193,7 +4221,6 @@ void av1_twopass_postencode_update(AV1_COMP *cpi) {
 #endif
   }
 
-#if CONFIG_FRAME_PARALLEL_ENCODE
   // Update the frame probabilities obtained from parallel encode frames
   FrameProbInfo *const frame_probs = &cpi->ppi->frame_probs;
 #if CONFIG_FPMT_TEST
@@ -4372,5 +4399,4 @@ void av1_twopass_postencode_update(AV1_COMP *cpi) {
       cpi->do_frame_data_update && simulate_parallel_frame)
     cpi->temp_framerate = cpi->framerate;
 #endif
-#endif
 }
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/pass2_strategy.h b/chromium/third_party/libaom/source/libaom/av1/encoder/pass2_strategy.h
index c54b8c4561f..87e08bb8a39 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/pass2_strategy.h
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/pass2_strategy.h
@@ -21,7 +21,6 @@ struct EncodeFrameParams;
 
 #include "av1/encoder/encoder.h"
 
-/*!\endcond */
 /*!
  * \brief accumulated stats and features in a gf group
  */
@@ -60,7 +59,7 @@ typedef struct {
   double frame_sr_coded_error;
   /*!\endcond */
 } GF_FRAME_STATS;
-/*!cond */
+/*!\cond */
 
 void av1_init_second_pass(struct AV1_COMP *cpi);
 
@@ -83,7 +82,7 @@ void av1_init_single_pass_lap(AV1_COMP *cpi);
  * \param[in]    frame_params  Per frame encoding parameters
  * \param[in]    frame_flags   Frame type and coding flags
  *
- * \return No return but analyses first pass stats and assigns a target
+ * \remark No return but analyses first pass stats and assigns a target
  *         number of bits to the current frame and a target Q range.
  */
 void av1_get_second_pass_params(struct AV1_COMP *cpi,
@@ -99,7 +98,7 @@ void av1_get_second_pass_params(struct AV1_COMP *cpi,
  *
  * \param[in]    cpi       Top - level encoder instance structure
  *
- * \return No return value but this function updates various rate control
+ * \remark No return value but this function updates various rate control
  *         related data structures that for example track overshoot and
  *         undershoot.
  */
@@ -121,7 +120,7 @@ void av1_twopass_postencode_update(struct AV1_COMP *cpi);
  *                            uni-directional group.
  * \param[in]   gf_group_bits Bits available to be allocated.
  *
- * \return No return but updates the rate control and group data structures
+ * \remark No return but updates the rate control and group data structures
  *         to reflect the allocation of bits.
  */
 void av1_gop_bit_allocation(const AV1_COMP *cpi, RATE_CONTROL *const rc,
@@ -135,6 +134,12 @@ int av1_calc_arf_boost(const TWO_PASS *twopass,
                        int *num_fpstats_used, int *num_fpstats_required,
                        int project_gfu_boost);
 
+void av1_accumulate_next_frame_stats(const FIRSTPASS_STATS *stats,
+                                     const int flash_detected,
+                                     const int frames_since_key,
+                                     const int cur_idx,
+                                     GF_GROUP_STATS *gf_stats, int f_w,
+                                     int f_h);
 // Identify stable and unstable regions from first pass stats.
 // stats_start points to the first frame to analyze.
 // |offset| is the offset from the current frame to the frame stats_start is
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/pickcdef.c b/chromium/third_party/libaom/source/libaom/av1/encoder/pickcdef.c
index 7f7d0c9640d..5cd2f7ecfaf 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/pickcdef.c
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/pickcdef.c
@@ -10,6 +10,7 @@
  */
 
 #include <math.h>
+#include <stdbool.h>
 #include <string.h>
 
 #include "config/aom_dsp_rtcd.h"
@@ -266,6 +267,22 @@ static uint64_t compute_cdef_dist_highbd(void *dst, int dstride, uint16_t *src,
   return sum >> 2 * coeff_shift;
 }
 #endif
+
+// Checks dual and quad block processing is applicable for block widths 8 and 4
+// respectively.
+static INLINE int is_dual_or_quad_applicable(cdef_list *dlist, int width,
+                                             int cdef_count, int bi, int iter) {
+  assert(width == 8 || width == 4);
+  const int blk_offset = (width == 8) ? 1 : 3;
+  if ((iter + blk_offset) >= cdef_count) return 0;
+
+  if (dlist[bi].by == dlist[bi + blk_offset].by &&
+      dlist[bi].bx + blk_offset == dlist[bi + blk_offset].bx)
+    return 1;
+
+  return 0;
+}
+
 static uint64_t compute_cdef_dist(void *dst, int dstride, uint16_t *src,
                                   cdef_list *dlist, int cdef_count,
                                   BLOCK_SIZE bsize, int coeff_shift, int row,
@@ -274,21 +291,95 @@ static uint64_t compute_cdef_dist(void *dst, int dstride, uint16_t *src,
          bsize == BLOCK_8X8);
   uint64_t sum = 0;
   int bi, bx, by;
+  int iter = 0;
+  int inc = 1;
   uint8_t *dst8 = (uint8_t *)dst;
   uint8_t *dst_buff = &dst8[row * dstride + col];
   int src_stride, width, height, width_log2, height_log2;
   init_src_params(&src_stride, &width, &height, &width_log2, &height_log2,
                   bsize);
-  for (bi = 0; bi < cdef_count; bi++) {
+
+  const int num_blks = 16 / width;
+  for (bi = 0; bi < cdef_count; bi += inc) {
     by = dlist[bi].by;
     bx = dlist[bi].bx;
-    sum += aom_mse_wxh_16bit(
-        &dst_buff[(by << height_log2) * dstride + (bx << width_log2)], dstride,
-        &src[bi << (height_log2 + width_log2)], src_stride, width, height);
+    uint16_t *src_tmp = &src[bi << (height_log2 + width_log2)];
+    uint8_t *dst_tmp =
+        &dst_buff[(by << height_log2) * dstride + (bx << width_log2)];
+
+    if (is_dual_or_quad_applicable(dlist, width, cdef_count, bi, iter)) {
+      sum += aom_mse_16xh_16bit(dst_tmp, dstride, src_tmp, width, height);
+      iter += num_blks;
+      inc = num_blks;
+    } else {
+      sum += aom_mse_wxh_16bit(dst_tmp, dstride, src_tmp, src_stride, width,
+                               height);
+      iter += 1;
+      inc = 1;
+    }
   }
+
   return sum >> 2 * coeff_shift;
 }
 
+// Fill the boundary regions of the block with CDEF_VERY_LARGE, only if the
+// region is outside frame boundary
+static INLINE void fill_borders_for_fbs_on_frame_boundary(
+    uint16_t *inbuf, int hfilt_size, int vfilt_size,
+    bool is_fb_on_frm_left_boundary, bool is_fb_on_frm_right_boundary,
+    bool is_fb_on_frm_top_boundary, bool is_fb_on_frm_bottom_boundary) {
+  if (!is_fb_on_frm_left_boundary && !is_fb_on_frm_right_boundary &&
+      !is_fb_on_frm_top_boundary && !is_fb_on_frm_bottom_boundary)
+    return;
+  if (is_fb_on_frm_bottom_boundary) {
+    // Fill bottom region of the block
+    const int buf_offset =
+        (vfilt_size + CDEF_VBORDER) * CDEF_BSTRIDE + CDEF_HBORDER;
+    fill_rect(&inbuf[buf_offset], CDEF_BSTRIDE, CDEF_VBORDER, hfilt_size,
+              CDEF_VERY_LARGE);
+  }
+  if (is_fb_on_frm_bottom_boundary || is_fb_on_frm_left_boundary) {
+    const int buf_offset = (vfilt_size + CDEF_VBORDER) * CDEF_BSTRIDE;
+    // Fill bottom-left region of the block
+    fill_rect(&inbuf[buf_offset], CDEF_BSTRIDE, CDEF_VBORDER, CDEF_HBORDER,
+              CDEF_VERY_LARGE);
+  }
+  if (is_fb_on_frm_bottom_boundary || is_fb_on_frm_right_boundary) {
+    const int buf_offset =
+        (vfilt_size + CDEF_VBORDER) * CDEF_BSTRIDE + hfilt_size + CDEF_HBORDER;
+    // Fill bottom-right region of the block
+    fill_rect(&inbuf[buf_offset], CDEF_BSTRIDE, CDEF_VBORDER, CDEF_HBORDER,
+              CDEF_VERY_LARGE);
+  }
+  if (is_fb_on_frm_top_boundary) {
+    // Fill top region of the block
+    fill_rect(&inbuf[CDEF_HBORDER], CDEF_BSTRIDE, CDEF_VBORDER, hfilt_size,
+              CDEF_VERY_LARGE);
+  }
+  if (is_fb_on_frm_top_boundary || is_fb_on_frm_left_boundary) {
+    // Fill top-left region of the block
+    fill_rect(inbuf, CDEF_BSTRIDE, CDEF_VBORDER, CDEF_HBORDER, CDEF_VERY_LARGE);
+  }
+  if (is_fb_on_frm_top_boundary || is_fb_on_frm_right_boundary) {
+    const int buf_offset = hfilt_size + CDEF_HBORDER;
+    // Fill top-right region of the block
+    fill_rect(&inbuf[buf_offset], CDEF_BSTRIDE, CDEF_VBORDER, CDEF_HBORDER,
+              CDEF_VERY_LARGE);
+  }
+  if (is_fb_on_frm_left_boundary) {
+    const int buf_offset = CDEF_VBORDER * CDEF_BSTRIDE;
+    // Fill left region of the block
+    fill_rect(&inbuf[buf_offset], CDEF_BSTRIDE, vfilt_size, CDEF_HBORDER,
+              CDEF_VERY_LARGE);
+  }
+  if (is_fb_on_frm_right_boundary) {
+    const int buf_offset = CDEF_VBORDER * CDEF_BSTRIDE;
+    // Fill right region of the block
+    fill_rect(&inbuf[buf_offset + hfilt_size + CDEF_HBORDER], CDEF_BSTRIDE,
+              vfilt_size, CDEF_HBORDER, CDEF_VERY_LARGE);
+  }
+}
+
 // Calculates MSE at block level.
 // Inputs:
 //   cdef_search_ctx: Pointer to the structure containing parameters related to
@@ -345,26 +436,35 @@ void av1_cdef_mse_calc_block(CdefSearchCtx *cdef_search_ctx, int fbr, int fbc,
   const int cdef_count = av1_cdef_compute_sb_list(
       mi_params, fbr * MI_SIZE_64X64, fbc * MI_SIZE_64X64, dlist, bs);
 
-  const int yoff = CDEF_VBORDER * (fbr != 0);
-  const int xoff = CDEF_HBORDER * (fbc != 0);
+  const bool is_fb_on_frm_left_boundary = (fbc == 0);
+  const bool is_fb_on_frm_right_boundary =
+      (fbc + hb_step == cdef_search_ctx->nhfb);
+  const bool is_fb_on_frm_top_boundary = (fbr == 0);
+  const bool is_fb_on_frm_bottom_boundary =
+      (fbr + vb_step == cdef_search_ctx->nvfb);
+  const int yoff = CDEF_VBORDER * (!is_fb_on_frm_top_boundary);
+  const int xoff = CDEF_HBORDER * (!is_fb_on_frm_left_boundary);
   int dirinit = 0;
   for (int pli = 0; pli < cdef_search_ctx->num_planes; pli++) {
-    for (int i = 0; i < CDEF_INBUF_SIZE; i++) inbuf[i] = CDEF_VERY_LARGE;
     /* We avoid filtering the pixels for which some of the pixels to
     average are outside the frame. We could change the filter instead,
     but it would add special cases for any future vectorization. */
-    const int ysize = (nvb << mi_high_l2[pli]) +
-                      CDEF_VBORDER * (fbr + vb_step < cdef_search_ctx->nvfb) +
-                      yoff;
-    const int xsize = (nhb << mi_wide_l2[pli]) +
-                      CDEF_HBORDER * (fbc + hb_step < cdef_search_ctx->nhfb) +
-                      xoff;
+    const int hfilt_size = (nhb << mi_wide_l2[pli]);
+    const int vfilt_size = (nvb << mi_high_l2[pli]);
+    const int ysize =
+        vfilt_size + CDEF_VBORDER * (!is_fb_on_frm_bottom_boundary) + yoff;
+    const int xsize =
+        hfilt_size + CDEF_HBORDER * (!is_fb_on_frm_right_boundary) + xoff;
     const int row = fbr * MI_SIZE_64X64 << mi_high_l2[pli];
     const int col = fbc * MI_SIZE_64X64 << mi_wide_l2[pli];
     struct macroblockd_plane pd = cdef_search_ctx->plane[pli];
     cdef_search_ctx->copy_fn(&in[(-yoff * CDEF_BSTRIDE - xoff)], CDEF_BSTRIDE,
                              pd.dst.buf, row - yoff, col - xoff, pd.dst.stride,
                              ysize, xsize);
+    fill_borders_for_fbs_on_frame_boundary(
+        inbuf, hfilt_size, vfilt_size, is_fb_on_frm_left_boundary,
+        is_fb_on_frm_right_boundary, is_fb_on_frm_top_boundary,
+        is_fb_on_frm_bottom_boundary);
     for (int gi = 0; gi < cdef_search_ctx->total_strengths; gi++) {
       int pri_strength, sec_strength;
       get_cdef_filter_strengths(cdef_search_ctx->pick_method, &pri_strength,
@@ -413,7 +513,7 @@ static void cdef_mse_calc_frame(CdefSearchCtx *cdef_search_ctx) {
 //   related to CDEF search context.
 // Returns:
 //   Nothing will be returned. Contents of cdef_search_ctx will be modified.
-static AOM_INLINE void cdef_alloc_data(CdefSearchCtx *cdef_search_ctx) {
+static AOM_INLINE bool cdef_alloc_data(CdefSearchCtx *cdef_search_ctx) {
   const int nvfb = cdef_search_ctx->nvfb;
   const int nhfb = cdef_search_ctx->nhfb;
   cdef_search_ctx->sb_index =
@@ -423,6 +523,14 @@ static AOM_INLINE void cdef_alloc_data(CdefSearchCtx *cdef_search_ctx) {
       aom_malloc(sizeof(**cdef_search_ctx->mse) * nvfb * nhfb);
   cdef_search_ctx->mse[1] =
       aom_malloc(sizeof(**cdef_search_ctx->mse) * nvfb * nhfb);
+  if (!(cdef_search_ctx->sb_index && cdef_search_ctx->mse[0] &&
+        cdef_search_ctx->mse[1])) {
+    aom_free(cdef_search_ctx->sb_index);
+    aom_free(cdef_search_ctx->mse[0]);
+    aom_free(cdef_search_ctx->mse[1]);
+    return false;
+  }
+  return true;
 }
 
 // Deallocates the memory allocated for members of CdefSearchCtx.
@@ -499,7 +607,7 @@ static AOM_INLINE void cdef_params_init(const YV12_BUFFER_CONFIG *frame,
 }
 
 static void pick_cdef_from_qp(AV1_COMMON *const cm, int skip_cdef,
-                              int frames_since_key) {
+                              int is_screen_content) {
   const int bd = cm->seq_params->bit_depth;
   const int q =
       av1_ac_quant_QTX(cm->quant_params.base_qindex, 0, bd) >> (bd - 8);
@@ -518,42 +626,61 @@ static void pick_cdef_from_qp(AV1_COMMON *const cm, int skip_cdef,
   int predicted_y_f2 = 0;
   int predicted_uv_f1 = 0;
   int predicted_uv_f2 = 0;
-  if (!frame_is_intra_only(cm)) {
-    predicted_y_f1 = clamp((int)roundf(q * q * -0.0000023593946f +
-                                       q * 0.0068615186f + 0.02709886f),
-                           0, 15);
-    predicted_y_f2 = clamp((int)roundf(q * q * -0.00000057629734f +
-                                       q * 0.0013993345f + 0.03831067f),
-                           0, 3);
-    predicted_uv_f1 = clamp((int)roundf(q * q * -0.0000007095069f +
-                                        q * 0.0034628846f + 0.00887099f),
-                            0, 15);
-    predicted_uv_f2 = clamp((int)roundf(q * q * 0.00000023874085f +
-                                        q * 0.00028223585f + 0.05576307f),
-                            0, 3);
+  if (is_screen_content) {
+    predicted_y_f1 =
+        (int)(5.88217781e-06 * q * q + 6.10391455e-03 * q + 9.95043102e-02);
+    predicted_y_f2 =
+        (int)(-7.79934857e-06 * q * q + 6.58957830e-03 * q + 8.81045025e-01);
+    predicted_uv_f1 =
+        (int)(-6.79500136e-06 * q * q + 1.02695586e-02 * q + 1.36126802e-01);
+    predicted_uv_f2 =
+        (int)(-9.99613695e-08 * q * q - 1.79361339e-05 * q + 1.17022324e+0);
+    predicted_y_f1 = clamp(predicted_y_f1, 0, 15);
+    predicted_y_f2 = clamp(predicted_y_f2, 0, 3);
+    predicted_uv_f1 = clamp(predicted_uv_f1, 0, 15);
+    predicted_uv_f2 = clamp(predicted_uv_f2, 0, 3);
   } else {
-    predicted_y_f1 = clamp(
-        (int)roundf(q * q * 0.0000033731974f + q * 0.008070594f + 0.0187634f),
-        0, 15);
-    predicted_y_f2 = clamp(
-        (int)roundf(q * q * 0.0000029167343f + q * 0.0027798624f + 0.0079405f),
-        0, 3);
-    predicted_uv_f1 = clamp(
-        (int)roundf(q * q * -0.0000130790995f + q * 0.012892405f - 0.00748388f),
-        0, 15);
-    predicted_uv_f2 = clamp((int)roundf(q * q * 0.0000032651783f +
-                                        q * 0.00035520183f + 0.00228092f),
-                            0, 3);
+    if (!frame_is_intra_only(cm)) {
+      predicted_y_f1 = clamp((int)roundf(q * q * -0.0000023593946f +
+                                         q * 0.0068615186f + 0.02709886f),
+                             0, 15);
+      predicted_y_f2 = clamp((int)roundf(q * q * -0.00000057629734f +
+                                         q * 0.0013993345f + 0.03831067f),
+                             0, 3);
+      predicted_uv_f1 = clamp((int)roundf(q * q * -0.0000007095069f +
+                                          q * 0.0034628846f + 0.00887099f),
+                              0, 15);
+      predicted_uv_f2 = clamp((int)roundf(q * q * 0.00000023874085f +
+                                          q * 0.00028223585f + 0.05576307f),
+                              0, 3);
+    } else {
+      predicted_y_f1 = clamp(
+          (int)roundf(q * q * 0.0000033731974f + q * 0.008070594f + 0.0187634f),
+          0, 15);
+      predicted_y_f2 = clamp((int)roundf(q * q * 0.0000029167343f +
+                                         q * 0.0027798624f + 0.0079405f),
+                             0, 3);
+      predicted_uv_f1 = clamp((int)roundf(q * q * -0.0000130790995f +
+                                          q * 0.012892405f - 0.00748388f),
+                              0, 15);
+      predicted_uv_f2 = clamp((int)roundf(q * q * 0.0000032651783f +
+                                          q * 0.00035520183f + 0.00228092f),
+                              0, 3);
+    }
   }
   cdef_info->cdef_strengths[0] =
       predicted_y_f1 * CDEF_SEC_STRENGTHS + predicted_y_f2;
   cdef_info->cdef_uv_strengths[0] =
       predicted_uv_f1 * CDEF_SEC_STRENGTHS + predicted_uv_f2;
 
+  // mbmi->cdef_strength is already set in the encoding stage. We don't need to
+  // set it again here.
   if (skip_cdef) {
     cdef_info->cdef_strengths[1] = 0;
     cdef_info->cdef_uv_strengths[1] = 0;
+    return;
   }
+
   const CommonModeInfoParams *const mi_params = &cm->mi_params;
   const int nvfb = (mi_params->mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
   const int nhfb = (mi_params->mi_cols + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
@@ -562,10 +689,6 @@ static void pick_cdef_from_qp(AV1_COMMON *const cm, int skip_cdef,
     for (int c = 0; c < nhfb; ++c) {
       MB_MODE_INFO *current_mbmi = mbmi[MI_SIZE_64X64 * c];
       current_mbmi->cdef_strength = 0;
-      if (skip_cdef && current_mbmi->skip_cdef_curr_sb &&
-          frames_since_key > 10) {
-        current_mbmi->cdef_strength = 1;
-      }
     }
     mbmi += MI_SIZE_64X64 * mi_params->mi_stride;
   }
@@ -574,8 +697,8 @@ static void pick_cdef_from_qp(AV1_COMMON *const cm, int skip_cdef,
 void av1_cdef_search(MultiThreadInfo *mt_info, const YV12_BUFFER_CONFIG *frame,
                      const YV12_BUFFER_CONFIG *ref, AV1_COMMON *cm,
                      MACROBLOCKD *xd, CDEF_PICK_METHOD pick_method, int rdmult,
-                     int skip_cdef_feature, int frames_since_key,
-                     CDEF_CONTROL cdef_control, int non_reference_frame) {
+                     int skip_cdef_feature, CDEF_CONTROL cdef_control,
+                     const int is_screen_content, int non_reference_frame) {
   assert(cdef_control != CDEF_NONE);
   if (cdef_control == CDEF_REFERENCE && non_reference_frame) {
     CdefInfo *const cdef_info = &cm->cdef_info;
@@ -587,7 +710,7 @@ void av1_cdef_search(MultiThreadInfo *mt_info, const YV12_BUFFER_CONFIG *frame,
   }
 
   if (pick_method == CDEF_PICK_FROM_Q) {
-    pick_cdef_from_qp(cm, skip_cdef_feature, frames_since_key);
+    pick_cdef_from_qp(cm, skip_cdef_feature, is_screen_content);
     return;
   }
   const CommonModeInfoParams *const mi_params = &cm->mi_params;
@@ -599,7 +722,14 @@ void av1_cdef_search(MultiThreadInfo *mt_info, const YV12_BUFFER_CONFIG *frame,
   // Initialize parameters related to CDEF search context.
   cdef_params_init(frame, ref, cm, xd, &cdef_search_ctx, pick_method);
   // Allocate CDEF search context buffers.
-  cdef_alloc_data(&cdef_search_ctx);
+  if (!cdef_alloc_data(&cdef_search_ctx)) {
+    CdefInfo *const cdef_info = &cm->cdef_info;
+    cdef_info->nb_cdef_strengths = 0;
+    cdef_info->cdef_bits = 0;
+    cdef_info->cdef_strengths[0] = 0;
+    cdef_info->cdef_uv_strengths[0] = 0;
+    return;
+  }
   // Frame level mse calculation.
   if (mt_info->num_workers > 1) {
     av1_cdef_mse_calc_frame_mt(cm, mt_info, &cdef_search_ctx);
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/pickcdef.h b/chromium/third_party/libaom/source/libaom/av1/encoder/pickcdef.h
index d52cb4bc668..077edcbffa5 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/pickcdef.h
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/pickcdef.h
@@ -226,12 +226,12 @@ void av1_cdef_mse_calc_block(CdefSearchCtx *cdef_search_ctx, int fbr, int fbc,
  * \param[in]      pick_method  The method used to select params
  * \param[in]      rdmult       rd multiplier to use in making param choices
  * \param[in]      skip_cdef_feature Speed feature to skip cdef
- * \param[in]      frames_since_key Number of frames since key frame
  * \param[in]      cdef_control  Parameter that controls CDEF application
+ * \param[in]      is_screen_content   Whether it is screen content type
  * \param[in]      non_reference_frame Indicates if current frame is
  * non-reference
  *
- * \return Nothing is returned. Instead, optimal CDEF parameters are stored
+ * \remark Nothing is returned. Instead, optimal CDEF parameters are stored
  * in the \c cdef_info structure of type \ref CdefInfo inside \c cm:
  * \arg \c cdef_bits: Bits of strength parameters
  * \arg \c nb_cdef_strengths: Number of strength parameters
@@ -246,8 +246,8 @@ void av1_cdef_search(struct MultiThreadInfo *mt_info,
                      const YV12_BUFFER_CONFIG *frame,
                      const YV12_BUFFER_CONFIG *ref, AV1_COMMON *cm,
                      MACROBLOCKD *xd, CDEF_PICK_METHOD pick_method, int rdmult,
-                     int skip_cdef_feature, int frames_since_key,
-                     CDEF_CONTROL cdef_control, int non_reference_frame);
+                     int skip_cdef_feature, CDEF_CONTROL cdef_control,
+                     const int is_screen_content, int non_reference_frame);
 
 #ifdef __cplusplus
 }  // extern "C"
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/picklpf.c b/chromium/third_party/libaom/source/libaom/av1/encoder/picklpf.c
index 925c2691b66..47d007e69f8 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/picklpf.c
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/picklpf.c
@@ -69,9 +69,12 @@ static int64_t try_filter_frame(const YV12_BUFFER_CONFIG *sd,
     case 2: cm->lf.filter_level_v = filter_level[0]; break;
   }
 
+  // lpf_opt_level = 1 : Enables dual/quad loop-filtering.
+  int lpf_opt_level = is_inter_tx_size_search_level_one(&cpi->sf.tx_sf);
+
   av1_loop_filter_frame_mt(&cm->cur_frame->buf, cm, &cpi->td.mb.e_mbd, plane,
                            plane + 1, partial_frame, mt_info->workers,
-                           num_workers, &mt_info->lf_row_sync, 0);
+                           num_workers, &mt_info->lf_row_sync, lpf_opt_level);
 
   filt_err = aom_get_sse_plane(sd, &cm->cur_frame->buf, plane,
                                cm->seq_params->use_highbitdepth);
@@ -209,7 +212,7 @@ void av1_pick_filter_level(const YV12_BUFFER_CONFIG *sd, AV1_COMP *cpi,
   if (disable_filter_rt_screen ||
       cpi->oxcf.algo_cfg.loopfilter_control == LOOPFILTER_NONE ||
       (cpi->oxcf.algo_cfg.loopfilter_control == LOOPFILTER_REFERENCE &&
-       cpi->svc.non_reference_frame)) {
+       cpi->rtc_ref.non_reference_frame)) {
     lf->filter_level[0] = 0;
     lf->filter_level[1] = 0;
     return;
@@ -268,7 +271,7 @@ void av1_pick_filter_level(const YV12_BUFFER_CONFIG *sd, AV1_COMP *cpi,
     lf->filter_level_u = clamp(filt_guess, min_filter_level, max_filter_level);
     lf->filter_level_v = clamp(filt_guess, min_filter_level, max_filter_level);
     if (cpi->oxcf.algo_cfg.loopfilter_control == LOOPFILTER_SELECTIVELY &&
-        !frame_is_intra_only(cm)) {
+        !frame_is_intra_only(cm) && !cpi->rc.high_source_sad) {
       if (cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN) {
         lf->filter_level[0] = 0;
         lf->filter_level[1] = 0;
@@ -298,7 +301,7 @@ void av1_pick_filter_level(const YV12_BUFFER_CONFIG *sd, AV1_COMP *cpi,
             &cpi->last_frame_uf, cm->width, cm->height,
             seq_params->subsampling_x, seq_params->subsampling_y,
             seq_params->use_highbitdepth, cpi->oxcf.border_in_pixels,
-            cm->features.byte_alignment, NULL, NULL, NULL, 0))
+            cm->features.byte_alignment, NULL, NULL, NULL, 0, 0))
       aom_internal_error(cm->error, AOM_CODEC_MEM_ERROR,
                          "Failed to allocate last frame buffer");
 
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/picklpf.h b/chromium/third_party/libaom/source/libaom/av1/encoder/picklpf.h
index 727335517b5..f567937c327 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/picklpf.h
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/picklpf.h
@@ -43,7 +43,7 @@ int av1_get_max_filter_level(const AV1_COMP *cpi);
  * \arg \c LPF_PICK_MINIMAL_LPF: Pick 0 to disable LPF if LPF was enabled last
  * frame
  *
- * \return Nothing is returned. Instead, filter levels below are stored in the
+ * \remark Nothing is returned. Instead, filter levels below are stored in the
  * "loopfilter" structure inside "cpi":
  * \arg \c filter_level[0]: the vertical filter level for Y plane
  * \arg \c filter_level[1]: the horizontal filter level for Y plane
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/pickrst.c b/chromium/third_party/libaom/source/libaom/av1/encoder/pickrst.c
index 467fb1eb7a1..008c469be5e 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/pickrst.c
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/pickrst.c
@@ -1081,7 +1081,7 @@ static INLINE int wrap_index(int i, int wiener_win) {
 // Solve linear equations to find Wiener filter tap values
 // Taps are output scaled by WIENER_FILT_STEP
 static int linsolve_wiener(int n, int64_t *A, int stride, int64_t *b,
-                           int32_t *x) {
+                           int64_t *x) {
   for (int k = 0; k < n - 1; k++) {
     // Partial pivoting: bring the row with the largest pivot to the top
     for (int i = n - 1; i > k; i--) {
@@ -1116,7 +1116,7 @@ static int linsolve_wiener(int n, int64_t *A, int stride, int64_t *b,
       c += A[i * stride + j] * x[j] / WIENER_TAP_SCALE_FACTOR;
     }
     // Store filter taps x in scaled form.
-    x[i] = (int32_t)(WIENER_TAP_SCALE_FACTOR * (b[i] - c) / A[i * stride + i]);
+    x[i] = WIENER_TAP_SCALE_FACTOR * (b[i] - c) / A[i * stride + i];
   }
 
   return 1;
@@ -1126,7 +1126,7 @@ static int linsolve_wiener(int n, int64_t *A, int stride, int64_t *b,
 static AOM_INLINE void update_a_sep_sym(int wiener_win, int64_t **Mc,
                                         int64_t **Hc, int32_t *a, int32_t *b) {
   int i, j;
-  int32_t S[WIENER_WIN];
+  int64_t S[WIENER_WIN];
   int64_t A[WIENER_HALFWIN1], B[WIENER_HALFWIN1 * WIENER_HALFWIN1];
   const int wiener_win2 = wiener_win * wiener_win;
   const int wiener_halfwin1 = (wiener_win >> 1) + 1;
@@ -1174,7 +1174,10 @@ static AOM_INLINE void update_a_sep_sym(int wiener_win, int64_t **Mc,
       S[i] = S[wiener_win - 1 - i];
       S[wiener_halfwin1 - 1] -= 2 * S[i];
     }
-    memcpy(a, S, wiener_win * sizeof(*a));
+    for (i = 0; i < wiener_win; ++i) {
+      a[i] = (int32_t)CLIP(S[i], -(1 << (WIENER_FILT_BITS - 1)),
+                           (1 << (WIENER_FILT_BITS - 1)) - 1);
+    }
   }
 }
 
@@ -1182,7 +1185,7 @@ static AOM_INLINE void update_a_sep_sym(int wiener_win, int64_t **Mc,
 static AOM_INLINE void update_b_sep_sym(int wiener_win, int64_t **Mc,
                                         int64_t **Hc, int32_t *a, int32_t *b) {
   int i, j;
-  int32_t S[WIENER_WIN];
+  int64_t S[WIENER_WIN];
   int64_t A[WIENER_HALFWIN1], B[WIENER_HALFWIN1 * WIENER_HALFWIN1];
   const int wiener_win2 = wiener_win * wiener_win;
   const int wiener_halfwin1 = (wiener_win >> 1) + 1;
@@ -1231,7 +1234,10 @@ static AOM_INLINE void update_b_sep_sym(int wiener_win, int64_t **Mc,
       S[i] = S[wiener_win - 1 - i];
       S[wiener_halfwin1 - 1] -= 2 * S[i];
     }
-    memcpy(b, S, wiener_win * sizeof(*b));
+    for (i = 0; i < wiener_win; ++i) {
+      b[i] = (int32_t)CLIP(S[i], -(1 << (WIENER_FILT_BITS - 1)),
+                           (1 << (WIENER_FILT_BITS - 1)) - 1);
+    }
   }
 }
 
@@ -1750,8 +1756,10 @@ void av1_pick_filter_restoration(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi) {
     ntiles[is_uv] = rest_tiles_in_plane(cm, is_uv);
 
   assert(ntiles[1] <= ntiles[0]);
-  RestUnitSearchInfo *rusi =
-      (RestUnitSearchInfo *)aom_memalign(16, sizeof(*rusi) * ntiles[0]);
+  RestUnitSearchInfo *rusi;
+  CHECK_MEM_ERROR(
+      cm, rusi,
+      (RestUnitSearchInfo *)aom_memalign(16, sizeof(*rusi) * ntiles[0]));
 
   // If the restoration unit dimensions are not multiples of
   // rsi->restoration_unit_size then some elements of the rusi array may be
@@ -1768,7 +1776,7 @@ void av1_pick_filter_restoration(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi) {
           cm->superres_upscaled_height, seq_params->subsampling_x,
           seq_params->subsampling_y, seq_params->use_highbitdepth,
           AOM_RESTORATION_FRAME_BORDER, cm->features.byte_alignment, NULL, NULL,
-          NULL, 0))
+          NULL, 0, 0))
     aom_internal_error(cm->error, AOM_CODEC_MEM_ERROR,
                        "Failed to allocate trial restored frame buffer");
 
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/pickrst.h b/chromium/third_party/libaom/source/libaom/av1/encoder/pickrst.h
index 46a4b48f2cd..94a6932dee6 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/pickrst.h
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/pickrst.h
@@ -65,7 +65,7 @@ static INLINE uint16_t find_average_highbd(const uint16_t *src, int h_start,
  * \param[in]       sd           Source frame buffer
  * \param[in,out]   cpi          Top-level encoder structure
  *
- * \return Nothing is returned. Instead, chosen restoration filter
+ * \remark Nothing is returned. Instead, chosen restoration filter
  * types and parameters are stored per plane in the \c rst_info structure
  * of type \ref RestorationInfo inside \c cpi->common:
  * \arg \c rst_info[ \c 0 ]: Chosen parameters for Y plane
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/ransac.c b/chromium/third_party/libaom/source/libaom/av1/encoder/ransac.c
index ff00a46e2b5..f878fce6a7b 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/ransac.c
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/ransac.c
@@ -225,6 +225,7 @@ static int find_translation(int np, double *pts1, double *pts2, double *mat) {
 static int find_rotzoom(int np, double *pts1, double *pts2, double *mat) {
   const int np2 = np * 2;
   double *a = (double *)aom_malloc(sizeof(*a) * (np2 * 5 + 20));
+  if (a == NULL) return 1;
   double *b = a + np2 * 4;
   double *temp = b + np2;
   int i;
@@ -411,26 +412,29 @@ static int ransac(const int *matched_points, int npoints,
   corners1 = (double *)aom_malloc(sizeof(*corners1) * npoints * 2);
   corners2 = (double *)aom_malloc(sizeof(*corners2) * npoints * 2);
   image1_coord = (double *)aom_malloc(sizeof(*image1_coord) * npoints * 2);
-
   motions =
-      (RANSAC_MOTION *)aom_malloc(sizeof(RANSAC_MOTION) * num_desired_motions);
+      (RANSAC_MOTION *)aom_calloc(num_desired_motions, sizeof(RANSAC_MOTION));
+  current_motion.inlier_indices =
+      (int *)aom_malloc(sizeof(*current_motion.inlier_indices) * npoints);
+  if (!(points1 && points2 && corners1 && corners2 && image1_coord && motions &&
+        current_motion.inlier_indices)) {
+    ret_val = 1;
+    goto finish_ransac;
+  }
+
   for (i = 0; i < num_desired_motions; ++i) {
     motions[i].inlier_indices =
         (int *)aom_malloc(sizeof(*motions->inlier_indices) * npoints);
+    if (!motions[i].inlier_indices) {
+      ret_val = 1;
+      goto finish_ransac;
+    }
     clear_motion(motions + i, npoints);
   }
-  current_motion.inlier_indices =
-      (int *)aom_malloc(sizeof(*current_motion.inlier_indices) * npoints);
   clear_motion(&current_motion, npoints);
 
   worst_kept_motion = motions;
 
-  if (!(points1 && points2 && corners1 && corners2 && image1_coord && motions &&
-        current_motion.inlier_indices)) {
-    ret_val = 1;
-    goto finish_ransac;
-  }
-
   cnp1 = corners1;
   cnp2 = corners2;
   for (i = 0; i < npoints; ++i) {
@@ -541,10 +545,12 @@ finish_ransac:
   aom_free(corners2);
   aom_free(image1_coord);
   aom_free(current_motion.inlier_indices);
-  for (i = 0; i < num_desired_motions; ++i) {
-    aom_free(motions[i].inlier_indices);
+  if (motions) {
+    for (i = 0; i < num_desired_motions; ++i) {
+      aom_free(motions[i].inlier_indices);
+    }
+    aom_free(motions);
   }
-  aom_free(motions);
 
   return ret_val;
 }
@@ -592,26 +598,29 @@ static int ransac_double_prec(const double *matched_points, int npoints,
   corners1 = (double *)aom_malloc(sizeof(*corners1) * npoints * 2);
   corners2 = (double *)aom_malloc(sizeof(*corners2) * npoints * 2);
   image1_coord = (double *)aom_malloc(sizeof(*image1_coord) * npoints * 2);
-
   motions =
-      (RANSAC_MOTION *)aom_malloc(sizeof(RANSAC_MOTION) * num_desired_motions);
+      (RANSAC_MOTION *)aom_calloc(num_desired_motions, sizeof(RANSAC_MOTION));
+  current_motion.inlier_indices =
+      (int *)aom_malloc(sizeof(*current_motion.inlier_indices) * npoints);
+  if (!(points1 && points2 && corners1 && corners2 && image1_coord && motions &&
+        current_motion.inlier_indices)) {
+    ret_val = 1;
+    goto finish_ransac;
+  }
+
   for (i = 0; i < num_desired_motions; ++i) {
     motions[i].inlier_indices =
         (int *)aom_malloc(sizeof(*motions->inlier_indices) * npoints);
+    if (!motions[i].inlier_indices) {
+      ret_val = 1;
+      goto finish_ransac;
+    }
     clear_motion(motions + i, npoints);
   }
-  current_motion.inlier_indices =
-      (int *)aom_malloc(sizeof(*current_motion.inlier_indices) * npoints);
   clear_motion(&current_motion, npoints);
 
   worst_kept_motion = motions;
 
-  if (!(points1 && points2 && corners1 && corners2 && image1_coord && motions &&
-        current_motion.inlier_indices)) {
-    ret_val = 1;
-    goto finish_ransac;
-  }
-
   cnp1 = corners1;
   cnp2 = corners2;
   for (i = 0; i < npoints; ++i) {
@@ -720,10 +729,12 @@ finish_ransac:
   aom_free(corners2);
   aom_free(image1_coord);
   aom_free(current_motion.inlier_indices);
-  for (i = 0; i < num_desired_motions; ++i) {
-    aom_free(motions[i].inlier_indices);
+  if (motions) {
+    for (i = 0; i < num_desired_motions; ++i) {
+      aom_free(motions[i].inlier_indices);
+    }
+    aom_free(motions);
   }
-  aom_free(motions);
 
   return ret_val;
 }
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/ratectrl.c b/chromium/third_party/libaom/source/libaom/av1/encoder/ratectrl.c
index 5a0c6f52660..e6bb18486f6 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/ratectrl.c
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/ratectrl.c
@@ -269,7 +269,7 @@ static void update_buffer_level(AV1_COMP *cpi, int encoded_frame_size) {
   if (cpi->ppi->use_svc)
     update_layer_buffer_level(&cpi->svc, encoded_frame_size);
 
-#if CONFIG_FRAME_PARALLEL_ENCODE && CONFIG_FPMT_TEST
+#if CONFIG_FPMT_TEST
   /* The variable temp_buffer_level is introduced for quality
    * simulation purpose, it retains the value previous to the parallel
    * encode frames. The variable is updated based on the update flag.
@@ -400,7 +400,7 @@ int av1_rc_drop_frame(AV1_COMP *cpi) {
   const AV1EncoderConfig *oxcf = &cpi->oxcf;
   RATE_CONTROL *const rc = &cpi->rc;
   PRIMARY_RATE_CONTROL *const p_rc = &cpi->ppi->p_rc;
-#if CONFIG_FRAME_PARALLEL_ENCODE && CONFIG_FPMT_TEST
+#if CONFIG_FPMT_TEST
   const int simulate_parallel_frame =
       cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] > 0 &&
       cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE;
@@ -447,8 +447,9 @@ static int adjust_q_cbr(const AV1_COMP *cpi, int q, int active_worst_quality) {
   const PRIMARY_RATE_CONTROL *const p_rc = &cpi->ppi->p_rc;
   const AV1_COMMON *const cm = &cpi->common;
   const RefreshFrameInfo *const refresh_frame = &cpi->refresh_frame;
-  const int max_delta_down =
-      (cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN) ? 8 : 16;
+  const int max_delta_down = (cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN)
+                                 ? AOMMIN(8, AOMMAX(1, rc->q_1_frame / 16))
+                                 : AOMMIN(16, AOMMAX(1, rc->q_1_frame / 8));
   const int max_delta_up = 20;
   const int change_avg_frame_bandwidth =
       abs(rc->avg_frame_bandwidth - rc->prev_avg_frame_bandwidth) >
@@ -556,7 +557,7 @@ static double get_rate_correction_factor(const AV1_COMP *cpi, int width,
   double rate_correction_factors_kfstd;
   double rate_correction_factors_gfarfstd;
   double rate_correction_factors_internormal;
-#if CONFIG_FRAME_PARALLEL_ENCODE
+
   rate_correction_factors_kfstd =
       (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] > 0)
           ? rc->frame_level_rate_correction_factors[KF_STD]
@@ -569,27 +570,16 @@ static double get_rate_correction_factor(const AV1_COMP *cpi, int width,
       (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] > 0)
           ? rc->frame_level_rate_correction_factors[INTER_NORMAL]
           : p_rc->rate_correction_factors[INTER_NORMAL];
-#else
-  rate_correction_factors_kfstd = p_rc->rate_correction_factors[KF_STD];
-  rate_correction_factors_gfarfstd = p_rc->rate_correction_factors[GF_ARF_STD];
-  rate_correction_factors_internormal =
-      p_rc->rate_correction_factors[INTER_NORMAL];
-#endif
 
   if (cpi->common.current_frame.frame_type == KEY_FRAME) {
     rcf = rate_correction_factors_kfstd;
   } else if (is_stat_consumption_stage(cpi)) {
     const RATE_FACTOR_LEVEL rf_lvl =
         get_rate_factor_level(&cpi->ppi->gf_group, cpi->gf_frame_index);
-    double rate_correction_factors_rflvl;
-#if CONFIG_FRAME_PARALLEL_ENCODE
-    rate_correction_factors_rflvl =
+    double rate_correction_factors_rflvl =
         (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] > 0)
             ? rc->frame_level_rate_correction_factors[rf_lvl]
             : p_rc->rate_correction_factors[rf_lvl];
-#else
-    rate_correction_factors_rflvl = p_rc->rate_correction_factors[rf_lvl];
-#endif
     rcf = rate_correction_factors_rflvl;
   } else {
     if ((refresh_frame->alt_ref_frame || refresh_frame->golden_frame) &&
@@ -617,7 +607,7 @@ static double get_rate_correction_factor(const AV1_COMP *cpi, int width,
  * \param[in]   width                 Frame width
  * \param[in]   height                Frame height
  *
- * \return None but updates the rate correction factor for the
+ * \remark Updates the rate correction factor for the
  *         current frame type in cpi->rc.
  */
 static void set_rate_correction_factor(AV1_COMP *cpi, int is_encode_stage,
@@ -625,9 +615,6 @@ static void set_rate_correction_factor(AV1_COMP *cpi, int is_encode_stage,
   RATE_CONTROL *const rc = &cpi->rc;
   PRIMARY_RATE_CONTROL *const p_rc = &cpi->ppi->p_rc;
   const RefreshFrameInfo *const refresh_frame = &cpi->refresh_frame;
-#if !CONFIG_FRAME_PARALLEL_ENCODE
-  (void)is_encode_stage;
-#endif
   int update_default_rcf = 1;
   // Normalize RCF to account for the size-dependent scaling factor.
   factor /= resize_rate_factor(&cpi->oxcf.frm_dim_cfg, width, height);
@@ -639,13 +626,11 @@ static void set_rate_correction_factor(AV1_COMP *cpi, int is_encode_stage,
   } else if (is_stat_consumption_stage(cpi)) {
     const RATE_FACTOR_LEVEL rf_lvl =
         get_rate_factor_level(&cpi->ppi->gf_group, cpi->gf_frame_index);
-#if CONFIG_FRAME_PARALLEL_ENCODE
     if (is_encode_stage &&
         cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] > 0) {
       rc->frame_level_rate_correction_factors[rf_lvl] = factor;
       update_default_rcf = 0;
     }
-#endif
     if (update_default_rcf) p_rc->rate_correction_factors[rf_lvl] = factor;
   } else {
     if ((refresh_frame->alt_ref_frame || refresh_frame->golden_frame) &&
@@ -654,13 +639,11 @@ static void set_rate_correction_factor(AV1_COMP *cpi, int is_encode_stage,
          cpi->oxcf.rc_cfg.gf_cbr_boost_pct > 20)) {
       p_rc->rate_correction_factors[GF_ARF_STD] = factor;
     } else {
-#if CONFIG_FRAME_PARALLEL_ENCODE
       if (is_encode_stage &&
           cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] > 0) {
         rc->frame_level_rate_correction_factors[INTER_NORMAL] = factor;
         update_default_rcf = 0;
       }
-#endif
       if (update_default_rcf)
         p_rc->rate_correction_factors[INTER_NORMAL] = factor;
     }
@@ -670,17 +653,14 @@ static void set_rate_correction_factor(AV1_COMP *cpi, int is_encode_stage,
 void av1_rc_update_rate_correction_factors(AV1_COMP *cpi, int is_encode_stage,
                                            int width, int height) {
   const AV1_COMMON *const cm = &cpi->common;
-  int correction_factor = 100;
+  double correction_factor = 1.0;
   double rate_correction_factor =
       get_rate_correction_factor(cpi, width, height);
   double adjustment_limit;
   const int MBs = av1_get_MBs(width, height);
-
-#if !CONFIG_FRAME_PARALLEL_ENCODE
-  (void)is_encode_stage;
-#endif
-
   int projected_size_based_on_q = 0;
+  int cyclic_refresh_active =
+      cpi->oxcf.q_cfg.aq_mode == CYCLIC_REFRESH_AQ && cpi->common.seg.enabled;
 
   // Do not update the rate factors for arf overlay frames.
   if (cpi->rc.is_src_frame_alt_ref) return;
@@ -690,7 +670,7 @@ void av1_rc_update_rate_correction_factors(AV1_COMP *cpi, int is_encode_stage,
   // Work out how big we would have expected the frame to be at this Q given
   // the current correction factor.
   // Stay in double to avoid int overflow when values are large
-  if (cpi->oxcf.q_cfg.aq_mode == CYCLIC_REFRESH_AQ && cpi->common.seg.enabled) {
+  if (cyclic_refresh_active) {
     projected_size_based_on_q =
         av1_cyclic_refresh_estimate_bits_at_q(cpi, rate_correction_factor);
   } else {
@@ -701,41 +681,67 @@ void av1_rc_update_rate_correction_factors(AV1_COMP *cpi, int is_encode_stage,
   }
   // Work out a size correction factor.
   if (projected_size_based_on_q > FRAME_OVERHEAD_BITS)
-    correction_factor = (int)((100 * (int64_t)cpi->rc.projected_frame_size) /
-                              projected_size_based_on_q);
-
-  // More heavily damped adjustment used if we have been oscillating either side
-  // of target.
-  if (correction_factor > 0) {
-    adjustment_limit =
-        0.25 + 0.5 * AOMMIN(1, fabs(log10(0.01 * correction_factor)));
-  } else {
-    adjustment_limit = 0.75;
-  }
+    correction_factor = (double)cpi->rc.projected_frame_size /
+                        (double)projected_size_based_on_q;
+
+  // Clamp correction factor to prevent anything too extreme
+  correction_factor = AOMMAX(correction_factor, 0.25);
 
   cpi->rc.q_2_frame = cpi->rc.q_1_frame;
   cpi->rc.q_1_frame = cm->quant_params.base_qindex;
   cpi->rc.rc_2_frame = cpi->rc.rc_1_frame;
-  if (correction_factor > 110)
+  if (correction_factor > 1.1)
     cpi->rc.rc_1_frame = -1;
-  else if (correction_factor < 90)
+  else if (correction_factor < 0.9)
     cpi->rc.rc_1_frame = 1;
   else
     cpi->rc.rc_1_frame = 0;
 
-  if (correction_factor > 102) {
+  // Decide how heavily to dampen the adjustment
+  if (correction_factor > 0.0) {
+    if (cpi->is_screen_content_type) {
+      adjustment_limit =
+          0.25 + 0.5 * AOMMIN(0.5, fabs(log10(correction_factor)));
+    } else {
+      adjustment_limit =
+          0.25 + 0.75 * AOMMIN(0.5, fabs(log10(correction_factor)));
+    }
+  } else {
+    adjustment_limit = 0.75;
+  }
+
+  // Adjustment to delta Q and number of blocks updated in cyclic refressh
+  // based on over or under shoot of target in current frame.
+  if (cyclic_refresh_active && (cpi->rc.this_frame_target > 0) &&
+      !cpi->ppi->use_svc) {
+    CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
+    if (correction_factor > 1.25) {
+      cr->percent_refresh_adjustment =
+          AOMMAX(cr->percent_refresh_adjustment - 1, -5);
+      cr->rate_ratio_qdelta_adjustment =
+          AOMMAX(cr->rate_ratio_qdelta_adjustment - 0.05, -0.0);
+    } else if (correction_factor < 0.5) {
+      cr->percent_refresh_adjustment =
+          AOMMIN(cr->percent_refresh_adjustment + 1, 5);
+      cr->rate_ratio_qdelta_adjustment =
+          AOMMIN(cr->rate_ratio_qdelta_adjustment + 0.05, 0.25);
+    }
+  }
+
+  if (correction_factor > 1.01) {
     // We are not already at the worst allowable quality
-    correction_factor =
-        (int)(100 + ((correction_factor - 100) * adjustment_limit));
-    rate_correction_factor = (rate_correction_factor * correction_factor) / 100;
+    correction_factor = (1.0 + ((correction_factor - 1.0) * adjustment_limit));
+    rate_correction_factor = rate_correction_factor * correction_factor;
     // Keep rate_correction_factor within limits
     if (rate_correction_factor > MAX_BPB_FACTOR)
       rate_correction_factor = MAX_BPB_FACTOR;
-  } else if (correction_factor < 99) {
+  } else if (correction_factor < 0.99) {
     // We are not already at the best allowable quality
-    correction_factor =
-        (int)(100 - ((100 - correction_factor) * adjustment_limit));
-    rate_correction_factor = (rate_correction_factor * correction_factor) / 100;
+    correction_factor = 1.0 / correction_factor;
+    correction_factor = (1.0 + ((correction_factor - 1.0) * adjustment_limit));
+    correction_factor = 1.0 / correction_factor;
+
+    rate_correction_factor = rate_correction_factor * correction_factor;
 
     // Keep rate_correction_factor within limits
     if (rate_correction_factor < MIN_BPB_FACTOR)
@@ -896,7 +902,7 @@ static int calc_active_worst_quality_no_stats_vbr(const AV1_COMP *cpi) {
   int active_worst_quality;
   int last_q_key_frame;
   int last_q_inter_frame;
-#if CONFIG_FRAME_PARALLEL_ENCODE && CONFIG_FPMT_TEST
+#if CONFIG_FPMT_TEST
   const int simulate_parallel_frame =
       cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] > 0 &&
       cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE;
@@ -1060,6 +1066,30 @@ static int calc_active_best_quality_no_stats_cbr(const AV1_COMP *cpi,
   return active_best_quality;
 }
 
+#if RT_PASSIVE_STRATEGY
+static int get_q_passive_strategy(const AV1_COMP *const cpi,
+                                  const int q_candidate, const int threshold) {
+  const AV1_COMMON *const cm = &cpi->common;
+  const PRIMARY_RATE_CONTROL *const p_rc = &cpi->ppi->p_rc;
+  const CurrentFrame *const current_frame = &cm->current_frame;
+  int sum = 0;
+  int count = 0;
+  int i = 1;
+  while (i < MAX_Q_HISTORY) {
+    int frame_id = current_frame->frame_number - i;
+    if (frame_id <= 0) break;
+    sum += p_rc->q_history[frame_id % MAX_Q_HISTORY];
+    ++count;
+    ++i;
+  }
+  if (count > 0) {
+    const int avg_q = sum / count;
+    if (abs(avg_q - q_candidate) <= threshold) return avg_q;
+  }
+  return q_candidate;
+}
+#endif  // RT_PASSIVE_STRATEGY
+
 /*!\brief Picks q and q bounds given CBR rate control parameters in \c cpi->rc.
  *
  * Handles the special case when using:
@@ -1116,6 +1146,12 @@ static int rc_pick_q_and_bounds_no_stats_cbr(const AV1_COMP *cpi, int width,
   } else {
     q = av1_rc_regulate_q(cpi, rc->this_frame_target, active_best_quality,
                           active_worst_quality, width, height);
+#if RT_PASSIVE_STRATEGY
+    if (current_frame->frame_type != KEY_FRAME &&
+        cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN) {
+      q = get_q_passive_strategy(cpi, q, 50);
+    }
+#endif  // RT_PASSIVE_STRATEGY
     if (q > *top_index) {
       // Special case when we are targeting the max allowed rate
       if (rc->this_frame_target >= rc->max_frame_bandwidth)
@@ -1230,7 +1266,7 @@ static int rc_pick_q_and_bounds_no_stats(const AV1_COMP *cpi, int width,
           av1_compute_qdelta(rc, q_val, q_val * 0.25, bit_depth);
       active_best_quality = AOMMAX(qindex + delta_qindex, rc->best_quality);
     } else if (p_rc->this_key_frame_forced) {
-#if CONFIG_FRAME_PARALLEL_ENCODE && CONFIG_FPMT_TEST
+#if CONFIG_FPMT_TEST
       const int simulate_parallel_frame =
           cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] > 0 &&
           cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE;
@@ -1345,7 +1381,7 @@ static int rc_pick_q_and_bounds_no_stats(const AV1_COMP *cpi, int width,
     // Special case code to try and match quality with forced key frames
   } else if ((current_frame->frame_type == KEY_FRAME) &&
              p_rc->this_key_frame_forced) {
-#if CONFIG_FRAME_PARALLEL_ENCODE && CONFIG_FPMT_TEST
+#if CONFIG_FPMT_TEST
     const int simulate_parallel_frame =
         cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] > 0 &&
         cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE;
@@ -1441,7 +1477,7 @@ static void get_intra_q_and_bounds(const AV1_COMP *cpi, int width, int height,
     double last_boosted_q;
     int delta_qindex;
     int qindex;
-#if CONFIG_FRAME_PARALLEL_ENCODE && CONFIG_FPMT_TEST
+#if CONFIG_FPMT_TEST
     const int simulate_parallel_frame =
         cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] > 0 &&
         cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE;
@@ -1472,7 +1508,7 @@ static void get_intra_q_and_bounds(const AV1_COMP *cpi, int width, int height,
     double q_adj_factor = 1.0;
     double q_val;
 
-    // Baseline value derived from cpi->active_worst_quality and kf boost.
+    // Baseline value derived from active_worst_quality and kf boost.
     active_best_quality =
         get_kf_active_quality(p_rc, active_worst_quality, bit_depth);
     if (cpi->is_screen_content_type) {
@@ -1528,7 +1564,7 @@ static void adjust_active_best_and_worst_quality(const AV1_COMP *cpi,
   const int bit_depth = cpi->common.seq_params->bit_depth;
   int active_best_quality = *active_best;
   int active_worst_quality = *active_worst;
-#if CONFIG_FRAME_PARALLEL_ENCODE && CONFIG_FPMT_TEST
+#if CONFIG_FPMT_TEST
   const int simulate_parallel_frame =
       cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] > 0 &&
       cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE;
@@ -1547,7 +1583,7 @@ static void adjust_active_best_and_worst_quality(const AV1_COMP *cpi,
         (!rc->is_src_frame_alt_ref &&
          (refresh_frame->golden_frame || is_intrl_arf_boost ||
           refresh_frame->alt_ref_frame))) {
-#if CONFIG_FRAME_PARALLEL_ENCODE && CONFIG_FPMT_TEST
+#if CONFIG_FPMT_TEST
       active_best_quality -= (extend_minq + extend_minq_fast);
       active_worst_quality += (extend_maxq / 2);
 #else
@@ -1556,7 +1592,7 @@ static void adjust_active_best_and_worst_quality(const AV1_COMP *cpi,
       active_worst_quality += (cpi->ppi->twopass.extend_maxq / 2);
 #endif
     } else {
-#if CONFIG_FRAME_PARALLEL_ENCODE && CONFIG_FPMT_TEST
+#if CONFIG_FPMT_TEST
       active_best_quality -= (extend_minq + extend_minq_fast) / 2;
       active_worst_quality += extend_maxq;
 #else
@@ -1618,7 +1654,7 @@ static int get_q(const AV1_COMP *cpi, const int width, const int height,
   const RATE_CONTROL *const rc = &cpi->rc;
   const PRIMARY_RATE_CONTROL *const p_rc = &cpi->ppi->p_rc;
   int q;
-#if CONFIG_FRAME_PARALLEL_ENCODE && CONFIG_FPMT_TEST
+#if CONFIG_FPMT_TEST
   const int simulate_parallel_frame =
       cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] > 0 &&
       cpi->ppi->fpmt_unit_test_cfg;
@@ -1865,7 +1901,7 @@ static int rc_pick_q_and_bounds(const AV1_COMP *cpi, int width, int height,
       active_best_quality = get_active_best_quality(cpi, active_worst_quality,
                                                     cq_level, gf_index);
     } else {
-#if CONFIG_FRAME_PARALLEL_ENCODE && CONFIG_FPMT_TEST
+#if CONFIG_FPMT_TEST
       const int simulate_parallel_frame =
           cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] > 0 &&
           cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE;
@@ -1920,8 +1956,8 @@ static int rc_pick_q_and_bounds(const AV1_COMP *cpi, int width, int height,
   return q;
 }
 
-int av1_rc_pick_q_and_bounds(const AV1_COMP *cpi, int width, int height,
-                             int gf_index, int *bottom_index, int *top_index) {
+int av1_rc_pick_q_and_bounds(AV1_COMP *cpi, int width, int height, int gf_index,
+                             int *bottom_index, int *top_index) {
   PRIMARY_RATE_CONTROL *const p_rc = &cpi->ppi->p_rc;
   int q;
   // TODO(sarahparker) merge no-stats vbr and altref q computation
@@ -1933,6 +1969,9 @@ int av1_rc_pick_q_and_bounds(const AV1_COMP *cpi, int width, int height,
     if (cpi->oxcf.rc_cfg.mode == AOM_CBR) {
       q = rc_pick_q_and_bounds_no_stats_cbr(cpi, width, height, bottom_index,
                                             top_index);
+      // preserve copy of active worst quality selected.
+      cpi->rc.active_worst_quality = *top_index;
+
 #if USE_UNRESTRICTED_Q_IN_CQ_MODE
     } else if (cpi->oxcf.rc_cfg.mode == AOM_CQ) {
       q = rc_pick_q_and_bounds_no_stats_cq(cpi, width, height, bottom_index,
@@ -2017,6 +2056,11 @@ void av1_rc_postencode_update(AV1_COMP *cpi, uint64_t bytes_used) {
 
   const int qindex = cm->quant_params.base_qindex;
 
+#if RT_PASSIVE_STRATEGY
+  const int frame_number = current_frame->frame_number % MAX_Q_HISTORY;
+  p_rc->q_history[frame_number] = qindex;
+#endif  // RT_PASSIVE_STRATEGY
+
   // Update rate control heuristics
   rc->projected_frame_size = (int)(bytes_used << 3);
 
@@ -2089,7 +2133,7 @@ void av1_rc_postencode_update(AV1_COMP *cpi, uint64_t bytes_used) {
     // Update the Golden frame stats as appropriate.
     update_golden_frame_stats(cpi);
 
-#if CONFIG_FRAME_PARALLEL_ENCODE && CONFIG_FPMT_TEST
+#if CONFIG_FPMT_TEST
   /*The variables temp_avg_frame_qindex, temp_last_q, temp_avg_q,
    * temp_last_boosted_qindex are introduced only for quality simulation
    * purpose, it retains the value previous to the parallel encode frames. The
@@ -2281,7 +2325,7 @@ void av1_rc_update_framerate(AV1_COMP *cpi, int width, int height) {
 static void vbr_rate_correction(AV1_COMP *cpi, int *this_frame_target) {
   RATE_CONTROL *const rc = &cpi->rc;
   PRIMARY_RATE_CONTROL *const p_rc = &cpi->ppi->p_rc;
-#if CONFIG_FRAME_PARALLEL_ENCODE && CONFIG_FPMT_TEST
+#if CONFIG_FPMT_TEST
   const int simulate_parallel_frame =
       cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] > 0 &&
       cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE;
@@ -2308,7 +2352,7 @@ static void vbr_rate_correction(AV1_COMP *cpi, int *this_frame_target) {
     *this_frame_target += (vbr_bits_off_target >= 0) ? max_delta : -max_delta;
   }
 
-#if CONFIG_FRAME_PARALLEL_ENCODE && CONFIG_FPMT_TEST
+#if CONFIG_FPMT_TEST
   int64_t vbr_bits_off_target_fast =
       simulate_parallel_frame ? cpi->ppi->p_rc.temp_vbr_bits_off_target_fast
                               : p_rc->vbr_bits_off_target_fast;
@@ -2316,7 +2360,7 @@ static void vbr_rate_correction(AV1_COMP *cpi, int *this_frame_target) {
   // Fast redistribution of bits arising from massive local undershoot.
   // Dont do it for kf,arf,gf or overlay frames.
   if (!frame_is_kf_gf_arf(cpi) &&
-#if CONFIG_FRAME_PARALLEL_ENCODE && CONFIG_FPMT_TEST
+#if CONFIG_FPMT_TEST
       vbr_bits_off_target_fast &&
 #else
       p_rc->vbr_bits_off_target_fast &&
@@ -2324,7 +2368,7 @@ static void vbr_rate_correction(AV1_COMP *cpi, int *this_frame_target) {
       !rc->is_src_frame_alt_ref) {
     int one_frame_bits = AOMMAX(rc->avg_frame_bandwidth, *this_frame_target);
     int fast_extra_bits;
-#if CONFIG_FRAME_PARALLEL_ENCODE && CONFIG_FPMT_TEST
+#if CONFIG_FPMT_TEST
     fast_extra_bits = (int)AOMMIN(vbr_bits_off_target_fast, one_frame_bits);
     fast_extra_bits =
         (int)AOMMIN(fast_extra_bits,
@@ -2516,7 +2560,7 @@ static void set_baseline_gf_interval(AV1_COMP *cpi, FRAME_TYPE frame_type) {
 void av1_adjust_gf_refresh_qp_one_pass_rt(AV1_COMP *cpi) {
   AV1_COMMON *const cm = &cpi->common;
   RATE_CONTROL *const rc = &cpi->rc;
-  SVC *const svc = &cpi->svc;
+  RTC_REF *const rtc_ref = &cpi->rtc_ref;
   const int resize_pending = is_frame_resize_pending(cpi);
   if (!resize_pending && !rc->high_source_sad) {
     // Check if we should disable GF refresh (if period is up),
@@ -2531,7 +2575,7 @@ void av1_adjust_gf_refresh_qp_one_pass_rt(AV1_COMP *cpi) {
     if (rc->frames_till_gf_update_due == 1 &&
         cm->quant_params.base_qindex > avg_qp) {
       // Disable GF refresh since QP is above the runninhg average QP.
-      svc->refresh[svc->gld_idx_1layer] = 0;
+      rtc_ref->refresh[rtc_ref->gld_idx_1layer] = 0;
       gf_update_changed = 1;
       cpi->refresh_frame.golden_frame = 0;
     } else if (allow_gf_update &&
@@ -2539,7 +2583,7 @@ void av1_adjust_gf_refresh_qp_one_pass_rt(AV1_COMP *cpi) {
                 (rc->avg_frame_low_motion && rc->avg_frame_low_motion < 20))) {
       // Force refresh since QP is well below average QP or this is a high
       // motion frame.
-      svc->refresh[svc->gld_idx_1layer] = 1;
+      rtc_ref->refresh[rtc_ref->gld_idx_1layer] = 1;
       gf_update_changed = 1;
       cpi->refresh_frame.golden_frame = 1;
     }
@@ -2547,8 +2591,9 @@ void av1_adjust_gf_refresh_qp_one_pass_rt(AV1_COMP *cpi) {
       set_baseline_gf_interval(cpi, INTER_FRAME);
       int refresh_mask = 0;
       for (unsigned int i = 0; i < INTER_REFS_PER_FRAME; i++) {
-        int ref_frame_map_idx = svc->ref_idx[i];
-        refresh_mask |= svc->refresh[ref_frame_map_idx] << ref_frame_map_idx;
+        int ref_frame_map_idx = rtc_ref->ref_idx[i];
+        refresh_mask |= rtc_ref->refresh[ref_frame_map_idx]
+                        << ref_frame_map_idx;
       }
       cm->current_frame.refresh_frame_flags = refresh_mask;
     }
@@ -2567,19 +2612,18 @@ void av1_adjust_gf_refresh_qp_one_pass_rt(AV1_COMP *cpi) {
  * \param[in]       cpi          Top level encoder structure
  * \param[in]       gf_update    Flag to indicate if GF is updated
  *
- * \return Nothing is returned. Instead the settings for the prediction
+ * \remark Nothing is returned. Instead the settings for the prediction
  * structure are set in \c cpi-ext_flags; and the buffer slot index
  * (for each of 7 references) and refresh flags (for each of the 8 slots)
  * are set in \c cpi->svc.ref_idx[] and \c cpi->svc.refresh[].
  */
-void av1_set_reference_structure_one_pass_rt(AV1_COMP *cpi, int gf_update) {
+void av1_set_rtc_reference_structure_one_layer(AV1_COMP *cpi, int gf_update) {
   AV1_COMMON *const cm = &cpi->common;
   ExternalFlags *const ext_flags = &cpi->ext_flags;
   RATE_CONTROL *const rc = &cpi->rc;
   ExtRefreshFrameFlagsInfo *const ext_refresh_frame_flags =
       &ext_flags->refresh_frame;
-  SVC *const svc = &cpi->svc;
-  const int gld_fixed_slot = 1;
+  RTC_REF *const rtc_ref = &cpi->rtc_ref;
   unsigned int lag_alt = 4;
   int last_idx = 0;
   int last_idx_refresh = 0;
@@ -2587,7 +2631,6 @@ void av1_set_reference_structure_one_pass_rt(AV1_COMP *cpi, int gf_update) {
   int alt_ref_idx = 0;
   int last2_idx = 0;
   ext_refresh_frame_flags->update_pending = 1;
-  svc->set_ref_frame_config = 1;
   ext_flags->ref_frame_flags = 0;
   ext_refresh_frame_flags->last_frame = 1;
   ext_refresh_frame_flags->golden_frame = 0;
@@ -2610,28 +2653,28 @@ void av1_set_reference_structure_one_pass_rt(AV1_COMP *cpi, int gf_update) {
     else if (rc->avg_source_sad > th_frame_sad[th_idx][2])
       lag_alt = 5;
   }
-  for (int i = 0; i < INTER_REFS_PER_FRAME; ++i) svc->ref_idx[i] = 7;
-  for (int i = 0; i < REF_FRAMES; ++i) svc->refresh[i] = 0;
+  // This defines the reference structure for 1 layer (non-svc) RTC encoding.
+  // To avoid the internal/default reference structure for non-realtime
+  // overwriting this behavior, we use the "svc" ref parameters from the
+  // external control SET_SVC_REF_FRAME_CONFIG.
+  // TODO(marpan): rename that control and the related internal parameters
+  // to rtc_ref.
+  for (int i = 0; i < INTER_REFS_PER_FRAME; ++i) rtc_ref->ref_idx[i] = 7;
+  for (int i = 0; i < REF_FRAMES; ++i) rtc_ref->refresh[i] = 0;
   // Set the reference frame flags.
   ext_flags->ref_frame_flags ^= AOM_LAST_FLAG;
   ext_flags->ref_frame_flags ^= AOM_ALT_FLAG;
   ext_flags->ref_frame_flags ^= AOM_GOLD_FLAG;
   if (cpi->sf.rt_sf.ref_frame_comp_nonrd[1])
     ext_flags->ref_frame_flags ^= AOM_LAST2_FLAG;
-  const int sh = 7 - gld_fixed_slot;
+  const int sh = 6;
   // Moving index slot for last: 0 - (sh - 1).
   if (cm->current_frame.frame_number > 1)
     last_idx = ((cm->current_frame.frame_number - 1) % sh);
   // Moving index for refresh of last: one ahead for next frame.
   last_idx_refresh = (cm->current_frame.frame_number % sh);
   gld_idx = 6;
-  if (!gld_fixed_slot) {
-    gld_idx = 7;
-    const unsigned int lag_gld = 7;  // Must be <= 7.
-    // Moving index for gld_ref, lag behind current by gld_interval frames.
-    if (cm->current_frame.frame_number > lag_gld)
-      gld_idx = ((cm->current_frame.frame_number - lag_gld) % sh);
-  }
+
   // Moving index for alt_ref, lag behind LAST by lag_alt frames.
   if (cm->current_frame.frame_number > lag_alt)
     alt_ref_idx = ((cm->current_frame.frame_number - lag_alt) % sh);
@@ -2640,23 +2683,31 @@ void av1_set_reference_structure_one_pass_rt(AV1_COMP *cpi, int gf_update) {
     if (cm->current_frame.frame_number > 2)
       last2_idx = ((cm->current_frame.frame_number - 2) % sh);
   }
-  svc->ref_idx[0] = last_idx;          // LAST
-  svc->ref_idx[1] = last_idx_refresh;  // LAST2 (for refresh of last).
+  rtc_ref->ref_idx[0] = last_idx;          // LAST
+  rtc_ref->ref_idx[1] = last_idx_refresh;  // LAST2 (for refresh of last).
   if (cpi->sf.rt_sf.ref_frame_comp_nonrd[1]) {
-    svc->ref_idx[1] = last2_idx;         // LAST2
-    svc->ref_idx[2] = last_idx_refresh;  // LAST3 (for refresh of last).
+    rtc_ref->ref_idx[1] = last2_idx;         // LAST2
+    rtc_ref->ref_idx[2] = last_idx_refresh;  // LAST3 (for refresh of last).
   }
-  svc->ref_idx[3] = gld_idx;      // GOLDEN
-  svc->ref_idx[6] = alt_ref_idx;  // ALT_REF
+  rtc_ref->ref_idx[3] = gld_idx;      // GOLDEN
+  rtc_ref->ref_idx[6] = alt_ref_idx;  // ALT_REF
   // Refresh this slot, which will become LAST on next frame.
-  svc->refresh[last_idx_refresh] = 1;
+  rtc_ref->refresh[last_idx_refresh] = 1;
   // Update GOLDEN on period for fixed slot case.
-  if (gld_fixed_slot && gf_update &&
-      cm->current_frame.frame_type != KEY_FRAME) {
+  if (gf_update && cm->current_frame.frame_type != KEY_FRAME) {
     ext_refresh_frame_flags->golden_frame = 1;
-    svc->refresh[gld_idx] = 1;
-  }
-  svc->gld_idx_1layer = gld_idx;
+    rtc_ref->refresh[gld_idx] = 1;
+  }
+  rtc_ref->gld_idx_1layer = gld_idx;
+  // Set the flag to reduce the number of reference frame buffers used.
+  // This assumes that slot 7 is never used.
+  cpi->rt_reduce_num_ref_buffers = 1;
+  cpi->rt_reduce_num_ref_buffers &= (rtc_ref->ref_idx[0] < 7);
+  cpi->rt_reduce_num_ref_buffers &= (rtc_ref->ref_idx[1] < 7);
+  cpi->rt_reduce_num_ref_buffers &= (rtc_ref->ref_idx[3] < 7);
+  cpi->rt_reduce_num_ref_buffers &= (rtc_ref->ref_idx[6] < 7);
+  if (cpi->sf.rt_sf.ref_frame_comp_nonrd[1])
+    cpi->rt_reduce_num_ref_buffers &= (rtc_ref->ref_idx[2] < 7);
 }
 
 /*!\brief Check for scene detection, for 1 pass real-time mode.
@@ -2667,15 +2718,17 @@ void av1_set_reference_structure_one_pass_rt(AV1_COMP *cpi, int gf_update) {
  *
  * \ingroup rate_control
  * \param[in]       cpi          Top level encoder structure
+ * \param[in]       frame_input  Current and last input source frames
  *
- * \return Nothing is returned. Instead the flag \c cpi->rc.high_source_sad
+ * \remark Nothing is returned. Instead the flag \c cpi->rc.high_source_sad
  * is set if scene change is detected, and \c cpi->rc.avg_source_sad is updated.
  */
-static void rc_scene_detection_onepass_rt(AV1_COMP *cpi) {
+static void rc_scene_detection_onepass_rt(AV1_COMP *cpi,
+                                          const EncodeFrameInput *frame_input) {
   AV1_COMMON *const cm = &cpi->common;
   RATE_CONTROL *const rc = &cpi->rc;
-  YV12_BUFFER_CONFIG const *unscaled_src = cpi->unscaled_source;
-  YV12_BUFFER_CONFIG const *unscaled_last_src = cpi->unscaled_last_source;
+  YV12_BUFFER_CONFIG const *const unscaled_src = frame_input->source;
+  YV12_BUFFER_CONFIG const *const unscaled_last_src = frame_input->last_source;
   uint8_t *src_y;
   int src_ystride;
   int src_width;
@@ -2685,14 +2738,14 @@ static void rc_scene_detection_onepass_rt(AV1_COMP *cpi) {
   int last_src_width;
   int last_src_height;
   if (cm->spatial_layer_id != 0 || cm->width != cm->render_width ||
-      cm->height != cm->render_height || cpi->unscaled_source == NULL ||
-      cpi->unscaled_last_source == NULL) {
+      cm->height != cm->render_height || unscaled_src == NULL ||
+      unscaled_last_src == NULL) {
     if (cpi->src_sad_blk_64x64) {
       aom_free(cpi->src_sad_blk_64x64);
       cpi->src_sad_blk_64x64 = NULL;
     }
   }
-  if (cpi->unscaled_source == NULL || cpi->unscaled_last_source == NULL) return;
+  if (unscaled_src == NULL || unscaled_last_src == NULL) return;
   src_y = unscaled_src->y_buffer;
   src_ystride = unscaled_src->y_stride;
   src_width = unscaled_src->y_width;
@@ -2709,7 +2762,8 @@ static void rc_scene_detection_onepass_rt(AV1_COMP *cpi) {
     return;
   }
   rc->high_source_sad = 0;
-  rc->high_num_blocks_with_motion = 0;
+  rc->percent_blocks_with_motion = 0;
+  rc->max_block_source_sad = 0;
   rc->prev_avg_source_sad = rc->avg_source_sad;
   if (src_width == last_src_width && src_height == last_src_height) {
     const int num_mi_cols = cm->mi_params.mi_cols;
@@ -2718,7 +2772,6 @@ static void rc_scene_detection_onepass_rt(AV1_COMP *cpi) {
     uint32_t min_thresh = 10000;
     if (cpi->oxcf.tune_cfg.content != AOM_CONTENT_SCREEN) min_thresh = 100000;
     const BLOCK_SIZE bsize = BLOCK_64X64;
-    int full_sampling = (cm->width * cm->height < 640 * 360) ? 1 : 0;
     // Loop over sub-sample of frame, compute average sad over 64x64 blocks.
     uint64_t avg_sad = 0;
     uint64_t tmp_sad = 0;
@@ -2736,42 +2789,37 @@ static void rc_scene_detection_onepass_rt(AV1_COMP *cpi) {
     // Flag to check light change or not.
     const int check_light_change = 0;
     // Store blkwise SAD for later use
-    if (cpi->sf.rt_sf.sad_based_comp_prune && (cm->spatial_layer_id == 0) &&
-        (cm->width == cm->render_width) && (cm->height == cm->render_height)) {
-      full_sampling = 1;
+    if ((cm->spatial_layer_id == 0) && (cm->width == cm->render_width) &&
+        (cm->height == cm->render_height)) {
       if (cpi->src_sad_blk_64x64 == NULL) {
-        cpi->src_sad_blk_64x64 = (uint64_t *)aom_malloc(
-            (sb_cols * sb_rows) * sizeof(*cpi->src_sad_blk_64x64));
-        memset(cpi->src_sad_blk_64x64, 0,
-               (sb_cols * sb_rows) * sizeof(*cpi->src_sad_blk_64x64));
+        CHECK_MEM_ERROR(
+            cm, cpi->src_sad_blk_64x64,
+            (uint64_t *)aom_calloc(sb_cols * sb_rows,
+                                   sizeof(*cpi->src_sad_blk_64x64)));
       }
     }
     for (int sbi_row = 0; sbi_row < sb_rows; ++sbi_row) {
       for (int sbi_col = 0; sbi_col < sb_cols; ++sbi_col) {
-        // Checker-board pattern, ignore boundary.
-        if (full_sampling ||
-            ((sbi_row > 0 && sbi_col > 0) &&
-             (sbi_row < sb_rows - 1 && sbi_col < sb_cols - 1) &&
-             ((sbi_row % 2 == 0 && sbi_col % 2 == 0) ||
-              (sbi_row % 2 != 0 && sbi_col % 2 != 0)))) {
-          tmp_sad = cpi->ppi->fn_ptr[bsize].sdf(src_y, src_ystride, last_src_y,
-                                                last_src_ystride);
-          if (cpi->src_sad_blk_64x64 != NULL)
-            cpi->src_sad_blk_64x64[sbi_col + sbi_row * sb_cols] = tmp_sad;
-          if (check_light_change) {
-            unsigned int sse, variance;
-            variance = cpi->ppi->fn_ptr[bsize].vf(
-                src_y, src_ystride, last_src_y, last_src_ystride, &sse);
-            // Note: sse - variance = ((sum * sum) >> 12)
-            // Detect large lighting change.
-            if (variance < (sse >> 1) && (sse - variance) > sum_sq_thresh) {
-              num_low_var_high_sumdiff++;
-            }
+        tmp_sad = cpi->ppi->fn_ptr[bsize].sdf(src_y, src_ystride, last_src_y,
+                                              last_src_ystride);
+        if (cpi->src_sad_blk_64x64 != NULL)
+          cpi->src_sad_blk_64x64[sbi_col + sbi_row * sb_cols] = tmp_sad;
+        if (check_light_change) {
+          unsigned int sse, variance;
+          variance = cpi->ppi->fn_ptr[bsize].vf(src_y, src_ystride, last_src_y,
+                                                last_src_ystride, &sse);
+          // Note: sse - variance = ((sum * sum) >> 12)
+          // Detect large lighting change.
+          if (variance < (sse >> 1) && (sse - variance) > sum_sq_thresh) {
+            num_low_var_high_sumdiff++;
           }
-          avg_sad += tmp_sad;
-          num_samples++;
-          if (tmp_sad == 0) num_zero_temp_sad++;
         }
+        avg_sad += tmp_sad;
+        num_samples++;
+        if (tmp_sad == 0) num_zero_temp_sad++;
+        if (tmp_sad > rc->max_block_source_sad)
+          rc->max_block_source_sad = tmp_sad;
+
         src_y += 64;
         last_src_y += 64;
       }
@@ -2796,9 +2844,9 @@ static void rc_scene_detection_onepass_rt(AV1_COMP *cpi) {
       rc->high_source_sad = 0;
     rc->avg_source_sad = (3 * rc->avg_source_sad + avg_sad) >> 2;
     rc->frame_source_sad = avg_sad;
-
-    if (num_zero_temp_sad < (3 * num_samples >> 2))
-      rc->high_num_blocks_with_motion = 1;
+    if (num_samples > 0)
+      rc->percent_blocks_with_motion =
+          ((num_samples - num_zero_temp_sad) * 100) / num_samples;
   }
   cpi->svc.high_source_sad_superframe = rc->high_source_sad;
 }
@@ -2894,7 +2942,7 @@ static void resize_reset_rc(AV1_COMP *cpi, int resize_width, int resize_height,
  * \ingroup rate_control
  * \param[in]       cpi          Top level encoder structure
  *
- * \return Return resized width/height in \c cpi->resize_pending_params,
+ * \remark Return resized width/height in \c cpi->resize_pending_params,
  * and update some resize counters in \c rc.
  */
 static void dynamic_resize_one_pass_cbr(AV1_COMP *cpi) {
@@ -3006,8 +3054,8 @@ static INLINE int set_key_frame(AV1_COMP *cpi, unsigned int frame_flags) {
   return 0;
 }
 
-void av1_get_one_pass_rt_params(AV1_COMP *cpi,
-                                EncodeFrameParams *const frame_params,
+void av1_get_one_pass_rt_params(AV1_COMP *cpi, FRAME_TYPE *const frame_type,
+                                const EncodeFrameInput *frame_input,
                                 unsigned int frame_flags) {
   RATE_CONTROL *const rc = &cpi->rc;
   PRIMARY_RATE_CONTROL *const p_rc = &cpi->ppi->p_rc;
@@ -3028,7 +3076,7 @@ void av1_get_one_pass_rt_params(AV1_COMP *cpi,
   }
   // Set frame type.
   if (set_key_frame(cpi, frame_flags)) {
-    frame_params->frame_type = KEY_FRAME;
+    *frame_type = KEY_FRAME;
     p_rc->this_key_frame_forced =
         cm->current_frame.frame_number != 0 && rc->frames_to_key == 0;
     rc->frames_to_key = cpi->oxcf.kf_cfg.key_freq_max;
@@ -3042,7 +3090,7 @@ void av1_get_one_pass_rt_params(AV1_COMP *cpi,
       svc->layer_context[layer].is_key_frame = 1;
     }
   } else {
-    frame_params->frame_type = INTER_FRAME;
+    *frame_type = INTER_FRAME;
     gf_group->update_type[cpi->gf_frame_index] = LF_UPDATE;
     gf_group->frame_type[cpi->gf_frame_index] = INTER_FRAME;
     gf_group->refbuf_state[cpi->gf_frame_index] = REFBUF_UPDATE;
@@ -3052,12 +3100,13 @@ void av1_get_one_pass_rt_params(AV1_COMP *cpi,
           svc->spatial_layer_id == 0
               ? 0
               : svc->layer_context[svc->temporal_layer_id].is_key_frame;
-      // If the user is setting the SVC pattern with set_ref_frame_config and
-      // did not set any references, set the frame type to Intra-only.
-      if (svc->set_ref_frame_config) {
+      // If the user is setting the reference structure with
+      // set_ref_frame_config and did not set any references, set the
+      // frame type to Intra-only.
+      if (cpi->rtc_ref.set_ref_frame_config) {
         int no_references_set = 1;
         for (int i = 0; i < INTER_REFS_PER_FRAME; i++) {
-          if (svc->reference[i]) {
+          if (cpi->rtc_ref.reference[i]) {
             no_references_set = 0;
             break;
           }
@@ -3066,13 +3115,13 @@ void av1_get_one_pass_rt_params(AV1_COMP *cpi,
         // The stream can start decoding on INTRA_ONLY_FRAME so long as the
         // layer with the intra_only_frame doesn't signal a reference to a slot
         // that hasn't been set yet.
-        if (no_references_set) frame_params->frame_type = INTRA_ONLY_FRAME;
+        if (no_references_set) *frame_type = INTRA_ONLY_FRAME;
       }
     }
   }
   // Check for scene change: for SVC check on base spatial layer only.
   if (cpi->sf.rt_sf.check_scene_detection && svc->spatial_layer_id == 0)
-    rc_scene_detection_onepass_rt(cpi);
+    rc_scene_detection_onepass_rt(cpi, frame_input);
   // Check for dynamic resize, for single spatial layer for now.
   // For temporal layers only check on base temporal layer.
   if (cpi->oxcf.resize_cfg.resize_mode == RESIZE_DYNAMIC) {
@@ -3095,19 +3144,17 @@ void av1_get_one_pass_rt_params(AV1_COMP *cpi,
   }
   // Set the GF interval and update flag.
   if (!rc->rtc_external_ratectrl)
-    set_gf_interval_update_onepass_rt(cpi, frame_params->frame_type);
+    set_gf_interval_update_onepass_rt(cpi, *frame_type);
   // Set target size.
   if (cpi->oxcf.rc_cfg.mode == AOM_CBR) {
-    if (frame_params->frame_type == KEY_FRAME ||
-        frame_params->frame_type == INTRA_ONLY_FRAME) {
+    if (*frame_type == KEY_FRAME || *frame_type == INTRA_ONLY_FRAME) {
       target = av1_calc_iframe_target_size_one_pass_cbr(cpi);
     } else {
       target = av1_calc_pframe_target_size_one_pass_cbr(
           cpi, gf_group->update_type[cpi->gf_frame_index]);
     }
   } else {
-    if (frame_params->frame_type == KEY_FRAME ||
-        frame_params->frame_type == INTRA_ONLY_FRAME) {
+    if (*frame_type == KEY_FRAME || *frame_type == INTRA_ONLY_FRAME) {
       target = av1_calc_iframe_target_size_one_pass_vbr(cpi);
     } else {
       target = av1_calc_pframe_target_size_one_pass_vbr(
@@ -3119,7 +3166,7 @@ void av1_get_one_pass_rt_params(AV1_COMP *cpi,
 
   av1_rc_set_frame_target(cpi, target, cm->width, cm->height);
   rc->base_frame_target = target;
-  cm->current_frame.frame_type = frame_params->frame_type;
+  cm->current_frame.frame_type = *frame_type;
   // For fixed mode SVC: if KSVC is enabled remove inter layer
   // prediction on spatial enhancement layer frames for frames
   // whose base is not KEY frame.
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/ratectrl.h b/chromium/third_party/libaom/source/libaom/av1/encoder/ratectrl.h
index 7f86b052746..df177b9c306 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/ratectrl.h
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/ratectrl.h
@@ -57,6 +57,16 @@ extern "C" {
 #define DEFAULT_KF_BOOST_RT 2300
 #define DEFAULT_GF_BOOST_RT 2000
 
+// A passive rate control strategy for screen content type in real-time mode.
+// When it is turned on, the compression performance is improved by
+// 7.8% (overall_psnr), 5.0% (VMAF) on average. Some clips see gains
+// over 20% on metric.
+// The downside is that it does not guarantee frame size.
+// Since RT mode has a tight restriction on buffer overflow control, we
+// turn it off by default.
+#define RT_PASSIVE_STRATEGY 0
+#define MAX_Q_HISTORY 1000
+
 typedef struct {
   int resize_width;
   int resize_height;
@@ -218,16 +228,20 @@ typedef struct {
 
   /*!\endcond */
   /*!
-   * Proposed maximum alloed Q for current frame
+   * Proposed maximum allowed Q for current frame
    */
   int active_worst_quality;
 
   /*!\cond */
   // Track amount of low motion in scene
   int avg_frame_low_motion;
+  int cnt_zeromv;
 
   // signals if number of blocks with motion is high
-  int high_num_blocks_with_motion;
+  int percent_blocks_with_motion;
+
+  // Maximum value of source sad across all blocks of frame.
+  uint64_t max_block_source_sad;
 
   // For dynamic resize, 1 pass cbr.
   RESIZE_STATE resize_state;
@@ -240,9 +254,8 @@ typedef struct {
 
   // Stores fast_extra_bits of the current frame.
   int frame_level_fast_extra_bits;
-#if CONFIG_FRAME_PARALLEL_ENCODE
+
   double frame_level_rate_correction_factors[RATE_FACTOR_LEVELS];
-#endif
   /*!\endcond */
 } RATE_CONTROL;
 
@@ -346,7 +359,7 @@ typedef struct {
    */
   int avg_frame_qindex[FRAME_TYPES];
 
-#if CONFIG_FRAME_PARALLEL_ENCODE && CONFIG_FPMT_TEST
+#if CONFIG_FPMT_TEST
   /*!
    * Temporary variable used in simulating the delayed update of
    * active_best_quality.
@@ -521,8 +534,16 @@ typedef struct {
    * size.
    */
   int rolling_actual_bits;
+
+  /*!
+   * The history of qindex for each frame.
+   * Only used when RT_PASSIVE_STRATEGY = 1.
+   */
+  int q_history[MAX_Q_HISTORY];
 } PRIMARY_RATE_CONTROL;
 
+/*!\cond */
+
 struct AV1_COMP;
 struct AV1EncoderConfig;
 struct GF_GROUP;
@@ -566,7 +587,7 @@ int av1_rc_get_default_max_gf_interval(double framerate, int min_gf_interval);
 
 // Functions to set parameters for encoding before the actual
 // encode_frame_to_data_rate() function.
-struct EncodeFrameParams;
+struct EncodeFrameInput;
 
 // Post encode update of the rate control parameters based
 // on bytes used
@@ -586,7 +607,7 @@ void av1_rc_postencode_update_drop_frame(struct AV1_COMP *cpi);
  * \param[in]   width                 Frame width
  * \param[in]   height                Frame height
  *
- * \return None but updates the relevant rate correction factor in cpi->rc
+ * \remark Updates the relevant rate correction factor in cpi->rc
  */
 void av1_rc_update_rate_correction_factors(struct AV1_COMP *cpi,
                                            int is_encode_stage, int width,
@@ -617,7 +638,7 @@ void av1_rc_compute_frame_size_bounds(const struct AV1_COMP *cpi,
  * \return Returns selected q index to be used for encoding this frame.
  * Also, updates \c rc->arf_q.
  */
-int av1_rc_pick_q_and_bounds(const struct AV1_COMP *cpi, int width, int height,
+int av1_rc_pick_q_and_bounds(struct AV1_COMP *cpi, int width, int height,
                              int gf_index, int *bottom_index, int *top_index);
 
 /*!\brief Estimates q to achieve a target bits per frame
@@ -683,8 +704,8 @@ void av1_rc_set_frame_target(struct AV1_COMP *cpi, int target, int width,
 
 void av1_adjust_gf_refresh_qp_one_pass_rt(struct AV1_COMP *cpi);
 
-void av1_set_reference_structure_one_pass_rt(struct AV1_COMP *cpi,
-                                             int gf_update);
+void av1_set_rtc_reference_structure_one_layer(struct AV1_COMP *cpi,
+                                               int gf_update);
 
 /*!\endcond */
 /*!\brief Calculates how many bits to use for a P frame in one pass vbr
@@ -749,14 +770,17 @@ int av1_calc_iframe_target_size_one_pass_cbr(const struct AV1_COMP *cpi);
  *
  * \ingroup rate_control
  * \param[in]       cpi          Top level encoder structure
- * \param[in]       frame_params Encoder frame parameters
- * \param[in]       frame_flags  Emcoder frame flags
+ * \param[in]       frame_type   Encoder frame type
+ * \param[in]       frame_input  Current and last input source frames
+ * \param[in]       frame_flags  Encoder frame flags
  *
- * \return Nothing is returned. Instead the settings computed in this
- * funtion are set in: \c frame_params, \c cpi->common, \c cpi->rc, \c cpi->svc.
+ * \remark Nothing is returned. Instead the settings computed in this
+ * function are set in: \c frame_params, \c cpi->common, \c cpi->rc,
+ * \c cpi->svc.
  */
 void av1_get_one_pass_rt_params(struct AV1_COMP *cpi,
-                                struct EncodeFrameParams *const frame_params,
+                                FRAME_TYPE *const frame_type,
+                                const struct EncodeFrameInput *frame_input,
                                 unsigned int frame_flags);
 
 /*!\brief Increase q on expected encoder overshoot, for CBR mode.
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/rd.c b/chromium/third_party/libaom/source/libaom/av1/encoder/rd.c
index 17c79603934..98c4fbe18cc 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/rd.c
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/rd.c
@@ -14,8 +14,6 @@
 #include <math.h>
 #include <stdio.h>
 
-#include "config/av1_rtcd.h"
-
 #include "aom_dsp/aom_dsp_common.h"
 #include "aom_mem/aom_mem.h"
 #include "aom_ports/bitops.h"
@@ -25,23 +23,18 @@
 #include "av1/common/common.h"
 #include "av1/common/entropy.h"
 #include "av1/common/entropymode.h"
-#include "av1/common/mvref_common.h"
 #include "av1/common/pred_common.h"
 #include "av1/common/quant_common.h"
 #include "av1/common/reconinter.h"
 #include "av1/common/reconintra.h"
 #include "av1/common/seg_common.h"
 
-#include "av1/encoder/av1_quantize.h"
 #include "av1/encoder/cost.h"
-#include "av1/encoder/encodemb.h"
 #include "av1/encoder/encodemv.h"
 #include "av1/encoder/encoder.h"
-#include "av1/encoder/encodetxb.h"
-#include "av1/encoder/mcomp.h"
+#include "av1/encoder/nonrd_opt.h"
 #include "av1/encoder/ratectrl.h"
 #include "av1/encoder/rd.h"
-#include "av1/encoder/tokenize.h"
 
 #define RD_THRESH_POW 1.25
 
@@ -515,8 +508,26 @@ void av1_set_sad_per_bit(const AV1_COMP *cpi, int *sadperbit, int qindex) {
   }
 }
 
-static void set_block_thresholds(const AV1_COMMON *cm, RD_OPT *rd) {
+static void set_block_thresholds(const AV1_COMMON *cm, RD_OPT *rd,
+                                 int use_nonrd_pick_mode) {
   int i, bsize, segment_id;
+  THR_MODES mode_indices[RTC_REFS * RTC_MODES] = { 0 };
+  int num_modes_count = use_nonrd_pick_mode ? 0 : MAX_MODES;
+
+  if (use_nonrd_pick_mode) {
+    for (int r_idx = 0; r_idx < RTC_REFS; r_idx++) {
+      const MV_REFERENCE_FRAME ref = real_time_ref_combos[r_idx][0];
+      if (ref != INTRA_FRAME) {
+        for (i = 0; i < RTC_INTER_MODES; i++)
+          mode_indices[num_modes_count++] =
+              mode_idx[ref][mode_offset(inter_mode_list[i])];
+      } else {
+        for (i = 0; i < RTC_INTRA_MODES; i++)
+          mode_indices[num_modes_count++] =
+              mode_idx[ref][mode_offset(intra_mode_list[i])];
+      }
+    }
+  }
 
   for (segment_id = 0; segment_id < MAX_SEGMENTS; ++segment_id) {
     const int qindex = clamp(
@@ -531,10 +542,13 @@ static void set_block_thresholds(const AV1_COMMON *cm, RD_OPT *rd) {
       const int t = q * rd_thresh_block_size_factor[bsize];
       const int thresh_max = INT_MAX / t;
 
-      for (i = 0; i < MAX_MODES; ++i)
-        rd->threshes[segment_id][bsize][i] = rd->thresh_mult[i] < thresh_max
-                                                 ? rd->thresh_mult[i] * t / 4
-                                                 : INT_MAX;
+      for (i = 0; i < num_modes_count; ++i) {
+        const int mode_index = use_nonrd_pick_mode ? mode_indices[i] : i;
+        rd->threshes[segment_id][bsize][mode_index] =
+            rd->thresh_mult[mode_index] < thresh_max
+                ? rd->thresh_mult[mode_index] * t / 4
+                : INT_MAX;
+      }
     }
   }
 }
@@ -706,6 +720,34 @@ static INLINE int is_frame_level_cost_upd_freq_set(
           cost_upd_level == INTERNAL_COST_UPD_TILE || fill_costs);
 }
 
+// Decide whether we want to update the mode entropy cost for the current frame.
+// The logit is currently inherited from selective_disable_cdf_rtc.
+static AOM_INLINE int should_force_mode_cost_update(const AV1_COMP *cpi) {
+  const REAL_TIME_SPEED_FEATURES *const rt_sf = &cpi->sf.rt_sf;
+  if (!rt_sf->frame_level_mode_cost_update) {
+    return false;
+  }
+
+  if (cpi->oxcf.algo_cfg.cdf_update_mode == 2) {
+    return cpi->frames_since_last_update == 1;
+  } else if (cpi->oxcf.algo_cfg.cdf_update_mode == 1) {
+    if (cpi->svc.number_spatial_layers == 1 &&
+        cpi->svc.number_temporal_layers == 1) {
+      const AV1_COMMON *const cm = &cpi->common;
+      const RATE_CONTROL *const rc = &cpi->rc;
+
+      return frame_is_intra_only(cm) || is_frame_resize_pending(cpi) ||
+             rc->high_source_sad || rc->frames_since_key < 10 ||
+             cpi->cyclic_refresh->counter_encode_maxq_scene_change < 10 ||
+             cm->current_frame.frame_number % 8 == 0;
+    } else if (cpi->svc.number_temporal_layers > 1) {
+      return cpi->svc.temporal_layer_id != cpi->svc.number_temporal_layers - 1;
+    }
+  }
+
+  return false;
+}
+
 void av1_initialize_rd_consts(AV1_COMP *cpi) {
   AV1_COMMON *const cm = &cpi->common;
   MACROBLOCK *const x = &cpi->td.mb;
@@ -728,7 +770,7 @@ void av1_initialize_rd_consts(AV1_COMP *cpi) {
 
   av1_set_error_per_bit(&x->errorperbit, rd->RDMULT);
 
-  set_block_thresholds(cm, rd);
+  set_block_thresholds(cm, rd, cpi->sf.rt_sf.use_nonrd_pick_mode);
 
   populate_unified_cost_update_freq(cpi->oxcf.cost_upd_freq, sf);
   const INTER_MODE_SPEED_FEATURES *const inter_sf = &cpi->sf.inter_sf;
@@ -744,7 +786,8 @@ void av1_initialize_rd_consts(AV1_COMP *cpi) {
     av1_fill_coeff_costs(&x->coeff_costs, cm->fc, av1_num_planes(cm));
 
   // Frame level mode cost update
-  if (is_frame_level_cost_upd_freq_set(cm, inter_sf->mode_cost_upd_level,
+  if (should_force_mode_cost_update(cpi) ||
+      is_frame_level_cost_upd_freq_set(cm, inter_sf->mode_cost_upd_level,
                                        use_nonrd_pick_mode, frames_since_key))
     av1_fill_mode_rates(cm, &x->mode_costs, cm->fc);
 
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/rd.h b/chromium/third_party/libaom/source/libaom/av1/encoder/rd.h
index 8d0277e3bff..96d53c94716 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/rd.h
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/rd.h
@@ -56,6 +56,28 @@ extern "C" {
 // Factor to weigh the rate for switchable interp filters.
 #define SWITCHABLE_INTERP_RATE_FACTOR 1
 
+#define RTC_REFS 4
+static const MV_REFERENCE_FRAME real_time_ref_combos[RTC_REFS][2] = {
+  { LAST_FRAME, NONE_FRAME },
+  { ALTREF_FRAME, NONE_FRAME },
+  { GOLDEN_FRAME, NONE_FRAME },
+  { INTRA_FRAME, NONE_FRAME }
+};
+
+static INLINE int mode_offset(const PREDICTION_MODE mode) {
+  if (mode >= NEARESTMV) {
+    return INTER_OFFSET(mode);
+  } else {
+    switch (mode) {
+      case DC_PRED: return 0;
+      case V_PRED: return 1;
+      case H_PRED: return 2;
+      case SMOOTH_PRED: return 3;
+      default: assert(0); return -1;
+    }
+  }
+}
+
 enum {
   // Default initialization when we are not using winner mode framework. e.g.
   // intrabc
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/rdopt.c b/chromium/third_party/libaom/source/libaom/av1/encoder/rdopt.c
index b9470430adf..c25db61b25f 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/rdopt.c
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/rdopt.c
@@ -1354,7 +1354,7 @@ static int64_t motion_mode_rd(
         get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
     int use_actual_frame_probs = 1;
     int prune_obmc;
-#if CONFIG_FRAME_PARALLEL_ENCODE && CONFIG_FPMT_TEST
+#if CONFIG_FPMT_TEST
     use_actual_frame_probs =
         (cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE) ? 0 : 1;
     if (!use_actual_frame_probs) {
@@ -2533,7 +2533,7 @@ static AOM_INLINE int prune_zero_mv_with_sse(
  * is currently only used by realtime mode as \ref
  * av1_interpolation_filter_search is not called during realtime encoding.
  *
- * This funciton only searches over two possible filters. EIGHTTAP_REGULAR is
+ * This function only searches over two possible filters. EIGHTTAP_REGULAR is
  * always search. For lowres clips (<= 240p), MULTITAP_SHARP is also search. For
  * higher  res slips (>240p), EIGHTTAP_SMOOTH is also searched.
  *  *
@@ -2906,11 +2906,16 @@ static int64_t handle_inter_mode(
         }
 
         if (cpi->sf.rt_sf.skip_newmv_mode_based_on_sse) {
-          const double scale_factor[11] = { 0.7, 0.7, 0.7, 0.7, 0.7, 0.8,
-                                            0.8, 0.9, 0.9, 0.9, 0.9 };
-          assert(num_pels_log2_lookup[bsize] >= 4);
-          if (args->best_pred_sse <
-              scale_factor[num_pels_log2_lookup[bsize] - 4] * this_sse)
+          const int th_idx = cpi->sf.rt_sf.skip_newmv_mode_based_on_sse - 1;
+          const int pix_idx = num_pels_log2_lookup[bsize] - 4;
+          const double scale_factor[3][11] = {
+            { 0.7, 0.7, 0.7, 0.7, 0.7, 0.8, 0.8, 0.9, 0.9, 0.9, 0.9 },
+            { 0.7, 0.7, 0.7, 0.7, 0.8, 0.8, 1, 1, 1, 1, 1 },
+            { 0.7, 0.7, 0.7, 0.7, 1, 1, 1, 1, 1, 1, 1 }
+          };
+          assert(pix_idx >= 0);
+          assert(th_idx <= 2);
+          if (args->best_pred_sse < scale_factor[th_idx][pix_idx] * this_sse)
             continue;
         }
       }
@@ -3290,7 +3295,7 @@ void av1_rd_pick_intra_mode_sb(const struct AV1_COMP *cpi, struct macroblock *x,
   const int num_planes = av1_num_planes(cm);
   TxfmSearchInfo *txfm_info = &x->txfm_search_info;
   int rate_y = 0, rate_uv = 0, rate_y_tokenonly = 0, rate_uv_tokenonly = 0;
-  int y_skip_txfm = 0, uv_skip_txfm = 0;
+  uint8_t y_skip_txfm = 0, uv_skip_txfm = 0;
   int64_t dist_y = 0, dist_uv = 0;
 
   ctx->rd_stats.skip_txfm = 0;
@@ -3529,7 +3534,7 @@ static AOM_INLINE void refine_winner_mode_tx(
   const int num_planes = av1_num_planes(cm);
 
   if (!is_winner_mode_processing_enabled(cpi, x, best_mbmode,
-                                         best_mbmode->mode))
+                                         rd_cost->skip_txfm))
     return;
 
   // Set params for winner mode evaluation
@@ -3538,16 +3543,6 @@ static AOM_INLINE void refine_winner_mode_tx(
   // No best mode identified so far
   if (*best_mode_index == THR_INVALID) return;
 
-  int skip_winner_mode_eval =
-      cpi->sf.winner_mode_sf.disable_winner_mode_eval_for_txskip;
-  // Do not skip winner mode evaluation at low quantizers if normal mode's
-  // transform search was too aggressive.
-  if (cpi->sf.rd_sf.perform_coeff_opt >= 5 && x->qindex <= 70)
-    skip_winner_mode_eval = 0;
-
-  if (skip_winner_mode_eval && (best_mbmode->skip_txfm || rd_cost->skip_txfm))
-    return;
-
   best_rd = RDCOST(x->rdmult, rd_cost->rate, rd_cost->dist);
   for (int mode_idx = 0; mode_idx < winner_mode_count; mode_idx++) {
     RD_STATS *winner_rd_stats = NULL;
@@ -3564,7 +3559,7 @@ static AOM_INLINE void refine_winner_mode_tx(
     if (xd->lossless[winner_mbmi->segment_id] == 0 &&
         winner_mode_index != THR_INVALID &&
         is_winner_mode_processing_enabled(cpi, x, winner_mbmi,
-                                          winner_mbmi->mode)) {
+                                          rd_cost->skip_txfm)) {
       RD_STATS rd_stats = *winner_rd_stats;
       int skip_blk = 0;
       RD_STATS rd_stats_y, rd_stats_uv;
@@ -3708,13 +3703,6 @@ static const MV_REFERENCE_FRAME reduced_ref_combos[][2] = {
   { ALTREF_FRAME, INTRA_FRAME },  { BWDREF_FRAME, INTRA_FRAME },
 };
 
-static const MV_REFERENCE_FRAME real_time_ref_combos[][2] = {
-  { LAST_FRAME, NONE_FRAME },
-  { ALTREF_FRAME, NONE_FRAME },
-  { GOLDEN_FRAME, NONE_FRAME },
-  { INTRA_FRAME, NONE_FRAME }
-};
-
 typedef enum { REF_SET_FULL, REF_SET_REDUCED, REF_SET_REALTIME } REF_SET;
 
 static AOM_INLINE void default_skip_mask(mode_skip_mask_t *mask,
@@ -3846,8 +3834,8 @@ static AOM_INLINE void init_mode_skip_mask(mode_skip_mask_t *mask,
   }
 
   if (sf->inter_sf.alt_ref_search_fp) {
-    if (!cm->show_frame && x->best_pred_mv_sad < INT_MAX) {
-      int sad_thresh = x->best_pred_mv_sad + (x->best_pred_mv_sad >> 3);
+    if (!cm->show_frame && x->best_pred_mv_sad[0] < INT_MAX) {
+      int sad_thresh = x->best_pred_mv_sad[0] + (x->best_pred_mv_sad[0] >> 3);
       // Conservatively skip the modes w.r.t. BWDREF, ALTREF2 and ALTREF, if
       // those are past frames
       MV_REFERENCE_FRAME start_frame =
@@ -3872,8 +3860,8 @@ static AOM_INLINE void init_mode_skip_mask(mode_skip_mask_t *mask,
   }
 
   if (sf->rt_sf.prune_inter_modes_wrt_gf_arf_based_on_sad) {
-    if (x->best_pred_mv_sad < INT_MAX) {
-      int sad_thresh = x->best_pred_mv_sad + (x->best_pred_mv_sad >> 1);
+    if (x->best_pred_mv_sad[0] < INT_MAX) {
+      int sad_thresh = x->best_pred_mv_sad[0] + (x->best_pred_mv_sad[0] >> 1);
       const int prune_ref_list[2] = { GOLDEN_FRAME, ALTREF_FRAME };
 
       // Conservatively skip the modes w.r.t. GOLDEN and ALTREF references
@@ -3897,7 +3885,7 @@ static AOM_INLINE void init_mode_skip_mask(mode_skip_mask_t *mask,
   }
 
   mask->pred_modes[INTRA_FRAME] |=
-      ~(sf->intra_sf.intra_y_mode_mask[max_txsize_lookup[bsize]]);
+      ~(uint32_t)sf->intra_sf.intra_y_mode_mask[max_txsize_lookup[bsize]];
 }
 
 static AOM_INLINE void init_neighbor_pred_buf(
@@ -3990,7 +3978,8 @@ static AOM_INLINE void set_params_rd_pick_inter_mode(
 
   const int mi_row = xd->mi_row;
   const int mi_col = xd->mi_col;
-  x->best_pred_mv_sad = INT_MAX;
+  x->best_pred_mv_sad[0] = INT_MAX;
+  x->best_pred_mv_sad[1] = INT_MAX;
 
   for (MV_REFERENCE_FRAME ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME;
        ++ref_frame) {
@@ -4008,12 +3997,18 @@ static AOM_INLINE void set_params_rd_pick_inter_mode(
       assert(get_ref_frame_yv12_buf(cm, ref_frame) != NULL);
       setup_buffer_ref_mvs_inter(cpi, x, ref_frame, bsize, yv12_mb);
     }
-    // Store the best pred_mv_sad across all past frames
-    if ((cpi->sf.inter_sf.alt_ref_search_fp ||
-         cpi->sf.rt_sf.prune_inter_modes_wrt_gf_arf_based_on_sad) &&
-        cpi->ref_frame_dist_info.ref_relative_dist[ref_frame - LAST_FRAME] < 0)
-      x->best_pred_mv_sad =
-          AOMMIN(x->best_pred_mv_sad, x->pred_mv_sad[ref_frame]);
+    if (cpi->sf.inter_sf.alt_ref_search_fp ||
+        cpi->sf.rt_sf.prune_inter_modes_wrt_gf_arf_based_on_sad) {
+      // Store the best pred_mv_sad across all past frames
+      if (cpi->ref_frame_dist_info.ref_relative_dist[ref_frame - LAST_FRAME] <
+          0)
+        x->best_pred_mv_sad[0] =
+            AOMMIN(x->best_pred_mv_sad[0], x->pred_mv_sad[ref_frame]);
+      else
+        // Store the best pred_mv_sad across all future frames
+        x->best_pred_mv_sad[1] =
+            AOMMIN(x->best_pred_mv_sad[1], x->pred_mv_sad[ref_frame]);
+    }
   }
 
   if (!cpi->sf.rt_sf.use_real_time_ref_set && is_comp_ref_allowed(bsize)) {
@@ -4050,7 +4045,7 @@ static AOM_INLINE void set_params_rd_pick_inter_mode(
       get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
   int use_actual_frame_probs = 1;
   int prune_obmc;
-#if CONFIG_FRAME_PARALLEL_ENCODE && CONFIG_FPMT_TEST
+#if CONFIG_FPMT_TEST
   use_actual_frame_probs =
       (cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE) ? 0 : 1;
   if (!use_actual_frame_probs) {
@@ -5324,19 +5319,11 @@ static void handle_winner_cand(
  * InterModeSearchState::intra_search_state so it can be reused later by \ref
  * av1_search_palette_mode.
  *
- * \return Returns the rdcost of the current intra-mode if it's available,
- * otherwise returns INT64_MAX. The corresponding values in x->e_mbd.mi[0],
- * rd_stats, rd_stats_y/uv, and best_intra_rd are also updated. Moreover, in the
- * first evocation of the function, the chroma intra mode result is cached in
- * intra_search_state to be used in subsequent calls. In the first evaluation
- * with directional mode, a prune_mask computed with histogram of gradient is
- * also stored in intra_search_state.
- *
  * \param[in,out] search_state      Struct keep track of the prediction mode
  *                                  search state in interframe.
  *
  * \param[in]     cpi               Top-level encoder structure.
- * \param[in]     x                 Pointer to struct holding all the data for
+ * \param[in,out] x                 Pointer to struct holding all the data for
  *                                  the current prediction block.
  * \param[out]    rd_cost           Stores the best rd_cost among all the
  *                                  prediction modes searched.
@@ -5344,21 +5331,21 @@ static void handle_winner_cand(
  * \param[in,out] ctx               Structure to hold the number of 4x4 blks to
  *                                  copy the tx_type and txfm_skip arrays.
  *                                  for only the Y plane.
- * \param[in,out] sf_args           Stores the list of intra mode candidates
+ * \param[in]     sf_args           Stores the list of intra mode candidates
  *                                  to be searched.
  * \param[in]     intra_ref_frame_cost  The entropy cost for signaling that the
  *                                      current ref frame is an intra frame.
  * \param[in]     yrd_threshold     The rdcost threshold for luma intra mode to
  *                                  terminate chroma intra mode search.
  *
- * \return Returns INT64_MAX if the determined motion mode is invalid and the
- * current motion mode being tested should be skipped. It returns 0 if the
- * motion mode search is a success.
+ * \remark If a new best mode is found, search_state and rd_costs are updated
+ * correspondingly. While x is also modified, it is only used as a temporary
+ * buffer, and the final decisions are stored in search_state.
  */
 static AOM_INLINE void search_intra_modes_in_interframe(
     InterModeSearchState *search_state, const AV1_COMP *cpi, MACROBLOCK *x,
     RD_STATS *rd_cost, BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
-    InterModeSFArgs *sf_args, unsigned int intra_ref_frame_cost,
+    const InterModeSFArgs *sf_args, unsigned int intra_ref_frame_cost,
     int64_t yrd_threshold) {
   const AV1_COMMON *const cm = &cpi->common;
   const SPEED_FEATURES *const sf = &cpi->sf;
@@ -5387,7 +5374,8 @@ static AOM_INLINE void search_intra_modes_in_interframe(
     if (sf->intra_sf.skip_intra_in_interframe &&
         search_state->intra_search_state.skip_intra_modes)
       break;
-    set_y_mode_and_delta_angle(mode_idx, mbmi);
+    set_y_mode_and_delta_angle(
+        mode_idx, mbmi, sf->intra_sf.prune_luma_odd_delta_angles_in_intra);
     assert(mbmi->mode < INTRA_MODE_END);
 
     // Use intra_y_mode_mask speed feature to skip intra mode evaluation.
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/rdopt.h b/chromium/third_party/libaom/source/libaom/av1/encoder/rdopt.h
index cbc4bf343de..4d813a2bb85 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/rdopt.h
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/rdopt.h
@@ -54,7 +54,7 @@ struct RD_STATS;
                                 during the mode picking process.
  * \param[in]    best_rd Best   RD seen for this block so far.
  *
- * \return Nothing is returned. Instead, the MB_MODE_INFO struct inside x
+ * \remark Nothing is returned. Instead, the MB_MODE_INFO struct inside x
  * is modified to store information about the best mode computed
  * in this function. The rd_cost struct is also updated with the RD stats
  * corresponding to the best mode found.
@@ -85,7 +85,7 @@ void av1_rd_pick_intra_mode_sb(const struct AV1_COMP *cpi, struct macroblock *x,
                                 during the mode picking process
  * \param[in]    best_rd_so_far Best RD seen for this block so far
  *
- * \return Nothing is returned. Instead, the MB_MODE_INFO struct inside x
+ * \remark Nothing is returned. Instead, the MB_MODE_INFO struct inside x
  * is modified to store information about the best mode computed
  * in this function. The rd_cost struct is also updated with the RD stats
  * corresponding to the best mode found.
@@ -115,7 +115,7 @@ void av1_rd_pick_inter_mode(struct AV1_COMP *cpi, struct TileDataEnc *tile_data,
  * \param[in]    ctx            Structure to hold snapshot of coding context
                                 during the mode picking process
  *
- * \return Nothing is returned. Instead, the MB_MODE_INFO struct inside x
+ * \remark Nothing is returned. Instead, the MB_MODE_INFO struct inside x
  * is modified to store information about the best mode computed
  * in this function. The rd_cost struct is also updated with the RD stats
  * corresponding to the best mode found.
@@ -147,7 +147,7 @@ void av1_nonrd_pick_intra_mode(AV1_COMP *cpi, MACROBLOCK *x, RD_STATS *rd_cost,
  * \param[in]    ctx            Structure to hold snapshot of coding context
                                 during the mode picking process
  *
- * \return Nothing is returned. Instead, the MB_MODE_INFO struct inside x
+ * \remark Nothing is returned. Instead, the MB_MODE_INFO struct inside x
  * is modified to store information about the best mode computed
  * in this function. The rd_cost struct is also updated with the RD stats
  * corresponding to the best mode found.
@@ -213,6 +213,31 @@ static INLINE int prune_ref(const MV_REFERENCE_FRAME *const ref_frame,
   return 0;
 }
 
+static INLINE int has_closest_ref_frames(const MV_REFERENCE_FRAME *ref_frame,
+                                         int8_t closest_past_ref,
+                                         int8_t closest_future_ref) {
+  int has_closest_past_ref =
+      (ref_frame[0] == closest_past_ref) || (ref_frame[1] == closest_past_ref);
+  int has_closest_future_ref = (ref_frame[0] == closest_future_ref) ||
+                               (ref_frame[1] == closest_future_ref);
+  return (has_closest_past_ref && has_closest_future_ref);
+}
+
+static INLINE int has_best_pred_mv_sad(const MV_REFERENCE_FRAME *ref_frame,
+                                       const MACROBLOCK *const x) {
+  int has_best_past_pred_mv_sad = 0;
+  int has_best_future_pred_mv_sad = 0;
+  if (x->best_pred_mv_sad[0] < INT_MAX && x->best_pred_mv_sad[1] < INT_MAX) {
+    has_best_past_pred_mv_sad =
+        (x->pred_mv_sad[ref_frame[0]] == x->best_pred_mv_sad[0]) ||
+        (x->pred_mv_sad[ref_frame[1]] == x->best_pred_mv_sad[0]);
+    has_best_future_pred_mv_sad =
+        (x->pred_mv_sad[ref_frame[0]] == x->best_pred_mv_sad[1]) ||
+        (x->pred_mv_sad[ref_frame[1]] == x->best_pred_mv_sad[1]);
+  }
+  return (has_best_past_pred_mv_sad && has_best_future_pred_mv_sad);
+}
+
 static INLINE int prune_ref_by_selective_ref_frame(
     const AV1_COMP *const cpi, const MACROBLOCK *const x,
     const MV_REFERENCE_FRAME *const ref_frame,
@@ -230,11 +255,11 @@ static INLINE int prune_ref_by_selective_ref_frame(
       // Disable pruning if either tpl suggests that we keep the frame or
       // the pred_mv gives us the best sad
       if (x->tpl_keep_ref_frame[LAST3_FRAME] ||
-          x->pred_mv_sad[LAST3_FRAME] == x->best_pred_mv_sad) {
+          x->pred_mv_sad[LAST3_FRAME] == x->best_pred_mv_sad[0]) {
         ref_frame_list[0] = NONE_FRAME;
       }
       if (x->tpl_keep_ref_frame[LAST2_FRAME] ||
-          x->pred_mv_sad[LAST2_FRAME] == x->best_pred_mv_sad) {
+          x->pred_mv_sad[LAST2_FRAME] == x->best_pred_mv_sad[0]) {
         ref_frame_list[1] = NONE_FRAME;
       }
     }
@@ -252,11 +277,11 @@ static INLINE int prune_ref_by_selective_ref_frame(
       // Disable pruning if either tpl suggests that we keep the frame or
       // the pred_mv gives us the best sad
       if (x->tpl_keep_ref_frame[ALTREF2_FRAME] ||
-          x->pred_mv_sad[ALTREF2_FRAME] == x->best_pred_mv_sad) {
+          x->pred_mv_sad[ALTREF2_FRAME] == x->best_pred_mv_sad[0]) {
         ref_frame_list[0] = NONE_FRAME;
       }
       if (x->tpl_keep_ref_frame[BWDREF_FRAME] ||
-          x->pred_mv_sad[BWDREF_FRAME] == x->best_pred_mv_sad) {
+          x->pred_mv_sad[BWDREF_FRAME] == x->best_pred_mv_sad[0]) {
         ref_frame_list[1] = NONE_FRAME;
       }
     }
@@ -267,6 +292,21 @@ static INLINE int prune_ref_by_selective_ref_frame(
       return 1;
   }
 
+  if (x != NULL && sf->inter_sf.prune_comp_ref_frames && comp_pred) {
+    int closest_ref_frames = has_closest_ref_frames(
+        ref_frame, cpi->ref_frame_dist_info.nearest_past_ref,
+        cpi->ref_frame_dist_info.nearest_future_ref);
+    if (closest_ref_frames == 0) {
+      // Prune reference frames which are not the closest to the current frame.
+      if (sf->inter_sf.prune_comp_ref_frames >= 2) {
+        return 1;
+      } else if (sf->inter_sf.prune_comp_ref_frames == 1) {
+        // Prune reference frames with non minimum pred_mv_sad.
+        if (has_best_pred_mv_sad(ref_frame, x) == 0) return 1;
+      }
+    }
+  }
+
   return 0;
 }
 
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/rdopt_utils.h b/chromium/third_party/libaom/source/libaom/av1/encoder/rdopt_utils.h
index 7014c75659d..91823d8ffbd 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/rdopt_utils.h
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/rdopt_utils.h
@@ -395,20 +395,60 @@ static TX_MODE select_tx_mode(
     return TX_MODE_SELECT;
   }
 }
-// Checks the conditions to enable winner mode processing
-static INLINE int is_winner_mode_processing_enabled(
-    const struct AV1_COMP *cpi, const MACROBLOCK *const x,
-    MB_MODE_INFO *const mbmi, const PREDICTION_MODE best_mode) {
-  const SPEED_FEATURES *sf = &cpi->sf;
+
+// Checks the conditions to disable winner mode processing
+static INLINE int bypass_winner_mode_processing(const MACROBLOCK *const x,
+                                                const SPEED_FEATURES *sf,
+                                                int use_txfm_skip,
+                                                int actual_txfm_skip,
+                                                PREDICTION_MODE best_mode) {
+  const int prune_winner_mode_eval_level =
+      sf->winner_mode_sf.prune_winner_mode_eval_level;
 
   // Disable winner mode processing for blocks with low source variance.
   // The aggressiveness of this pruning logic reduces as qindex increases.
   // The threshold decreases linearly from 64 as qindex varies from 0 to 255.
-  if (sf->winner_mode_sf.prune_winner_mode_processing_using_src_var) {
+  if (prune_winner_mode_eval_level == 1) {
     const unsigned int src_var_thresh = 64 - 48 * x->qindex / (MAXQ + 1);
-    if (x->source_variance < src_var_thresh) return 0;
+    if (x->source_variance < src_var_thresh) return 1;
+  } else if (prune_winner_mode_eval_level == 2) {
+    // Skip winner mode processing of blocks for which transform turns out to be
+    // skip due to nature of eob alone except NEWMV mode.
+    if (!have_newmv_in_inter_mode(best_mode) && actual_txfm_skip) return 1;
+  } else if (prune_winner_mode_eval_level == 3) {
+    // Skip winner mode processing of blocks for which transform turns out to be
+    // skip except NEWMV mode and considered based on the quantizer.
+    // At high quantizers: Take conservative approach by considering transform
+    // skip based on eob alone.
+    // At low quantizers: Consider transform skip based on eob nature or RD cost
+    // evaluation.
+    const int is_txfm_skip =
+        x->qindex > 127 ? actual_txfm_skip : actual_txfm_skip || use_txfm_skip;
+
+    if (!have_newmv_in_inter_mode(best_mode) && is_txfm_skip) return 1;
+  } else if (prune_winner_mode_eval_level >= 4) {
+    // Do not skip winner mode evaluation at low quantizers if normal mode's
+    // transform search was too aggressive.
+    if (sf->rd_sf.perform_coeff_opt >= 5 && x->qindex <= 70) return 0;
+
+    if (use_txfm_skip || actual_txfm_skip) return 1;
   }
 
+  return 0;
+}
+
+// Checks the conditions to enable winner mode processing
+static INLINE int is_winner_mode_processing_enabled(const struct AV1_COMP *cpi,
+                                                    const MACROBLOCK *const x,
+                                                    MB_MODE_INFO *const mbmi,
+                                                    int actual_txfm_skip) {
+  const SPEED_FEATURES *sf = &cpi->sf;
+  const PREDICTION_MODE best_mode = mbmi->mode;
+
+  if (bypass_winner_mode_processing(x, sf, mbmi->skip_txfm, actual_txfm_skip,
+                                    best_mode))
+    return 0;
+
   // TODO(any): Move block independent condition checks to frame level
   if (is_inter_block(mbmi)) {
     if (is_inter_mode(best_mode) &&
@@ -588,10 +628,17 @@ static INLINE void set_mode_eval_params(const struct AV1_COMP *cpi,
       set_tx_type_prune(sf, txfm_params,
                         sf->tx_sf.tx_type_search.winner_mode_tx_type_pruning,
                         1);
-      reset_mb_rd_record(x->txfm_search_info.mb_rd_record);
       break;
     default: assert(0);
   }
+
+  // Rd record collected at a specific mode evaluation stage can not be used
+  // across other evaluation stages as the transform parameters are different.
+  // Hence, reset mb rd record whenever mode evaluation stage type changes.
+  if (txfm_params->mode_eval_type != mode_eval_type)
+    reset_mb_rd_record(x->txfm_search_info.mb_rd_record);
+
+  txfm_params->mode_eval_type = mode_eval_type;
 }
 
 // Similar to store_cfl_required(), but for use during the RDO process,
@@ -693,13 +740,16 @@ static INLINE void store_winner_mode_stats(
       AOMMIN(x->winner_mode_count + 1, max_winner_mode_count);
 }
 
-unsigned int av1_get_sby_perpixel_variance(const struct AV1_COMP *cpi,
-                                           const struct buf_2d *ref,
-                                           BLOCK_SIZE bs);
+unsigned int av1_get_perpixel_variance(const AV1_COMP *cpi,
+                                       const MACROBLOCKD *xd,
+                                       const struct buf_2d *ref,
+                                       BLOCK_SIZE bsize, int plane,
+                                       int use_hbd);
 
-unsigned int av1_high_get_sby_perpixel_variance(const struct AV1_COMP *cpi,
-                                                const struct buf_2d *ref,
-                                                BLOCK_SIZE bs, int bd);
+unsigned int av1_get_perpixel_variance_facade(const struct AV1_COMP *cpi,
+                                              const MACROBLOCKD *xd,
+                                              const struct buf_2d *ref,
+                                              BLOCK_SIZE bsize, int plane);
 
 static INLINE int is_mode_intra(PREDICTION_MODE mode) {
   return mode < INTRA_MODE_END;
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/segmentation.c b/chromium/third_party/libaom/source/libaom/av1/encoder/segmentation.c
index d3158388558..4b4e78779cc 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/segmentation.c
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/segmentation.c
@@ -35,7 +35,7 @@ void av1_disable_segmentation(struct segmentation *seg) {
 
 void av1_disable_segfeature(struct segmentation *seg, int segment_id,
                             SEG_LVL_FEATURES feature_id) {
-  seg->feature_mask[segment_id] &= ~(1 << feature_id);
+  seg->feature_mask[segment_id] &= ~(1u << feature_id);
 }
 
 void av1_clear_segdata(struct segmentation *seg, int segment_id,
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/speed_features.c b/chromium/third_party/libaom/source/libaom/av1/encoder/speed_features.c
index 6167a482a1b..c4f95c00bf0 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/speed_features.c
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/speed_features.c
@@ -57,14 +57,20 @@ static unsigned int tx_domain_dist_thresholds[4][MODE_EVAL_TYPES] = {
   { 0, 0, 0 }
 };
 
+// Number of different levels of aggressiveness in using transform domain
+// distortion during the R-D evaluation based on the speed feature
+// tx_domain_dist_level.
+#define TX_DOMAIN_DIST_LEVELS 4
+
 // Transform domain distortion type to be used for default, mode and winner mode
 // evaluation Index 0: Default mode evaluation, Winner mode processing is not
 // applicable (Eg : IntraBc). Index 1: Mode evaluation. Index 2: Winner mode
 // evaluation. Index 1 and 2 are applicable when
 // enable_winner_mode_for_use_tx_domain_dist speed feature is ON
-static unsigned int tx_domain_dist_types[3][MODE_EVAL_TYPES] = { { 0, 2, 0 },
-                                                                 { 1, 2, 0 },
-                                                                 { 2, 2, 0 } };
+static unsigned int
+    tx_domain_dist_types[TX_DOMAIN_DIST_LEVELS][MODE_EVAL_TYPES] = {
+      { 0, 2, 0 }, { 1, 2, 0 }, { 2, 2, 0 }, { 2, 2, 2 }
+    };
 
 // Threshold values to be used for disabling coeff RD-optimization
 // based on block MSE / qstep^2.
@@ -113,18 +119,23 @@ static unsigned int predict_skip_levels[3][MODE_EVAL_TYPES] = { { 0, 0, 0 },
                                                                 { 1, 1, 1 },
                                                                 { 1, 2, 1 } };
 
-// Predict DC block levels to be used for default, mode and winner mode
-// evaluation. Index 0: Default mode evaluation, Winner mode processing is not
-// applicable. Index 1: Mode evaluation, Index 2: Winner mode evaluation
-// Values indicate the aggressiveness of skip flag prediction.
-// 0 : no early DC block prediction
-// 1 : Early DC block prediction based on error variance
-static unsigned int predict_dc_levels[3][MODE_EVAL_TYPES] = { { 0, 0, 0 },
-                                                              { 1, 1, 0 },
-                                                              { 1, 1, 1 } };
-
-#if !CONFIG_FRAME_PARALLEL_ENCODE || \
-    (CONFIG_FRAME_PARALLEL_ENCODE && !CONFIG_FPMT_TEST)
+// Predict skip or DC block level used during transform type search. It is
+// indexed using the following:
+// First index  : Speed feature 'dc_blk_pred_level' (0 to 3)
+// Second index : Mode evaluation type (DEFAULT_EVAL, MODE_EVAL and
+// WINNER_MODE_EVAL).
+//
+// The values of predict_dc_levels[][] indicate the aggressiveness of predicting
+// a block as transform skip or DC only.
+// Type 0 : No skip block or DC only block prediction
+// Type 1 : Prediction of skip block based on residual mean and variance
+// Type 2 : Prediction of skip block or DC only block based on residual mean and
+// variance
+static unsigned int predict_dc_levels[4][MODE_EVAL_TYPES] = {
+  { 0, 0, 0 }, { 1, 1, 0 }, { 2, 2, 0 }, { 2, 2, 2 }
+};
+
+#if !CONFIG_FPMT_TEST
 // This table holds the maximum number of reference frames for global motion.
 // The table is indexed as per the speed feature 'gm_search_type'.
 // 0 : All reference frames are allowed.
@@ -475,12 +486,14 @@ static void set_allintra_speed_features_framesize_independent(
   }
 
   if (speed >= 6) {
+    sf->intra_sf.prune_smooth_intra_mode_for_chroma = 1;
     sf->intra_sf.prune_filter_intra_level = 2;
     sf->intra_sf.chroma_intra_pruning_with_hog = 4;
     sf->intra_sf.intra_pruning_with_hog = 4;
     sf->intra_sf.cfl_search_range = 1;
     sf->intra_sf.top_intra_model_count_allowed = 2;
     sf->intra_sf.adapt_top_model_rd_count_using_neighbors = 1;
+    sf->intra_sf.prune_luma_odd_delta_angles_in_intra = 1;
 
     sf->part_sf.prune_rectangular_split_based_on_qidx =
         allow_screen_content_tools ? 0 : 2;
@@ -495,13 +508,17 @@ static void set_allintra_speed_features_framesize_independent(
 
     sf->tx_sf.tx_type_search.winner_mode_tx_type_pruning = 3;
     sf->tx_sf.tx_type_search.prune_tx_type_est_rd = 0;
+    sf->tx_sf.prune_intra_tx_depths_using_nn = true;
 
     sf->rd_sf.perform_coeff_opt = 6;
+    sf->rd_sf.tx_domain_dist_level = 3;
+
     sf->lpf_sf.cdef_pick_method = CDEF_FAST_SEARCH_LVL4;
     sf->lpf_sf.lpf_pick = LPF_PICK_FROM_Q;
 
     sf->winner_mode_sf.multi_winner_mode_type = MULTI_WINNER_MODE_OFF;
-    sf->winner_mode_sf.prune_winner_mode_processing_using_src_var = 1;
+    sf->winner_mode_sf.prune_winner_mode_eval_level = 1;
+    sf->winner_mode_sf.dc_blk_pred_level = 1;
   }
   // The following should make all-intra mode speed 7 approximately equal
   // to real-time speed 6,
@@ -536,7 +553,18 @@ static void set_allintra_speed_features_framesize_independent(
     sf->rt_sf.nonrd_check_partition_merge_mode = 0;
     sf->rt_sf.hybrid_intra_pickmode = 0;
     sf->rt_sf.var_part_split_threshold_shift = 9;
-  }
+    sf->rt_sf.vbp_prune_16x16_split_using_min_max_sub_blk_var = true;
+    sf->rt_sf.prune_h_pred_using_best_mode_so_far = true;
+  }
+
+  // As the speed feature prune_chroma_modes_using_luma_winner already
+  // constrains the number of chroma directional mode evaluations to a maximum
+  // of 1, the HOG computation and the associated pruning logic does not seem to
+  // help speed-up the chroma mode evaluations. Hence disable the speed feature
+  // chroma_intra_pruning_with_hog when prune_chroma_modes_using_luma_winner is
+  // enabled.
+  if (sf->intra_sf.prune_chroma_modes_using_luma_winner)
+    sf->intra_sf.chroma_intra_pruning_with_hog = 0;
 }
 
 static void set_good_speed_feature_framesize_dependent(
@@ -745,6 +773,12 @@ static void set_good_speed_feature_framesize_dependent(
     sf->inter_sf.limit_txfm_eval_per_mode = boosted ? 0 : 2;
     if (is_480p_or_lesser) sf->inter_sf.skip_newmv_in_drl = 3;
 
+    if (is_720p_or_larger) {
+      sf->inter_sf.prune_comp_ref_frames = 1;
+    } else if (is_480p_or_larger) {
+      sf->inter_sf.prune_comp_ref_frames = is_boosted_arf2_bwd_type ? 0 : 1;
+    }
+
     if (is_720p_or_larger)
       sf->hl_sf.recode_tolerance = 32;
     else
@@ -764,6 +798,7 @@ static void set_good_speed_feature_framesize_dependent(
     if (is_720p_or_larger) sf->hl_sf.recode_tolerance = 40;
 
     sf->inter_sf.skip_newmv_in_drl = 4;
+    sf->inter_sf.prune_comp_ref_frames = 1;
 
     if (!is_720p_or_larger) {
       sf->inter_sf.mv_cost_upd_level = INTERNAL_COST_UPD_SBROW_SET;
@@ -772,6 +807,7 @@ static void set_good_speed_feature_framesize_dependent(
     if (!is_480p_or_larger) {
       sf->tx_sf.tx_type_search.fast_inter_tx_type_prob_thresh =
           boosted ? INT_MAX : 250;
+      sf->part_sf.partition_search_breakout_dist_thr = (1 << 26);
     }
 
     if (is_480p_or_lesser) {
@@ -784,6 +820,7 @@ static void set_good_speed_feature_framesize_dependent(
   if (speed >= 6) {
     sf->tx_sf.tx_type_search.winner_mode_tx_type_pruning = 4;
     sf->inter_sf.prune_nearmv_using_neighbors = PRUNE_NEARMV_LEVEL3;
+    sf->inter_sf.prune_comp_ref_frames = 2;
     if (is_720p_or_larger) {
       sf->part_sf.auto_max_partition_based_on_simple_motion = NOT_IN_USE;
     } else if (is_480p_or_larger) {
@@ -805,8 +842,10 @@ static void set_good_speed_feature_framesize_dependent(
 
     if (is_720p_or_larger) {
       sf->part_sf.use_square_partition_only_threshold = BLOCK_32X32;
+      sf->part_sf.partition_search_breakout_dist_thr = (1 << 28);
     } else {
       sf->part_sf.use_square_partition_only_threshold = BLOCK_16X16;
+      sf->part_sf.partition_search_breakout_dist_thr = (1 << 26);
     }
 
     if (is_720p_or_larger) {
@@ -1067,11 +1106,10 @@ static void set_good_speed_features_framesize_independent(
     sf->winner_mode_sf.enable_winner_mode_for_coeff_opt = 1;
     sf->winner_mode_sf.enable_winner_mode_for_use_tx_domain_dist = 1;
     sf->winner_mode_sf.motion_mode_for_winner_cand =
-        boosted ? 0
-                : gf_group->update_type[cpi->gf_frame_index] == INTNL_ARF_UPDATE
-                      ? 1
-                      : 2;
-    sf->winner_mode_sf.disable_winner_mode_eval_for_txskip = boosted ? 0 : 1;
+        boosted                                                          ? 0
+        : gf_group->update_type[cpi->gf_frame_index] == INTNL_ARF_UPDATE ? 1
+                                                                         : 2;
+    sf->winner_mode_sf.prune_winner_mode_eval_level = boosted ? 0 : 4;
 
     // For screen content, "prune_sgr_based_on_wiener = 2" cause large quality
     // loss.
@@ -1103,7 +1141,6 @@ static void set_good_speed_features_framesize_independent(
     sf->inter_sf.prune_ext_comp_using_neighbors = 2;
     sf->inter_sf.prune_obmc_prob_thresh = INT_MAX;
     sf->inter_sf.disable_interinter_wedge_var_thresh = UINT_MAX;
-    sf->inter_sf.prune_nearest_near_mv_using_refmv_weight = boosted ? 0 : 1;
 
     sf->interp_sf.cb_pred_filter_search = 1;
     sf->interp_sf.skip_sharp_interp_filter_search = 1;
@@ -1132,7 +1169,7 @@ static void set_good_speed_features_framesize_independent(
     sf->winner_mode_sf.multi_winner_mode_type =
         frame_is_intra_only(&cpi->common) ? MULTI_WINNER_MODE_DEFAULT
                                           : MULTI_WINNER_MODE_OFF;
-    sf->winner_mode_sf.dc_blk_pred_level = boosted ? 0 : 1;
+    sf->winner_mode_sf.dc_blk_pred_level = boosted ? 0 : 2;
 
     sf->lpf_sf.lpf_pick = LPF_PICK_FROM_FULL_IMAGE_NON_DUAL;
   }
@@ -1167,7 +1204,7 @@ static void set_good_speed_features_framesize_independent(
     sf->tpl_sf.subpel_force_stop = FULL_PEL;
     sf->tpl_sf.gop_length_decision_method = 2;
 
-    sf->winner_mode_sf.dc_blk_pred_level = 1;
+    sf->winner_mode_sf.dc_blk_pred_level = 2;
 
     sf->fp_sf.disable_recon = 1;
   }
@@ -1192,8 +1229,9 @@ static void set_good_speed_features_framesize_independent(
     sf->part_sf.prune_rectangular_split_based_on_qidx =
         boosted || allow_screen_content_tools ? 0 : 2;
     sf->part_sf.prune_sub_8x8_partition_level =
-        allow_screen_content_tools ? 0
-                                   : frame_is_intra_only(&cpi->common) ? 1 : 2;
+        allow_screen_content_tools          ? 0
+        : frame_is_intra_only(&cpi->common) ? 1
+                                            : 2;
     sf->part_sf.prune_part4_search = 3;
 
     sf->mv_sf.simple_motion_subpel_force_stop = FULL_PEL;
@@ -1205,7 +1243,7 @@ static void set_good_speed_features_framesize_independent(
 
     sf->rd_sf.perform_coeff_opt = is_boosted_arf2_bwd_type ? 6 : 8;
 
-    sf->winner_mode_sf.dc_blk_pred_level = 2;
+    sf->winner_mode_sf.dc_blk_pred_level = 3;
     sf->winner_mode_sf.multi_winner_mode_type = MULTI_WINNER_MODE_OFF;
 
     sf->fp_sf.skip_zeromv_motion_search = 1;
@@ -1217,6 +1255,7 @@ static void set_rt_speed_feature_framesize_dependent(const AV1_COMP *const cpi,
                                                      int speed) {
   const AV1_COMMON *const cm = &cpi->common;
   const int boosted = frame_is_boosted(cpi);
+  const int is_1080p_or_larger = AOMMIN(cm->width, cm->height) >= 1080;
   const int is_720p_or_larger = AOMMIN(cm->width, cm->height) >= 720;
   const int is_480p_or_larger = AOMMIN(cm->width, cm->height) >= 480;
   const int is_360p_or_larger = AOMMIN(cm->width, cm->height) >= 360;
@@ -1224,15 +1263,31 @@ static void set_rt_speed_feature_framesize_dependent(const AV1_COMP *const cpi,
   if (!is_360p_or_larger) {
     sf->rt_sf.prune_intra_mode_based_on_mv_range = 1;
     sf->rt_sf.prune_inter_modes_wrt_gf_arf_based_on_sad = 1;
-    if (speed >= 7) sf->lpf_sf.cdef_pick_method = CDEF_PICK_FROM_Q;
+    if (speed >= 6)
+      sf->winner_mode_sf.prune_winner_mode_eval_level = boosted ? 0 : 2;
+    if (speed == 7) sf->rt_sf.prefer_large_partition_blocks = 2;
+    if (speed >= 7) {
+      sf->lpf_sf.cdef_pick_method = CDEF_PICK_FROM_Q;
+      sf->rt_sf.check_only_zero_zeromv_on_large_blocks = true;
+      sf->rt_sf.use_rtc_tf = 2;
+    }
+    if (speed == 8) sf->rt_sf.prefer_large_partition_blocks = 1;
     if (speed >= 8) {
       sf->rt_sf.use_nonrd_filter_search = 0;
       sf->rt_sf.tx_size_level_based_on_qstep = 1;
     }
     if (speed >= 9) {
       sf->rt_sf.use_comp_ref_nonrd = 0;
-      sf->rt_sf.nonrd_agressive_skip = 1;
+      sf->rt_sf.nonrd_aggressive_skip = 1;
       sf->rt_sf.skip_intra_pred = 1;
+      // Only turn on enable_ref_short_signaling for low resolution when only
+      // LAST and GOLDEN ref frames are used.
+      sf->rt_sf.enable_ref_short_signaling =
+          (!sf->rt_sf.use_nonrd_altref_frame &&
+           (!sf->rt_sf.use_comp_ref_nonrd ||
+            (!sf->rt_sf.ref_frame_comp_nonrd[1] &&
+             !sf->rt_sf.ref_frame_comp_nonrd[2])));
+
 // TODO(kyslov) Re-enable when AV1 models are trained
 #if 0
 #if CONFIG_RT_ML_PARTITIONING
@@ -1246,6 +1301,8 @@ static void set_rt_speed_feature_framesize_dependent(const AV1_COMP *const cpi,
     if (speed >= 10) {
       sf->rt_sf.skip_intra_pred = 2;
       sf->rt_sf.hybrid_intra_pickmode = 3;
+      sf->rt_sf.reduce_mv_pel_precision_lowcomplex = 1;
+      sf->rt_sf.reduce_mv_pel_precision_highmotion = 2;
     }
   } else {
     sf->rt_sf.prune_intra_mode_based_on_mv_range = 2;
@@ -1253,6 +1310,13 @@ static void set_rt_speed_feature_framesize_dependent(const AV1_COMP *const cpi,
     if (speed <= 5) {
       sf->tx_sf.tx_type_search.fast_inter_tx_type_prob_thresh =
           boosted ? INT_MAX : 350;
+      sf->winner_mode_sf.prune_winner_mode_eval_level = boosted ? 0 : 2;
+    }
+    if (speed == 6) sf->part_sf.disable_8x8_part_based_on_qidx = 1;
+    if (speed >= 6) sf->rt_sf.skip_newmv_mode_based_on_sse = 2;
+    if (speed == 7) sf->rt_sf.prefer_large_partition_blocks = 1;
+    if (speed >= 7) {
+      sf->rt_sf.use_rtc_tf = 1;
     }
     if (speed == 8 && !cpi->ppi->use_svc) {
       sf->rt_sf.short_circuit_low_temp_var = 0;
@@ -1263,14 +1327,13 @@ static void set_rt_speed_feature_framesize_dependent(const AV1_COMP *const cpi,
       sf->rt_sf.gf_length_lvl = 1;
       sf->rt_sf.skip_cdef_sb = 1;
       sf->rt_sf.sad_based_adp_altref_lag = 2;
+      sf->rt_sf.reduce_mv_pel_precision_highmotion = 2;
     }
-
     if (speed >= 10) {
-      // TODO(yunqing): extend this sf to other speeds and/or other resolutions.
-      sf->rt_sf.use_rtc_tf = 1;
       sf->rt_sf.hybrid_intra_pickmode = 2;
       sf->rt_sf.sad_based_adp_altref_lag = 4;
       sf->rt_sf.tx_size_level_based_on_qstep = 0;
+      sf->rt_sf.reduce_mv_pel_precision_highmotion = 3;
     }
   }
   if (!is_480p_or_larger) {
@@ -1278,11 +1341,9 @@ static void set_rt_speed_feature_framesize_dependent(const AV1_COMP *const cpi,
       sf->rt_sf.nonrd_check_partition_merge_mode = 2;
     }
     if (speed >= 8) {
-      sf->mv_sf.subpel_search_method = SUBPEL_TREE;
       sf->rt_sf.estimate_motion_for_var_based_partition = 1;
     }
     if (speed >= 9) {
-      sf->mv_sf.subpel_search_method = SUBPEL_TREE_PRUNED;
       sf->rt_sf.estimate_motion_for_var_based_partition = 0;
     }
   }
@@ -1291,48 +1352,123 @@ static void set_rt_speed_feature_framesize_dependent(const AV1_COMP *const cpi,
       sf->rt_sf.force_large_partition_blocks_intra = 1;
     }
   } else {
+    if (speed >= 6) sf->rt_sf.skip_newmv_mode_based_on_sse = 3;
+    if (speed == 7) sf->rt_sf.prefer_large_partition_blocks = 0;
+    if (speed >= 7) {
+      sf->rt_sf.reduce_mv_pel_precision_lowcomplex = 2;
+      sf->rt_sf.reduce_mv_pel_precision_highmotion = 1;
+    }
     if (speed >= 9) {
       sf->rt_sf.sad_based_adp_altref_lag = 1;
-      sf->rt_sf.sad_based_comp_prune = 1;
+      sf->rt_sf.reduce_mv_pel_precision_lowcomplex = 0;
+      sf->rt_sf.reduce_mv_pel_precision_highmotion = 2;
     }
     if (speed >= 10) {
       sf->rt_sf.sad_based_adp_altref_lag = 3;
-      sf->rt_sf.sad_based_comp_prune = 2;
+      sf->rt_sf.reduce_mv_pel_precision_highmotion = 3;
     }
   }
-  if (cpi->ppi->use_svc) {
-    if (cpi->svc.ref_frame_comp[0] || cpi->svc.ref_frame_comp[1] ||
-        cpi->svc.ref_frame_comp[2]) {
+  // TODO(Any): Check/Tune settings of other sfs for 1080p.
+  if (is_1080p_or_larger) {
+    if (speed >= 7) sf->rt_sf.reduce_mv_pel_precision_highmotion = 0;
+  } else {
+    if (speed >= 9) sf->lpf_sf.cdef_pick_method = CDEF_PICK_FROM_Q;
+    if (speed >= 10) sf->rt_sf.nonrd_aggressive_skip = 1;
+  }
+
+  // Setting for SVC, or when the ref_frame_config control is
+  // used to set the reference structure.
+  if (cpi->ppi->use_svc || cpi->rtc_ref.set_ref_frame_config) {
+    const RTC_REF *const rtc_ref = &cpi->rtc_ref;
+    // For SVC: for greater than 2 temporal layers, use better mv search on
+    // base temporal layers, and only on base spatial layer if highest
+    // resolution is above 640x360.
+    if (cpi->svc.number_temporal_layers > 2 &&
+        cpi->svc.temporal_layer_id == 0 &&
+        (cpi->svc.spatial_layer_id == 0 ||
+         cpi->oxcf.frm_dim_cfg.width * cpi->oxcf.frm_dim_cfg.height <=
+             640 * 360)) {
+      sf->mv_sf.search_method = NSTEP;
+      sf->mv_sf.subpel_search_method = SUBPEL_TREE;
+      sf->rt_sf.fullpel_search_step_param = 6;
+      sf->rt_sf.reduce_mv_pel_precision_highmotion = 0;
+    }
+    if (speed >= 8) {
+      sf->rt_sf.disable_cdf_update_non_reference_frame = true;
+      sf->rt_sf.reduce_mv_pel_precision_highmotion = 3;
+      if (rtc_ref->non_reference_frame) {
+        sf->rt_sf.nonrd_aggressive_skip = 1;
+        sf->mv_sf.subpel_search_method = SUBPEL_TREE_PRUNED_MORE;
+      }
+    }
+    if (speed <= 9 && cpi->svc.number_temporal_layers > 2 &&
+        cpi->svc.temporal_layer_id == 0)
+      sf->rt_sf.check_only_zero_zeromv_on_large_blocks = false;
+    else
+      sf->rt_sf.check_only_zero_zeromv_on_large_blocks = true;
+    if (cpi->svc.number_temporal_layers > 1 && cpi->svc.temporal_layer_id == 0)
+      sf->rt_sf.source_metrics_sb_nonrd = 0;
+    // Compound mode enabling.
+    if (rtc_ref->ref_frame_comp[0] || rtc_ref->ref_frame_comp[1] ||
+        rtc_ref->ref_frame_comp[2]) {
       sf->rt_sf.use_comp_ref_nonrd = 1;
       sf->rt_sf.ref_frame_comp_nonrd[0] =
-          cpi->svc.ref_frame_comp[0] && cpi->svc.reference[GOLDEN_FRAME - 1];
+          rtc_ref->ref_frame_comp[0] && rtc_ref->reference[GOLDEN_FRAME - 1];
       sf->rt_sf.ref_frame_comp_nonrd[1] =
-          cpi->svc.ref_frame_comp[1] && cpi->svc.reference[LAST2_FRAME - 1];
+          rtc_ref->ref_frame_comp[1] && rtc_ref->reference[LAST2_FRAME - 1];
       sf->rt_sf.ref_frame_comp_nonrd[2] =
-          cpi->svc.ref_frame_comp[2] && cpi->svc.reference[ALTREF_FRAME - 1];
+          rtc_ref->ref_frame_comp[2] && rtc_ref->reference[ALTREF_FRAME - 1];
     } else {
       sf->rt_sf.use_comp_ref_nonrd = 0;
-      sf->rt_sf.sad_based_comp_prune = 0;
     }
   }
+  // Screen settings.
   if (cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN) {
-    if (speed >= 10) {
+    // TODO(marpan): Check settings for speed 7 and 8.
+    if (speed >= 7) sf->rt_sf.reduce_mv_pel_precision_highmotion = 1;
+    if (speed >= 8) {
+      sf->rt_sf.nonrd_check_partition_merge_mode = 3;
+      sf->rt_sf.nonrd_prune_ref_frame_search = 1;
+    }
+    if (speed >= 9) {
       sf->rt_sf.prune_idtx_nonrd = 1;
-      sf->rt_sf.part_early_exit_zeromv = 1;
+      sf->rt_sf.part_early_exit_zeromv = 2;
+      sf->rt_sf.skip_lf_screen = 1;
       sf->rt_sf.use_nonrd_filter_search = 0;
+      sf->rt_sf.nonrd_prune_ref_frame_search = 3;
+      sf->rt_sf.var_part_split_threshold_shift = 10;
+      sf->mv_sf.subpel_search_method = SUBPEL_TREE_PRUNED_MORE;
+      sf->rt_sf.reduce_mv_pel_precision_highmotion = 3;
+      sf->rt_sf.reduce_mv_pel_precision_lowcomplex = 1;
+      sf->lpf_sf.cdef_pick_method = CDEF_PICK_FROM_Q;
+      sf->rt_sf.nonrd_check_partition_merge_mode = 0;
+    }
+    if (speed >= 10) {
+      if (cm->width * cm->height > 1920 * 1080)
+        sf->part_sf.disable_8x8_part_based_on_qidx = 1;
+      sf->rt_sf.set_zeromv_skip_based_on_source_sad = 2;
+      sf->rt_sf.screen_content_cdef_filter_qindex_thresh = 80;
+      sf->rt_sf.part_early_exit_zeromv = 1;
+      sf->rt_sf.nonrd_aggressive_skip = 1;
     }
-    if (speed >= 9) sf->rt_sf.skip_lf_screen = 1;
+    sf->rt_sf.use_nonrd_altref_frame = 0;
     sf->rt_sf.skip_cdef_sb = 1;
     sf->rt_sf.use_rtc_tf = 0;
     sf->rt_sf.use_comp_ref_nonrd = 0;
-    sf->rt_sf.sad_based_comp_prune = 0;
     sf->rt_sf.source_metrics_sb_nonrd = 1;
     if (cpi->rc.high_source_sad == 1) {
-      sf->rt_sf.force_large_partition_blocks = 0;
-      for (int i = 0; i < BLOCK_SIZES; ++i)
-        sf->rt_sf.intra_y_mode_bsize_mask_nrd[i] = INTRA_DC_H_V;
+      sf->rt_sf.prefer_large_partition_blocks = 0;
+      sf->part_sf.max_intra_bsize = BLOCK_128X128;
+      for (int i = 0; i < BLOCK_SIZES; ++i) {
+        if (i > BLOCK_32X32)
+          sf->rt_sf.intra_y_mode_bsize_mask_nrd[i] = INTRA_DC;
+        else
+          sf->rt_sf.intra_y_mode_bsize_mask_nrd[i] = INTRA_DC_H_V;
+      }
     }
-    if (cpi->rc.high_num_blocks_with_motion && speed >= 6) {
+    if (cpi->rc.max_block_source_sad > 20000 &&
+        cpi->rc.frame_source_sad > 100 &&
+        cpi->rc.percent_blocks_with_motion > 1 && speed >= 6) {
       sf->mv_sf.search_method = NSTEP;
       sf->rt_sf.fullpel_search_step_param = 2;
     }
@@ -1353,7 +1489,7 @@ static void set_rt_speed_features_framesize_independent(AV1_COMP *cpi,
   // Currently, rt speed 0, 1, 2, 3, 4, 5 are the same.
   // Following set of speed features are not impacting encoder's decisions as
   // the relevant tools are disabled by default.
-  sf->gm_sf.gm_search_type = GM_REDUCED_REF_SEARCH_SKIP_L2_L3_ARF2;
+  sf->gm_sf.gm_search_type = GM_DISABLE_SEARCH;
   sf->hl_sf.recode_loop = ALLOW_RECODE_KFARFGF;
   sf->inter_sf.reuse_inter_intra_mode = 1;
   sf->inter_sf.prune_compound_using_single_ref = 0;
@@ -1376,6 +1512,7 @@ static void set_rt_speed_features_framesize_independent(AV1_COMP *cpi,
   sf->rt_sf.skip_interp_filter_search = 1;
   sf->intra_sf.prune_palette_search_level = 2;
   sf->intra_sf.prune_luma_palette_size_search_level = 2;
+  sf->intra_sf.early_term_chroma_palette_size_search = 1;
 
   // End of set
 
@@ -1461,7 +1598,7 @@ static void set_rt_speed_features_framesize_independent(AV1_COMP *cpi,
   sf->lpf_sf.cdef_pick_method = CDEF_FAST_SEARCH_LVL4;
   sf->lpf_sf.lpf_pick = LPF_PICK_FROM_Q;
 
-  sf->winner_mode_sf.dc_blk_pred_level = frame_is_intra_only(cm) ? 0 : 2;
+  sf->winner_mode_sf.dc_blk_pred_level = frame_is_intra_only(cm) ? 0 : 3;
   sf->winner_mode_sf.enable_winner_mode_for_tx_size_srch = 1;
   sf->winner_mode_sf.tx_size_search_level = 1;
   sf->winner_mode_sf.winner_mode_ifs = 1;
@@ -1502,18 +1639,7 @@ static void set_rt_speed_features_framesize_independent(AV1_COMP *cpi,
                 FLAG_SKIP_COMP_BESTINTRA | FLAG_SKIP_INTRA_LOWVAR |
                 FLAG_EARLY_TERMINATE;
   sf->rt_sf.var_part_split_threshold_shift = 5;
-
-  // For SVC: use better mv search on base temporal layers, and only
-  // on base spatial layer if highest resolution is above 640x360.
-  if (cpi->svc.number_temporal_layers > 1 &&
-      cpi->svc.temporal_layer_id < cpi->svc.number_temporal_layers - 1 &&
-      (cpi->svc.spatial_layer_id == 0 ||
-       cpi->oxcf.frm_dim_cfg.width * cpi->oxcf.frm_dim_cfg.height <=
-           640 * 360)) {
-    sf->mv_sf.search_method = NSTEP;
-    sf->mv_sf.subpel_search_method = SUBPEL_TREE;
-    sf->rt_sf.fullpel_search_step_param = 6;
-  }
+  if (!frame_is_intra_only(&cpi->common)) sf->rt_sf.var_part_based_on_qidx = 1;
 
   if (speed >= 6) {
     sf->mv_sf.use_fullpel_costlist = 1;
@@ -1528,6 +1654,10 @@ static void set_rt_speed_features_framesize_independent(AV1_COMP *cpi,
     sf->rt_sf.gf_refresh_based_on_qp = 1;
     sf->rt_sf.prune_inter_modes_wrt_gf_arf_based_on_sad = 1;
     sf->rt_sf.var_part_split_threshold_shift = 7;
+    if (!frame_is_intra_only(&cpi->common))
+      sf->rt_sf.var_part_based_on_qidx = 2;
+
+    sf->winner_mode_sf.prune_winner_mode_eval_level = boosted ? 0 : 3;
   }
 
   if (speed >= 7) {
@@ -1538,11 +1668,8 @@ static void set_rt_speed_features_framesize_independent(AV1_COMP *cpi,
     sf->part_sf.partition_search_type = VAR_BASED_PARTITION;
     sf->part_sf.max_intra_bsize = BLOCK_32X32;
 
-    sf->gm_sf.gm_search_type = GM_DISABLE_SEARCH;
-
     sf->mv_sf.search_method = FAST_DIAMOND;
     sf->mv_sf.subpel_force_stop = QUARTER_PEL;
-    sf->mv_sf.subpel_search_method = SUBPEL_TREE_PRUNED;
 
     sf->inter_sf.inter_mode_rd_model_estimation = 2;
     // This sf is not applicable in non-rd path.
@@ -1584,20 +1711,6 @@ static void set_rt_speed_features_framesize_independent(AV1_COMP *cpi,
     sf->rt_sf.nonrd_check_partition_merge_mode = 3;
     sf->rt_sf.skip_intra_pred = 1;
     sf->rt_sf.source_metrics_sb_nonrd = 1;
-    // For SVC: use better mv search on base temporal layers, and only
-    // on base spatial layer if highest resolution is above 640x360.
-    if (cpi->svc.number_temporal_layers > 1 &&
-        cpi->svc.temporal_layer_id < cpi->svc.number_temporal_layers - 1 &&
-        (cpi->svc.spatial_layer_id == 0 ||
-         cpi->oxcf.frm_dim_cfg.width * cpi->oxcf.frm_dim_cfg.height <=
-             640 * 360)) {
-      sf->mv_sf.search_method = NSTEP;
-      sf->mv_sf.subpel_search_method = SUBPEL_TREE;
-      sf->rt_sf.fullpel_search_step_param = 6;
-    } else if (cpi->svc.non_reference_frame) {
-      sf->mv_sf.subpel_search_method = SUBPEL_TREE_PRUNED_MORE;
-      sf->rt_sf.fullpel_search_step_param = 10;
-    }
     // Set mask for intra modes.
     for (int i = 0; i < BLOCK_SIZES; ++i)
       if (i >= BLOCK_32X32)
@@ -1607,7 +1720,11 @@ static void set_rt_speed_features_framesize_independent(AV1_COMP *cpi,
         sf->rt_sf.intra_y_mode_bsize_mask_nrd[i] = INTRA_DC_H_V;
 
     sf->winner_mode_sf.dc_blk_pred_level = 0;
-    sf->rt_sf.var_part_based_on_qidx = 1;
+    sf->rt_sf.var_part_based_on_qidx = 3;
+    sf->rt_sf.prune_compoundmode_with_singlecompound_var = true;
+    sf->rt_sf.prune_compoundmode_with_singlemode_var = true;
+    sf->rt_sf.skip_compound_based_on_var = true;
+    sf->rt_sf.use_adaptive_subpel_search = true;
   }
 
   if (speed >= 8) {
@@ -1620,27 +1737,29 @@ static void set_rt_speed_features_framesize_independent(AV1_COMP *cpi,
     sf->rt_sf.nonrd_check_partition_merge_mode = 0;
     sf->rt_sf.var_part_split_threshold_shift = 8;
     sf->interp_sf.cb_pred_filter_search = 1;
-    sf->rt_sf.var_part_based_on_qidx = 2;
+    sf->rt_sf.var_part_based_on_qidx = 4;
     sf->rt_sf.partition_direct_merging = 1;
+    sf->rt_sf.prune_compoundmode_with_singlemode_var = false;
   }
   if (speed >= 9) {
     sf->rt_sf.sse_early_term_inter_search = EARLY_TERM_IDX_3;
-    sf->lpf_sf.cdef_pick_method = CDEF_PICK_FROM_Q;
     sf->rt_sf.estimate_motion_for_var_based_partition = 0;
-    sf->rt_sf.force_large_partition_blocks = 1;
+    sf->rt_sf.prefer_large_partition_blocks = 3;
     sf->rt_sf.skip_intra_pred = 2;
     sf->rt_sf.var_part_split_threshold_shift = 9;
     for (int i = 0; i < BLOCK_SIZES; ++i)
       sf->rt_sf.intra_y_mode_bsize_mask_nrd[i] = INTRA_DC;
     sf->rt_sf.var_part_based_on_qidx = 0;
+    sf->rt_sf.frame_level_mode_cost_update = true;
+    sf->rt_sf.check_only_zero_zeromv_on_large_blocks = true;
+    sf->rt_sf.reduce_mv_pel_precision_highmotion = 0;
+    sf->rt_sf.use_adaptive_subpel_search = false;
   }
   if (speed >= 10) {
     sf->rt_sf.sse_early_term_inter_search = EARLY_TERM_IDX_4;
-    sf->rt_sf.nonrd_agressive_skip = 1;
     sf->rt_sf.nonrd_prune_ref_frame_search = 3;
     sf->rt_sf.var_part_split_threshold_shift = 10;
     sf->mv_sf.subpel_search_method = SUBPEL_TREE_PRUNED_MORE;
-    sf->rt_sf.force_half_pel_block = 1;
   }
 }
 
@@ -1726,6 +1845,7 @@ static AOM_INLINE void init_part_sf(PARTITION_SPEED_FEATURES *part_sf) {
   part_sf->reuse_best_prediction_for_part_ab = 0;
   part_sf->use_best_rd_for_pruning = 0;
   part_sf->skip_non_sq_part_based_on_none = 0;
+  part_sf->disable_8x8_part_based_on_qidx = 0;
 }
 
 static AOM_INLINE void init_mv_sf(MV_SPEED_FEATURES *mv_sf) {
@@ -1754,6 +1874,7 @@ static AOM_INLINE void init_inter_sf(INTER_MODE_SPEED_FEATURES *inter_sf) {
   inter_sf->model_based_post_interp_filter_breakout = 0;
   inter_sf->reduce_inter_modes = 0;
   inter_sf->alt_ref_search_fp = 0;
+  inter_sf->prune_comp_ref_frames = 0;
   inter_sf->selective_ref_frame = 0;
   inter_sf->prune_ref_frame_for_rect_partitions = 0;
   inter_sf->fast_wedge_sign_estimate = 0;
@@ -1820,6 +1941,7 @@ static AOM_INLINE void init_intra_sf(INTRA_MODE_SPEED_FEATURES *intra_sf) {
     intra_sf->intra_uv_mode_mask[i] = UV_INTRA_ALL;
   }
   intra_sf->disable_smooth_intra = 0;
+  intra_sf->prune_smooth_intra_mode_for_chroma = 0;
   intra_sf->prune_filter_intra_level = 0;
   intra_sf->prune_chroma_modes_using_luma_winner = 0;
   intra_sf->cfl_search_range = 3;
@@ -1827,6 +1949,7 @@ static AOM_INLINE void init_intra_sf(INTRA_MODE_SPEED_FEATURES *intra_sf) {
   intra_sf->adapt_top_model_rd_count_using_neighbors = 0;
   intra_sf->early_term_chroma_palette_size_search = 0;
   intra_sf->skip_filter_intra_in_inter_frames = 0;
+  intra_sf->prune_luma_odd_delta_angles_in_intra = 0;
 }
 
 static AOM_INLINE void init_tx_sf(TX_SPEED_FEATURES *tx_sf) {
@@ -1850,6 +1973,7 @@ static AOM_INLINE void init_tx_sf(TX_SPEED_FEATURES *tx_sf) {
   tx_sf->adaptive_txb_search_level = 0;
   tx_sf->refine_fast_tx_search_results = 1;
   tx_sf->prune_tx_size_level = 0;
+  tx_sf->prune_intra_tx_depths_using_nn = false;
 }
 
 static AOM_INLINE void init_rd_sf(RD_CALC_SPEED_FEATURES *rd_sf,
@@ -1892,8 +2016,7 @@ static AOM_INLINE void init_winner_mode_sf(
   winner_mode_sf->multi_winner_mode_type = 0;
   winner_mode_sf->dc_blk_pred_level = 0;
   winner_mode_sf->winner_mode_ifs = 0;
-  winner_mode_sf->prune_winner_mode_processing_using_src_var = 0;
-  winner_mode_sf->disable_winner_mode_eval_for_txskip = 0;
+  winner_mode_sf->prune_winner_mode_eval_level = 0;
 }
 
 static AOM_INLINE void init_lpf_sf(LOOP_FILTER_SPEED_FEATURES *lpf_sf) {
@@ -1925,7 +2048,6 @@ static AOM_INLINE void init_rt_sf(REAL_TIME_SPEED_FEATURES *rt_sf) {
   rt_sf->use_comp_ref_nonrd = 0;
   rt_sf->use_real_time_ref_set = 0;
   rt_sf->short_circuit_low_temp_var = 0;
-  rt_sf->use_modeled_non_rd_cost = 0;
   rt_sf->reuse_inter_pred_nonrd = 0;
   rt_sf->num_inter_modes_for_tx_search = INT_MAX;
   rt_sf->use_nonrd_filter_search = 0;
@@ -1935,12 +2057,12 @@ static AOM_INLINE void init_rt_sf(REAL_TIME_SPEED_FEATURES *rt_sf) {
   rt_sf->source_metrics_sb_nonrd = 0;
   rt_sf->overshoot_detection_cbr = NO_DETECTION;
   rt_sf->check_scene_detection = 0;
-  rt_sf->force_large_partition_blocks = 0;
+  rt_sf->prefer_large_partition_blocks = 0;
   rt_sf->use_temporal_noise_estimate = 0;
   rt_sf->fullpel_search_step_param = 0;
   for (int i = 0; i < BLOCK_SIZES; ++i)
     rt_sf->intra_y_mode_bsize_mask_nrd[i] = INTRA_ALL;
-  rt_sf->nonrd_agressive_skip = 0;
+  rt_sf->nonrd_aggressive_skip = 0;
   rt_sf->skip_cdef_sb = 0;
   rt_sf->force_large_partition_blocks_intra = 0;
   rt_sf->skip_tx_no_split_var_based_partition = 0;
@@ -1949,7 +2071,8 @@ static AOM_INLINE void init_rt_sf(REAL_TIME_SPEED_FEATURES *rt_sf) {
   rt_sf->prune_inter_modes_with_golden_ref = 0;
   rt_sf->prune_inter_modes_wrt_gf_arf_based_on_sad = 0;
   rt_sf->prune_inter_modes_using_temp_var = 0;
-  rt_sf->force_half_pel_block = 0;
+  rt_sf->reduce_mv_pel_precision_highmotion = 0;
+  rt_sf->reduce_mv_pel_precision_lowcomplex = 0;
   rt_sf->prune_intra_mode_based_on_mv_range = 0;
   rt_sf->var_part_split_threshold_shift = 7;
   rt_sf->gf_refresh_based_on_qp = 0;
@@ -1961,8 +2084,45 @@ static AOM_INLINE void init_rt_sf(REAL_TIME_SPEED_FEATURES *rt_sf) {
   rt_sf->sad_based_adp_altref_lag = 0;
   rt_sf->partition_direct_merging = 0;
   rt_sf->var_part_based_on_qidx = 0;
-  rt_sf->sad_based_comp_prune = 0;
   rt_sf->tx_size_level_based_on_qstep = 0;
+  rt_sf->vbp_prune_16x16_split_using_min_max_sub_blk_var = false;
+  rt_sf->prune_compoundmode_with_singlecompound_var = false;
+  rt_sf->frame_level_mode_cost_update = false;
+  rt_sf->prune_h_pred_using_best_mode_so_far = false;
+  rt_sf->check_only_zero_zeromv_on_large_blocks = false;
+  rt_sf->disable_cdf_update_non_reference_frame = false;
+  rt_sf->prune_compoundmode_with_singlemode_var = false;
+  rt_sf->skip_compound_based_on_var = false;
+  rt_sf->set_zeromv_skip_based_on_source_sad = 1;
+  rt_sf->use_adaptive_subpel_search = false;
+  rt_sf->screen_content_cdef_filter_qindex_thresh = 0;
+  rt_sf->enable_ref_short_signaling = false;
+  rt_sf->check_globalmv_on_single_ref = true;
+}
+
+// Populate appropriate sub-pel search method based on speed feature and user
+// specified settings
+static void set_subpel_search_method(
+    MotionVectorSearchParams *mv_search_params,
+    unsigned int motion_vector_unit_test,
+    SUBPEL_SEARCH_METHODS subpel_search_method) {
+  if (subpel_search_method == SUBPEL_TREE) {
+    mv_search_params->find_fractional_mv_step = av1_find_best_sub_pixel_tree;
+  } else if (subpel_search_method == SUBPEL_TREE_PRUNED) {
+    mv_search_params->find_fractional_mv_step =
+        av1_find_best_sub_pixel_tree_pruned;
+  } else if (subpel_search_method == SUBPEL_TREE_PRUNED_MORE) {
+    mv_search_params->find_fractional_mv_step =
+        av1_find_best_sub_pixel_tree_pruned_more;
+  } else {
+    assert(0);
+  }
+
+  // This is only used in motion vector unit test.
+  if (motion_vector_unit_test == 1)
+    mv_search_params->find_fractional_mv_step = av1_return_max_sub_pixel_mv;
+  else if (motion_vector_unit_test == 2)
+    mv_search_params->find_fractional_mv_step = av1_return_min_sub_pixel_mv;
 }
 
 void av1_set_speed_features_framesize_dependent(AV1_COMP *cpi, int speed) {
@@ -1988,11 +2148,9 @@ void av1_set_speed_features_framesize_dependent(AV1_COMP *cpi, int speed) {
         (sf->inter_sf.disable_interintra_wedge_var_thresh != UINT_MAX);
   }
 
-  // This is only used in motion vector unit test.
-  if (cpi->oxcf.unit_test_cfg.motion_vector_unit_test == 1)
-    cpi->mv_search_params.find_fractional_mv_step = av1_return_max_sub_pixel_mv;
-  else if (cpi->oxcf.unit_test_cfg.motion_vector_unit_test == 2)
-    cpi->mv_search_params.find_fractional_mv_step = av1_return_min_sub_pixel_mv;
+  set_subpel_search_method(&cpi->mv_search_params,
+                           cpi->oxcf.unit_test_cfg.motion_vector_unit_test,
+                           sf->mv_sf.subpel_search_method);
 
   // For multi-thread use case with row_mt enabled, cost update for a set of
   // SB rows is not desirable. Hence, the sf mv_cost_upd_level is set to
@@ -2038,7 +2196,11 @@ void av1_set_speed_features_framesize_independent(AV1_COMP *cpi, int speed) {
       break;
   }
 
-  if (!oxcf->txfm_cfg.enable_tx_size_search) {
+  // Note: when use_nonrd_pick_mode is true, the transform size is the
+  // minimum of 16x16 and the largest possible size of the current block,
+  // which conflicts with the speed feature "enable_tx_size_search".
+  if (!oxcf->txfm_cfg.enable_tx_size_search &&
+      sf->rt_sf.use_nonrd_pick_mode == 0) {
     sf->winner_mode_sf.tx_size_search_level = 3;
   }
 
@@ -2053,13 +2215,6 @@ void av1_set_speed_features_framesize_independent(AV1_COMP *cpi, int speed) {
         (sf->inter_sf.disable_interintra_wedge_var_thresh != UINT_MAX);
   }
 
-  // sf->part_sf.partition_search_breakout_dist_thr is set assuming max 64x64
-  // blocks. Normalise this if the blocks are bigger.
-  if (MAX_SB_SIZE_LOG2 > 6) {
-    sf->part_sf.partition_search_breakout_dist_thr <<=
-        2 * (MAX_SB_SIZE_LOG2 - 6);
-  }
-
   const int mesh_speed = AOMMIN(speed, MAX_MESH_SPEED);
   for (i = 0; i < MAX_MESH_STEP; ++i) {
     sf->mv_sf.mesh_patterns[i].range =
@@ -2087,22 +2242,9 @@ void av1_set_speed_features_framesize_independent(AV1_COMP *cpi, int speed) {
   if (oxcf->pass == AOM_RC_ONE_PASS && has_no_stats_stage(cpi))
     sf->hl_sf.recode_loop = DISALLOW_RECODE;
 
-  MotionVectorSearchParams *const mv_search_params = &cpi->mv_search_params;
-  if (sf->mv_sf.subpel_search_method == SUBPEL_TREE) {
-    mv_search_params->find_fractional_mv_step = av1_find_best_sub_pixel_tree;
-  } else if (sf->mv_sf.subpel_search_method == SUBPEL_TREE_PRUNED) {
-    mv_search_params->find_fractional_mv_step =
-        av1_find_best_sub_pixel_tree_pruned;
-  } else if (sf->mv_sf.subpel_search_method == SUBPEL_TREE_PRUNED_MORE) {
-    mv_search_params->find_fractional_mv_step =
-        av1_find_best_sub_pixel_tree_pruned_more;
-  }
-
-  // This is only used in motion vector unit test.
-  if (cpi->oxcf.unit_test_cfg.motion_vector_unit_test == 1)
-    mv_search_params->find_fractional_mv_step = av1_return_max_sub_pixel_mv;
-  else if (cpi->oxcf.unit_test_cfg.motion_vector_unit_test == 2)
-    mv_search_params->find_fractional_mv_step = av1_return_min_sub_pixel_mv;
+  set_subpel_search_method(&cpi->mv_search_params,
+                           cpi->oxcf.unit_test_cfg.motion_vector_unit_test,
+                           sf->mv_sf.subpel_search_method);
 
   // assert ensures that tx_domain_dist_level is accessed correctly
   assert(cpi->sf.rd_sf.tx_domain_dist_thres_level >= 0 &&
@@ -2112,7 +2254,7 @@ void av1_set_speed_features_framesize_independent(AV1_COMP *cpi, int speed) {
          sizeof(winner_mode_params->tx_domain_dist_threshold));
 
   assert(cpi->sf.rd_sf.tx_domain_dist_level >= 0 &&
-         cpi->sf.rd_sf.tx_domain_dist_level < 3);
+         cpi->sf.rd_sf.tx_domain_dist_level < TX_DOMAIN_DIST_LEVELS);
   memcpy(winner_mode_params->use_transform_domain_distortion,
          tx_domain_dist_types[cpi->sf.rd_sf.tx_domain_dist_level],
          sizeof(winner_mode_params->use_transform_domain_distortion));
@@ -2134,7 +2276,7 @@ void av1_set_speed_features_framesize_independent(AV1_COMP *cpi, int speed) {
 
   // assert ensures that tx_size_search_level is accessed correctly
   assert(cpi->sf.winner_mode_sf.tx_size_search_level >= 0 &&
-         cpi->sf.winner_mode_sf.tx_size_search_level < 3);
+         cpi->sf.winner_mode_sf.tx_size_search_level <= 3);
   memcpy(winner_mode_params->tx_size_search_methods,
          tx_size_search_methods[cpi->sf.winner_mode_sf.tx_size_search_level],
          sizeof(winner_mode_params->tx_size_search_methods));
@@ -2148,8 +2290,7 @@ void av1_set_speed_features_framesize_independent(AV1_COMP *cpi, int speed) {
       sf->inter_sf.inter_mode_rd_model_estimation = 2;
     }
 
-#if !CONFIG_FRAME_PARALLEL_ENCODE || \
-    (CONFIG_FRAME_PARALLEL_ENCODE && !CONFIG_FPMT_TEST)
+#if !CONFIG_FPMT_TEST
     // Disable the speed feature 'prune_ref_frame_for_gm_search' to achieve
     // better parallelism when number of threads available are greater than or
     // equal to maximum number of reference frames allowed for global motion.
@@ -2299,4 +2440,8 @@ void av1_set_speed_features_qindex_dependent(AV1_COMP *cpi, int speed) {
         sf->inter_sf.reuse_mask_search_results = 1;
     }
   }
+
+  set_subpel_search_method(&cpi->mv_search_params,
+                           cpi->oxcf.unit_test_cfg.motion_vector_unit_test,
+                           sf->mv_sf.subpel_search_method);
 }
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/speed_features.h b/chromium/third_party/libaom/source/libaom/av1/encoder/speed_features.h
index 00b46f47938..c1f796cc3ef 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/speed_features.h
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/speed_features.h
@@ -716,6 +716,9 @@ typedef struct PARTITION_SPEED_FEATURES {
   // 2: on top of 1, prune rectangular partitions if NONE is inter, not a newmv
   // mode and skippable
   int skip_non_sq_part_based_on_none;
+
+  // Disables 8x8 and below partitions for low quantizers.
+  int disable_8x8_part_based_on_qidx;
 } PARTITION_SPEED_FEATURES;
 
 typedef struct MV_SPEED_FEATURES {
@@ -833,6 +836,17 @@ typedef struct INTER_MODE_SPEED_FEATURES {
   // 2 prune inter modes w.r.t BWDREF, ALTREF2 and ALTREF reference frames
   int alt_ref_search_fp;
 
+  // Prune compound reference frames
+  // 0 no pruning
+  // 1 prune compound references which do not satisfy the two conditions:
+  //   a) The references are at a nearest distance from the current frame in
+  //   both past and future direction.
+  //   b) The references have minimum pred_mv_sad in both past and future
+  //   direction.
+  // 2 prune compound references except the one with nearest distance from the
+  //   current frame in both past and future direction.
+  int prune_comp_ref_frames;
+
   // Skip the current ref_mv in NEW_MV mode based on mv, rate cost, etc.
   // This speed feature equaling 0 means no skipping.
   // If the speed feature equals 1 or 2, skip the current ref_mv in NEW_MV mode
@@ -906,6 +920,14 @@ typedef struct INTER_MODE_SPEED_FEATURES {
   int prune_comp_using_best_single_mode_ref;
 
   // Skip NEARESTMV and NEARMV using weight computed in ref mv list population
+  // This speed feature sometimes leads to severe visual artifacts for
+  // the overlay frame. It makes inter RD mode search skip NEARESTMV
+  // and NEARMV, and no valid inter mode is evaluated when the NEWMV mode
+  // is also early terminated due to the constraint that it does not handle
+  // zero mv difference. In this cases, intra modes will be chosen, leading
+  // to bad prediction and flickering artifacts.
+  // Turn off this feature for now. Be careful to check visual quality if
+  // anyone is going to turn it on.
   int prune_nearest_near_mv_using_refmv_weight;
 
   // Based on previous ref_mv_idx search result, prune the following search.
@@ -1054,6 +1076,17 @@ typedef struct INTRA_MODE_SPEED_FEATURES {
   // Enable/disable smooth intra modes.
   int disable_smooth_intra;
 
+  // Prune UV_SMOOTH_PRED mode for chroma based on chroma source variance.
+  // false : No pruning
+  // true  : Prune UV_SMOOTH_PRED mode based on chroma source variance
+  //
+  // For allintra encode, this speed feature reduces instruction count
+  // by 1.90%, 2.21% and 1.97% for speed 6, 7 and 8 with coding performance
+  // change less than 0.04%. For AVIF image encode, this speed feature reduces
+  // encode time by 1.56%, 2.14% and 0.90% for speed 6, 7 and 8 on a typical
+  // image dataset with coding performance change less than 0.05%.
+  bool prune_smooth_intra_mode_for_chroma;
+
   // Prune filter intra modes in intra frames.
   // 0 : No pruning
   // 1 : Evaluate applicable filter intra modes based on best intra mode so far
@@ -1120,6 +1153,16 @@ typedef struct INTRA_MODE_SPEED_FEATURES {
   // neighbor block and quantizer information.
   int adapt_top_model_rd_count_using_neighbors;
 
+  // Prune the evaluation of odd delta angles of directional luma intra modes by
+  // using the rdcosts of neighbouring delta angles.
+  // For allintra encode, this speed feature reduces instruction count
+  // by 4.461%, 3.699% and 3.536% for speed 6, 7 and 8 on a typical video
+  // dataset with coding performance change less than 0.26%. For AVIF image
+  // encode, this speed feature reduces encode time by 2.849%, 2.471%,
+  // and 2.051% for speed 6, 7 and 8 on a typical image dataset with coding
+  // performance change less than 0.27%.
+  int prune_luma_odd_delta_angles_in_intra;
+
   // Terminate early in chroma palette_size search.
   // 0: No early termination
   // 1: Terminate early for higher palette_size, if header rd cost of lower
@@ -1174,18 +1217,45 @@ typedef struct TX_SPEED_FEATURES {
   // of 0 indicates no pruning, and the aggressiveness of pruning progressively
   // increases from levels 1 to 3.
   int prune_tx_size_level;
+
+  // Prune the evaluation of transform depths as decided by the NN model.
+  // false: No pruning.
+  // true : Avoid the evaluation of specific transform depths using NN model.
+  //
+  // For allintra encode, this speed feature reduces instruction count
+  // by 4.76%, 8.92% and 11.28% for speed 6, 7 and 8 with coding performance
+  // change less than 0.32%. For AVIF image encode, this speed feature reduces
+  // encode time by 4.65%, 9.16% and 10.45% for speed 6, 7 and 8 on a typical
+  // image dataset with coding performance change less than 0.19%.
+  bool prune_intra_tx_depths_using_nn;
 } TX_SPEED_FEATURES;
 
 typedef struct RD_CALC_SPEED_FEATURES {
   // Fast approximation of av1_model_rd_from_var_lapndz
   int simple_model_rd_from_var;
 
-  // Whether to compute distortion in the image domain (slower but
-  // more accurate), or in the transform domain (faster but less acurate).
-  // 0: use image domain
-  // 1: use transform domain in tx_type search, and use image domain for
-  // RD_STATS
-  // 2: use transform domain
+  // Perform faster distortion computation during the R-D evaluation by trying
+  // to approximate the prediction error with transform coefficients (faster but
+  // less accurate) rather than computing distortion in the pixel domain (slower
+  // but more accurate). The following methods are used for distortion
+  // computation:
+  // Method 0: Always compute distortion in the pixel domain
+  // Method 1: Based on block error, try using transform domain distortion for
+  // tx_type search and compute distortion in pixel domain for final RD_STATS
+  // Method 2: Based on block error, try to compute distortion in transform
+  // domain
+  // Methods 1 and 2 may fallback to computing distortion in the pixel domain in
+  // case the block error is less than the threshold, which is controlled by the
+  // speed feature tx_domain_dist_thres_level.
+  //
+  // The speed feature tx_domain_dist_level decides which of the above methods
+  // needs to be used across different mode evaluation stages as described
+  // below:
+  // Eval type:    Default      Mode        Winner
+  // Level 0  :    Method 0    Method 2    Method 0
+  // Level 1  :    Method 1    Method 2    Method 0
+  // Level 2  :    Method 2    Method 2    Method 0
+  // Level 3  :    Method 2    Method 2    Method 2
   int tx_domain_dist_level;
 
   // Transform domain distortion threshold level
@@ -1231,9 +1301,11 @@ typedef struct WINNER_MODE_SPEED_FEATURES {
   // 1 / 2 : Use configured number of winner candidates
   int motion_mode_for_winner_cand;
 
-  // Early DC only txfm block prediction
-  // 0: speed feature OFF
-  // 1 / 2 : Use the configured level for different modes
+  // Controls the prediction of transform skip block or DC only block.
+  //
+  // Different speed feature values (0 to 3) decide the aggressiveness of
+  // prediction (refer to predict_dc_levels[][] in speed_features.c) to be used
+  // during different mode evaluation stages.
   int dc_blk_pred_level;
 
   // If on, disables interpolation filter search in handle_inter_mode loop, and
@@ -1241,13 +1313,19 @@ typedef struct WINNER_MODE_SPEED_FEATURES {
   // tx_search_best_inter_candidates.
   int winner_mode_ifs;
 
-  // Flag used to enable the pruning of winner mode processing for blocks with
-  // low source variance.
-  int prune_winner_mode_processing_using_src_var;
-
-  // If on, disables transform refinement for winner mode if the normal mode
-  // evaluation resulted in transform skip.
-  int disable_winner_mode_eval_for_txskip;
+  // Controls the disabling of winner mode processing. Speed feature levels
+  // are ordered in increasing aggressiveness of pruning. The method considered
+  // for disabling, depends on the sf level value and it is described as below.
+  // 0: Do not disable
+  // 1: Disable for blocks with low source variance.
+  // 2: Disable for blocks which turn out to be transform skip (skipped based on
+  // eob) during MODE_EVAL stage except NEWMV mode.
+  // 3: Disable for blocks which turn out to be transform skip during MODE_EVAL
+  // stage except NEWMV mode. For high quantizers, prune conservatively based on
+  // transform skip (skipped based on eob) except for NEWMV mode.
+  // 4: Disable for blocks which turn out to be transform skip during MODE_EVAL
+  // stage.
+  int prune_winner_mode_eval_level;
 } WINNER_MODE_SPEED_FEATURES;
 
 typedef struct LOOP_FILTER_SPEED_FEATURES {
@@ -1353,9 +1431,6 @@ typedef struct REAL_TIME_SPEED_FEATURES {
   // temporal variance.
   int short_circuit_low_temp_var;
 
-  // Use modeled (currently CurvFit model) RDCost for fast non-RD mode
-  int use_modeled_non_rd_cost;
-
   // Reuse inter prediction in fast non-rd mode.
   int reuse_inter_pred_nonrd;
 
@@ -1388,8 +1463,9 @@ typedef struct REAL_TIME_SPEED_FEATURES {
   // Check for scene/content change detection on every frame before encoding.
   int check_scene_detection;
 
-  // Forces larger partition blocks in variance based partitioning
-  int force_large_partition_blocks;
+  // For nonrd mode: Prefer larger partition blks in variance based partitioning
+  // 0: disabled, 1-3: increasing aggressiveness
+  int prefer_large_partition_blocks;
 
   // uses results of temporal noise estimate
   int use_temporal_noise_estimate;
@@ -1404,8 +1480,8 @@ typedef struct REAL_TIME_SPEED_FEATURES {
   // separately, for nonrd pickmode.
   int intra_y_mode_bsize_mask_nrd[BLOCK_SIZES];
 
-  // Skips mode checks more agressively in nonRD mode
-  int nonrd_agressive_skip;
+  // Skips mode checks more aggressively in nonRD mode
+  int nonrd_aggressive_skip;
 
   // Skip cdef on 64x64 blocks when NEWMV or INTRA is not picked or color
   // sensitivity is off. When color sensitivity is on for a superblock, all
@@ -1437,8 +1513,19 @@ typedef struct REAL_TIME_SPEED_FEATURES {
   // variance wrt LAST reference.
   int prune_inter_modes_using_temp_var;
 
-  // Force half_pel at block level.
-  int force_half_pel_block;
+  // Reduce MV precision to halfpel for higher int MV value & frame-level motion
+  // 0: disabled
+  // 1-2: Reduce precision to halfpel, fullpel based on conservative
+  // thresholds, aggressiveness increases with increase in level
+  // 3: Reduce precision to halfpel using more aggressive thresholds
+  int reduce_mv_pel_precision_highmotion;
+
+  // Reduce MV precision for low complexity blocks
+  // 0: disabled
+  // 1: Reduce the mv resolution for zero mv if the variance is low
+  // 2: Switch to halfpel, fullpel based on low block spatial-temporal
+  // complexity.
+  int reduce_mv_pel_precision_lowcomplex;
 
   // Prune intra mode evaluation in inter frames based on mv range.
   BLOCK_SIZE prune_intra_mode_based_on_mv_range;
@@ -1447,13 +1534,18 @@ typedef struct REAL_TIME_SPEED_FEATURES {
   // by a negative number.
   int var_part_split_threshold_shift;
 
-  // Qindex based variance partition threshold index.
+  // Qindex based variance partition threshold index, which determines
+  // the aggressiveness of partition pruning
+  // 0: disabled for speeds 9,10
+  // 1,2: (rd-path) lowers qindex thresholds conditionally (for low SAD sb)
+  // 3,4: (non-rd path) uses pre-tuned qindex thresholds
   int var_part_based_on_qidx;
 
   // Enable GF refresh based on Q value.
   int gf_refresh_based_on_qp;
 
   // Temporal filtering
+  // The value can be 1 or 2, which indicates the threshold to use.
   int use_rtc_tf;
 
   // Prune the use of the identity transform in nonrd_pickmode,
@@ -1468,6 +1560,9 @@ typedef struct REAL_TIME_SPEED_FEATURES {
 
   // For nonrd: early exit out of variance partition that sets the
   // block size to superblock size, and sets mode to zeromv-last skip.
+  // 0: disabled
+  // 1: zeromv-skip is enabled at SB level only
+  // 2: zeromv-skip is enabled at SB level and coding block level
   int part_early_exit_zeromv;
 
   // Early terminate inter mode search based on sse in non-rd path.
@@ -1479,11 +1574,88 @@ typedef struct REAL_TIME_SPEED_FEATURES {
   // Enable/disable partition direct merging.
   int partition_direct_merging;
 
-  // SAD based compound mode pruning
-  int sad_based_comp_prune;
-
   // Level of aggressiveness for obtaining tx size based on qstep
   int tx_size_level_based_on_qstep;
+
+  // Avoid the partitioning of a 16x16 block in variance based partitioning
+  // (VBP) by making use of minimum and maximum sub-block variances.
+  // For allintra encode, this speed feature reduces instruction count by 5.39%
+  // for speed 9 on a typical video dataset with coding performance gain
+  // of 1.44%.
+  // For AVIF image encode, this speed feature reduces encode time
+  // by 8.44% for speed 9 on a typical image dataset with coding performance
+  // gain of 0.78%.
+  bool vbp_prune_16x16_split_using_min_max_sub_blk_var;
+
+  // A qindex threshold that determines whether to use qindex based CDEF filter
+  // strength estimation for screen content types. The strength estimation model
+  // used for screen contents prefers to allow cdef filtering for more frames.
+  // This sf is used to limit the frames which go through cdef filtering and
+  // following explains the setting of the same.
+  // MAXQ (255): This disables the usage of this sf. Here, frame does not use a
+  // screen content model thus reduces the number of frames that go through cdef
+  // filtering.
+  // MINQ (0): Frames always use screen content model thus increasing the number
+  // of frames that go through cdef filtering.
+  // This speed feature has a substantial gain on coding metrics, with moderate
+  // increase encoding time. Select threshold based on speed vs quality
+  // trade-off.
+  int screen_content_cdef_filter_qindex_thresh;
+
+  // Prune compound mode if its variance is higher than the variance of single
+  // modes.
+  bool prune_compoundmode_with_singlecompound_var;
+
+  // Allow mode cost update at frame level every couple frames. This
+  // overrides the command line setting --mode-cost-upd-freq=3 (never update
+  // except on key frame and first delta).
+  bool frame_level_mode_cost_update;
+
+  // Prune H_PRED during intra mode evaluation in the nonrd path based on best
+  // mode so far.
+  //
+  // For allintra encode, this speed feature reduces instruction count by 1.10%
+  // for speed 9 with coding performance change less than 0.04%.
+  // For AVIF image encode, this speed feature reduces encode time by 1.03% for
+  // speed 9 on a typical image dataset with coding performance change less than
+  // 0.08%.
+  bool prune_h_pred_using_best_mode_so_far;
+
+  // If compound is enabled, and the current block size is \geq BLOCK_16X16,
+  // limit the compound modes to GLOBAL_GLOBALMV. This does not apply to the
+  // base layer of svc.
+  bool check_only_zero_zeromv_on_large_blocks;
+
+  // Allow for disabling cdf update for non reference frames in svc mode.
+  bool disable_cdf_update_non_reference_frame;
+
+  // Prune compound modes if the single modes variances do not perform well.
+  bool prune_compoundmode_with_singlemode_var;
+
+  // Skip searching all compound mode if the variance of single_mode residue is
+  // sufficiently low.
+  bool skip_compound_based_on_var;
+
+  // Sets force_zeromv_skip based on the source sad available. Aggressiveness
+  // increases with increase in the level set for speed feature.
+  // 0: No setting
+  // 1: If source sad is kZeroSad
+  // 2: If source sad <= kVeryLowSad
+  int set_zeromv_skip_based_on_source_sad;
+
+  // Downgrades the subpel search to av1_find_best_sub_pixel_tree_pruned_more
+  // when either the fullpel search performed well, or when zeromv has low sad.
+  bool use_adaptive_subpel_search;
+
+  // A flag used in RTC case to control frame_refs_short_signaling. Note that
+  // the final decision is made in check_frame_refs_short_signaling(). The flag
+  // can only be turned on when res < 360p and speed >= 9, in which case only
+  // LAST and GOLDEN ref frames are used now.
+  bool enable_ref_short_signaling;
+
+  // A flag that controls if we check or bypass GLOBALMV in rtc single ref frame
+  // case.
+  bool check_globalmv_on_single_ref;
 } REAL_TIME_SPEED_FEATURES;
 
 /*!\endcond */
@@ -1574,7 +1746,7 @@ struct AV1_COMP;
  * \param[in]    cpi     Top - level encoder instance structure
  * \param[in]    speed   Speed setting passed in from the command  line
  *
- * \return No return value but configures the various speed trade off flags
+ * \remark No return value but configures the various speed trade off flags
  *         based on the passed in speed setting. (Higher speed gives lower
  *         quality)
  */
@@ -1588,7 +1760,7 @@ void av1_set_speed_features_framesize_independent(struct AV1_COMP *cpi,
  * \param[in]    cpi     Top - level encoder instance structure
  * \param[in]    speed   Speed setting passed in from the command  line
  *
- * \return No return value but configures the various speed trade off flags
+ * \remark No return value but configures the various speed trade off flags
  *         based on the passed in speed setting and frame size. (Higher speed
  *         corresponds to lower quality)
  */
@@ -1601,7 +1773,7 @@ void av1_set_speed_features_framesize_dependent(struct AV1_COMP *cpi,
  * \param[in]    cpi     Top - level encoder instance structure
  * \param[in]    speed   Speed setting passed in from the command  line
  *
- * \return No return value but configures the various speed trade off flags
+ * \remark No return value but configures the various speed trade off flags
  *         based on the passed in speed setting and current frame's Q index.
  *         (Higher speed corresponds to lower quality)
  */
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/svc_layercontext.c b/chromium/third_party/libaom/source/libaom/av1/encoder/svc_layercontext.c
index c132fd46b5f..2575b1b9579 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/svc_layercontext.c
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/svc_layercontext.c
@@ -70,8 +70,7 @@ void av1_init_layer_context(AV1_COMP *const cpi) {
         lc->counter_encode_maxq_scene_change = 0;
         if (lc->map) aom_free(lc->map);
         CHECK_MEM_ERROR(cm, lc->map,
-                        aom_malloc(mi_rows * mi_cols * sizeof(*lc->map)));
-        memset(lc->map, 0, mi_rows * mi_cols);
+                        aom_calloc(mi_rows * mi_cols, sizeof(*lc->map)));
       }
     }
     svc->downsample_filter_type[sl] = BILINEAR;
@@ -80,9 +79,18 @@ void av1_init_layer_context(AV1_COMP *const cpi) {
   if (svc->number_spatial_layers == 3) {
     svc->downsample_filter_type[0] = EIGHTTAP_SMOOTH;
   }
-  svc->ref_frame_comp[0] = 0;
-  svc->ref_frame_comp[1] = 0;
-  svc->ref_frame_comp[2] = 0;
+}
+
+void av1_alloc_layer_context(AV1_COMP *cpi, int num_layers) {
+  AV1_COMMON *const cm = &cpi->common;
+  SVC *const svc = &cpi->svc;
+  if (svc->layer_context == NULL || svc->num_allocated_layers < num_layers) {
+    aom_free(svc->layer_context);
+    CHECK_MEM_ERROR(
+        cm, svc->layer_context,
+        (LAYER_CONTEXT *)aom_calloc(num_layers, sizeof(*svc->layer_context)));
+    svc->num_allocated_layers = num_layers;
+  }
 }
 
 // Update the layer context from a change_config() call.
@@ -166,8 +174,16 @@ void av1_update_temporal_layer_framerate(AV1_COMP *const cpi) {
   }
 }
 
+static AOM_INLINE bool check_ref_is_low_spatial_res_super_frame(
+    int ref_frame, const SVC *svc, const RTC_REF *rtc_ref) {
+  int ref_frame_idx = rtc_ref->ref_idx[ref_frame - 1];
+  return svc->buffer_time_index[ref_frame_idx] == svc->current_superframe &&
+         svc->buffer_spatial_layer[ref_frame_idx] <= svc->spatial_layer_id - 1;
+}
+
 void av1_restore_layer_context(AV1_COMP *const cpi) {
   SVC *const svc = &cpi->svc;
+  RTC_REF *const rtc_ref = &cpi->rtc_ref;
   const AV1_COMMON *const cm = &cpi->common;
   LAYER_CONTEXT *const lc = get_layer_context(cpi);
   const int old_frame_since_key = cpi->rc.frames_since_key;
@@ -196,19 +212,21 @@ void av1_restore_layer_context(AV1_COMP *const cpi) {
   }
   svc->skip_mvsearch_last = 0;
   svc->skip_mvsearch_gf = 0;
+  svc->skip_mvsearch_altref = 0;
   // For each reference (LAST/GOLDEN) set the skip_mvsearch_last/gf frame flags.
   // This is to skip searching mv for that reference if it was last
   // refreshed (i.e., buffer slot holding that reference was refreshed) on the
   // previous spatial layer(s) at the same time (current_superframe).
-  if (svc->set_ref_frame_config && svc->force_zero_mode_spatial_ref) {
-    int ref_frame_idx = svc->ref_idx[LAST_FRAME - 1];
-    if (svc->buffer_time_index[ref_frame_idx] == svc->current_superframe &&
-        svc->buffer_spatial_layer[ref_frame_idx] <= svc->spatial_layer_id - 1)
+  if (rtc_ref->set_ref_frame_config && svc->force_zero_mode_spatial_ref) {
+    if (check_ref_is_low_spatial_res_super_frame(LAST_FRAME, svc, rtc_ref)) {
       svc->skip_mvsearch_last = 1;
-    ref_frame_idx = svc->ref_idx[GOLDEN_FRAME - 1];
-    if (svc->buffer_time_index[ref_frame_idx] == svc->current_superframe &&
-        svc->buffer_spatial_layer[ref_frame_idx] <= svc->spatial_layer_id - 1)
+    }
+    if (check_ref_is_low_spatial_res_super_frame(GOLDEN_FRAME, svc, rtc_ref)) {
       svc->skip_mvsearch_gf = 1;
+    }
+    if (check_ref_is_low_spatial_res_super_frame(ALTREF_FRAME, svc, rtc_ref)) {
+      svc->skip_mvsearch_altref = 1;
+    }
   }
 }
 
@@ -242,10 +260,10 @@ void av1_save_layer_context(AV1_COMP *const cpi) {
       svc->buffer_time_index[i] = svc->current_superframe;
       svc->buffer_spatial_layer[i] = svc->spatial_layer_id;
     }
-  } else if (cpi->svc.set_ref_frame_config) {
+  } else if (cpi->rtc_ref.set_ref_frame_config) {
     for (unsigned int i = 0; i < INTER_REFS_PER_FRAME; i++) {
-      int ref_frame_map_idx = svc->ref_idx[i];
-      if (cpi->svc.refresh[ref_frame_map_idx]) {
+      int ref_frame_map_idx = cpi->rtc_ref.ref_idx[i];
+      if (cpi->rtc_ref.refresh[ref_frame_map_idx]) {
         svc->buffer_time_index[ref_frame_map_idx] = svc->current_superframe;
         svc->buffer_spatial_layer[ref_frame_map_idx] = svc->spatial_layer_id;
       }
@@ -265,22 +283,18 @@ void av1_save_layer_context(AV1_COMP *const cpi) {
 int av1_svc_primary_ref_frame(const AV1_COMP *const cpi) {
   const SVC *const svc = &cpi->svc;
   const AV1_COMMON *const cm = &cpi->common;
-  int wanted_fb = -1;
+  int fb_idx = -1;
   int primary_ref_frame = PRIMARY_REF_NONE;
-  for (unsigned int i = 0; i < REF_FRAMES; i++) {
-    if (svc->spatial_layer_fb[i] == svc->spatial_layer_id &&
-        svc->temporal_layer_fb[i] == svc->temporal_layer_id) {
-      wanted_fb = i;
-      break;
-    }
-  }
-  if (wanted_fb != -1) {
-    for (int ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) {
-      if (get_ref_frame_map_idx(cm, ref_frame) == wanted_fb) {
-        primary_ref_frame = ref_frame - LAST_FRAME;
-        break;
-      }
-    }
+  // Set the primary_ref_frame to LAST_FRAME if that buffer slot for LAST
+  // was last updated on a lower temporal layer (or base TL0) and for the
+  // same spatial layer. For RTC patterns this allows for continued decoding
+  // when set of enhancement layers are dropped (continued decoding starting
+  // at next base TL0), so error_resilience can be off/0 for all layers.
+  fb_idx = get_ref_frame_map_idx(cm, LAST_FRAME);
+  if (svc->spatial_layer_fb[fb_idx] == svc->spatial_layer_id &&
+      (svc->temporal_layer_fb[fb_idx] < svc->temporal_layer_id ||
+       svc->temporal_layer_fb[fb_idx] == 0)) {
+    primary_ref_frame = 0;  // LAST_FRAME
   }
   return primary_ref_frame;
 }
@@ -357,42 +371,43 @@ enum {
 // spatial and temporal layers, and the ksvc_fixed_mode.
 void av1_set_svc_fixed_mode(AV1_COMP *const cpi) {
   SVC *const svc = &cpi->svc;
+  RTC_REF *const rtc_ref = &cpi->rtc_ref;
   int i;
   assert(svc->use_flexible_mode == 0);
   // Fixed SVC mode only supports at most 3 spatial or temporal layers.
   assert(svc->number_spatial_layers >= 1 && svc->number_spatial_layers <= 3 &&
          svc->number_temporal_layers >= 1 && svc->number_temporal_layers <= 3);
-  svc->set_ref_frame_config = 1;
+  rtc_ref->set_ref_frame_config = 1;
   int superframe_cnt = svc->current_superframe;
   // Set the reference map buffer idx for the 7 references:
   // LAST_FRAME (0), LAST2_FRAME(1), LAST3_FRAME(2), GOLDEN_FRAME(3),
   // BWDREF_FRAME(4), ALTREF2_FRAME(5), ALTREF_FRAME(6).
-  for (i = 0; i < INTER_REFS_PER_FRAME; i++) svc->ref_idx[i] = i;
-  for (i = 0; i < INTER_REFS_PER_FRAME; i++) svc->reference[i] = 0;
-  for (i = 0; i < REF_FRAMES; i++) svc->refresh[i] = 0;
+  for (i = 0; i < INTER_REFS_PER_FRAME; i++) rtc_ref->ref_idx[i] = i;
+  for (i = 0; i < INTER_REFS_PER_FRAME; i++) rtc_ref->reference[i] = 0;
+  for (i = 0; i < REF_FRAMES; i++) rtc_ref->refresh[i] = 0;
   // Always reference LAST, and reference GOLDEN on SL > 0.
   // For KSVC: GOLDEN reference will be removed on INTER_FRAMES later
   // when frame_type is set.
-  svc->reference[SVC_LAST_FRAME] = 1;
-  if (svc->spatial_layer_id > 0) svc->reference[SVC_GOLDEN_FRAME] = 1;
+  rtc_ref->reference[SVC_LAST_FRAME] = 1;
+  if (svc->spatial_layer_id > 0) rtc_ref->reference[SVC_GOLDEN_FRAME] = 1;
   if (svc->temporal_layer_id == 0) {
     // Base temporal layer.
     if (svc->spatial_layer_id == 0) {
       // Set all buffer_idx to 0. Update slot 0 (LAST).
-      for (i = 0; i < INTER_REFS_PER_FRAME; i++) svc->ref_idx[i] = 0;
-      svc->refresh[0] = 1;
+      for (i = 0; i < INTER_REFS_PER_FRAME; i++) rtc_ref->ref_idx[i] = 0;
+      rtc_ref->refresh[0] = 1;
     } else if (svc->spatial_layer_id == 1) {
       // Set buffer_idx for LAST to slot 1, GOLDEN (and all other refs) to
       // slot 0. Update slot 1 (LAST).
-      for (i = 0; i < INTER_REFS_PER_FRAME; i++) svc->ref_idx[i] = 0;
-      svc->ref_idx[SVC_LAST_FRAME] = 1;
-      svc->refresh[1] = 1;
+      for (i = 0; i < INTER_REFS_PER_FRAME; i++) rtc_ref->ref_idx[i] = 0;
+      rtc_ref->ref_idx[SVC_LAST_FRAME] = 1;
+      rtc_ref->refresh[1] = 1;
     } else if (svc->spatial_layer_id == 2) {
       // Set buffer_idx for LAST to slot 2, GOLDEN (and all other refs) to
       // slot 1. Update slot 2 (LAST).
-      for (i = 0; i < INTER_REFS_PER_FRAME; i++) svc->ref_idx[i] = 1;
-      svc->ref_idx[SVC_LAST_FRAME] = 2;
-      svc->refresh[2] = 1;
+      for (i = 0; i < INTER_REFS_PER_FRAME; i++) rtc_ref->ref_idx[i] = 1;
+      rtc_ref->ref_idx[SVC_LAST_FRAME] = 2;
+      rtc_ref->refresh[2] = 1;
     }
   } else if (svc->temporal_layer_id == 2 && (superframe_cnt - 1) % 4 == 0) {
     // First top temporal enhancement layer.
@@ -400,27 +415,27 @@ void av1_set_svc_fixed_mode(AV1_COMP *const cpi) {
       // Reference LAST (slot 0).
       // Set GOLDEN to slot 3 and update slot 3.
       // Set all other buffer_idx to slot 0.
-      for (i = 0; i < INTER_REFS_PER_FRAME; i++) svc->ref_idx[i] = 0;
+      for (i = 0; i < INTER_REFS_PER_FRAME; i++) rtc_ref->ref_idx[i] = 0;
       if (svc->spatial_layer_id < svc->number_spatial_layers - 1) {
-        svc->ref_idx[SVC_GOLDEN_FRAME] = 3;
-        svc->refresh[3] = 1;
+        rtc_ref->ref_idx[SVC_GOLDEN_FRAME] = 3;
+        rtc_ref->refresh[3] = 1;
       }
     } else if (svc->spatial_layer_id == 1) {
       // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1,
       // GOLDEN (and all other refs) to slot 3.
       // Set LAST2 to slot 4 and Update slot 4.
-      for (i = 0; i < INTER_REFS_PER_FRAME; i++) svc->ref_idx[i] = 3;
-      svc->ref_idx[SVC_LAST_FRAME] = 1;
+      for (i = 0; i < INTER_REFS_PER_FRAME; i++) rtc_ref->ref_idx[i] = 3;
+      rtc_ref->ref_idx[SVC_LAST_FRAME] = 1;
       if (svc->spatial_layer_id < svc->number_spatial_layers - 1) {
-        svc->ref_idx[SVC_LAST2_FRAME] = 4;
-        svc->refresh[4] = 1;
+        rtc_ref->ref_idx[SVC_LAST2_FRAME] = 4;
+        rtc_ref->refresh[4] = 1;
       }
     } else if (svc->spatial_layer_id == 2) {
       // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 2,
       // GOLDEN (and all other refs) to slot 4.
       // No update.
-      for (i = 0; i < INTER_REFS_PER_FRAME; i++) svc->ref_idx[i] = 4;
-      svc->ref_idx[SVC_LAST_FRAME] = 2;
+      for (i = 0; i < INTER_REFS_PER_FRAME; i++) rtc_ref->ref_idx[i] = 4;
+      rtc_ref->ref_idx[SVC_LAST_FRAME] = 2;
     }
   } else if (svc->temporal_layer_id == 1) {
     // Middle temporal enhancement layer.
@@ -428,30 +443,30 @@ void av1_set_svc_fixed_mode(AV1_COMP *const cpi) {
       // Reference LAST.
       // Set all buffer_idx to 0.
       // Set GOLDEN to slot 5 and update slot 5.
-      for (i = 0; i < INTER_REFS_PER_FRAME; i++) svc->ref_idx[i] = 0;
+      for (i = 0; i < INTER_REFS_PER_FRAME; i++) rtc_ref->ref_idx[i] = 0;
       if (svc->temporal_layer_id < svc->number_temporal_layers - 1) {
-        svc->ref_idx[SVC_GOLDEN_FRAME] = 5;
-        svc->refresh[5] = 1;
+        rtc_ref->ref_idx[SVC_GOLDEN_FRAME] = 5;
+        rtc_ref->refresh[5] = 1;
       }
     } else if (svc->spatial_layer_id == 1) {
       // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1,
       // GOLDEN (and all other refs) to slot 5.
       // Set LAST3 to slot 6 and update slot 6.
-      for (i = 0; i < INTER_REFS_PER_FRAME; i++) svc->ref_idx[i] = 5;
-      svc->ref_idx[SVC_LAST_FRAME] = 1;
+      for (i = 0; i < INTER_REFS_PER_FRAME; i++) rtc_ref->ref_idx[i] = 5;
+      rtc_ref->ref_idx[SVC_LAST_FRAME] = 1;
       if (svc->temporal_layer_id < svc->number_temporal_layers - 1) {
-        svc->ref_idx[SVC_LAST3_FRAME] = 6;
-        svc->refresh[6] = 1;
+        rtc_ref->ref_idx[SVC_LAST3_FRAME] = 6;
+        rtc_ref->refresh[6] = 1;
       }
     } else if (svc->spatial_layer_id == 2) {
       // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 2,
       // GOLDEN (and all other refs) to slot 6.
       // Set LAST3 to slot 7 and update slot 7.
-      for (i = 0; i < INTER_REFS_PER_FRAME; i++) svc->ref_idx[i] = 6;
-      svc->ref_idx[SVC_LAST_FRAME] = 2;
+      for (i = 0; i < INTER_REFS_PER_FRAME; i++) rtc_ref->ref_idx[i] = 6;
+      rtc_ref->ref_idx[SVC_LAST_FRAME] = 2;
       if (svc->temporal_layer_id < svc->number_temporal_layers - 1) {
-        svc->ref_idx[SVC_LAST3_FRAME] = 7;
-        svc->refresh[7] = 1;
+        rtc_ref->ref_idx[SVC_LAST3_FRAME] = 7;
+        rtc_ref->refresh[7] = 1;
       }
     }
   } else if (svc->temporal_layer_id == 2 && (superframe_cnt - 3) % 4 == 0) {
@@ -460,28 +475,28 @@ void av1_set_svc_fixed_mode(AV1_COMP *const cpi) {
       // Set LAST to slot 5 and reference LAST.
       // Set GOLDEN to slot 3 and update slot 3.
       // Set all other buffer_idx to 0.
-      for (i = 0; i < INTER_REFS_PER_FRAME; i++) svc->ref_idx[i] = 0;
-      svc->ref_idx[SVC_LAST_FRAME] = 5;
+      for (i = 0; i < INTER_REFS_PER_FRAME; i++) rtc_ref->ref_idx[i] = 0;
+      rtc_ref->ref_idx[SVC_LAST_FRAME] = 5;
       if (svc->spatial_layer_id < svc->number_spatial_layers - 1) {
-        svc->ref_idx[SVC_GOLDEN_FRAME] = 3;
-        svc->refresh[3] = 1;
+        rtc_ref->ref_idx[SVC_GOLDEN_FRAME] = 3;
+        rtc_ref->refresh[3] = 1;
       }
     } else if (svc->spatial_layer_id == 1) {
       // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 6,
       // GOLDEN to slot 3. Set LAST2 to slot 4 and update slot 4.
-      for (i = 0; i < INTER_REFS_PER_FRAME; i++) svc->ref_idx[i] = 0;
-      svc->ref_idx[SVC_LAST_FRAME] = 6;
-      svc->ref_idx[SVC_GOLDEN_FRAME] = 3;
+      for (i = 0; i < INTER_REFS_PER_FRAME; i++) rtc_ref->ref_idx[i] = 0;
+      rtc_ref->ref_idx[SVC_LAST_FRAME] = 6;
+      rtc_ref->ref_idx[SVC_GOLDEN_FRAME] = 3;
       if (svc->spatial_layer_id < svc->number_spatial_layers - 1) {
-        svc->ref_idx[SVC_LAST2_FRAME] = 4;
-        svc->refresh[4] = 1;
+        rtc_ref->ref_idx[SVC_LAST2_FRAME] = 4;
+        rtc_ref->refresh[4] = 1;
       }
     } else if (svc->spatial_layer_id == 2) {
       // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 7,
       // GOLDEN to slot 4. No update.
-      for (i = 0; i < INTER_REFS_PER_FRAME; i++) svc->ref_idx[i] = 0;
-      svc->ref_idx[SVC_LAST_FRAME] = 7;
-      svc->ref_idx[SVC_GOLDEN_FRAME] = 4;
+      for (i = 0; i < INTER_REFS_PER_FRAME; i++) rtc_ref->ref_idx[i] = 0;
+      rtc_ref->ref_idx[SVC_LAST_FRAME] = 7;
+      rtc_ref->ref_idx[SVC_GOLDEN_FRAME] = 4;
     }
   }
 }
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/svc_layercontext.h b/chromium/third_party/libaom/source/libaom/av1/encoder/svc_layercontext.h
index dc6906d438e..a27d768c2d7 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/svc_layercontext.h
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/svc_layercontext.h
@@ -90,28 +90,18 @@ typedef struct SVC {
   int temporal_layer_id;
   int number_spatial_layers;
   int number_temporal_layers;
-  int set_ref_frame_config;
-  int non_reference_frame;
   int use_flexible_mode;
   int ksvc_fixed_mode;
-  int ref_frame_comp[3];
   /*!\endcond */
 
-  /*!
-   * LAST_FRAME (0), LAST2_FRAME(1), LAST3_FRAME(2), GOLDEN_FRAME(3),
-   * BWDREF_FRAME(4), ALTREF2_FRAME(5), ALTREF_FRAME(6).
-   */
-  int reference[INTER_REFS_PER_FRAME];
   /*!\cond */
-  int ref_idx[INTER_REFS_PER_FRAME];
-  int refresh[REF_FRAMES];
-  int gld_idx_1layer;
   double base_framerate;
   unsigned int current_superframe;
   unsigned int buffer_time_index[REF_FRAMES];
   unsigned char buffer_spatial_layer[REF_FRAMES];
   int skip_mvsearch_last;
   int skip_mvsearch_gf;
+  int skip_mvsearch_altref;
   int spatial_layer_fb[REF_FRAMES];
   int temporal_layer_fb[REF_FRAMES];
   int num_encoded_top_layer;
@@ -122,7 +112,12 @@ typedef struct SVC {
   /*!
    * Layer context used for rate control in CBR mode.
    */
-  LAYER_CONTEXT layer_context[AOM_MAX_LAYERS];
+  LAYER_CONTEXT *layer_context;
+
+  /*!
+   * Number of layers allocated for layer_context.
+   */
+  int num_allocated_layers;
 
   /*!
    * EIGHTTAP_SMOOTH or BILINEAR
@@ -151,10 +146,23 @@ struct AV1_COMP;
  *
  * \param[in]       cpi  Top level encoder structure
  *
- * \return  Nothing returned. Set cpi->svc.
+ * \remark  Nothing returned. Set cpi->svc.
  */
 void av1_init_layer_context(struct AV1_COMP *const cpi);
 
+/*!\brief Allocate layer context data.
+ *
+ * \ingroup SVC
+ * \callgraph
+ * \callergraph
+ *
+ * \param[in]       cpi  Top level encoder structure
+ * \param[in]       num_layers  Number of layers to be allocated
+ *
+ * \remark  Nothing returned. Allocates memory for cpi->svc.layer_context.
+ */
+void av1_alloc_layer_context(struct AV1_COMP *cpi, int num_layers);
+
 /*!\brief Update the layer context from a change_config() call.
  *
  * \ingroup SVC
@@ -164,7 +172,7 @@ void av1_init_layer_context(struct AV1_COMP *const cpi);
  * \param[in]       cpi  Top level encoder structure
  * \param[in]       target_bandwidth  Total target bandwidth
  *
- * \return  Nothing returned. Buffer level for each layer is set.
+ * \remark  Nothing returned. Buffer level for each layer is set.
  */
 void av1_update_layer_context_change_config(struct AV1_COMP *const cpi,
                                             const int64_t target_bandwidth);
@@ -178,7 +186,7 @@ void av1_update_layer_context_change_config(struct AV1_COMP *const cpi,
  *
  * \param[in]       cpi  Top level encoder structure
  *
- * \return  Nothing returned. Frame related quantities for current temporal
+ * \remark  Nothing returned. Frame related quantities for current temporal
  layer are updated.
  */
 void av1_update_temporal_layer_framerate(struct AV1_COMP *const cpi);
@@ -192,7 +200,7 @@ void av1_update_temporal_layer_framerate(struct AV1_COMP *const cpi);
  *
  * \param[in]       cpi  Top level encoder structure
  *
- * \return  Nothing returned. Layer context for current layer is set.
+ * \remark  Nothing returned. Layer context for current layer is set.
  */
 void av1_restore_layer_context(struct AV1_COMP *const cpi);
 
@@ -203,8 +211,6 @@ void av1_restore_layer_context(struct AV1_COMP *const cpi);
  * \callergraph
  *
  * \param[in]       cpi  Top level encoder structure
- *
- * \return  Nothing returned.
  */
 void av1_save_layer_context(struct AV1_COMP *const cpi);
 
@@ -215,8 +221,6 @@ void av1_save_layer_context(struct AV1_COMP *const cpi);
  * \callergraph
  *
  * \param[in]       cpi  Top level encoder structure
- *
- * \return  Nothing returned.
  */
 void av1_free_svc_cyclic_refresh(struct AV1_COMP *const cpi);
 
@@ -228,8 +232,6 @@ void av1_free_svc_cyclic_refresh(struct AV1_COMP *const cpi);
  *
  * \param[in]       cpi  Top level encoder structure
  * \param[in]       is_key  Whether current layer is key frame
- *
- * \return  Nothing returned.
  */
 void av1_svc_reset_temporal_layers(struct AV1_COMP *const cpi, int is_key);
 
@@ -240,8 +242,6 @@ void av1_svc_reset_temporal_layers(struct AV1_COMP *const cpi, int is_key);
  * \callergraph
  *
  * \param[in]       cpi  Top level encoder structure
- *
- * \return  Nothing returned.
  */
 void av1_one_pass_cbr_svc_start_layer(struct AV1_COMP *const cpi);
 
@@ -267,7 +267,7 @@ int av1_svc_primary_ref_frame(const struct AV1_COMP *const cpi);
  * \param[in]       width_out    Output width, scaled for current layer
  * \param[in]       height_out   Output height, scaled for current layer
  *
- * \return Nothing is returned. Instead the scaled width and height are set.
+ * \remark Nothing is returned. Instead the scaled width and height are set.
  */
 void av1_get_layer_resolution(const int width_org, const int height_org,
                               const int num, const int den, int *width_out,
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/temporal_filter.c b/chromium/third_party/libaom/source/libaom/av1/encoder/temporal_filter.c
index c876679a921..62a4d3b80f6 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/temporal_filter.c
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/temporal_filter.c
@@ -33,6 +33,7 @@
 #include "av1/encoder/firstpass.h"
 #include "av1/encoder/gop_structure.h"
 #include "av1/encoder/mcomp.h"
+#include "av1/encoder/motion_search_facade.h"
 #include "av1/encoder/pass2_strategy.h"
 #include "av1/encoder/ratectrl.h"
 #include "av1/encoder/reconinter_enc.h"
@@ -80,7 +81,7 @@ static void tf_determine_block_partition(const MV block_mv, const int block_mse,
  * \param[out]  subblock_mses   Pointer to the search errors (MSE) for 4
  *                              sub-blocks
  *
- * \return Nothing will be returned. Results are saved in subblock_mvs and
+ * \remark Nothing will be returned. Results are saved in subblock_mvs and
  *         subblock_mses
  */
 static void tf_motion_search(AV1_COMP *cpi, MACROBLOCK *mb,
@@ -109,8 +110,8 @@ static void tf_motion_search(AV1_COMP *cpi, MACROBLOCK *mb,
   FULLPEL_MOTION_SEARCH_PARAMS full_ms_params;
   SUBPEL_MOTION_SEARCH_PARAMS ms_params;
   const SEARCH_METHODS search_method = NSTEP;
-  const search_site_config *search_site_cfg =
-      cpi->mv_search_params.search_site_cfg[SS_CFG_LOOKAHEAD];
+  const search_site_config *search_site_cfg = av1_get_search_site_config(
+      mb->search_site_cfg_buf, &cpi->mv_search_params, search_method, y_stride);
   const int step_param = av1_init_search_range(
       AOMMAX(frame_to_filter->y_crop_width, frame_to_filter->y_crop_height));
   const SUBPEL_SEARCH_TYPE subpel_search_type = USE_8_TAPS;
@@ -322,7 +323,7 @@ static INLINE int is_frame_high_bitdepth(const YV12_BUFFER_CONFIG *frame) {
  *                             order)
  * \param[out]  pred           Pointer to the predictor to be built
  *
- * \return Nothing returned, But the contents of `pred` will be modified
+ * \remark Nothing returned, But the contents of `pred` will be modified
  */
 static void tf_build_predictor(const YV12_BUFFER_CONFIG *ref_frame,
                                const MACROBLOCKD *mbd,
@@ -550,7 +551,7 @@ void compute_luma_sq_error_sum(uint32_t *square_diff, uint32_t *luma_sse_sum,
  * \param[out]  count           Pointer to the pixel-wise counter for
  *                              filtering
  *
- * \return Nothing returned, But the contents of `accum`, `pred` and 'count'
+ * \remark Nothing returned, But the contents of `accum`, `pred` and 'count'
  *         will be modified
  */
 void av1_apply_temporal_filter_c(
@@ -608,11 +609,20 @@ void av1_apply_temporal_filter_c(
   // Allocate memory for pixel-wise squared differences. They,
   // regardless of the subsampling, are assigned with memory of size `mb_pels`.
   uint32_t *square_diff = aom_memalign(16, mb_pels * sizeof(uint32_t));
+  if (!square_diff) {
+    aom_internal_error(mbd->error_info, AOM_CODEC_MEM_ERROR,
+                       "Error allocating temporal filter data");
+  }
   memset(square_diff, 0, mb_pels * sizeof(square_diff[0]));
 
   // Allocate memory for accumulated luma squared error. This value will be
   // consumed while filtering the chroma planes.
   uint32_t *luma_sse_sum = aom_memalign(32, mb_pels * sizeof(uint32_t));
+  if (!luma_sse_sum) {
+    aom_free(square_diff);
+    aom_internal_error(mbd->error_info, AOM_CODEC_MEM_ERROR,
+                       "Error allocating temporal filter data");
+  }
   memset(luma_sse_sum, 0, mb_pels * sizeof(luma_sse_sum[0]));
 
   // Get window size for pixel-wise filtering.
@@ -724,7 +734,7 @@ void av1_highbd_apply_temporal_filter_c(
  * \param[in]   count          Pointer to the pre-computed count
  * \param[out]  result_buffer  Pointer to result buffer
  *
- * \return Nothing returned, but the content to which `result_buffer` pointer
+ * \remark Nothing returned, but the content to which `result_buffer` pointer
  *         will be modified
  */
 static void tf_normalize_filtered_frame(
@@ -904,7 +914,7 @@ void av1_tf_do_filtering_row(AV1_COMP *cpi, ThreadData *td, int mb_row) {
  * \ingroup src_frame_proc
  * \param[in]   cpi                   Top level encoder instance structure
  *
- * \return Nothing will be returned, but the contents of td->diff will be
+ * \remark Nothing will be returned, but the contents of td->diff will be
  modified.
  */
 static void tf_do_filtering(AV1_COMP *cpi) {
@@ -939,7 +949,7 @@ static void tf_do_filtering(AV1_COMP *cpi) {
  *                              in the lookahead buffer cpi->lookahead
  * \param[in]   gf_frame_index  GOP index
  *
- * \return Nothing will be returned. But the fields `frames`, `num_frames`,
+ * \remark Nothing will be returned. But the fields `frames`, `num_frames`,
  *         `filter_frame_idx` and `noise_levels` will be updated in cpi->tf_ctx.
  */
 static void tf_setup_filtering_buffer(AV1_COMP *cpi,
@@ -1234,10 +1244,8 @@ void av1_temporal_filter(AV1_COMP *cpi, const int filter_frame_lookahead_idx,
   // TODO(anyone): Currently, we enforce the filtering strength on internal
   // ARFs except the second ARF to be zero. We should investigate in which case
   // it is more beneficial to use non-zero strength filtering.
-#if CONFIG_FRAME_PARALLEL_ENCODE
   // Only parallel level 0 frames go through temporal filtering.
   assert(cpi->ppi->gf_group.frame_parallel_level[gf_frame_index] == 0);
-#endif  // CONFIG_FRAME_PARALLEL_ENCODE
 
   // Initialize temporal filter context structure.
   init_tf_ctx(cpi, filter_frame_lookahead_idx, gf_frame_index,
@@ -1245,7 +1253,10 @@ void av1_temporal_filter(AV1_COMP *cpi, const int filter_frame_lookahead_idx,
 
   // Allocate and reset temporal filter buffers.
   const int is_highbitdepth = tf_ctx->is_highbitdepth;
-  tf_alloc_and_reset_data(tf_data, tf_ctx->num_pels, is_highbitdepth);
+  if (!tf_alloc_and_reset_data(tf_data, tf_ctx->num_pels, is_highbitdepth)) {
+    aom_internal_error(cpi->common.error, AOM_CODEC_MEM_ERROR,
+                       "Error allocating temporal filter data");
+  }
 
   // Perform temporal filtering process.
   if (mt_info->num_workers > 1)
@@ -1264,12 +1275,12 @@ int av1_is_temporal_filter_on(const AV1EncoderConfig *oxcf) {
   return oxcf->algo_cfg.arnr_max_frames > 0 && oxcf->gf_cfg.lag_in_frames > 1;
 }
 
-void av1_tf_info_alloc(TEMPORAL_FILTER_INFO *tf_info, AV1_COMP *cpi) {
+void av1_tf_info_alloc(TEMPORAL_FILTER_INFO *tf_info, const AV1_COMP *cpi) {
   const AV1EncoderConfig *oxcf = &cpi->oxcf;
   tf_info->is_temporal_filter_on = av1_is_temporal_filter_on(oxcf);
   if (tf_info->is_temporal_filter_on == 0) return;
 
-  AV1_COMMON *cm = &cpi->common;
+  const AV1_COMMON *cm = &cpi->common;
   const SequenceHeader *const seq_params = cm->seq_params;
   int ret;
   for (int i = 0; i < TF_INFO_BUF_COUNT; ++i) {
@@ -1278,23 +1289,12 @@ void av1_tf_info_alloc(TEMPORAL_FILTER_INFO *tf_info, AV1_COMP *cpi) {
         seq_params->subsampling_x, seq_params->subsampling_y,
         seq_params->use_highbitdepth, cpi->oxcf.border_in_pixels,
         cm->features.byte_alignment, NULL, NULL, NULL,
-        cpi->oxcf.tool_cfg.enable_global_motion);
+        cpi->oxcf.tool_cfg.enable_global_motion, 0);
     if (ret) {
       aom_internal_error(cm->error, AOM_CODEC_MEM_ERROR,
                          "Failed to allocate tf_info");
     }
   }
-
-  ret = aom_realloc_frame_buffer(
-      &tf_info->tf_buf_second_arf, oxcf->frm_dim_cfg.width,
-      oxcf->frm_dim_cfg.height, seq_params->subsampling_x,
-      seq_params->subsampling_y, seq_params->use_highbitdepth,
-      cpi->oxcf.border_in_pixels, cm->features.byte_alignment, NULL, NULL, NULL,
-      cpi->oxcf.tool_cfg.enable_global_motion);
-  if (ret) {
-    aom_internal_error(cm->error, AOM_CODEC_MEM_ERROR,
-                       "Failed to allocate tf_info");
-  }
 }
 
 void av1_tf_info_free(TEMPORAL_FILTER_INFO *tf_info) {
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/temporal_filter.h b/chromium/third_party/libaom/source/libaom/av1/encoder/temporal_filter.h
index f6ae27fd5d1..725bd869d0a 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/temporal_filter.h
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/temporal_filter.h
@@ -12,6 +12,8 @@
 #ifndef AOM_AV1_ENCODER_TEMPORAL_FILTER_H_
 #define AOM_AV1_ENCODER_TEMPORAL_FILTER_H_
 
+#include <stdbool.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -200,7 +202,8 @@ int av1_is_temporal_filter_on(const struct AV1EncoderConfig *oxcf);
  * \param[in,out]   tf_info           Temporal filter info for a gop
  * \param[in,out]   cpi               Top level encoder instance structure
  */
-void av1_tf_info_alloc(TEMPORAL_FILTER_INFO *tf_info, struct AV1_COMP *cpi);
+void av1_tf_info_alloc(TEMPORAL_FILTER_INFO *tf_info,
+                       const struct AV1_COMP *cpi);
 
 /*!\brief Free buffers for TEMPORAL_FILTER_INFO
  * \param[in,out]   tf_info           Temporal filter info for a gop
@@ -284,7 +287,7 @@ double av1_estimate_noise_from_single_plane(const YV12_BUFFER_CONFIG *frame,
 * \param[in]   mb_row                Macroblock row to be filtered
 filtering
 *
-* \return Nothing will be returned, but the contents of td->diff will be
+* \remark Nothing will be returned, but the contents of td->diff will be
 modified.
 */
 void av1_tf_do_filtering_row(struct AV1_COMP *cpi, struct ThreadData *td,
@@ -348,7 +351,7 @@ int av1_get_q(const struct AV1_COMP *cpi);
 //   is_high_bitdepth: Whether the frame is high-bitdepth or not.
 // Returns:
 //   Nothing will be returned. But the contents of tf_data will be modified.
-static AOM_INLINE void tf_alloc_and_reset_data(TemporalFilterData *tf_data,
+static AOM_INLINE bool tf_alloc_and_reset_data(TemporalFilterData *tf_data,
                                                int num_pels,
                                                int is_high_bitdepth) {
   tf_data->tmp_mbmi = (MB_MODE_INFO *)malloc(sizeof(*tf_data->tmp_mbmi));
@@ -364,6 +367,13 @@ static AOM_INLINE void tf_alloc_and_reset_data(TemporalFilterData *tf_data,
   else
     tf_data->pred =
         (uint8_t *)aom_memalign(32, num_pels * sizeof(*tf_data->pred));
+  if (!(tf_data->accum && tf_data->count && tf_data->pred)) {
+    aom_free(tf_data->accum);
+    aom_free(tf_data->count);
+    aom_free(tf_data->pred);
+    return false;
+  }
+  return true;
 }
 
 // Setup macroblockd params for temporal filtering process.
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/thirdpass.c b/chromium/third_party/libaom/source/libaom/av1/encoder/thirdpass.c
index d5265540d1d..cbd9a690584 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/thirdpass.c
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/thirdpass.c
@@ -8,7 +8,9 @@
  * Media Patent License 1.0 was not distributed with this source code in the
  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
  */
+#include "av1/encoder/thirdpass.h"
 
+#if CONFIG_THREE_PASS && CONFIG_AV1_DECODER
 #include "aom/aom_codec.h"
 #include "aom/aomdx.h"
 #include "aom_dsp/psnr.h"
@@ -16,14 +18,9 @@
 #include "av1/av1_iface_common.h"
 #include "av1/encoder/encoder.h"
 #include "av1/encoder/firstpass.h"
-#include "av1/encoder/thirdpass.h"
 #include "av1/common/blockd.h"
-
-#if CONFIG_THREE_PASS
 #include "common/ivfdec.h"
-#endif
 
-#if CONFIG_THREE_PASS
 static void setup_two_pass_stream_input(
     struct AvxInputContext **input_ctx_ptr, const char *input_file_name,
     struct aom_internal_error_info *err_info) {
@@ -64,7 +61,6 @@ static void init_third_pass(THIRD_PASS_DEC_CTX *ctx) {
                                 ctx->err_info);
   }
 
-#if CONFIG_AV1_DECODER
   if (!ctx->decoder.iface) {
     aom_codec_iface_t *decoder_iface = &aom_codec_av1_inspect_algo;
     if (aom_codec_dec_init(&ctx->decoder, decoder_iface, NULL, 0)) {
@@ -72,19 +68,12 @@ static void init_third_pass(THIRD_PASS_DEC_CTX *ctx) {
                          "Failed to initialize decoder.");
     }
   }
-#else
-  aom_internal_error(ctx->err_info, AOM_CODEC_ERROR,
-                     "To utilize three-pass encoding, libaom must be built "
-                     "with CONFIG_AV1_DECODER=1.");
-#endif
 }
-#endif  // CONFIG_THREE_PASS
 
 // Return 0: success
 //        1: cannot read because this is end of file
 //       -1: failure to read the frame
 static int read_frame(THIRD_PASS_DEC_CTX *ctx) {
-#if CONFIG_THREE_PASS
   if (!ctx->input_ctx || !ctx->decoder.iface) {
     init_third_pass(ctx);
   }
@@ -101,10 +90,7 @@ static int read_frame(THIRD_PASS_DEC_CTX *ctx) {
     ctx->end_frame = ctx->frame + ctx->bytes_in_buffer;
     ctx->have_frame = 1;
   }
-#else
-  aom_internal_error(ctx->err_info, AOM_CODEC_ERROR,
-                     "Cannot parse bitstream without CONFIG_THREE_PASS.");
-#endif
+
   Av1DecodeReturn adr;
   if (aom_codec_decode(&ctx->decoder, ctx->frame,
                        (unsigned int)ctx->bytes_in_buffer,
@@ -404,10 +390,8 @@ void av1_free_thirdpass_ctx(THIRD_PASS_DEC_CTX *ctx) {
   if (ctx->decoder.iface) {
     aom_codec_destroy(&ctx->decoder);
   }
-#if CONFIG_THREE_PASS
   if (ctx->input_ctx && ctx->input_ctx->file) fclose(ctx->input_ctx->file);
   aom_free(ctx->input_ctx);
-#endif
   if (ctx->buf) free(ctx->buf);
   for (int i = 0; i < MAX_THIRD_PASS_BUF; i++) {
     free_frame_info(&ctx->frame_info[i]);
@@ -706,6 +690,119 @@ PARTITION_TYPE av1_third_pass_get_sb_part_type(THIRD_PASS_DEC_CTX *ctx,
   return corner_mi->partition;
 }
 
+#else   // !(CONFIG_THREE_PASS && CONFIG_AV1_DECODER)
+void av1_init_thirdpass_ctx(AV1_COMMON *cm, THIRD_PASS_DEC_CTX **ctx,
+                            const char *file) {
+  (void)ctx;
+  (void)file;
+  aom_internal_error(cm->error, AOM_CODEC_ERROR,
+                     "To utilize three-pass encoding, libaom must be built "
+                     "with CONFIG_THREE_PASS=1 & CONFIG_AV1_DECODER=1.");
+}
+
+void av1_free_thirdpass_ctx(THIRD_PASS_DEC_CTX *ctx) { (void)ctx; }
+
+void av1_set_gop_third_pass(THIRD_PASS_DEC_CTX *ctx) { (void)ctx; }
+
+void av1_pop_third_pass_info(THIRD_PASS_DEC_CTX *ctx) { (void)ctx; }
+
+void av1_open_second_pass_log(struct AV1_COMP *cpi, int is_read) {
+  (void)cpi;
+  (void)is_read;
+}
+
+void av1_close_second_pass_log(struct AV1_COMP *cpi) { (void)cpi; }
+
+void av1_write_second_pass_gop_info(struct AV1_COMP *cpi) { (void)cpi; }
+
+void av1_write_second_pass_per_frame_info(struct AV1_COMP *cpi, int gf_index) {
+  (void)cpi;
+  (void)gf_index;
+}
+
+void av1_read_second_pass_gop_info(FILE *second_pass_log_stream,
+                                   THIRD_PASS_GOP_INFO *gop_info,
+                                   struct aom_internal_error_info *error) {
+  (void)second_pass_log_stream;
+  (void)gop_info;
+  (void)error;
+}
+
+void av1_read_second_pass_per_frame_info(
+    FILE *second_pass_log_stream, THIRD_PASS_FRAME_INFO *frame_info_arr,
+    int frame_info_count, struct aom_internal_error_info *error) {
+  (void)second_pass_log_stream;
+  (void)frame_info_arr;
+  (void)frame_info_count;
+  (void)error;
+}
+
+int av1_check_use_arf(THIRD_PASS_DEC_CTX *ctx) {
+  (void)ctx;
+  return 1;
+}
+
+void av1_get_third_pass_ratio(THIRD_PASS_DEC_CTX *ctx, int fidx, int fheight,
+                              int fwidth, double *ratio_h, double *ratio_w) {
+  (void)ctx;
+  (void)fidx;
+  (void)fheight;
+  (void)fwidth;
+  (void)ratio_h;
+  (void)ratio_w;
+}
+
+THIRD_PASS_MI_INFO *av1_get_third_pass_mi(THIRD_PASS_DEC_CTX *ctx, int fidx,
+                                          int mi_row, int mi_col,
+                                          double ratio_h, double ratio_w) {
+  (void)ctx;
+  (void)fidx;
+  (void)mi_row;
+  (void)mi_col;
+  (void)ratio_h;
+  (void)ratio_w;
+  return NULL;
+}
+
+int_mv av1_get_third_pass_adjusted_mv(THIRD_PASS_MI_INFO *this_mi,
+                                      double ratio_h, double ratio_w,
+                                      MV_REFERENCE_FRAME frame) {
+  (void)this_mi;
+  (void)ratio_h;
+  (void)ratio_w;
+  (void)frame;
+  int_mv mv;
+  mv.as_int = INVALID_MV;
+  return mv;
+}
+
+BLOCK_SIZE av1_get_third_pass_adjusted_blk_size(THIRD_PASS_MI_INFO *this_mi,
+                                                double ratio_h,
+                                                double ratio_w) {
+  (void)this_mi;
+  (void)ratio_h;
+  (void)ratio_w;
+  return BLOCK_INVALID;
+}
+
+void av1_third_pass_get_adjusted_mi(THIRD_PASS_MI_INFO *third_pass_mi,
+                                    double ratio_h, double ratio_w, int *mi_row,
+                                    int *mi_col) {
+  (void)third_pass_mi;
+  (void)ratio_h;
+  (void)ratio_w;
+  (void)mi_row;
+  (void)mi_col;
+}
+
+PARTITION_TYPE av1_third_pass_get_sb_part_type(THIRD_PASS_DEC_CTX *ctx,
+                                               THIRD_PASS_MI_INFO *this_mi) {
+  (void)ctx;
+  (void)this_mi;
+  return PARTITION_INVALID;
+}
+#endif  // CONFIG_THREE_PASS && CONFIG_AV1_DECODER
+
 #if CONFIG_BITRATE_ACCURACY
 static void fwrite_and_check(const void *ptr, size_t size, size_t nmemb,
                              FILE *stream,
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/tokenize.h b/chromium/third_party/libaom/source/libaom/av1/encoder/tokenize.h
index 6451b9d1b60..f675c489aea 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/tokenize.h
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/tokenize.h
@@ -109,8 +109,8 @@ static INLINE unsigned int get_token_alloc(int mb_rows, int mb_cols,
   const int shift = sb_size_log2 - 4;
   const int sb_size = 1 << sb_size_log2;
   const int sb_size_square = sb_size * sb_size;
-  const int sb_rows = ALIGN_POWER_OF_TWO(mb_rows, shift) >> shift;
-  const int sb_cols = ALIGN_POWER_OF_TWO(mb_cols, shift) >> shift;
+  const int sb_rows = CEIL_POWER_OF_TWO(mb_rows, shift);
+  const int sb_cols = CEIL_POWER_OF_TWO(mb_cols, shift);
 
   // One palette token for each pixel. There can be palettes on two planes.
   const int sb_palette_toks = AOMMIN(2, num_planes) * sb_size_square;
@@ -121,9 +121,8 @@ static INLINE unsigned int get_token_alloc(int mb_rows, int mb_cols,
 // Allocate memory for token related info.
 static AOM_INLINE void alloc_token_info(AV1_COMMON *cm, TokenInfo *token_info,
                                         unsigned int tokens_required) {
-  int mi_rows_aligned_to_sb =
-      ALIGN_POWER_OF_TWO(cm->mi_params.mi_rows, cm->seq_params->mib_size_log2);
-  int sb_rows = mi_rows_aligned_to_sb >> cm->seq_params->mib_size_log2;
+  int sb_rows =
+      CEIL_POWER_OF_TWO(cm->mi_params.mi_rows, cm->seq_params->mib_size_log2);
   token_info->tokens_allocated = tokens_required;
 
   CHECK_MEM_ERROR(cm, token_info->tile_tok[0][0],
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/tpl_model.c b/chromium/third_party/libaom/source/libaom/av1/encoder/tpl_model.c
index 79d313b151d..b8366f02864 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/tpl_model.c
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/tpl_model.c
@@ -154,6 +154,8 @@ void av1_setup_tpl_buffers(AV1_PRIMARY *const ppi,
   tpl_data->border_in_pixels =
       ALIGN_POWER_OF_TWO(tpl_data->tpl_bsize_1d + 2 * AOM_INTERP_EXTEND, 5);
 
+  const int alloc_y_plane_only =
+      ppi->cpi->sf.tpl_sf.use_y_only_rate_distortion ? 1 : 0;
   for (int frame = 0; frame < MAX_LENGTH_TPL_FRAME_STATS; ++frame) {
     const int mi_cols =
         ALIGN_POWER_OF_TWO(mi_params->mi_cols, MAX_MIB_SIZE_LOG2);
@@ -184,17 +186,17 @@ void av1_setup_tpl_buffers(AV1_PRIMARY *const ppi,
                        tpl_data->tpl_stats_buffer[frame].height,
                    sizeof(*tpl_data->tpl_stats_buffer[frame].tpl_stats_ptr)));
 
-    if (aom_alloc_frame_buffer(&tpl_data->tpl_rec_pool[frame], width, height,
-                               seq_params->subsampling_x,
-                               seq_params->subsampling_y,
-                               seq_params->use_highbitdepth,
-                               tpl_data->border_in_pixels, byte_alignment))
+    if (aom_alloc_frame_buffer(
+            &tpl_data->tpl_rec_pool[frame], width, height,
+            seq_params->subsampling_x, seq_params->subsampling_y,
+            seq_params->use_highbitdepth, tpl_data->border_in_pixels,
+            byte_alignment, alloc_y_plane_only))
       aom_internal_error(&ppi->error, AOM_CODEC_MEM_ERROR,
                          "Failed to allocate frame buffer");
   }
 }
 
-static AOM_INLINE int64_t tpl_get_satd_cost(BitDepthInfo bd_info,
+static AOM_INLINE int32_t tpl_get_satd_cost(BitDepthInfo bd_info,
                                             int16_t *src_diff, int diff_stride,
                                             const uint8_t *src, int src_stride,
                                             const uint8_t *dst, int dst_stride,
@@ -456,8 +458,8 @@ static AOM_INLINE void mode_estimation(AV1_COMP *cpi,
 
   int frame_offset = tpl_data->frame_idx - cpi->gf_frame_index;
 
-  int64_t best_intra_cost = INT64_MAX;
-  int64_t intra_cost;
+  int32_t best_intra_cost = INT32_MAX;
+  int32_t intra_cost;
   PREDICTION_MODE best_mode = DC_PRED;
 
   int mb_y_offset = mi_row * MI_SIZE * xd->cur_buf->y_stride + mi_col * MI_SIZE;
@@ -501,6 +503,16 @@ static AOM_INLINE void mode_estimation(AV1_COMP *cpi,
   int64_t recon_error = 1;
   int64_t pred_error = 1;
 
+  if (!(predictor8 && src_diff && coeff && qcoeff && dqcoeff)) {
+    aom_free(predictor8);
+    aom_free(src_diff);
+    aom_free(coeff);
+    aom_free(qcoeff);
+    aom_free(dqcoeff);
+    aom_internal_error(cm->error, AOM_CODEC_MEM_ERROR,
+                       "Error allocating tpl data");
+  }
+
   memset(tpl_stats, 0, sizeof(*tpl_stats));
   tpl_stats->ref_frame_index[0] = -1;
   tpl_stats->ref_frame_index[1] = -1;
@@ -594,8 +606,8 @@ static AOM_INLINE void mode_estimation(AV1_COMP *cpi,
 
   int best_rf_idx = -1;
   int_mv best_mv[2];
-  int64_t inter_cost;
-  int64_t best_inter_cost = INT64_MAX;
+  int32_t inter_cost;
+  int32_t best_inter_cost = INT32_MAX;
   int rf_idx;
   int_mv single_mv[INTER_REFS_PER_FRAME];
 
@@ -738,14 +750,16 @@ static AOM_INLINE void mode_estimation(AV1_COMP *cpi,
 
       best_inter_cost = inter_cost;
       best_mv[0].as_int = best_rfidx_mv.as_int;
-      if (best_inter_cost < best_intra_cost) {
-        best_mode = NEWMV;
-        xd->mi[0]->ref_frame[0] = best_rf_idx + LAST_FRAME;
-        xd->mi[0]->mv[0].as_int = best_mv[0].as_int;
-      }
     }
   }
 
+  if (best_rf_idx != -1 && best_inter_cost < best_intra_cost) {
+    best_mode = NEWMV;
+    xd->mi[0]->ref_frame[0] = best_rf_idx + LAST_FRAME;
+    xd->mi[0]->mv[0].as_int = best_mv[0].as_int;
+  }
+
+  // Start compound predition search.
   int comp_ref_frames[3][2] = {
     { 0, 4 },
     { 0, 6 },
@@ -804,8 +818,14 @@ static AOM_INLINE void mode_estimation(AV1_COMP *cpi,
       tpl_data->src_ref_frame[rf_idx1],
     };
 
-    xd->mi[0]->ref_frame[0] = LAST_FRAME;
-    xd->mi[0]->ref_frame[1] = ALTREF_FRAME;
+    xd->mi[0]->ref_frame[0] = rf_idx0 + LAST_FRAME;
+    xd->mi[0]->ref_frame[1] = rf_idx1 + LAST_FRAME;
+    xd->mi[0]->mode = NEW_NEWMV;
+    const int8_t ref_frame_type = av1_ref_frame_type(xd->mi[0]->ref_frame);
+    // Set up ref_mv for av1_joint_motion_search().
+    CANDIDATE_MV *this_ref_mv_stack = x->mbmi_ext.ref_mv_stack[ref_frame_type];
+    this_ref_mv_stack[xd->mi[0]->ref_mv_idx].this_mv = single_mv[rf_idx0];
+    this_ref_mv_stack[xd->mi[0]->ref_mv_idx].comp_mv = single_mv[rf_idx1];
 
     struct buf_2d yv12_mb[2][MAX_MB_PLANE];
     for (int i = 0; i < 2; ++i) {
@@ -847,16 +867,18 @@ static AOM_INLINE void mode_estimation(AV1_COMP *cpi,
       best_inter_cost = inter_cost;
       best_mv[0] = tmp_mv[0];
       best_mv[1] = tmp_mv[1];
-
-      if (best_inter_cost < best_intra_cost) {
-        best_mode = NEW_NEWMV;
-        xd->mi[0]->ref_frame[0] = rf_idx0 + LAST_FRAME;
-        xd->mi[0]->ref_frame[1] = rf_idx1 + LAST_FRAME;
-      }
     }
   }
 
-  if (best_inter_cost < INT64_MAX) {
+  if (best_cmp_rf_idx != -1 && best_inter_cost < best_intra_cost) {
+    best_mode = NEW_NEWMV;
+    const int best_rf_idx0 = comp_ref_frames[best_cmp_rf_idx][0];
+    const int best_rf_idx1 = comp_ref_frames[best_cmp_rf_idx][1];
+    xd->mi[0]->ref_frame[0] = best_rf_idx0 + LAST_FRAME;
+    xd->mi[0]->ref_frame[1] = best_rf_idx1 + LAST_FRAME;
+  }
+
+  if (best_inter_cost < INT32_MAX) {
     xd->mi[0]->mv[0].as_int = best_mv[0].as_int;
     xd->mi[0]->mv[1].as_int = best_mv[1].as_int;
     const YV12_BUFFER_CONFIG *ref_frame_ptr[2] = {
@@ -872,13 +894,13 @@ static AOM_INLINE void mode_estimation(AV1_COMP *cpi,
                         qcoeff, dqcoeff, cm, x, ref_frame_ptr, rec_buffer_pool,
                         rec_stride_pool, tx_size, best_mode, mi_row, mi_col,
                         use_y_only_rate_distortion, NULL);
-    tpl_stats->srcrf_rate = rate_cost << TPL_DEP_COST_SCALE_LOG2;
+    tpl_stats->srcrf_rate = rate_cost;
   }
 
   best_intra_cost = AOMMAX(best_intra_cost, 1);
   best_inter_cost = AOMMIN(best_intra_cost, best_inter_cost);
-  tpl_stats->inter_cost = best_inter_cost << TPL_DEP_COST_SCALE_LOG2;
-  tpl_stats->intra_cost = best_intra_cost << TPL_DEP_COST_SCALE_LOG2;
+  tpl_stats->inter_cost = best_inter_cost;
+  tpl_stats->intra_cost = best_intra_cost;
 
   tpl_stats->srcrf_dist = recon_error << TPL_DEP_COST_SCALE_LOG2;
   tpl_stats->srcrf_sse = pred_error << TPL_DEP_COST_SCALE_LOG2;
@@ -890,7 +912,8 @@ static AOM_INLINE void mode_estimation(AV1_COMP *cpi,
   ref_frame_ptr[0] =
       best_mode == NEW_NEWMV
           ? tpl_data->ref_frame[comp_ref_frames[best_cmp_rf_idx][0]]
-          : best_rf_idx >= 0 ? tpl_data->ref_frame[best_rf_idx] : NULL;
+      : best_rf_idx >= 0 ? tpl_data->ref_frame[best_rf_idx]
+                         : NULL;
   ref_frame_ptr[1] =
       best_mode == NEW_NEWMV
           ? tpl_data->ref_frame[comp_ref_frames[best_cmp_rf_idx][1]]
@@ -901,11 +924,11 @@ static AOM_INLINE void mode_estimation(AV1_COMP *cpi,
                       use_y_only_rate_distortion, tpl_txfm_stats);
 
   tpl_stats->recrf_dist = recon_error << (TPL_DEP_COST_SCALE_LOG2);
-  tpl_stats->recrf_rate = rate_cost << TPL_DEP_COST_SCALE_LOG2;
+  tpl_stats->recrf_rate = rate_cost;
 
   if (!is_inter_mode(best_mode)) {
     tpl_stats->srcrf_dist = recon_error << (TPL_DEP_COST_SCALE_LOG2);
-    tpl_stats->srcrf_rate = rate_cost << TPL_DEP_COST_SCALE_LOG2;
+    tpl_stats->srcrf_rate = rate_cost;
     tpl_stats->srcrf_sse = pred_error << TPL_DEP_COST_SCALE_LOG2;
   }
 
@@ -921,7 +944,7 @@ static AOM_INLINE void mode_estimation(AV1_COMP *cpi,
                         rec_stride_pool, tx_size, best_mode, mi_row, mi_col,
                         use_y_only_rate_distortion, NULL);
     tpl_stats->cmp_recrf_dist[0] = recon_error << TPL_DEP_COST_SCALE_LOG2;
-    tpl_stats->cmp_recrf_rate[0] = rate_cost << TPL_DEP_COST_SCALE_LOG2;
+    tpl_stats->cmp_recrf_rate[0] = rate_cost;
 
     tpl_stats->cmp_recrf_dist[0] =
         AOMMAX(tpl_stats->srcrf_dist, tpl_stats->cmp_recrf_dist[0]);
@@ -942,7 +965,7 @@ static AOM_INLINE void mode_estimation(AV1_COMP *cpi,
                         rec_stride_pool, tx_size, best_mode, mi_row, mi_col,
                         use_y_only_rate_distortion, NULL);
     tpl_stats->cmp_recrf_dist[1] = recon_error << TPL_DEP_COST_SCALE_LOG2;
-    tpl_stats->cmp_recrf_rate[1] = rate_cost << TPL_DEP_COST_SCALE_LOG2;
+    tpl_stats->cmp_recrf_rate[1] = rate_cost;
 
     tpl_stats->cmp_recrf_dist[1] =
         AOMMAX(tpl_stats->srcrf_dist, tpl_stats->cmp_recrf_dist[1]);
@@ -1076,15 +1099,18 @@ static AOM_INLINE void tpl_model_update_b(TplParams *const tpl_data, int mi_row,
 
   int64_t srcrf_dist = is_compound ? tpl_stats_ptr->cmp_recrf_dist[!ref]
                                    : tpl_stats_ptr->srcrf_dist;
-  int64_t srcrf_rate = is_compound ? tpl_stats_ptr->cmp_recrf_rate[!ref]
-                                   : tpl_stats_ptr->srcrf_rate;
+  int64_t srcrf_rate =
+      is_compound
+          ? (tpl_stats_ptr->cmp_recrf_rate[!ref] << TPL_DEP_COST_SCALE_LOG2)
+          : (tpl_stats_ptr->srcrf_rate << TPL_DEP_COST_SCALE_LOG2);
 
   int64_t cur_dep_dist = tpl_stats_ptr->recrf_dist - srcrf_dist;
   int64_t mc_dep_dist =
       (int64_t)(tpl_stats_ptr->mc_dep_dist *
                 ((double)(tpl_stats_ptr->recrf_dist - srcrf_dist) /
                  tpl_stats_ptr->recrf_dist));
-  int64_t delta_rate = tpl_stats_ptr->recrf_rate - srcrf_rate;
+  int64_t delta_rate =
+      (tpl_stats_ptr->recrf_rate << TPL_DEP_COST_SCALE_LOG2) - srcrf_rate;
   int64_t mc_dep_rate =
       av1_delta_rate_cost(tpl_stats_ptr->mc_dep_rate, tpl_stats_ptr->recrf_dist,
                           srcrf_dist, pix_num);
@@ -1258,6 +1284,9 @@ static AOM_INLINE void init_mc_flow_dispenser(AV1_COMP *cpi, int frame_idx,
                            6;
 
   av1_init_tpl_txfm_stats(tpl_txfm_stats);
+
+  // Initialize x->mbmi_ext when compound predictions are enabled.
+  if (cpi->sf.tpl_sf.allow_compound_pred) av1_zero(x->mbmi_ext);
 }
 
 // This function stores the motion estimation dependencies of all the blocks in
@@ -1428,10 +1457,7 @@ static AOM_INLINE void init_gop_frames_for_tpl(
     }
     const int true_disp = (int)(tpl_frame->frame_display_index);
 
-    av1_get_ref_frames(ref_frame_map_pairs, true_disp,
-#if CONFIG_FRAME_PARALLEL_ENCODE_2
-                       cpi, gf_index, 0,
-#endif  // CONFIG_FRAME_PARALLEL_ENCODE_2
+    av1_get_ref_frames(ref_frame_map_pairs, true_disp, cpi, gf_index, 0,
                        remapped_ref_idx);
 
     int refresh_mask =
@@ -1516,10 +1542,7 @@ static AOM_INLINE void init_gop_frames_for_tpl(
 #endif  // CONFIG_BITRATE_ACCURACY && CONFIG_THREE_PASS
     gf_group->q_val[gf_index] = *pframe_qindex;
     const int true_disp = (int)(tpl_frame->frame_display_index);
-    av1_get_ref_frames(ref_frame_map_pairs, true_disp,
-#if CONFIG_FRAME_PARALLEL_ENCODE_2
-                       cpi, gf_index, 0,
-#endif
+    av1_get_ref_frames(ref_frame_map_pairs, true_disp, cpi, gf_index, 0,
                        remapped_ref_idx);
     int refresh_mask =
         av1_get_refresh_frame_flags(cpi, &frame_params, frame_update_type,
@@ -1679,6 +1702,8 @@ int av1_tpl_setup_stats(AV1_COMP *cpi, int gop_eval,
                     cm->features.allow_high_precision_mv, cpi->td.mb.mv_costs);
 
   const int gop_length = get_gop_length(gf_group);
+  const int num_planes =
+      cpi->sf.tpl_sf.use_y_only_rate_distortion ? 1 : av1_num_planes(cm);
   // Backward propagation from tpl_group_frames to 1.
   for (int frame_idx = cpi->gf_frame_index; frame_idx < tpl_gf_group_frames;
        ++frame_idx) {
@@ -1712,7 +1737,7 @@ int av1_tpl_setup_stats(AV1_COMP *cpi, int gop_eval,
 #endif  // CONFIG_RATECTRL_LOG
 
     aom_extend_frame_borders(tpl_data->tpl_frame[frame_idx].rec_picture,
-                             av1_num_planes(cm));
+                             num_planes);
   }
 
   for (int frame_idx = tpl_gf_group_frames - 1;
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/tpl_model.h b/chromium/third_party/libaom/source/libaom/av1/encoder/tpl_model.h
index b77a19ff717..ec49ea5793d 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/tpl_model.h
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/tpl_model.h
@@ -104,20 +104,20 @@ typedef struct TplTxfmStats {
 } TplTxfmStats;
 
 typedef struct TplDepStats {
-  int64_t intra_cost;
-  int64_t inter_cost;
+  int64_t srcrf_sse;
   int64_t srcrf_dist;
   int64_t recrf_dist;
   int64_t cmp_recrf_dist[2];
-  int64_t srcrf_rate;
-  int64_t recrf_rate;
-  int64_t srcrf_sse;
-  int64_t cmp_recrf_rate[2];
   int64_t mc_dep_rate;
   int64_t mc_dep_dist;
-  int_mv mv[INTER_REFS_PER_FRAME];
-  int ref_frame_index[2];
   int64_t pred_error[INTER_REFS_PER_FRAME];
+  int32_t intra_cost;
+  int32_t inter_cost;
+  int32_t srcrf_rate;
+  int32_t recrf_rate;
+  int32_t cmp_recrf_rate[2];
+  int_mv mv[INTER_REFS_PER_FRAME];
+  int8_t ref_frame_index[2];
 } TplDepStats;
 
 typedef struct TplDepFrame {
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/tune_butteraugli.c b/chromium/third_party/libaom/source/libaom/av1/encoder/tune_butteraugli.c
index 70fa23922b7..2f057e1fc86 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/tune_butteraugli.c
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/tune_butteraugli.c
@@ -209,7 +209,7 @@ void av1_setup_butteraugli_source(AV1_COMP *cpi) {
   if (dst->buffer_alloc_sz == 0) {
     aom_alloc_frame_buffer(
         dst, width, height, ss_x, ss_y, cm->seq_params->use_highbitdepth,
-        cpi->oxcf.border_in_pixels, cm->features.byte_alignment);
+        cpi->oxcf.border_in_pixels, cm->features.byte_alignment, 0);
   }
   av1_copy_and_extend_frame(cpi->source, dst);
 
@@ -218,7 +218,7 @@ void av1_setup_butteraugli_source(AV1_COMP *cpi) {
     aom_alloc_frame_buffer(
         resized_dst, width / resize_factor, height / resize_factor, ss_x, ss_y,
         cm->seq_params->use_highbitdepth, cpi->oxcf.border_in_pixels,
-        cm->features.byte_alignment);
+        cm->features.byte_alignment, 0);
   }
   av1_resize_and_extend_frame_nonnormative(cpi->source, resized_dst, bit_depth,
                                            av1_num_planes(cm));
@@ -241,7 +241,7 @@ void av1_setup_butteraugli_rdmult_and_restore_source(AV1_COMP *cpi, double K) {
   aom_alloc_frame_buffer(
       &resized_recon, width / resize_factor, height / resize_factor, ss_x, ss_y,
       cm->seq_params->use_highbitdepth, cpi->oxcf.border_in_pixels,
-      cm->features.byte_alignment);
+      cm->features.byte_alignment, 0);
   copy_img(&cpi->common.cur_frame->buf, &resized_recon, width / resize_factor,
            height / resize_factor);
 
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/tune_vmaf.c b/chromium/third_party/libaom/source/libaom/av1/encoder/tune_vmaf.c
index 405dd6cfa5f..46260a6bda8 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/tune_vmaf.c
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/tune_vmaf.c
@@ -94,6 +94,7 @@ static unsigned int residual_variance(const AV1_COMP *cpi,
 
 static double frame_average_variance(const AV1_COMP *const cpi,
                                      const YV12_BUFFER_CONFIG *const frame) {
+  const MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
   const uint8_t *const y_buffer = frame->y_buffer;
   const int y_stride = frame->y_stride;
   const BLOCK_SIZE block_size = BLOCK_64X64;
@@ -101,8 +102,8 @@ static double frame_average_variance(const AV1_COMP *const cpi,
   const int block_w = mi_size_wide[block_size] * 4;
   const int block_h = mi_size_high[block_size] * 4;
   int row, col;
-  const int bit_depth = cpi->td.mb.e_mbd.bd;
   double var = 0.0, var_count = 0.0;
+  const int use_hbd = frame->flags & YV12_FLAG_HIGHBITDEPTH;
 
   // Loop through each block.
   for (row = 0; row < frame->y_height / block_h; ++row) {
@@ -114,13 +115,8 @@ static double frame_average_variance(const AV1_COMP *const cpi,
       buf.buf = (uint8_t *)y_buffer + row_offset_y * y_stride + col_offset_y;
       buf.stride = y_stride;
 
-      if (cpi->common.seq_params->use_highbitdepth) {
-        assert(frame->flags & YV12_FLAG_HIGHBITDEPTH);
-        var += av1_high_get_sby_perpixel_variance(cpi, &buf, block_size,
-                                                  bit_depth);
-      } else {
-        var += av1_get_sby_perpixel_variance(cpi, &buf, block_size);
-      }
+      var += av1_get_perpixel_variance(cpi, xd, &buf, block_size, AOM_PLANE_Y,
+                                       use_hbd);
       var_count += 1.0;
     }
   }
@@ -157,8 +153,8 @@ static double residual_frame_average_variance(AV1_COMP *cpi,
   bool do_motion_search = false;
   if (mvs == NULL) {
     do_motion_search = true;
-    mvs = (FULLPEL_MV *)aom_malloc(sizeof(*mvs) * mb_rows * mb_cols);
-    memset(mvs, 0, sizeof(*mvs) * mb_rows * mb_cols);
+    CHECK_MEM_ERROR(&cpi->common, mvs,
+                    (FULLPEL_MV *)aom_calloc(mb_rows * mb_cols, sizeof(*mvs)));
   }
 
   unsigned int variance = 0;
@@ -345,7 +341,7 @@ static double find_best_frame_unsharp_amount(const AV1_COMP *const cpi,
   aom_alloc_frame_buffer(
       &sharpened, width, height, source->subsampling_x, source->subsampling_y,
       cm->seq_params->use_highbitdepth, cpi->oxcf.border_in_pixels,
-      cm->features.byte_alignment);
+      cm->features.byte_alignment, 0);
 
   const double baseline_variance = frame_average_variance(cpi, source);
   double unsharp_amount;
@@ -397,7 +393,7 @@ void av1_vmaf_neg_preprocessing(AV1_COMP *const cpi,
   aom_alloc_frame_buffer(
       &blurred, width, height, source->subsampling_x, source->subsampling_y,
       cm->seq_params->use_highbitdepth, cpi->oxcf.border_in_pixels,
-      cm->features.byte_alignment);
+      cm->features.byte_alignment, 0);
 
   gaussian_blur(bit_depth, source, &blurred);
   unsharp(cpi, source, &blurred, source, best_frame_unsharp_amount);
@@ -417,11 +413,11 @@ void av1_vmaf_frame_preprocessing(AV1_COMP *const cpi,
   aom_alloc_frame_buffer(
       &source_extended, width, height, source->subsampling_x,
       source->subsampling_y, cm->seq_params->use_highbitdepth,
-      cpi->oxcf.border_in_pixels, cm->features.byte_alignment);
+      cpi->oxcf.border_in_pixels, cm->features.byte_alignment, 0);
   aom_alloc_frame_buffer(
       &blurred, width, height, source->subsampling_x, source->subsampling_y,
       cm->seq_params->use_highbitdepth, cpi->oxcf.border_in_pixels,
-      cm->features.byte_alignment);
+      cm->features.byte_alignment, 0);
 
   av1_copy_and_extend_frame(source, &source_extended);
   gaussian_blur(bit_depth, &source_extended, &blurred);
@@ -457,11 +453,11 @@ void av1_vmaf_blk_preprocessing(AV1_COMP *const cpi,
   memset(&source_extended, 0, sizeof(source_extended));
   aom_alloc_frame_buffer(
       &blurred, width, height, ss_x, ss_y, cm->seq_params->use_highbitdepth,
-      cpi->oxcf.border_in_pixels, cm->features.byte_alignment);
+      cpi->oxcf.border_in_pixels, cm->features.byte_alignment, 0);
   aom_alloc_frame_buffer(&source_extended, width, height, ss_x, ss_y,
                          cm->seq_params->use_highbitdepth,
                          cpi->oxcf.border_in_pixels,
-                         cm->features.byte_alignment);
+                         cm->features.byte_alignment, 0);
 
   av1_copy_and_extend_frame(source, &source_extended);
   gaussian_blur(bit_depth, &source_extended, &blurred);
@@ -485,9 +481,11 @@ void av1_vmaf_blk_preprocessing(AV1_COMP *const cpi,
   const int num_cols = (source->y_width + block_w - 1) / block_w;
   const int num_rows = (source->y_height + block_h - 1) / block_h;
   double *best_unsharp_amounts =
-      aom_malloc(sizeof(*best_unsharp_amounts) * num_cols * num_rows);
-  memset(best_unsharp_amounts, 0,
-         sizeof(*best_unsharp_amounts) * num_cols * num_rows);
+      aom_calloc(num_cols * num_rows, sizeof(*best_unsharp_amounts));
+  if (!best_unsharp_amounts) {
+    aom_internal_error(cm->error, AOM_CODEC_MEM_ERROR,
+                       "Error allocating vmaf data");
+  }
 
   YV12_BUFFER_CONFIG source_block, blurred_block;
   memset(&source_block, 0, sizeof(source_block));
@@ -495,11 +493,11 @@ void av1_vmaf_blk_preprocessing(AV1_COMP *const cpi,
   aom_alloc_frame_buffer(&source_block, block_w, block_h, ss_x, ss_y,
                          cm->seq_params->use_highbitdepth,
                          cpi->oxcf.border_in_pixels,
-                         cm->features.byte_alignment);
+                         cm->features.byte_alignment, 0);
   aom_alloc_frame_buffer(&blurred_block, block_w, block_h, ss_x, ss_y,
                          cm->seq_params->use_highbitdepth,
                          cpi->oxcf.border_in_pixels,
-                         cm->features.byte_alignment);
+                         cm->features.byte_alignment, 0);
 
   for (int row = 0; row < num_rows; ++row) {
     for (int col = 0; col < num_cols; ++col) {
@@ -622,7 +620,7 @@ void av1_set_mb_vmaf_rdmult_scaling(AV1_COMP *cpi) {
   aom_alloc_frame_buffer(
       &resized_source, y_width / resize_factor, y_height / resize_factor, ss_x,
       ss_y, cm->seq_params->use_highbitdepth, cpi->oxcf.border_in_pixels,
-      cm->features.byte_alignment);
+      cm->features.byte_alignment, 0);
   av1_resize_and_extend_frame_nonnormative(cpi->source, &resized_source,
                                            bit_depth, av1_num_planes(cm));
 
@@ -640,7 +638,7 @@ void av1_set_mb_vmaf_rdmult_scaling(AV1_COMP *cpi) {
   aom_alloc_frame_buffer(&blurred, resized_y_width, resized_y_height, ss_x,
                          ss_y, cm->seq_params->use_highbitdepth,
                          cpi->oxcf.border_in_pixels,
-                         cm->features.byte_alignment);
+                         cm->features.byte_alignment, 0);
   gaussian_blur(bit_depth, &resized_source, &blurred);
 
   YV12_BUFFER_CONFIG recon;
@@ -648,15 +646,18 @@ void av1_set_mb_vmaf_rdmult_scaling(AV1_COMP *cpi) {
   aom_alloc_frame_buffer(&recon, resized_y_width, resized_y_height, ss_x, ss_y,
                          cm->seq_params->use_highbitdepth,
                          cpi->oxcf.border_in_pixels,
-                         cm->features.byte_alignment);
+                         cm->features.byte_alignment, 0);
   aom_yv12_copy_frame(&resized_source, &recon, 1);
 
   VmafContext *vmaf_context;
   const bool cal_vmaf_neg =
       cpi->oxcf.tune_cfg.tuning == AOM_TUNE_VMAF_NEG_MAX_GAIN;
   aom_init_vmaf_context(&vmaf_context, cpi->vmaf_info.vmaf_model, cal_vmaf_neg);
-  unsigned int *sses = aom_malloc(sizeof(*sses) * (num_rows * num_cols));
-  memset(sses, 0, sizeof(*sses) * (num_rows * num_cols));
+  unsigned int *sses = aom_calloc(num_rows * num_cols, sizeof(*sses));
+  if (!sses) {
+    aom_internal_error(cm->error, AOM_CODEC_MEM_ERROR,
+                       "Error allocating vmaf data");
+  }
 
   // Loop through each 'block_size' block.
   for (int row = 0; row < num_rows; ++row) {
@@ -824,15 +825,15 @@ static double calc_vmaf_motion_score(const AV1_COMP *const cpi,
   aom_alloc_frame_buffer(&blurred_cur, y_width, y_height, ss_x, ss_y,
                          cm->seq_params->use_highbitdepth,
                          cpi->oxcf.border_in_pixels,
-                         cm->features.byte_alignment);
+                         cm->features.byte_alignment, 0);
   aom_alloc_frame_buffer(&blurred_last, y_width, y_height, ss_x, ss_y,
                          cm->seq_params->use_highbitdepth,
                          cpi->oxcf.border_in_pixels,
-                         cm->features.byte_alignment);
+                         cm->features.byte_alignment, 0);
   aom_alloc_frame_buffer(&blurred_next, y_width, y_height, ss_x, ss_y,
                          cm->seq_params->use_highbitdepth,
                          cpi->oxcf.border_in_pixels,
-                         cm->features.byte_alignment);
+                         cm->features.byte_alignment, 0);
 
   gaussian_blur(bit_depth, cur, &blurred_cur);
   gaussian_blur(bit_depth, last, &blurred_last);
@@ -1016,18 +1017,18 @@ static double find_best_frame_unsharp_amount_neg(
   aom_alloc_frame_buffer(&recon_sharpened, width, height, ss_x, ss_y,
                          cm->seq_params->use_highbitdepth,
                          cpi->oxcf.border_in_pixels,
-                         cm->features.byte_alignment);
+                         cm->features.byte_alignment, 0);
   aom_alloc_frame_buffer(&src_sharpened, width, height, ss_x, ss_y,
                          cm->seq_params->use_highbitdepth,
                          cpi->oxcf.border_in_pixels,
-                         cm->features.byte_alignment);
+                         cm->features.byte_alignment, 0);
   aom_alloc_frame_buffer(&recon_blurred, width, height, ss_x, ss_y,
                          cm->seq_params->use_highbitdepth,
                          cpi->oxcf.border_in_pixels,
-                         cm->features.byte_alignment);
+                         cm->features.byte_alignment, 0);
   aom_alloc_frame_buffer(
       &src_blurred, width, height, ss_x, ss_y, cm->seq_params->use_highbitdepth,
-      cpi->oxcf.border_in_pixels, cm->features.byte_alignment);
+      cpi->oxcf.border_in_pixels, cm->features.byte_alignment, 0);
 
   gaussian_blur(bit_depth, recon, &recon_blurred);
   gaussian_blur(bit_depth, src, &src_blurred);
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/tx_prune_model_weights.h b/chromium/third_party/libaom/source/libaom/av1/encoder/tx_prune_model_weights.h
index 76efe938215..aab5e1398db 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/tx_prune_model_weights.h
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/tx_prune_model_weights.h
@@ -9,6 +9,10 @@
  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
  */
 
+/*! \file
+ * Contains the details of the ML models used for pruning transform size. This
+ * file is only included by av1/encoder/tx_search.c.
+ */
 #ifndef AOM_AV1_ENCODER_TX_PRUNE_MODEL_WEIGHTS_H_
 #define AOM_AV1_ENCODER_TX_PRUNE_MODEL_WEIGHTS_H_
 
@@ -2317,7 +2321,7 @@ static const NN_CONFIG av1_tx_type_nnconfig_16x4_ver = {
 /******************************************************************************/
 
 // Map tx_size to its corresponding neural net model for tx type prediction.
-static const NN_CONFIG *av1_tx_type_nnconfig_map_hor[] = {
+static const NN_CONFIG *const av1_tx_type_nnconfig_map_hor[] = {
   &av1_tx_type_nnconfig_4x4_hor,   // 4x4 transform
   &av1_tx_type_nnconfig_8x8_hor,   // 8x8 transform
   &av1_tx_type_nnconfig_16x16,     // 16x16 transform
@@ -2339,7 +2343,7 @@ static const NN_CONFIG *av1_tx_type_nnconfig_map_hor[] = {
   NULL,                            // 64x16 transform
 };
 
-static const NN_CONFIG *av1_tx_type_nnconfig_map_ver[] = {
+static const NN_CONFIG *const av1_tx_type_nnconfig_map_ver[] = {
   &av1_tx_type_nnconfig_4x4_ver,   // 4x4 transform
   &av1_tx_type_nnconfig_8x8_ver,   // 8x8 transform
   &av1_tx_type_nnconfig_16x16,     // 16x16 transform
@@ -3291,7 +3295,7 @@ static const NN_CONFIG av1_tx_split_nnconfig_16x64 = {
 /******************************************************************************/
 
 // Map block size to its corresponding neural net model for tx split prediction.
-static const NN_CONFIG *av1_tx_split_nnconfig_map[TX_SIZES_ALL] = {
+static const NN_CONFIG *const av1_tx_split_nnconfig_map[TX_SIZES_ALL] = {
   NULL,                          // TX_4X4,
   &av1_tx_split_nnconfig_8x8,    // TX_8X8,
   &av1_tx_split_nnconfig_16x16,  // TX_16X16,
@@ -3313,6 +3317,104 @@ static const NN_CONFIG *av1_tx_split_nnconfig_map[TX_SIZES_ALL] = {
   &av1_tx_split_nnconfig_16x64,  // TX_64X16,
 };
 
+#if !CONFIG_REALTIME_ONLY
+#define NUM_INTRA_TX_SPLIT_FEATURES 14
+#define NUM_INTRA_TX_SPLIT_HIDDEN_LAYERS 1
+#define NUM_INTRA_TX_SPLIT_HIDDEN_NODES 16
+// Model to prune intra transform depth for intra 8x8 block.
+static const float av1_intra_tx_split_8x8_mean[NUM_INTRA_TX_SPLIT_FEATURES] = {
+  0.110706f,  18.901518f, 0.250436f,  13.483487f, 0.118141f,
+  14.318728f, 0.028409f,  14.257664f, 0.045839f,  15.143358f,
+  9.702971f,  14.300809f, 6.018646f,  3.682534f,
+};
+
+static const float av1_intra_tx_split_8x8_std[NUM_INTRA_TX_SPLIT_FEATURES] = {
+  13.750575f, 13.440116f, 14.334330f, 12.236641f, 18.415247f,
+  12.733355f, 18.309339f, 12.858130f, 23.465142f, 13.447014f,
+  8.625048f,  10.456774f, 1.185447f,  1.810423f,
+};
+
+static const float av1_intra_tx_split_nn_weights_8x8_layer0
+    [NUM_INTRA_TX_SPLIT_FEATURES * NUM_INTRA_TX_SPLIT_HIDDEN_NODES] = {
+      -0.156142f, -0.753623f, 0.026883f,  0.039188f,  -0.035310f, 0.106140f,
+      0.051622f,  0.077838f,  0.101632f,  0.107278f,  0.232200f,  0.269083f,
+      0.048966f,  -1.553293f, -0.113983f, -0.151248f, -0.067369f, 0.787292f,
+      0.076651f,  -0.802634f, 0.266414f,  1.107563f,  -0.068848f, -0.956468f,
+      -0.074920f, -0.192258f, 0.006207f,  0.176196f,  -0.493442f, 0.152290f,
+      -0.208874f, -0.014658f, 0.297385f,  -0.351695f, 0.246295f,  -0.178519f,
+      -0.204191f, 0.049663f,  -0.330343f, -0.299754f, 0.246215f,  -0.014558f,
+      -0.117611f, 0.206445f,  0.045840f,  -0.047563f, -0.049679f, 0.406892f,
+      -0.052307f, -1.513404f, 0.166166f,  0.520760f,  -0.143320f, -0.593928f,
+      -0.010533f, 0.250752f,  0.076738f,  0.537512f,  -0.082619f, -1.534031f,
+      0.047109f,  0.634247f,  -0.089730f, 0.545534f,  -0.022742f, -0.779047f,
+      -0.606358f, -0.199145f, -0.051269f, 0.248784f,  0.327545f,  -0.851751f,
+      0.071739f,  0.035975f,  0.387781f,  -0.136427f, -0.284436f, 0.578449f,
+      -0.198276f, 0.579950f,  0.600111f,  -0.370164f, -0.215297f, 0.517342f,
+      0.200061f,  -2.507660f, -0.030851f, 0.227315f,  -0.078289f, 0.276052f,
+      -0.050281f, 0.251481f,  -0.139318f, 0.281175f,  0.226524f,  0.058968f,
+      0.197436f,  0.517294f,  -0.105914f, -1.599567f, 0.064985f,  0.043209f,
+      -0.280038f, 0.126874f,  0.330387f,  -0.014407f, 0.031241f,  0.237801f,
+      0.948959f,  -0.253791f, -0.022622f, -0.061430f, 0.265852f,  0.750823f,
+      0.086606f,  0.853527f,  -0.180971f, -1.255744f, -0.152979f, -1.022198f,
+      -0.044708f, 0.506424f,  -0.501968f, -0.416863f, -0.012688f, 0.193523f,
+      -0.093698f, 0.430875f,  0.007379f,  0.019278f,  0.080890f,  0.462755f,
+      -0.054326f, -0.157611f, -0.004851f, -1.275676f, -0.060528f, -0.508170f,
+      0.195429f,  -0.023534f, 0.355211f,  0.983561f,  -0.122036f, -0.911948f,
+      -0.172280f, -1.135245f, -0.043211f, 0.576456f,  -0.075247f, 0.429734f,
+      -0.246309f, -0.355575f, -0.048809f, 0.217113f,  0.078385f,  0.720341f,
+      0.007070f,  0.144617f,  -0.167642f, 0.303056f,  -0.031425f, 0.123448f,
+      -0.320530f, 0.164070f,  -0.497849f, -0.233918f, -0.032123f, 0.084983f,
+      0.312216f,  0.062609f,  -0.389815f, 0.237593f,  0.000157f,  -0.642068f,
+      0.167898f,  0.495234f,  -0.083493f, -0.555971f, 0.124437f,  0.381125f,
+      -0.459219f, 0.047924f,  -0.138222f, -2.232816f, 0.127585f,  -0.102420f,
+      0.131598f,  0.036837f,  -0.163055f, -0.067429f, -0.078521f, -0.055666f,
+      1.387057f,  0.400154f,  -0.003355f, -0.073627f, -0.305098f, -0.413383f,
+      -0.008266f, -0.038329f, 0.209808f,  0.375777f,  0.037274f,  -0.050226f,
+      -0.100576f, 0.237441f,  0.237854f,  0.828296f,  0.001149f,  -0.093964f,
+      0.214051f,  -0.031486f, -0.561307f, 0.014540f,  0.169357f,  0.323202f,
+      -0.395334f, -0.038941f, 0.476800f,  -0.213122f, -0.287521f, -0.420717f,
+      -0.054142f, -0.102266f,
+    };
+
+static const float
+    av1_intra_tx_split_nn_bias_8x8_layer0[NUM_INTRA_TX_SPLIT_HIDDEN_NODES] = {
+      -1.150850f, -0.236404f, 0.184554f,  -0.904162f, -0.949979f, 0.427016f,
+      -0.546867f, -0.611094f, -0.676570f, -0.208959f, -0.286384f, 0.562238f,
+      0.434197f,  -0.746518f, 0.123085f,  -0.549836f,
+    };
+
+static const float av1_intra_tx_split_nn_weights_8x8_layer1
+    [NUM_INTRA_TX_SPLIT_HIDDEN_NODES] = {
+      0.749814f,  0.598172f,  0.375611f, 0.751612f,  0.947538f, -0.282228f,
+      -1.457522f, -1.092290f, 0.738657f, 0.575779f,  0.514823f, -0.560616f,
+      -0.491619f, -1.482014f, 0.524625f, -0.533590f,
+    };
+
+static const float av1_intra_tx_split_nn_bias_8x8_layer1[1] = {
+  -0.488888f,
+};
+
+static const NN_CONFIG av1_intra_tx_split_nnconfig_8x8 = {
+  NUM_INTRA_TX_SPLIT_FEATURES,       // num_inputs
+  1,                                 // num_outputs
+  NUM_INTRA_TX_SPLIT_HIDDEN_LAYERS,  // num_hidden_layers
+  {
+      NUM_INTRA_TX_SPLIT_HIDDEN_NODES,
+  },  // num_hidden_nodes
+  {
+      av1_intra_tx_split_nn_weights_8x8_layer0,
+      av1_intra_tx_split_nn_weights_8x8_layer1,
+  },
+  {
+      av1_intra_tx_split_nn_bias_8x8_layer0,
+      av1_intra_tx_split_nn_bias_8x8_layer1,
+  },
+};
+
+static const float av1_intra_tx_prune_nn_thresh_8x8[2] = { -0.405465f,
+                                                           0.405465f };
+#endif  // !CONFIG_REALTIME_ONLY
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/tx_search.c b/chromium/third_party/libaom/source/libaom/av1/encoder/tx_search.c
index e24800b8ce5..74c9de2ae96 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/tx_search.c
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/tx_search.c
@@ -1644,10 +1644,13 @@ static float get_dev(float mean, double x2_sum, int num) {
   return dev;
 }
 
-// Feature used by the model to predict tx split: the mean and standard
-// deviation values of the block and sub-blocks.
-static AOM_INLINE void get_mean_dev_features(const int16_t *data, int stride,
-                                             int bw, int bh, float *feature) {
+// Writes the features required by the ML model to predict tx split based on
+// mean and standard deviation values of the block and sub-blocks.
+// Returns the number of elements written to the output array which is at most
+// 12 currently. Hence 'features' buffer should be able to accommodate at least
+// 12 elements.
+static AOM_INLINE int get_mean_dev_features(const int16_t *data, int stride,
+                                            int bw, int bh, float *features) {
   const int16_t *const data_ptr = &data[0];
   const int subh = (bh >= bw) ? (bh >> 1) : bh;
   const int subw = (bw >= bh) ? (bw >> 1) : bw;
@@ -1656,7 +1659,7 @@ static AOM_INLINE void get_mean_dev_features(const int16_t *data, int stride,
   int feature_idx = 2;
   int total_x_sum = 0;
   int64_t total_x2_sum = 0;
-  int blk_idx = 0;
+  int num_sub_blks = 0;
   double mean2_sum = 0.0f;
   float dev_sum = 0.0f;
 
@@ -1672,24 +1675,24 @@ static AOM_INLINE void get_mean_dev_features(const int16_t *data, int stride,
 
       const float mean = (float)x_sum / sub_num;
       const float dev = get_dev(mean, (double)x2_sum, sub_num);
-      feature[feature_idx++] = mean;
-      feature[feature_idx++] = dev;
+      features[feature_idx++] = mean;
+      features[feature_idx++] = dev;
       mean2_sum += (double)(mean * mean);
       dev_sum += dev;
-      blk_idx++;
+      num_sub_blks++;
     }
   }
 
   const float lvl0_mean = (float)total_x_sum / num;
-  feature[0] = lvl0_mean;
-  feature[1] = get_dev(lvl0_mean, (double)total_x2_sum, num);
+  features[0] = lvl0_mean;
+  features[1] = get_dev(lvl0_mean, (double)total_x2_sum, num);
 
-  if (blk_idx > 1) {
-    // Deviation of means.
-    feature[feature_idx++] = get_dev(lvl0_mean, mean2_sum, blk_idx);
-    // Mean of deviations.
-    feature[feature_idx++] = dev_sum / blk_idx;
-  }
+  // Deviation of means.
+  features[feature_idx++] = get_dev(lvl0_mean, mean2_sum, num_sub_blks);
+  // Mean of deviations.
+  features[feature_idx++] = dev_sum / num_sub_blks;
+
+  return feature_idx;
 }
 
 static int ml_predict_tx_split(MACROBLOCK *x, BLOCK_SIZE bsize, int blk_row,
@@ -1732,7 +1735,7 @@ get_tx_mask(const AV1_COMP *cpi, MACROBLOCK *x, int plane, int block,
       get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
   int use_actual_frame_probs = 1;
   const int *tx_type_probs;
-#if CONFIG_FRAME_PARALLEL_ENCODE && CONFIG_FPMT_TEST
+#if CONFIG_FPMT_TEST
   use_actual_frame_probs =
       (cpi->ppi->fpmt_unit_test_cfg == PARALLEL_SIMULATION_ENCODE) ? 0 : 1;
   if (!use_actual_frame_probs) {
@@ -1966,10 +1969,14 @@ static INLINE void predict_dc_only_block(
   uint64_t var_threshold = (uint64_t)(1.8 * qstep * qstep);
   if (is_cur_buf_hbd(xd))
     block_var = ROUND_POWER_OF_TWO(block_var, (xd->bd - 8) * 2);
-  // Early prediction of skip block if residual mean and variance are less
+
+  if (block_var >= var_threshold) return;
+  const unsigned int predict_dc_level = x->txfm_search_params.predict_dc_level;
+  assert(predict_dc_level != 0);
+
+  // Prediction of skip block if residual mean and variance are less
   // than qstep based threshold
-  if (((llabs(*per_px_mean) * dc_coeff_scale[tx_size]) < (dc_qstep << 12)) &&
-      (block_var < var_threshold)) {
+  if ((llabs(*per_px_mean) * dc_coeff_scale[tx_size]) < (dc_qstep << 12)) {
     // If the normalized mean of residual block is less than the dc qstep and
     // the  normalized block variance is less than ac qstep, then the block is
     // assumed to be a skip block and its rdcost is updated accordingly.
@@ -2000,9 +2007,9 @@ static INLINE void predict_dc_only_block(
         RDCOST(x->rdmult, best_rd_stats->rate, best_rd_stats->sse);
 
     x->plane[plane].txb_entropy_ctx[block] = 0;
-  } else if (block_var < var_threshold) {
+  } else if (predict_dc_level > 1) {
     // Predict DC only blocks based on residual variance.
-    // For chroma plane, this early prediction is disabled for intra blocks.
+    // For chroma plane, this prediction is disabled for intra blocks.
     if ((plane == 0) || (plane > 0 && is_inter_block(mbmi))) *dc_only_blk = 1;
   }
 }
@@ -2052,7 +2059,7 @@ static void search_tx_type(const AV1_COMP *cpi, MACROBLOCK *x, int plane,
   unsigned int block_mse_q8;
   int dc_only_blk = 0;
   const bool predict_dc_block =
-      txfm_params->predict_dc_level && txw != 64 && txh != 64;
+      txfm_params->predict_dc_level >= 1 && txw != 64 && txh != 64;
   int64_t per_px_mean = INT64_MAX;
   if (predict_dc_block) {
     predict_dc_only_block(x, plane, plane_bsize, tx_size, block, blk_row,
@@ -2758,6 +2765,72 @@ static AOM_INLINE void choose_smallest_tx_size(const AV1_COMP *const cpi,
                        FTXS_NONE, skip_trellis);
 }
 
+#if !CONFIG_REALTIME_ONLY
+static void ml_predict_intra_tx_depth_prune(MACROBLOCK *x, int blk_row,
+                                            int blk_col, BLOCK_SIZE bsize,
+                                            TX_SIZE tx_size) {
+  const MACROBLOCKD *const xd = &x->e_mbd;
+  const MB_MODE_INFO *const mbmi = xd->mi[0];
+
+  // Disable the pruning logic using NN model for the following cases:
+  // 1) Lossless coding as only 4x4 transform is evaluated in this case
+  // 2) When transform and current block sizes do not match as the features are
+  // obtained over the current block
+  // 3) When operating bit-depth is not 8-bit as the input features are not
+  // scaled according to bit-depth.
+  if (xd->lossless[mbmi->segment_id] || txsize_to_bsize[tx_size] != bsize ||
+      xd->bd != 8)
+    return;
+
+  // Currently NN model based pruning is supported only when largest transform
+  // size is 8x8
+  if (tx_size != TX_8X8) return;
+
+  // Neural network model is a sequential neural net and was trained using SGD
+  // optimizer. The model can be further improved in terms of speed/quality by
+  // considering the following experiments:
+  // 1) Generate ML model by training with balanced data for different learning
+  // rates and optimizers.
+  // 2) Experiment with ML model by adding features related to the statistics of
+  // top and left pixels to capture the accuracy of reconstructed neighbouring
+  // pixels for 4x4 blocks numbered 1, 2, 3 in 8x8 block, source variance of 4x4
+  // sub-blocks, etc.
+  // 3) Generate ML models for transform blocks other than 8x8.
+  const NN_CONFIG *const nn_config = &av1_intra_tx_split_nnconfig_8x8;
+  const float *const intra_tx_prune_thresh = av1_intra_tx_prune_nn_thresh_8x8;
+
+  float features[NUM_INTRA_TX_SPLIT_FEATURES] = { 0.0f };
+  const int diff_stride = block_size_wide[bsize];
+
+  const int16_t *diff = x->plane[0].src_diff + MI_SIZE * blk_row * diff_stride +
+                        MI_SIZE * blk_col;
+  const int bw = tx_size_wide[tx_size];
+  const int bh = tx_size_high[tx_size];
+
+  int feature_idx = get_mean_dev_features(diff, diff_stride, bw, bh, features);
+
+  features[feature_idx++] = logf(1.0f + (float)x->source_variance);
+
+  const int dc_q = av1_dc_quant_QTX(x->qindex, 0, xd->bd) >> (xd->bd - 8);
+  const float log_dc_q_square = logf(1.0f + (float)(dc_q * dc_q) / 256.0f);
+  features[feature_idx++] = log_dc_q_square;
+  assert(feature_idx == NUM_INTRA_TX_SPLIT_FEATURES);
+  for (int i = 0; i < NUM_INTRA_TX_SPLIT_FEATURES; i++) {
+    features[i] = (features[i] - av1_intra_tx_split_8x8_mean[i]) /
+                  av1_intra_tx_split_8x8_std[i];
+  }
+
+  float score;
+  av1_nn_predict(features, nn_config, 1, &score);
+
+  TxfmSearchParams *const txfm_params = &x->txfm_search_params;
+  if (score <= intra_tx_prune_thresh[0])
+    txfm_params->nn_prune_depths_for_intra_tx = TX_PRUNE_SPLIT;
+  else if (score > intra_tx_prune_thresh[1])
+    txfm_params->nn_prune_depths_for_intra_tx = TX_PRUNE_LARGEST;
+}
+#endif  // !CONFIG_REALTIME_ONLY
+
 // Search for the best uniform transform size and type for current coding block.
 static AOM_INLINE void choose_tx_size_type_from_rd(const AV1_COMP *const cpi,
                                                    MACROBLOCK *x,
@@ -2768,7 +2841,7 @@ static AOM_INLINE void choose_tx_size_type_from_rd(const AV1_COMP *const cpi,
 
   MACROBLOCKD *const xd = &x->e_mbd;
   MB_MODE_INFO *const mbmi = xd->mi[0];
-  const TxfmSearchParams *txfm_params = &x->txfm_search_params;
+  TxfmSearchParams *const txfm_params = &x->txfm_search_params;
   const TX_SIZE max_rect_tx_size = max_txsize_rect_lookup[bs];
   const int tx_select = txfm_params->tx_mode_search_type == TX_MODE_SELECT;
   int start_tx;
@@ -2810,6 +2883,17 @@ static AOM_INLINE void choose_tx_size_type_from_rd(const AV1_COMP *const cpi,
       continue;
     }
 
+#if !CONFIG_REALTIME_ONLY
+    if (txfm_params->nn_prune_depths_for_intra_tx == TX_PRUNE_SPLIT) break;
+
+    // Set the flag to enable the evaluation of NN classifier to prune transform
+    // depths. As the features are based on intra residual information of
+    // largest transform, the evaluation of NN model is enabled only for this
+    // case.
+    txfm_params->enable_nn_prune_intra_tx_depths =
+        (cpi->sf.tx_sf.prune_intra_tx_depths_using_nn && tx_size == start_tx);
+#endif
+
     RD_STATS this_rd_stats;
     rd[depth] = av1_uniform_txfm_yrd(cpi, x, &this_rd_stats, ref_best_rd, bs,
                                      tx_size, FTXS_NONE, skip_trellis);
@@ -2834,6 +2918,13 @@ static AOM_INLINE void choose_tx_size_type_from_rd(const AV1_COMP *const cpi,
     av1_copy_array(xd->tx_type_map, best_txk_type_map, num_blks);
     av1_copy_array(txfm_info->blk_skip, best_blk_skip, num_blks);
   }
+
+#if !CONFIG_REALTIME_ONLY
+  // Reset the flags to avoid any unintentional evaluation of NN model and
+  // consumption of prune depths.
+  txfm_params->enable_nn_prune_intra_tx_depths = false;
+  txfm_params->nn_prune_depths_for_intra_tx = TX_PRUNE_NONE;
+#endif
 }
 
 // Search for the best transform type for the given transform block in the
@@ -2860,6 +2951,18 @@ static AOM_INLINE void block_rd_txfm(int plane, int block, int blk_row,
   if (!is_inter) {
     av1_predict_intra_block_facade(cm, xd, plane, blk_col, blk_row, tx_size);
     av1_subtract_txb(x, plane, plane_bsize, blk_col, blk_row, tx_size);
+#if !CONFIG_REALTIME_ONLY
+    const TxfmSearchParams *const txfm_params = &x->txfm_search_params;
+    if (txfm_params->enable_nn_prune_intra_tx_depths) {
+      ml_predict_intra_tx_depth_prune(x, blk_row, blk_col, plane_bsize,
+                                      tx_size);
+      if (txfm_params->nn_prune_depths_for_intra_tx == TX_PRUNE_LARGEST) {
+        av1_invalid_rd_stats(&args->rd_stats);
+        args->exit_early = 1;
+        return;
+      }
+    }
+#endif
   }
 
   TXB_CTX txb_ctx;
@@ -3370,7 +3473,7 @@ static AOM_INLINE int model_based_tx_search_prune(const AV1_COMP *cpi,
   assert(level >= 0 && level <= 2);
   int model_rate;
   int64_t model_dist;
-  int model_skip;
+  uint8_t model_skip;
   MACROBLOCKD *const xd = &x->e_mbd;
   model_rd_sb_fn[MODELRD_TYPE_TX_SEARCH_PRUNE](
       cpi, bsize, x, xd, 0, 0, &model_rate, &model_dist, &model_skip, NULL,
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/tx_search.h b/chromium/third_party/libaom/source/libaom/av1/encoder/tx_search.h
index e3caf5bf4c5..b3689cf7db2 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/tx_search.h
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/tx_search.h
@@ -89,7 +89,7 @@ int64_t av1_uniform_txfm_yrd(const AV1_COMP *const cpi, MACROBLOCK *x,
  * \param[in]    rd_stats       Pointer to struct to keep track of the RD stats
  * \param[in]    bsize          Current macroblock size
  * \param[in]    ref_best_rd    Best RD cost seen for this block so far
- * \return       Nothing is returned. The selected transform size and type will
+ * \remark       Nothing is returned. The selected transform size and type will
                  be saved in the MB_MODE_INFO structure
  */
 void av1_pick_recursive_tx_size_type_yrd(const AV1_COMP *cpi, MACROBLOCK *x,
@@ -111,7 +111,7 @@ void av1_pick_recursive_tx_size_type_yrd(const AV1_COMP *cpi, MACROBLOCK *x,
  * \param[in]    rd_stats       Pointer to struct to keep track of the RD stats
  * \param[in]    bs             Current macroblock size
  * \param[in]    ref_best_rd    Best RD cost seen for this block so far
- * \return       Nothing is returned. The selected transform size and type will
+ * \remark       Nothing is returned. The selected transform size and type will
                  be saved in the MB_MODE_INFO structure
  */
 void av1_pick_uniform_tx_size_type_yrd(const AV1_COMP *const cpi, MACROBLOCK *x,
@@ -158,7 +158,7 @@ int av1_txfm_uvrd(const AV1_COMP *const cpi, MACROBLOCK *x, RD_STATS *rd_stats,
  * \param[in]    skip_trellis   Binary flag indicating if trellis optimization
                                 should be skipped
  *
- * \return       Nothing is returned. The RD results will be saved in rd_stats.
+ * \remark       Nothing is returned. The RD results will be saved in rd_stats.
  */
 void av1_txfm_rd_in_plane(MACROBLOCK *x, const AV1_COMP *cpi,
                           RD_STATS *rd_stats, int64_t ref_best_rd,
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/var_based_part.c b/chromium/third_party/libaom/source/libaom/av1/encoder/var_based_part.c
index 0511cf7be77..28588a5e03c 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/var_based_part.c
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/var_based_part.c
@@ -32,6 +32,17 @@
 
 extern const uint8_t AV1_VAR_OFFS[];
 
+// Possible values for the force_split variable while evaluating variance based
+// partitioning.
+enum {
+  // Evaluate all partition types
+  PART_EVAL_ALL = 0,
+  // Force PARTITION_SPLIT
+  PART_EVAL_ONLY_SPLIT = 1,
+  // Force PARTITION_NONE
+  PART_EVAL_ONLY_NONE = 2
+} UENUM1BYTE(PART_EVAL_STATUS);
+
 typedef struct {
   VPVariance *part_variances;
   VPartVar *split[4];
@@ -122,23 +133,24 @@ static AOM_INLINE void fill_variance_tree(void *data, BLOCK_SIZE bsize) {
                   &node.part_variances->none);
 }
 
-static AOM_INLINE void set_block_size(AV1_COMP *const cpi, MACROBLOCK *const x,
-                                      MACROBLOCKD *const xd, int mi_row,
+static AOM_INLINE void set_block_size(AV1_COMP *const cpi, int mi_row,
                                       int mi_col, BLOCK_SIZE bsize) {
   if (cpi->common.mi_params.mi_cols > mi_col &&
       cpi->common.mi_params.mi_rows > mi_row) {
-    set_mode_info_offsets(&cpi->common.mi_params, &cpi->mbmi_ext_info, x, xd,
-                          mi_row, mi_col);
-    xd->mi[0]->bsize = bsize;
+    CommonModeInfoParams *mi_params = &cpi->common.mi_params;
+    const int mi_grid_idx = get_mi_grid_idx(mi_params, mi_row, mi_col);
+    const int mi_alloc_idx = get_alloc_mi_idx(mi_params, mi_row, mi_col);
+    MB_MODE_INFO *mi = mi_params->mi_grid_base[mi_grid_idx] =
+        &mi_params->mi_alloc[mi_alloc_idx];
+    mi->bsize = bsize;
   }
 }
 
-static int set_vt_partitioning(AV1_COMP *cpi, MACROBLOCK *const x,
-                               MACROBLOCKD *const xd,
+static int set_vt_partitioning(AV1_COMP *cpi, MACROBLOCKD *const xd,
                                const TileInfo *const tile, void *data,
                                BLOCK_SIZE bsize, int mi_row, int mi_col,
                                int64_t threshold, BLOCK_SIZE bsize_min,
-                               int force_split) {
+                               PART_EVAL_STATUS force_split) {
   AV1_COMMON *const cm = &cpi->common;
   variance_node vt;
   const int block_width = mi_size_wide[bsize];
@@ -164,7 +176,13 @@ static int set_vt_partitioning(AV1_COMP *cpi, MACROBLOCK *const x,
   assert(block_height == block_width);
   tree_to_node(data, bsize, &vt);
 
-  if (force_split == 1) return 0;
+  if (mi_col + bs_width_check <= tile->mi_col_end &&
+      mi_row + bs_height_check <= tile->mi_row_end &&
+      force_split == PART_EVAL_ONLY_NONE) {
+    set_block_size(cpi, mi_row, mi_col, bsize);
+    return 1;
+  }
+  if (force_split == PART_EVAL_ONLY_SPLIT) return 0;
 
   // For bsize=bsize_min (16x16/8x8 for 8x8/4x4 downsampling), select if
   // variance is below threshold, otherwise split will be selected.
@@ -175,7 +193,7 @@ static int set_vt_partitioning(AV1_COMP *cpi, MACROBLOCK *const x,
     if (mi_col + bs_width_check <= tile->mi_col_end &&
         mi_row + bs_height_check <= tile->mi_row_end &&
         vt.part_variances->none.variance < threshold) {
-      set_block_size(cpi, x, xd, mi_row, mi_col, bsize);
+      set_block_size(cpi, mi_row, mi_col, bsize);
       return 1;
     }
     return 0;
@@ -192,7 +210,7 @@ static int set_vt_partitioning(AV1_COMP *cpi, MACROBLOCK *const x,
     if (mi_col + bs_width_check <= tile->mi_col_end &&
         mi_row + bs_height_check <= tile->mi_row_end &&
         vt.part_variances->none.variance < threshold) {
-      set_block_size(cpi, x, xd, mi_row, mi_col, bsize);
+      set_block_size(cpi, mi_row, mi_col, bsize);
       return 1;
     }
     // Check vertical split.
@@ -205,8 +223,8 @@ static int set_vt_partitioning(AV1_COMP *cpi, MACROBLOCK *const x,
           vt.part_variances->vert[1].variance < threshold &&
           get_plane_block_size(subsize, xd->plane[1].subsampling_x,
                                xd->plane[1].subsampling_y) < BLOCK_INVALID) {
-        set_block_size(cpi, x, xd, mi_row, mi_col, subsize);
-        set_block_size(cpi, x, xd, mi_row, mi_col + block_width / 2, subsize);
+        set_block_size(cpi, mi_row, mi_col, subsize);
+        set_block_size(cpi, mi_row, mi_col + block_width / 2, subsize);
         return 1;
       }
     }
@@ -220,8 +238,8 @@ static int set_vt_partitioning(AV1_COMP *cpi, MACROBLOCK *const x,
           vt.part_variances->horz[1].variance < threshold &&
           get_plane_block_size(subsize, xd->plane[1].subsampling_x,
                                xd->plane[1].subsampling_y) < BLOCK_INVALID) {
-        set_block_size(cpi, x, xd, mi_row, mi_col, subsize);
-        set_block_size(cpi, x, xd, mi_row + block_height / 2, mi_col, subsize);
+        set_block_size(cpi, mi_row, mi_col, subsize);
+        set_block_size(cpi, mi_row + block_height / 2, mi_col, subsize);
         return 1;
       }
     }
@@ -365,14 +383,16 @@ static AOM_INLINE void fill_variance_4x4avg(const uint8_t *s, int sp,
                                             int highbd_flag,
 #endif
                                             int pixels_wide, int pixels_high,
-                                            int is_key_frame) {
+                                            int is_key_frame,
+                                            int border_offset_4x4) {
   int k;
   for (k = 0; k < 4; k++) {
     int x4_idx = x8_idx + ((k & 1) << 2);
     int y4_idx = y8_idx + ((k >> 1) << 2);
     unsigned int sse = 0;
     int sum = 0;
-    if (x4_idx < pixels_wide && y4_idx < pixels_high) {
+    if (x4_idx < pixels_wide - border_offset_4x4 &&
+        y4_idx < pixels_high - border_offset_4x4) {
       int s_avg;
       int d_avg = 128;
 #if CONFIG_AV1_HIGHBITDEPTH
@@ -410,9 +430,30 @@ static int64_t scale_part_thresh_content(int64_t threshold_base, int speed,
   return threshold;
 }
 
+static AOM_INLINE void tune_thresh_based_on_qindex_window(
+    int qindex, int th, int win, int fac, int64_t thresholds[]) {
+  double weight;
+
+  if (qindex < th - win)
+    weight = 1.0;
+  else if (qindex > th + win)
+    weight = 0.0;
+  else
+    weight = 1.0 - (qindex - th + win) / (2 * win);
+  thresholds[1] =
+      (int)((1 - weight) * (thresholds[1] << 1) + weight * thresholds[1]);
+  thresholds[2] =
+      (int)((1 - weight) * (thresholds[2] << 1) + weight * thresholds[2]);
+  thresholds[3] =
+      (int)((1 - weight) * (thresholds[3] << fac) + weight * thresholds[3]);
+}
+
 static AOM_INLINE void set_vbp_thresholds(AV1_COMP *cpi, int64_t thresholds[],
                                           int q, int content_lowsumdiff,
-                                          int source_sad, int segment_id) {
+                                          int source_sad_nonrd,
+                                          int source_sad_rd, int segment_id,
+                                          uint64_t blk_sad,
+                                          int lighting_change) {
   AV1_COMMON *const cm = &cpi->common;
   const int is_key_frame = frame_is_intra_only(cm);
   const int threshold_multiplier = is_key_frame ? 120 : 1;
@@ -458,7 +499,7 @@ static AOM_INLINE void set_vbp_thresholds(AV1_COMP *cpi, int64_t thresholds[],
     if (noise_level == kHigh)
       threshold_base = (5 * threshold_base) >> 1;
     else if (noise_level == kMedium &&
-             !cpi->sf.rt_sf.force_large_partition_blocks)
+             !cpi->sf.rt_sf.prefer_large_partition_blocks)
       threshold_base = (5 * threshold_base) >> 2;
   }
   // TODO(kyslov) Enable var based partition adjusment on temporal denoising
@@ -471,12 +512,12 @@ static AOM_INLINE void set_vbp_thresholds(AV1_COMP *cpi, int64_t thresholds[],
   else
     threshold_base =
         scale_part_thresh_content(threshold_base, cpi->oxcf.speed, cm->width,
-                                  cm->height, cpi->svc.non_reference_frame);
+                                  cm->height, cpi->rtc_ref.non_reference_frame);
 #else
   // Increase base variance threshold based on content_state/sum_diff level.
   threshold_base =
       scale_part_thresh_content(threshold_base, cpi->oxcf.speed, cm->width,
-                                cm->height, cpi->svc.non_reference_frame);
+                                cm->height, cpi->rtc_ref.non_reference_frame);
 #endif
   thresholds[0] = threshold_base >> 1;
   thresholds[1] = threshold_base;
@@ -484,10 +525,17 @@ static AOM_INLINE void set_vbp_thresholds(AV1_COMP *cpi, int64_t thresholds[],
   if (cm->width >= 1280 && cm->height >= 720)
     thresholds[3] = thresholds[3] << 1;
   if (cm->width * cm->height <= 352 * 288) {
-    const int qindex_thr[3][2] = { { 200, 220 }, { 200, 210 }, { 170, 220 } };
-    assert(cpi->sf.rt_sf.var_part_based_on_qidx < 3);
-    int qindex_low_thr = qindex_thr[cpi->sf.rt_sf.var_part_based_on_qidx][0];
-    int qindex_high_thr = qindex_thr[cpi->sf.rt_sf.var_part_based_on_qidx][1];
+    const int qindex_thr[5][2] = {
+      { 200, 220 }, { 140, 170 }, { 120, 150 }, { 200, 210 }, { 170, 220 },
+    };
+    int th_idx = 0;
+    if (cpi->sf.rt_sf.var_part_based_on_qidx >= 1)
+      th_idx =
+          (source_sad_rd <= kLowSad) ? cpi->sf.rt_sf.var_part_based_on_qidx : 0;
+    if (cpi->sf.rt_sf.var_part_based_on_qidx >= 3)
+      th_idx = cpi->sf.rt_sf.var_part_based_on_qidx;
+    const int qindex_low_thr = qindex_thr[th_idx][0];
+    const int qindex_high_thr = qindex_thr[th_idx][1];
     if (current_qindex >= qindex_high_thr) {
       threshold_base = (5 * threshold_base) >> 1;
       thresholds[1] = threshold_base >> 3;
@@ -519,10 +567,13 @@ static AOM_INLINE void set_vbp_thresholds(AV1_COMP *cpi, int64_t thresholds[],
     thresholds[2] = (5 * threshold_base) >> 2;
   } else if (cm->width < 1920 && cm->height < 1080) {
     thresholds[2] = threshold_base << 1;
-  } else {
+  } else if (cm->width < 2560 && cm->height < 1440) {
     thresholds[2] = (5 * threshold_base) >> 1;
+  } else {
+    thresholds[2] = (7 * threshold_base) >> 1;
   }
-  if (cpi->sf.rt_sf.force_large_partition_blocks) {
+  // Tune thresholds less or more aggressively to prefer larger partitions
+  if (cpi->sf.rt_sf.prefer_large_partition_blocks >= 3) {
     double weight;
     const int win = 20;
     if (current_qindex < QINDEX_LARGE_BLOCK_THR - win)
@@ -538,23 +589,33 @@ static AOM_INLINE void set_vbp_thresholds(AV1_COMP *cpi, int64_t thresholds[],
       }
     }
     if (cm->width * cm->height <= 352 * 288) {
-      thresholds[3] = INT32_MAX;
+      thresholds[3] = INT64_MAX;
       if (segment_id == 0) {
         thresholds[1] <<= 2;
-        thresholds[2] <<= (source_sad == kLowSad) ? 5 : 4;
+        thresholds[2] <<= (source_sad_nonrd <= kLowSad) ? 5 : 4;
       } else {
         thresholds[1] <<= 1;
         thresholds[2] <<= 3;
       }
+      // Allow for split to 8x8 for superblocks where part of it has
+      // moving boundary. So allow for sb with source_sad above threshold,
+      // and avoid very large source_sad or high source content, to avoid
+      // too many 8x8 within superblock.
+      if (segment_id == 0 && cpi->rc.avg_source_sad < 25000 &&
+          blk_sad > 25000 && blk_sad < 50000 && !lighting_change) {
+        thresholds[2] = (3 * thresholds[2]) >> 2;
+        thresholds[3] = thresholds[2] << 3;
+      }
       // Condition the increase of partition thresholds on the segment
       // and the content. Avoid the increase for superblocks which have
       // high source sad, unless the whole frame has very high motion
       // (i.e, cpi->rc.avg_source_sad is very large, in which case all blocks
       // have high source sad).
     } else if (cm->width * cm->height > 640 * 480 && segment_id == 0 &&
-               (source_sad != kHighSad || cpi->rc.avg_source_sad > 50000)) {
+               (source_sad_nonrd != kHighSad ||
+                cpi->rc.avg_source_sad > 50000)) {
       thresholds[0] = (3 * thresholds[0]) >> 1;
-      thresholds[3] = INT32_MAX;
+      thresholds[3] = INT64_MAX;
       if (current_qindex > QINDEX_LARGE_BLOCK_THR) {
         thresholds[1] =
             (int)((1 - weight) * (thresholds[1] << 1) + weight * thresholds[1]);
@@ -562,14 +623,25 @@ static AOM_INLINE void set_vbp_thresholds(AV1_COMP *cpi, int64_t thresholds[],
             (int)((1 - weight) * (thresholds[2] << 1) + weight * thresholds[2]);
       }
     } else if (current_qindex > QINDEX_LARGE_BLOCK_THR && segment_id == 0 &&
-               (source_sad != kHighSad || cpi->rc.avg_source_sad > 50000)) {
+               (source_sad_nonrd != kHighSad ||
+                cpi->rc.avg_source_sad > 50000)) {
       thresholds[1] =
           (int)((1 - weight) * (thresholds[1] << 2) + weight * thresholds[1]);
       thresholds[2] =
           (int)((1 - weight) * (thresholds[2] << 4) + weight * thresholds[2]);
-      thresholds[3] = INT32_MAX;
+      thresholds[3] = INT64_MAX;
     }
+  } else if (cpi->sf.rt_sf.prefer_large_partition_blocks >= 2) {
+    thresholds[1] <<= (source_sad_nonrd <= kLowSad) ? 2 : 0;
+    thresholds[2] =
+        (source_sad_nonrd <= kLowSad) ? (3 * thresholds[2]) : thresholds[2];
+  } else if (cpi->sf.rt_sf.prefer_large_partition_blocks >= 1) {
+    const int fac = (source_sad_nonrd <= kLowSad) ? 2 : 1;
+    tune_thresh_based_on_qindex_window(current_qindex, QINDEX_LARGE_BLOCK_THR,
+                                       45, fac, thresholds);
   }
+  if (cpi->sf.part_sf.disable_8x8_part_based_on_qidx && (current_qindex < 128))
+    thresholds[3] = INT64_MAX;
 }
 
 // Set temporal variance low flag for superblock 64x64.
@@ -857,7 +929,7 @@ void av1_set_variance_partition_thresholds(AV1_COMP *cpi, int q,
     return;
   } else {
     set_vbp_thresholds(cpi, cpi->vbp_info.thresholds, q, content_lowsumdiff, 0,
-                       0);
+                       0, 0, 0, 0);
     // The threshold below is not changed locally.
     cpi->vbp_info.threshold_minmax = 15 + (q >> 3);
   }
@@ -865,37 +937,81 @@ void av1_set_variance_partition_thresholds(AV1_COMP *cpi, int q,
 
 static AOM_INLINE void chroma_check(AV1_COMP *cpi, MACROBLOCK *x,
                                     BLOCK_SIZE bsize, unsigned int y_sad,
-                                    int is_key_frame) {
+                                    unsigned int y_sad_g, int is_key_frame,
+                                    int zero_motion, unsigned int *uv_sad) {
   int i;
   MACROBLOCKD *xd = &x->e_mbd;
-
+  int shift = 3;
   if (is_key_frame || cpi->oxcf.tool_cfg.enable_monochrome) return;
 
+  if (cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN &&
+      cpi->rc.high_source_sad)
+    shift = 5;
+
+  MB_MODE_INFO *mi = xd->mi[0];
+  const AV1_COMMON *const cm = &cpi->common;
+  const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_yv12_buf(cm, LAST_FRAME);
+  const YV12_BUFFER_CONFIG *yv12_g = get_ref_frame_yv12_buf(cm, GOLDEN_FRAME);
+  const struct scale_factors *const sf =
+      get_ref_scale_factors_const(cm, LAST_FRAME);
+  struct buf_2d dst;
+  unsigned int uv_sad_g = 0;
+
   for (i = 1; i <= 2; ++i) {
-    unsigned int uv_sad = UINT_MAX;
     struct macroblock_plane *p = &x->plane[i];
     struct macroblockd_plane *pd = &xd->plane[i];
     const BLOCK_SIZE bs =
         get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y);
 
-    if (bs != BLOCK_INVALID)
-      uv_sad = cpi->ppi->fn_ptr[bs].sdf(p->src.buf, p->src.stride, pd->dst.buf,
-                                        pd->dst.stride);
+    if (bs != BLOCK_INVALID) {
+      // For last:
+      if (zero_motion) {
+        if (mi->ref_frame[0] == LAST_FRAME) {
+          uv_sad[i - 1] = cpi->ppi->fn_ptr[bs].sdf(
+              p->src.buf, p->src.stride, pd->pre[0].buf, pd->pre[0].stride);
+        } else {
+          uint8_t *src = (i == 1) ? yv12->u_buffer : yv12->v_buffer;
+          setup_pred_plane(&dst, xd->mi[0]->bsize, src, yv12->uv_crop_width,
+                           yv12->uv_crop_height, yv12->uv_stride, xd->mi_row,
+                           xd->mi_col, sf, xd->plane[i].subsampling_x,
+                           xd->plane[i].subsampling_y);
+
+          uv_sad[i - 1] = cpi->ppi->fn_ptr[bs].sdf(p->src.buf, p->src.stride,
+                                                   dst.buf, dst.stride);
+        }
+      } else {
+        uv_sad[i - 1] = cpi->ppi->fn_ptr[bs].sdf(p->src.buf, p->src.stride,
+                                                 pd->dst.buf, pd->dst.stride);
+      }
+
+      // For golden:
+      if (y_sad_g != UINT_MAX) {
+        uint8_t *src = (i == 1) ? yv12_g->u_buffer : yv12_g->v_buffer;
+        setup_pred_plane(&dst, xd->mi[0]->bsize, src, yv12_g->uv_crop_width,
+                         yv12_g->uv_crop_height, yv12_g->uv_stride, xd->mi_row,
+                         xd->mi_col, sf, xd->plane[i].subsampling_x,
+                         xd->plane[i].subsampling_y);
+        uv_sad_g = cpi->ppi->fn_ptr[bs].sdf(p->src.buf, p->src.stride, dst.buf,
+                                            dst.stride);
+      }
+    }
 
-    if (uv_sad > (y_sad >> 1))
+    if (uv_sad[i - 1] > (y_sad >> 1))
       x->color_sensitivity_sb[i - 1] = 1;
-    else if (uv_sad < (y_sad >> 3))
+    else if (uv_sad[i - 1] < (y_sad >> shift))
       x->color_sensitivity_sb[i - 1] = 0;
     // Borderline case: to be refined at coding block level in nonrd_pickmode,
     // for coding block size < sb_size.
     else
       x->color_sensitivity_sb[i - 1] = 2;
+
+    x->color_sensitivity_sb_g[i - 1] = uv_sad_g > y_sad_g / 6;
   }
 }
 
 static void fill_variance_tree_leaves(
     AV1_COMP *cpi, MACROBLOCK *x, VP128x128 *vt, VP16x16 *vt2,
-    unsigned char *force_split, int avg_16x16[][4], int maxvar_16x16[][4],
+    PART_EVAL_STATUS *force_split, int avg_16x16[][4], int maxvar_16x16[][4],
     int minvar_16x16[][4], int *variance4x4downsample, int64_t *thresholds,
     uint8_t *src, int src_stride, const uint8_t *dst, int dst_stride) {
   AV1_COMMON *cm = &cpi->common;
@@ -907,24 +1023,34 @@ static void fill_variance_tree_leaves(
   const int compute_minmax_variance = 0;
   const int segment_id = xd->mi[0]->segment_id;
   int pixels_wide = 128, pixels_high = 128;
-
+  int border_offset_4x4 = 0;
+  int temporal_denoising = cpi->sf.rt_sf.use_rtc_tf;
   if (is_small_sb) {
     pixels_wide = 64;
     pixels_high = 64;
   }
   if (xd->mb_to_right_edge < 0) pixels_wide += (xd->mb_to_right_edge >> 3);
   if (xd->mb_to_bottom_edge < 0) pixels_high += (xd->mb_to_bottom_edge >> 3);
+#if CONFIG_AV1_TEMPORAL_DENOISING
+  temporal_denoising |= cpi->oxcf.noise_sensitivity;
+#endif
+  // For temporal filtering or temporal denoiser enabled: since the source
+  // is modified we need to avoid 4x4 avg along superblock boundary, since
+  // simd code will load 8 pixels for 4x4 avg and so can access source
+  // data outside superblock (while its being modified by temporal filter).
+  // Temporal filtering is never done on key frames.
+  if (!is_key_frame && temporal_denoising) border_offset_4x4 = 4;
   for (int m = 0; m < num_64x64_blocks; m++) {
     const int x64_idx = ((m & 1) << 6);
     const int y64_idx = ((m >> 1) << 6);
     const int m2 = m << 2;
-    force_split[m + 1] = 0;
+    force_split[m + 1] = PART_EVAL_ALL;
 
     for (int i = 0; i < 4; i++) {
       const int x32_idx = x64_idx + ((i & 1) << 5);
       const int y32_idx = y64_idx + ((i >> 1) << 5);
       const int i2 = (m2 + i) << 2;
-      force_split[5 + m2 + i] = 0;
+      force_split[5 + m2 + i] = PART_EVAL_ALL;
       avg_16x16[m][i] = 0;
       maxvar_16x16[m][i] = 0;
       minvar_16x16[m][i] = INT_MAX;
@@ -933,7 +1059,7 @@ static void fill_variance_tree_leaves(
         const int y16_idx = y32_idx + ((j >> 1) << 4);
         const int split_index = 21 + i2 + j;
         VP16x16 *vst = &vt->split[m].split[i].split[j];
-        force_split[split_index] = 0;
+        force_split[split_index] = PART_EVAL_ALL;
         variance4x4downsample[i2 + j] = 0;
         if (!is_key_frame) {
           fill_variance_8x8avg(src, src_stride, dst, dst_stride, x16_idx,
@@ -957,10 +1083,10 @@ static void fill_variance_tree_leaves(
             // 16X16 variance is above threshold for split, so force split to
             // 8x8 for this 16x16 block (this also forces splits for upper
             // levels).
-            force_split[split_index] = 1;
-            force_split[5 + m2 + i] = 1;
-            force_split[m + 1] = 1;
-            force_split[0] = 1;
+            force_split[split_index] = PART_EVAL_ONLY_SPLIT;
+            force_split[5 + m2 + i] = PART_EVAL_ONLY_SPLIT;
+            force_split[m + 1] = PART_EVAL_ONLY_SPLIT;
+            force_split[0] = PART_EVAL_ONLY_SPLIT;
           } else if (!cyclic_refresh_segment_id_boosted(segment_id) &&
                      compute_minmax_variance &&
                      vt->split[m]
@@ -978,27 +1104,27 @@ static void fill_variance_tree_leaves(
                                             pixels_wide, pixels_high);
             int thresh_minmax = (int)cpi->vbp_info.threshold_minmax;
             if (minmax > thresh_minmax) {
-              force_split[split_index] = 1;
-              force_split[5 + m2 + i] = 1;
-              force_split[m + 1] = 1;
-              force_split[0] = 1;
+              force_split[split_index] = PART_EVAL_ONLY_SPLIT;
+              force_split[5 + m2 + i] = PART_EVAL_ONLY_SPLIT;
+              force_split[m + 1] = PART_EVAL_ONLY_SPLIT;
+              force_split[0] = PART_EVAL_ONLY_SPLIT;
             }
           }
         }
         if (is_key_frame) {
-          force_split[split_index] = 0;
+          force_split[split_index] = PART_EVAL_ALL;
           // Go down to 4x4 down-sampling for variance.
           variance4x4downsample[i2 + j] = 1;
           for (int k = 0; k < 4; k++) {
             int x8_idx = x16_idx + ((k & 1) << 3);
             int y8_idx = y16_idx + ((k >> 1) << 3);
             VP8x8 *vst2 = is_key_frame ? &vst->split[k] : &vt2[i2 + j].split[k];
-            fill_variance_4x4avg(src, src_stride, dst, dst_stride, x8_idx,
-                                 y8_idx, vst2,
+            fill_variance_4x4avg(
+                src, src_stride, dst, dst_stride, x8_idx, y8_idx, vst2,
 #if CONFIG_AV1_HIGHBITDEPTH
-                                 xd->cur_buf->flags,
+                xd->cur_buf->flags,
 #endif
-                                 pixels_wide, pixels_high, is_key_frame);
+                pixels_wide, pixels_high, is_key_frame, border_offset_4x4);
           }
         }
       }
@@ -1007,7 +1133,8 @@ static void fill_variance_tree_leaves(
 }
 
 static void setup_planes(AV1_COMP *cpi, MACROBLOCK *x, unsigned int *y_sad,
-                         unsigned int *y_sad_g,
+                         unsigned int *y_sad_g, unsigned int *y_sad_alt,
+                         unsigned int *y_sad_last,
                          MV_REFERENCE_FRAME *ref_frame_partition, int mi_row,
                          int mi_col) {
   AV1_COMMON *const cm = &cpi->common;
@@ -1015,16 +1142,24 @@ static void setup_planes(AV1_COMP *cpi, MACROBLOCK *x, unsigned int *y_sad,
   const int num_planes = av1_num_planes(cm);
   const int is_small_sb = (cm->seq_params->sb_size == BLOCK_64X64);
   BLOCK_SIZE bsize = is_small_sb ? BLOCK_64X64 : BLOCK_128X128;
-  // TODO(kyslov): we are assuming that the ref is LAST_FRAME! Check if it
-  // is!!
   MB_MODE_INFO *mi = xd->mi[0];
   const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_yv12_buf(cm, LAST_FRAME);
   assert(yv12 != NULL);
   const YV12_BUFFER_CONFIG *yv12_g = NULL;
-
-  // For non-SVC GOLDEN is another temporal reference. Check if it should be
-  // used as reference for partitioning.
-  if (!cpi->ppi->use_svc && (cpi->ref_frame_flags & AOM_GOLD_FLAG)) {
+  const YV12_BUFFER_CONFIG *yv12_alt = NULL;
+  // Check if LAST is a reference. For spatial layers always use it as
+  // reference scaling (golden or altref being lower resolution) is not
+  // handled/check here.
+  int use_last_ref = (cpi->ref_frame_flags & AOM_LAST_FLAG) ||
+                     cpi->svc.number_spatial_layers > 1;
+  int use_golden_ref = cpi->ref_frame_flags & AOM_GOLD_FLAG;
+  int use_alt_ref =
+      cpi->rtc_ref.set_ref_frame_config || cpi->sf.rt_sf.use_nonrd_altref_frame;
+
+  // For 1 spatial layer: GOLDEN is another temporal reference.
+  // Check if it should be used as reference for partitioning.
+  if (cpi->svc.number_spatial_layers == 1 && use_golden_ref &&
+      (x->content_state_sb.source_sad_nonrd != kZeroSad || !use_last_ref)) {
     yv12_g = get_ref_frame_yv12_buf(cm, GOLDEN_FRAME);
     if (yv12_g && yv12_g != yv12) {
       av1_setup_pre_planes(xd, 0, yv12_g, mi_row, mi_col,
@@ -1035,29 +1170,47 @@ static void setup_planes(AV1_COMP *cpi, MACROBLOCK *x, unsigned int *y_sad,
     }
   }
 
-  av1_setup_pre_planes(xd, 0, yv12, mi_row, mi_col,
-                       get_ref_scale_factors(cm, LAST_FRAME), num_planes);
-  mi->ref_frame[0] = LAST_FRAME;
-  mi->ref_frame[1] = NONE_FRAME;
-  mi->bsize = cm->seq_params->sb_size;
-  mi->mv[0].as_int = 0;
-  mi->interp_filters = av1_broadcast_interp_filter(BILINEAR);
-  if (cpi->sf.rt_sf.estimate_motion_for_var_based_partition) {
-    if (xd->mb_to_right_edge >= 0 && xd->mb_to_bottom_edge >= 0) {
-      const MV dummy_mv = { 0, 0 };
-      *y_sad = av1_int_pro_motion_estimation(cpi, x, cm->seq_params->sb_size,
-                                             mi_row, mi_col, &dummy_mv);
+  // For 1 spatial layer: ALTREF is another temporal reference.
+  // Check if it should be used as reference for partitioning.
+  if (cpi->svc.number_spatial_layers == 1 && use_alt_ref &&
+      (cpi->ref_frame_flags & AOM_ALT_FLAG) &&
+      (x->content_state_sb.source_sad_nonrd != kZeroSad || !use_last_ref)) {
+    yv12_alt = get_ref_frame_yv12_buf(cm, ALTREF_FRAME);
+    if (yv12_alt && yv12_alt != yv12) {
+      av1_setup_pre_planes(xd, 0, yv12_alt, mi_row, mi_col,
+                           get_ref_scale_factors(cm, ALTREF_FRAME), num_planes);
+      *y_sad_alt = cpi->ppi->fn_ptr[bsize].sdf(
+          x->plane[0].src.buf, x->plane[0].src.stride, xd->plane[0].pre[0].buf,
+          xd->plane[0].pre[0].stride);
     }
   }
-  if (*y_sad == UINT_MAX) {
-    *y_sad = cpi->ppi->fn_ptr[bsize].sdf(
-        x->plane[0].src.buf, x->plane[0].src.stride, xd->plane[0].pre[0].buf,
-        xd->plane[0].pre[0].stride);
+
+  if (use_last_ref) {
+    av1_setup_pre_planes(xd, 0, yv12, mi_row, mi_col,
+                         get_ref_scale_factors(cm, LAST_FRAME), num_planes);
+    mi->ref_frame[0] = LAST_FRAME;
+    mi->ref_frame[1] = NONE_FRAME;
+    mi->bsize = cm->seq_params->sb_size;
+    mi->mv[0].as_int = 0;
+    mi->interp_filters = av1_broadcast_interp_filter(BILINEAR);
+    if (cpi->sf.rt_sf.estimate_motion_for_var_based_partition) {
+      if (xd->mb_to_right_edge >= 0 && xd->mb_to_bottom_edge >= 0) {
+        const MV dummy_mv = { 0, 0 };
+        *y_sad = av1_int_pro_motion_estimation(cpi, x, cm->seq_params->sb_size,
+                                               mi_row, mi_col, &dummy_mv);
+      }
+    }
+    if (*y_sad == UINT_MAX) {
+      *y_sad = cpi->ppi->fn_ptr[bsize].sdf(
+          x->plane[0].src.buf, x->plane[0].src.stride, xd->plane[0].pre[0].buf,
+          xd->plane[0].pre[0].stride);
+    }
+    *y_sad_last = *y_sad;
   }
 
-  // Pick the ref frame for partitioning, use golden frame only if its
-  // lower sad.
-  if (*y_sad_g < 0.9 * *y_sad) {
+  // Pick the ref frame for partitioning, use golden or altref frame only if
+  // its lower sad, bias to LAST with factor 0.9.
+  if (*y_sad_g < 0.9 * *y_sad && *y_sad_g < *y_sad_alt) {
     av1_setup_pre_planes(xd, 0, yv12_g, mi_row, mi_col,
                          get_ref_scale_factors(cm, GOLDEN_FRAME), num_planes);
     mi->ref_frame[0] = GOLDEN_FRAME;
@@ -1065,6 +1218,14 @@ static void setup_planes(AV1_COMP *cpi, MACROBLOCK *x, unsigned int *y_sad,
     *y_sad = *y_sad_g;
     *ref_frame_partition = GOLDEN_FRAME;
     x->nonrd_prune_ref_frame_search = 0;
+  } else if (*y_sad_alt < 0.9 * *y_sad && *y_sad_alt < *y_sad_g) {
+    av1_setup_pre_planes(xd, 0, yv12_alt, mi_row, mi_col,
+                         get_ref_scale_factors(cm, ALTREF_FRAME), num_planes);
+    mi->ref_frame[0] = ALTREF_FRAME;
+    mi->mv[0].as_int = 0;
+    *y_sad = *y_sad_alt;
+    *ref_frame_partition = ALTREF_FRAME;
+    x->nonrd_prune_ref_frame_search = 0;
   } else {
     *ref_frame_partition = LAST_FRAME;
     x->nonrd_prune_ref_frame_search =
@@ -1076,8 +1237,40 @@ static void setup_planes(AV1_COMP *cpi, MACROBLOCK *x, unsigned int *y_sad,
     set_ref_ptrs(cm, xd, mi->ref_frame[0], mi->ref_frame[1]);
     av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL,
                                   cm->seq_params->sb_size, AOM_PLANE_Y,
-                                  AOM_PLANE_Y);
+                                  AOM_PLANE_V);
+  }
+}
+
+// Decides whether to split or merge a 16x16 partition block in variance based
+// partitioning based on the 8x8 sub-block variances.
+static AOM_INLINE PART_EVAL_STATUS get_part_eval_based_on_sub_blk_var(
+    VP16x16 *var_16x16_info, int64_t threshold16) {
+  int max_8x8_var = 0, min_8x8_var = INT_MAX;
+  for (int k = 0; k < 4; k++) {
+    get_variance(&var_16x16_info->split[k].part_variances.none);
+    int this_8x8_var = var_16x16_info->split[k].part_variances.none.variance;
+    max_8x8_var = AOMMAX(this_8x8_var, max_8x8_var);
+    min_8x8_var = AOMMIN(this_8x8_var, min_8x8_var);
   }
+  // If the difference between maximum and minimum sub-block variances is high,
+  // then only evaluate PARTITION_SPLIT for the 16x16 block. Otherwise, evaluate
+  // only PARTITION_NONE. The shift factor for threshold16 has been derived
+  // empirically.
+  return ((max_8x8_var - min_8x8_var) > (threshold16 << 2))
+             ? PART_EVAL_ONLY_SPLIT
+             : PART_EVAL_ONLY_NONE;
+}
+
+static AOM_INLINE bool is_set_force_zeromv_skip_based_on_src_sad(
+    int set_zeromv_skip_based_on_source_sad, SOURCE_SAD source_sad_nonrd) {
+  if (set_zeromv_skip_based_on_source_sad == 0) return false;
+
+  if (set_zeromv_skip_based_on_source_sad >= 2)
+    return source_sad_nonrd <= kVeryLowSad;
+  else if (set_zeromv_skip_based_on_source_sad >= 1)
+    return source_sad_nonrd == kZeroSad;
+
+  return false;
 }
 
 int av1_choose_var_based_partitioning(AV1_COMP *cpi, const TileInfo *const tile,
@@ -1093,7 +1286,7 @@ int av1_choose_var_based_partitioning(AV1_COMP *cpi, const TileInfo *const tile,
   int i, j, k, m;
   VP128x128 *vt;
   VP16x16 *vt2 = NULL;
-  unsigned char force_split[85];
+  PART_EVAL_STATUS force_split[85];
   int avg_64x64;
   int max_var_32x32[4];
   int min_var_32x32[4];
@@ -1109,7 +1302,9 @@ int av1_choose_var_based_partitioning(AV1_COMP *cpi, const TileInfo *const tile,
   const uint8_t *d;
   int sp;
   int dp;
+  unsigned int uv_sad[2];
   NOISE_LEVEL noise_level = kLow;
+  int zero_motion = 1;
 
   int is_key_frame =
       (frame_is_intra_only(cm) ||
@@ -1123,6 +1318,8 @@ int av1_choose_var_based_partitioning(AV1_COMP *cpi, const TileInfo *const tile,
 
   unsigned int y_sad = UINT_MAX;
   unsigned int y_sad_g = UINT_MAX;
+  unsigned int y_sad_alt = UINT_MAX;
+  unsigned int y_sad_last = UINT_MAX;
   BLOCK_SIZE bsize = is_small_sb ? BLOCK_64X64 : BLOCK_128X128;
 
   // Ref frame used in partitioning.
@@ -1139,17 +1336,32 @@ int av1_choose_var_based_partitioning(AV1_COMP *cpi, const TileInfo *const tile,
   const int low_res = (cm->width <= 352 && cm->height <= 288);
   int variance4x4downsample[64];
   const int segment_id = xd->mi[0]->segment_id;
+  uint64_t blk_sad = 0;
+  if (cpi->src_sad_blk_64x64 != NULL) {
+    const int sb_size_by_mb = (cm->seq_params->sb_size == BLOCK_128X128)
+                                  ? (cm->seq_params->mib_size >> 1)
+                                  : cm->seq_params->mib_size;
+    const int sb_cols =
+        (cm->mi_params.mi_cols + sb_size_by_mb - 1) / sb_size_by_mb;
+    const int sbi_col = mi_col / sb_size_by_mb;
+    const int sbi_row = mi_row / sb_size_by_mb;
+    blk_sad = cpi->src_sad_blk_64x64[sbi_col + sbi_row * sb_cols];
+  }
 
   if (cpi->oxcf.q_cfg.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled &&
       cyclic_refresh_segment_id_boosted(segment_id)) {
     const int q =
         av1_get_qindex(&cm->seg, segment_id, cm->quant_params.base_qindex);
     set_vbp_thresholds(cpi, thresholds, q, x->content_state_sb.low_sumdiff,
-                       x->content_state_sb.source_sad, 1);
+                       x->content_state_sb.source_sad_nonrd,
+                       x->content_state_sb.source_sad_rd, 1, blk_sad,
+                       x->content_state_sb.lighting_change);
   } else {
     set_vbp_thresholds(cpi, thresholds, cm->quant_params.base_qindex,
                        x->content_state_sb.low_sumdiff,
-                       x->content_state_sb.source_sad, 0);
+                       x->content_state_sb.source_sad_nonrd,
+                       x->content_state_sb.source_sad_rd, 0, blk_sad,
+                       x->content_state_sb.lighting_change);
   }
 
   // For non keyframes, disable 4x4 average for low resolution when speed = 8
@@ -1160,7 +1372,7 @@ int av1_choose_var_based_partitioning(AV1_COMP *cpi, const TileInfo *const tile,
 
   // Index for force_split: 0 for 64x64, 1-4 for 32x32 blocks,
   // 5-20 for the 16x16 blocks.
-  force_split[0] = 0;
+  force_split[0] = PART_EVAL_ALL;
   memset(x->part_search_info.variance_low, 0,
          sizeof(x->part_search_info.variance_low));
 
@@ -1180,14 +1392,15 @@ int av1_choose_var_based_partitioning(AV1_COMP *cpi, const TileInfo *const tile,
   }
 
   if (!is_key_frame) {
-    setup_planes(cpi, x, &y_sad, &y_sad_g, &ref_frame_partition, mi_row,
-                 mi_col);
+    setup_planes(cpi, x, &y_sad, &y_sad_g, &y_sad_alt, &y_sad_last,
+                 &ref_frame_partition, mi_row, mi_col);
 
     MB_MODE_INFO *mi = xd->mi[0];
     // Use reference SB directly for zero mv.
     if (mi->mv[0].as_int != 0) {
       d = xd->plane[0].dst.buf;
       dp = xd->plane[0].dst.stride;
+      zero_motion = 0;
     } else {
       d = xd->plane[0].pre[0].buf;
       dp = xd->plane[0].pre[0].stride;
@@ -1197,32 +1410,45 @@ int av1_choose_var_based_partitioning(AV1_COMP *cpi, const TileInfo *const tile,
     dp = 0;
   }
 
-  x->force_zeromv_skip = 0;
-  const unsigned int thresh_exit_part =
-      (cm->seq_params->sb_size == BLOCK_64X64) ? 5000 : 10000;
+  uv_sad[0] = 0;
+  uv_sad[1] = 0;
+  chroma_check(cpi, x, bsize, y_sad_last, y_sad_g, is_key_frame, zero_motion,
+               uv_sad);
+
+  x->force_zeromv_skip_for_sb = 0;
+  const bool is_set_force_zeromv_skip =
+      is_set_force_zeromv_skip_based_on_src_sad(
+          cpi->sf.rt_sf.set_zeromv_skip_based_on_source_sad,
+          x->content_state_sb.source_sad_nonrd);
+
   // If the superblock is completely static (zero source sad) and
   // the y_sad (relative to LAST ref) is very small, take the sb_size partition
   // and exit, and force zeromv_last skip mode for nonrd_pickmode.
-  // Only do this when the cyclic refresh is applied, and only on the base
-  // segment (so the QP-boosted segment can still contnue cleaning/ramping
-  // up the quality).
-  // TODO(marpan): Check color component for setting this skip.
+  // Only do this on the base segment (so the QP-boosted segment, if applied,
+  // can still continue cleaning/ramping up the quality).
+  // Condition on color uv_sad is also added.
   if (!is_key_frame && cpi->sf.rt_sf.part_early_exit_zeromv &&
-      cpi->oxcf.q_cfg.aq_mode == CYCLIC_REFRESH_AQ &&
-      cpi->cyclic_refresh->apply_cyclic_refresh &&
-      segment_id == CR_SEGMENT_ID_BASE &&
-      x->content_state_sb.source_sad == kZeroSad &&
-      ref_frame_partition == LAST_FRAME && xd->mi[0]->mv[0].as_int == 0 &&
-      y_sad < thresh_exit_part) {
+      cpi->rc.frames_since_key > 30 && segment_id == CR_SEGMENT_ID_BASE &&
+      is_set_force_zeromv_skip && ref_frame_partition == LAST_FRAME &&
+      xd->mi[0]->mv[0].as_int == 0) {
     const int block_width = mi_size_wide[cm->seq_params->sb_size];
     const int block_height = mi_size_high[cm->seq_params->sb_size];
+    const unsigned int thresh_exit_part_y =
+        cpi->zeromv_skip_thresh_exit_part[bsize];
+    const unsigned int thresh_exit_part_uv =
+        CALC_CHROMA_THRESH_FOR_ZEROMV_SKIP(thresh_exit_part_y);
     if (mi_col + block_width <= tile->mi_col_end &&
-        mi_row + block_height <= tile->mi_row_end) {
-      set_block_size(cpi, x, xd, mi_row, mi_col, bsize);
-      x->force_zeromv_skip = 1;
+        mi_row + block_height <= tile->mi_row_end &&
+        y_sad < thresh_exit_part_y && uv_sad[0] < thresh_exit_part_uv &&
+        uv_sad[1] < thresh_exit_part_uv) {
+      set_block_size(cpi, mi_row, mi_col, bsize);
+      x->force_zeromv_skip_for_sb = 1;
       if (vt2) aom_free(vt2);
       if (vt) aom_free(vt);
       return 0;
+    } else if (x->content_state_sb.source_sad_nonrd == kZeroSad &&
+               cpi->sf.rt_sf.part_early_exit_zeromv >= 2) {
+      x->force_zeromv_skip_for_sb = 2;
     }
   }
 
@@ -1256,10 +1482,13 @@ int av1_choose_var_based_partitioning(AV1_COMP *cpi, const TileInfo *const tile,
           // to split. This also forces a split on the upper levels.
           get_variance(&vtemp->part_variances.none);
           if (vtemp->part_variances.none.variance > thresholds[3]) {
-            force_split[split_index] = 1;
-            force_split[5 + m2 + i] = 1;
-            force_split[m + 1] = 1;
-            force_split[0] = 1;
+            force_split[split_index] =
+                cpi->sf.rt_sf.vbp_prune_16x16_split_using_min_max_sub_blk_var
+                    ? get_part_eval_based_on_sub_blk_var(vtemp, thresholds[3])
+                    : PART_EVAL_ONLY_SPLIT;
+            force_split[5 + m2 + i] = PART_EVAL_ONLY_SPLIT;
+            force_split[m + 1] = PART_EVAL_ONLY_SPLIT;
+            force_split[0] = PART_EVAL_ONLY_SPLIT;
           }
         }
       }
@@ -1268,7 +1497,7 @@ int av1_choose_var_based_partitioning(AV1_COMP *cpi, const TileInfo *const tile,
       // (some threshold of) the average variance over the sub-16x16 blocks,
       // then force this block to split. This also forces a split on the upper
       // (64x64) level.
-      if (!force_split[5 + m2 + i]) {
+      if (force_split[5 + m2 + i] == PART_EVAL_ALL) {
         get_variance(&vt->split[m].split[i].part_variances.none);
         var_32x32 = vt->split[m].split[i].part_variances.none.variance;
         max_var_32x32[m] = AOMMAX(var_32x32, max_var_32x32[m]);
@@ -1280,25 +1509,25 @@ int av1_choose_var_based_partitioning(AV1_COMP *cpi, const TileInfo *const tile,
                  (thresholds[2] >> 1) &&
              vt->split[m].split[i].part_variances.none.variance >
                  (avg_16x16[m][i] >> 1))) {
-          force_split[5 + m2 + i] = 1;
-          force_split[m + 1] = 1;
-          force_split[0] = 1;
+          force_split[5 + m2 + i] = PART_EVAL_ONLY_SPLIT;
+          force_split[m + 1] = PART_EVAL_ONLY_SPLIT;
+          force_split[0] = PART_EVAL_ONLY_SPLIT;
         } else if (!is_key_frame && (cm->width * cm->height <= 640 * 360) &&
                    (((maxvar_16x16[m][i] - minvar_16x16[m][i]) >
                          (thresholds[2] >> 1) &&
                      maxvar_16x16[m][i] > thresholds[2]) ||
-                    (cpi->sf.rt_sf.force_large_partition_blocks &&
-                     x->content_state_sb.source_sad > kLowSad &&
+                    (cpi->sf.rt_sf.prefer_large_partition_blocks &&
+                     x->content_state_sb.source_sad_nonrd > kLowSad &&
                      cpi->rc.frame_source_sad < 20000 &&
                      maxvar_16x16[m][i] > (thresholds[2] >> 4) &&
                      maxvar_16x16[m][i] > (minvar_16x16[m][i] << 2)))) {
-          force_split[5 + m2 + i] = 1;
-          force_split[m + 1] = 1;
-          force_split[0] = 1;
+          force_split[5 + m2 + i] = PART_EVAL_ONLY_SPLIT;
+          force_split[m + 1] = PART_EVAL_ONLY_SPLIT;
+          force_split[0] = PART_EVAL_ONLY_SPLIT;
         }
       }
     }
-    if (!force_split[1 + m]) {
+    if (force_split[1 + m] == PART_EVAL_ALL) {
       fill_variance_tree(&vt->split[m], BLOCK_64X64);
       get_variance(&vt->split[m].part_variances.none);
       var_64x64 = vt->split[m].part_variances.none.variance;
@@ -1313,30 +1542,30 @@ int av1_choose_var_based_partitioning(AV1_COMP *cpi, const TileInfo *const tile,
           (max_var_32x32[m] - min_var_32x32[m]) > 3 * (thresholds[1] >> 3) &&
           max_var_32x32[m] > thresholds[1] >> 1 &&
           (noise_level >= kMedium || cpi->ppi->use_svc ||
-           cpi->sf.rt_sf.force_large_partition_blocks)) {
-        force_split[1 + m] = 1;
-        force_split[0] = 1;
+           cpi->sf.rt_sf.prefer_large_partition_blocks)) {
+        force_split[1 + m] = PART_EVAL_ONLY_SPLIT;
+        force_split[0] = PART_EVAL_ONLY_SPLIT;
       }
       avg_64x64 += var_64x64;
     }
-    if (is_small_sb) force_split[0] = 1;
+    if (is_small_sb) force_split[0] = PART_EVAL_ONLY_SPLIT;
   }
 
-  if (!force_split[0]) {
+  if (force_split[0] == PART_EVAL_ALL) {
     fill_variance_tree(vt, BLOCK_128X128);
     get_variance(&vt->part_variances.none);
     if (!is_key_frame &&
         vt->part_variances.none.variance > (9 * avg_64x64) >> 5)
-      force_split[0] = 1;
+      force_split[0] = PART_EVAL_ONLY_SPLIT;
 
     if (!is_key_frame &&
         (max_var_64x64 - min_var_64x64) > 3 * (thresholds[0] >> 3) &&
         max_var_64x64 > thresholds[0] >> 1)
-      force_split[0] = 1;
+      force_split[0] = PART_EVAL_ONLY_SPLIT;
   }
 
   if (mi_col + 32 > tile->mi_col_end || mi_row + 32 > tile->mi_row_end ||
-      !set_vt_partitioning(cpi, x, xd, tile, vt, BLOCK_128X128, mi_row, mi_col,
+      !set_vt_partitioning(cpi, xd, tile, vt, BLOCK_128X128, mi_row, mi_col,
                            thresholds[0], BLOCK_16X16, force_split[0])) {
     for (m = 0; m < num_64x64_blocks; ++m) {
       const int x64_idx = ((m & 1) << 4);
@@ -1345,7 +1574,7 @@ int av1_choose_var_based_partitioning(AV1_COMP *cpi, const TileInfo *const tile,
 
       // Now go through the entire structure, splitting every block size until
       // we get to one that's got a variance lower than our threshold.
-      if (!set_vt_partitioning(cpi, x, xd, tile, &vt->split[m], BLOCK_64X64,
+      if (!set_vt_partitioning(cpi, xd, tile, &vt->split[m], BLOCK_64X64,
                                mi_row + y64_idx, mi_col + x64_idx,
                                thresholds[1], BLOCK_16X16,
                                force_split[1 + m])) {
@@ -1353,7 +1582,7 @@ int av1_choose_var_based_partitioning(AV1_COMP *cpi, const TileInfo *const tile,
           const int x32_idx = ((i & 1) << 3);
           const int y32_idx = ((i >> 1) << 3);
           const int i2 = (m2 + i) << 2;
-          if (!set_vt_partitioning(cpi, x, xd, tile, &vt->split[m].split[i],
+          if (!set_vt_partitioning(cpi, xd, tile, &vt->split[m].split[i],
                                    BLOCK_32X32, (mi_row + y64_idx + y32_idx),
                                    (mi_col + x64_idx + x32_idx), thresholds[2],
                                    BLOCK_16X16, force_split[5 + m2 + i])) {
@@ -1368,7 +1597,7 @@ int av1_choose_var_based_partitioning(AV1_COMP *cpi, const TileInfo *const tile,
                   (!is_key_frame && variance4x4downsample[i2 + j] == 1)
                       ? &vt2[i2 + j]
                       : &vt->split[m].split[i].split[j];
-              if (!set_vt_partitioning(cpi, x, xd, tile, vtemp, BLOCK_16X16,
+              if (!set_vt_partitioning(cpi, xd, tile, vtemp, BLOCK_16X16,
                                        mi_row + y64_idx + y32_idx + y16_idx,
                                        mi_col + x64_idx + x32_idx + x16_idx,
                                        thresholds[3], BLOCK_8X8,
@@ -1377,8 +1606,7 @@ int av1_choose_var_based_partitioning(AV1_COMP *cpi, const TileInfo *const tile,
                   const int x8_idx = (k & 1) << 1;
                   const int y8_idx = (k >> 1) << 1;
                   set_block_size(
-                      cpi, x, xd,
-                      (mi_row + y64_idx + y32_idx + y16_idx + y8_idx),
+                      cpi, (mi_row + y64_idx + y32_idx + y16_idx + y8_idx),
                       (mi_col + x64_idx + x32_idx + x16_idx + x8_idx),
                       BLOCK_8X8);
                 }
@@ -1394,7 +1622,6 @@ int av1_choose_var_based_partitioning(AV1_COMP *cpi, const TileInfo *const tile,
     set_low_temp_var_flag(cpi, &x->part_search_info, xd, vt, thresholds,
                           ref_frame_partition, mi_col, mi_row);
   }
-  chroma_check(cpi, x, bsize, y_sad, is_key_frame);
 
   if (vt2) aom_free(vt2);
   if (vt) aom_free(vt);
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/var_based_part.h b/chromium/third_party/libaom/source/libaom/av1/encoder/var_based_part.h
index 5176751342a..7febc0eb7af 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/var_based_part.h
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/var_based_part.h
@@ -28,6 +28,8 @@ extern "C" {
   100  // Use increased thresholds for midres for speed 9 when qindex is above
        // this threshold
 
+#define CALC_CHROMA_THRESH_FOR_ZEROMV_SKIP(thresh_exit_part) \
+  ((3 * (thresh_exit_part)) >> 2)
 /*!\brief Set the thresholds for variance based partition.
  *
  * Set the variance split thresholds for following the block sizes:
@@ -44,7 +46,7 @@ extern "C" {
  * \param[in]      q                  q index
  * \param[in]      content_lowsumdiff Low sumdiff flag for superblock
  *
- * \return Returns the set of thresholds in \c cpi->vbp_info.thresholds.
+ * \remark Returns the set of thresholds in \c cpi->vbp_info.thresholds.
  */
 void av1_set_variance_partition_thresholds(AV1_COMP *cpi, int q,
                                            int content_lowsumdiff);
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/x86/av1_fwd_txfm_sse2.c b/chromium/third_party/libaom/source/libaom/av1/encoder/x86/av1_fwd_txfm_sse2.c
index 694e6131c29..a5cbe16df39 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/x86/av1_fwd_txfm_sse2.c
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/x86/av1_fwd_txfm_sse2.c
@@ -1425,7 +1425,7 @@ static void fadst4x4_new_sse2(const __m128i *input, __m128i *output,
   const __m128i sinpi_p03_p04 = pair_set_epi16(sinpi[3], sinpi[4]);
   const __m128i sinpi_m03_p02 = pair_set_epi16(-sinpi[3], sinpi[2]);
   const __m128i sinpi_p03_p03 = _mm_set1_epi16((int16_t)sinpi[3]);
-  const __m128i __zero = _mm_set1_epi16(0);
+  const __m128i __zero = _mm_setzero_si128();
   const __m128i __rounding = _mm_set1_epi32(1 << (cos_bit - 1));
   const __m128i in7 = _mm_add_epi16(input[0], input[1]);
   __m128i u[8], v[8];
@@ -1573,7 +1573,7 @@ static void fadst8x4_new_sse2(const __m128i *input, __m128i *output,
   const __m128i sinpi_p03_p04 = pair_set_epi16(sinpi[3], sinpi[4]);
   const __m128i sinpi_m03_p02 = pair_set_epi16(-sinpi[3], sinpi[2]);
   const __m128i sinpi_p03_p03 = _mm_set1_epi16((int16_t)sinpi[3]);
-  const __m128i __zero = _mm_set1_epi16(0);
+  const __m128i __zero = _mm_setzero_si128();
   const __m128i __rounding = _mm_set1_epi32(1 << (cos_bit - 1));
   const __m128i in7 = _mm_add_epi16(input[0], input[1]);
   __m128i u_lo[8], u_hi[8], v_lo[8], v_hi[8];
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/x86/av1_k_means_avx2.c b/chromium/third_party/libaom/source/libaom/av1/encoder/x86/av1_k_means_avx2.c
index 23a7369e99c..759f515be90 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/x86/av1_k_means_avx2.c
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/x86/av1_k_means_avx2.c
@@ -21,7 +21,7 @@ void av1_calc_indices_dim1_avx2(const int *data, const int *centroids,
   for (int i = 0; i < n; i += 8) {
     __m256i ind = _mm256_loadu_si256((__m256i *)data);
     for (int j = 0; j < k; j++) {
-      __m256i cent = _mm256_set1_epi32((uint32_t)centroids[j]);
+      __m256i cent = _mm256_set1_epi32(centroids[j]);
       __m256i d1 = _mm256_sub_epi32(ind, cent);
       dist[j] = _mm256_mullo_epi32(d1, d1);
     }
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/x86/av1_k_means_sse2.c b/chromium/third_party/libaom/source/libaom/av1/encoder/x86/av1_k_means_sse2.c
index 43f661fdaf2..f03c4591c8d 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/x86/av1_k_means_sse2.c
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/x86/av1_k_means_sse2.c
@@ -25,7 +25,7 @@ void av1_calc_indices_dim1_sse2(const int *data, const int *centroids,
     l = (l == 0) ? 1 : 0;
     ind[l] = _mm_loadu_si128((__m128i *)data);
     for (int j = 0; j < k; j++) {
-      __m128i cent = _mm_set1_epi32((uint32_t)centroids[j]);
+      __m128i cent = _mm_set1_epi32(centroids[j]);
       __m128i d1 = _mm_sub_epi32(ind[l], cent);
       __m128i d2 = _mm_packs_epi32(d1, d1);
       __m128i d3 = _mm_mullo_epi16(d2, d2);
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/x86/av1_quantize_avx2.c b/chromium/third_party/libaom/source/libaom/av1/encoder/x86/av1_quantize_avx2.c
index 591edd70613..8c4e3958326 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/x86/av1_quantize_avx2.c
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/x86/av1_quantize_avx2.c
@@ -16,25 +16,10 @@
 #include "aom/aom_integer.h"
 #include "aom_dsp/aom_dsp_common.h"
 
-static INLINE void read_coeff(const tran_low_t *coeff, __m256i *c) {
-  if (sizeof(tran_low_t) == 4) {
-    const __m256i x0 = _mm256_loadu_si256((const __m256i *)coeff);
-    const __m256i x1 = _mm256_loadu_si256((const __m256i *)coeff + 1);
-    *c = _mm256_packs_epi32(x0, x1);
-    *c = _mm256_permute4x64_epi64(*c, 0xD8);
-  } else {
-    *c = _mm256_loadu_si256((const __m256i *)coeff);
-  }
-}
-
 static INLINE void write_zero(tran_low_t *qcoeff) {
   const __m256i zero = _mm256_setzero_si256();
-  if (sizeof(tran_low_t) == 4) {
-    _mm256_storeu_si256((__m256i *)qcoeff, zero);
-    _mm256_storeu_si256((__m256i *)qcoeff + 1, zero);
-  } else {
-    _mm256_storeu_si256((__m256i *)qcoeff, zero);
-  }
+  _mm256_storeu_si256((__m256i *)qcoeff, zero);
+  _mm256_storeu_si256((__m256i *)qcoeff + 1, zero);
 }
 
 static INLINE void init_one_qp(const __m128i *p, __m256i *qp) {
@@ -64,36 +49,32 @@ static INLINE void init_qp(const int16_t *round_ptr, const int16_t *quant_ptr,
 
   init_one_qp(&dequant, &qp[2]);
   *thr = _mm256_srai_epi16(qp[2], 1 + log_scale);
+  // Subtracting 1 here eliminates a _mm256_cmpeq_epi16() instruction when
+  // calculating the zbin mask.
+  *thr = _mm256_sub_epi16(*thr, _mm256_set1_epi16(1));
 }
 
-static INLINE void update_qp(int log_scale, __m256i *thr, __m256i *qp) {
+static INLINE void update_qp(__m256i *thr, __m256i *qp) {
   qp[0] = _mm256_permute2x128_si256(qp[0], qp[0], 0x11);
   qp[1] = _mm256_permute2x128_si256(qp[1], qp[1], 0x11);
   qp[2] = _mm256_permute2x128_si256(qp[2], qp[2], 0x11);
-  *thr = _mm256_srai_epi16(qp[2], 1 + log_scale);
+  *thr = _mm256_permute2x128_si256(*thr, *thr, 0x11);
 }
 
-#define store_quan(q, addr)                               \
-  do {                                                    \
-    __m256i sign_bits = _mm256_srai_epi16(q, 15);         \
-    __m256i y0 = _mm256_unpacklo_epi16(q, sign_bits);     \
-    __m256i y1 = _mm256_unpackhi_epi16(q, sign_bits);     \
-    __m256i x0 = _mm256_permute2x128_si256(y0, y1, 0x20); \
-    __m256i x1 = _mm256_permute2x128_si256(y0, y1, 0x31); \
-    _mm256_storeu_si256((__m256i *)addr, x0);             \
-    _mm256_storeu_si256((__m256i *)addr + 1, x1);         \
-  } while (0)
-
-#define store_two_quan(q, addr1, dq, addr2)      \
-  do {                                           \
-    if (sizeof(tran_low_t) == 4) {               \
-      store_quan(q, addr1);                      \
-      store_quan(dq, addr2);                     \
-    } else {                                     \
-      _mm256_storeu_si256((__m256i *)addr1, q);  \
-      _mm256_storeu_si256((__m256i *)addr2, dq); \
-    }                                            \
-  } while (0)
+static INLINE __m256i load_coefficients_avx2(const tran_low_t *coeff_ptr) {
+  const __m256i coeff1 = _mm256_load_si256((__m256i *)coeff_ptr);
+  const __m256i coeff2 = _mm256_load_si256((__m256i *)(coeff_ptr + 8));
+  return _mm256_packs_epi32(coeff1, coeff2);
+}
+
+static INLINE void store_coefficients_avx2(__m256i coeff_vals,
+                                           tran_low_t *coeff_ptr) {
+  __m256i coeff_sign = _mm256_srai_epi16(coeff_vals, 15);
+  __m256i coeff_vals_lo = _mm256_unpacklo_epi16(coeff_vals, coeff_sign);
+  __m256i coeff_vals_hi = _mm256_unpackhi_epi16(coeff_vals, coeff_sign);
+  _mm256_store_si256((__m256i *)coeff_ptr, coeff_vals_lo);
+  _mm256_store_si256((__m256i *)(coeff_ptr + 8), coeff_vals_hi);
+}
 
 static INLINE uint16_t quant_gather_eob(__m256i eob) {
   const __m128i eob_lo = _mm256_castsi256_si128(eob);
@@ -104,48 +85,11 @@ static INLINE uint16_t quant_gather_eob(__m256i eob) {
   return INT16_MAX - _mm_extract_epi16(eob_s, 0);
 }
 
-static INLINE void quantize(const __m256i *thr, const __m256i *qp, __m256i *c,
-                            const int16_t *iscan_ptr, tran_low_t *qcoeff,
-                            tran_low_t *dqcoeff, __m256i *eob) {
-  const __m256i abs_coeff = _mm256_abs_epi16(*c);
-  __m256i mask = _mm256_cmpgt_epi16(abs_coeff, *thr);
-  mask = _mm256_or_si256(mask, _mm256_cmpeq_epi16(abs_coeff, *thr));
-  const int nzflag = _mm256_movemask_epi8(mask);
-
-  if (nzflag) {
-    __m256i q = _mm256_adds_epi16(abs_coeff, qp[0]);
-    q = _mm256_mulhi_epi16(q, qp[1]);
-    q = _mm256_sign_epi16(q, *c);
-    const __m256i dq = _mm256_mullo_epi16(q, qp[2]);
-
-    store_two_quan(q, qcoeff, dq, dqcoeff);
-    const __m256i zero = _mm256_setzero_si256();
-    const __m256i iscan = _mm256_loadu_si256((const __m256i *)iscan_ptr);
-    const __m256i zero_coeff = _mm256_cmpeq_epi16(dq, zero);
-    const __m256i nzero_coeff = _mm256_cmpeq_epi16(zero_coeff, zero);
-    __m256i cur_eob = _mm256_sub_epi16(iscan, nzero_coeff);
-    cur_eob = _mm256_and_si256(cur_eob, nzero_coeff);
-    *eob = _mm256_max_epi16(*eob, cur_eob);
-  } else {
-    write_zero(qcoeff);
-    write_zero(dqcoeff);
-  }
-}
-
-static INLINE __m256i scan_eob_256(const __m256i *iscan_ptr,
-                                   __m256i *coeff256) {
-  const __m256i iscan = _mm256_loadu_si256(iscan_ptr);
-  const __m256i zero256 = _mm256_setzero_si256();
-  const __m256i zero_coeff0 = _mm256_cmpeq_epi16(*coeff256, zero256);
-  const __m256i nzero_coeff0 = _mm256_cmpeq_epi16(zero_coeff0, zero256);
-  // Add one to convert from indices to counts
-  const __m256i iscan_plus_one = _mm256_sub_epi16(iscan, nzero_coeff0);
-  return _mm256_and_si256(iscan_plus_one, nzero_coeff0);
-}
-
-static INLINE int16_t accumulate_eob(__m128i eob) {
-  __m128i eob_shuffled;
-  eob_shuffled = _mm_shuffle_epi32(eob, 0xe);
+static INLINE int16_t accumulate_eob256(__m256i eob256) {
+  const __m128i eob_lo = _mm256_castsi256_si128(eob256);
+  const __m128i eob_hi = _mm256_extractf128_si256(eob256, 1);
+  __m128i eob = _mm_max_epi16(eob_lo, eob_hi);
+  __m128i eob_shuffled = _mm_shuffle_epi32(eob, 0xe);
   eob = _mm_max_epi16(eob, eob_shuffled);
   eob_shuffled = _mm_shufflelo_epi16(eob, 0xe);
   eob = _mm_max_epi16(eob, eob_shuffled);
@@ -154,15 +98,50 @@ static INLINE int16_t accumulate_eob(__m128i eob) {
   return _mm_extract_epi16(eob, 1);
 }
 
+static AOM_FORCE_INLINE void quantize_lp_16(
+    const int16_t *coeff_ptr, intptr_t n_coeffs, const int16_t *iscan_ptr,
+    int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, __m256i *round256,
+    __m256i *quant256, __m256i *dequant256, __m256i *eob) {
+  const __m256i coeff =
+      _mm256_loadu_si256((const __m256i *)(coeff_ptr + n_coeffs));
+  const __m256i abs_coeff = _mm256_abs_epi16(coeff);
+  const __m256i tmp_rnd = _mm256_adds_epi16(abs_coeff, *round256);
+  const __m256i abs_qcoeff = _mm256_mulhi_epi16(tmp_rnd, *quant256);
+  const __m256i qcoeff = _mm256_sign_epi16(abs_qcoeff, coeff);
+  const __m256i dqcoeff = _mm256_mullo_epi16(qcoeff, *dequant256);
+  const __m256i nz_mask =
+      _mm256_cmpgt_epi16(abs_qcoeff, _mm256_setzero_si256());
+
+  _mm256_storeu_si256((__m256i *)(qcoeff_ptr + n_coeffs), qcoeff);
+  _mm256_storeu_si256((__m256i *)(dqcoeff_ptr + n_coeffs), dqcoeff);
+
+  const __m256i iscan =
+      _mm256_loadu_si256((const __m256i *)(iscan_ptr + n_coeffs));
+  const __m256i iscan_plus1 = _mm256_sub_epi16(iscan, nz_mask);
+  const __m256i nz_iscan = _mm256_and_si256(iscan_plus1, nz_mask);
+  *eob = _mm256_max_epi16(*eob, nz_iscan);
+}
+
 void av1_quantize_lp_avx2(const int16_t *coeff_ptr, intptr_t n_coeffs,
                           const int16_t *round_ptr, const int16_t *quant_ptr,
                           int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr,
                           const int16_t *dequant_ptr, uint16_t *eob_ptr,
                           const int16_t *scan, const int16_t *iscan) {
   (void)scan;
-  __m128i eob;
-  __m256i round256, quant256, dequant256;
-  __m256i eob256;
+  __m256i eob256 = _mm256_setzero_si256();
+
+  // Setup global values.
+  __m256i round256 =
+      _mm256_castsi128_si256(_mm_load_si128((const __m128i *)round_ptr));
+  __m256i quant256 =
+      _mm256_castsi128_si256(_mm_load_si128((const __m128i *)quant_ptr));
+  __m256i dequant256 =
+      _mm256_castsi128_si256(_mm_load_si128((const __m128i *)dequant_ptr));
+
+  // Populate upper AC values.
+  round256 = _mm256_permute4x64_epi64(round256, 0x54);
+  quant256 = _mm256_permute4x64_epi64(quant256, 0x54);
+  dequant256 = _mm256_permute4x64_epi64(dequant256, 0x54);
 
   coeff_ptr += n_coeffs;
   iscan += n_coeffs;
@@ -170,69 +149,62 @@ void av1_quantize_lp_avx2(const int16_t *coeff_ptr, intptr_t n_coeffs,
   dqcoeff_ptr += n_coeffs;
   n_coeffs = -n_coeffs;
 
-  {
-    __m256i coeff256;
-
-    // Setup global values
-    {
-      const __m128i round = _mm_load_si128((const __m128i *)round_ptr);
-      const __m128i quant = _mm_load_si128((const __m128i *)quant_ptr);
-      const __m128i dequant = _mm_load_si128((const __m128i *)dequant_ptr);
-      round256 = _mm256_castsi128_si256(round);
-      round256 = _mm256_permute4x64_epi64(round256, 0x54);
-
-      quant256 = _mm256_castsi128_si256(quant);
-      quant256 = _mm256_permute4x64_epi64(quant256, 0x54);
-
-      dequant256 = _mm256_castsi128_si256(dequant);
-      dequant256 = _mm256_permute4x64_epi64(dequant256, 0x54);
-    }
-
-    {
-      __m256i qcoeff256;
-      __m256i qtmp256;
-      coeff256 = _mm256_loadu_si256((const __m256i *)(coeff_ptr + n_coeffs));
-      qcoeff256 = _mm256_abs_epi16(coeff256);
-      qcoeff256 = _mm256_adds_epi16(qcoeff256, round256);
-      qtmp256 = _mm256_mulhi_epi16(qcoeff256, quant256);
-      qcoeff256 = _mm256_sign_epi16(qtmp256, coeff256);
-      _mm256_storeu_si256((__m256i *)(qcoeff_ptr + n_coeffs), qcoeff256);
-      coeff256 = _mm256_mullo_epi16(qcoeff256, dequant256);
-      _mm256_storeu_si256((__m256i *)(dqcoeff_ptr + n_coeffs), coeff256);
-    }
-
-    eob256 = scan_eob_256((const __m256i *)(iscan + n_coeffs), &coeff256);
-    n_coeffs += 8 * 2;
-  }
+  // Process DC and the first 15 AC coeffs.
+  quantize_lp_16(coeff_ptr, n_coeffs, iscan, qcoeff_ptr, dqcoeff_ptr, &round256,
+                 &quant256, &dequant256, &eob256);
 
-  // remove dc constants
+  // Overwrite the DC constants with AC constants
   dequant256 = _mm256_permute2x128_si256(dequant256, dequant256, 0x31);
   quant256 = _mm256_permute2x128_si256(quant256, quant256, 0x31);
   round256 = _mm256_permute2x128_si256(round256, round256, 0x31);
 
-  // AC only loop
+  n_coeffs += 8 * 2;
+
+  // AC only loop.
   while (n_coeffs < 0) {
-    __m256i coeff256 =
-        _mm256_loadu_si256((const __m256i *)(coeff_ptr + n_coeffs));
-    __m256i qcoeff256 = _mm256_abs_epi16(coeff256);
-
-    __m256i qtmp256;
-    qcoeff256 = _mm256_adds_epi16(qcoeff256, round256);
-    qtmp256 = _mm256_mulhi_epi16(qcoeff256, quant256);
-    qcoeff256 = _mm256_sign_epi16(qtmp256, coeff256);
-    _mm256_storeu_si256((__m256i *)(qcoeff_ptr + n_coeffs), qcoeff256);
-    coeff256 = _mm256_mullo_epi16(qcoeff256, dequant256);
-    _mm256_storeu_si256((__m256i *)(dqcoeff_ptr + n_coeffs), coeff256);
-    eob256 = _mm256_max_epi16(
-        eob256, scan_eob_256((const __m256i *)(iscan + n_coeffs), &coeff256));
+    quantize_lp_16(coeff_ptr, n_coeffs, iscan, qcoeff_ptr, dqcoeff_ptr,
+                   &round256, &quant256, &dequant256, &eob256);
 
     n_coeffs += 8 * 2;
   }
 
-  eob = _mm_max_epi16(_mm256_castsi256_si128(eob256),
-                      _mm256_extracti128_si256(eob256, 1));
+  *eob_ptr = accumulate_eob256(eob256);
+}
 
-  *eob_ptr = accumulate_eob(eob);
+static AOM_FORCE_INLINE __m256i get_max_lane_eob(const int16_t *iscan,
+                                                 __m256i v_eobmax,
+                                                 __m256i v_mask) {
+  const __m256i v_iscan = _mm256_loadu_si256((const __m256i *)iscan);
+  const __m256i v_iscan_perm = _mm256_permute4x64_epi64(v_iscan, 0xD8);
+  const __m256i v_iscan_plus1 = _mm256_sub_epi16(v_iscan_perm, v_mask);
+  const __m256i v_nz_iscan = _mm256_and_si256(v_iscan_plus1, v_mask);
+  return _mm256_max_epi16(v_eobmax, v_nz_iscan);
+}
+
+static AOM_FORCE_INLINE void quantize_fp_16(
+    const __m256i *thr, const __m256i *qp, const tran_low_t *coeff_ptr,
+    const int16_t *iscan_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
+    __m256i *eob) {
+  const __m256i coeff = load_coefficients_avx2(coeff_ptr);
+  const __m256i abs_coeff = _mm256_abs_epi16(coeff);
+  const __m256i mask = _mm256_cmpgt_epi16(abs_coeff, *thr);
+  const int nzflag = _mm256_movemask_epi8(mask);
+
+  if (nzflag) {
+    const __m256i tmp_rnd = _mm256_adds_epi16(abs_coeff, qp[0]);
+    const __m256i abs_q = _mm256_mulhi_epi16(tmp_rnd, qp[1]);
+    const __m256i q = _mm256_sign_epi16(abs_q, coeff);
+    const __m256i dq = _mm256_mullo_epi16(q, qp[2]);
+    const __m256i nz_mask = _mm256_cmpgt_epi16(abs_q, _mm256_setzero_si256());
+
+    store_coefficients_avx2(q, qcoeff_ptr);
+    store_coefficients_avx2(dq, dqcoeff_ptr);
+
+    *eob = get_max_lane_eob(iscan_ptr, *eob, nz_mask);
+  } else {
+    write_zero(qcoeff_ptr);
+    write_zero(dqcoeff_ptr);
+  }
 }
 
 void av1_quantize_fp_avx2(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
@@ -245,17 +217,15 @@ void av1_quantize_fp_avx2(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
   (void)scan_ptr;
   (void)zbin_ptr;
   (void)quant_shift_ptr;
-  const unsigned int step = 16;
 
-  __m256i qp[3];
-  __m256i coeff, thr;
   const int log_scale = 0;
+  const int step = 16;
+  __m256i qp[3], thr;
+  __m256i eob = _mm256_setzero_si256();
 
   init_qp(round_ptr, quant_ptr, dequant_ptr, log_scale, &thr, qp);
-  read_coeff(coeff_ptr, &coeff);
 
-  __m256i eob = _mm256_setzero_si256();
-  quantize(&thr, qp, &coeff, iscan_ptr, qcoeff_ptr, dqcoeff_ptr, &eob);
+  quantize_fp_16(&thr, qp, coeff_ptr, iscan_ptr, qcoeff_ptr, dqcoeff_ptr, &eob);
 
   coeff_ptr += step;
   qcoeff_ptr += step;
@@ -263,11 +233,11 @@ void av1_quantize_fp_avx2(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
   iscan_ptr += step;
   n_coeffs -= step;
 
-  update_qp(log_scale, &thr, qp);
+  update_qp(&thr, qp);
 
   while (n_coeffs > 0) {
-    read_coeff(coeff_ptr, &coeff);
-    quantize(&thr, qp, &coeff, iscan_ptr, qcoeff_ptr, dqcoeff_ptr, &eob);
+    quantize_fp_16(&thr, qp, coeff_ptr, iscan_ptr, qcoeff_ptr, dqcoeff_ptr,
+                   &eob);
 
     coeff_ptr += step;
     qcoeff_ptr += step;
@@ -278,36 +248,31 @@ void av1_quantize_fp_avx2(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
   *eob_ptr = quant_gather_eob(eob);
 }
 
-static INLINE void quantize_32x32(const __m256i *thr, const __m256i *qp,
-                                  __m256i *c, const int16_t *iscan_ptr,
-                                  tran_low_t *qcoeff, tran_low_t *dqcoeff,
-                                  __m256i *eob) {
-  const __m256i abs_coeff = _mm256_abs_epi16(*c);
-  __m256i mask = _mm256_cmpgt_epi16(abs_coeff, *thr);
-  mask = _mm256_or_si256(mask, _mm256_cmpeq_epi16(abs_coeff, *thr));
+static AOM_FORCE_INLINE void quantize_fp_32x32(
+    const __m256i *thr, const __m256i *qp, const tran_low_t *coeff_ptr,
+    const int16_t *iscan_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
+    __m256i *eob) {
+  const __m256i coeff = load_coefficients_avx2(coeff_ptr);
+  const __m256i abs_coeff = _mm256_abs_epi16(coeff);
+  const __m256i mask = _mm256_cmpgt_epi16(abs_coeff, *thr);
   const int nzflag = _mm256_movemask_epi8(mask);
 
   if (nzflag) {
-    __m256i q = _mm256_adds_epi16(abs_coeff, qp[0]);
-    q = _mm256_mulhi_epu16(q, qp[1]);
-
-    __m256i dq = _mm256_mullo_epi16(q, qp[2]);
-    dq = _mm256_srli_epi16(dq, 1);
-
-    q = _mm256_sign_epi16(q, *c);
-    dq = _mm256_sign_epi16(dq, *c);
-
-    store_two_quan(q, qcoeff, dq, dqcoeff);
-    const __m256i zero = _mm256_setzero_si256();
-    const __m256i iscan = _mm256_loadu_si256((const __m256i *)iscan_ptr);
-    const __m256i zero_coeff = _mm256_cmpeq_epi16(dq, zero);
-    const __m256i nzero_coeff = _mm256_cmpeq_epi16(zero_coeff, zero);
-    __m256i cur_eob = _mm256_sub_epi16(iscan, nzero_coeff);
-    cur_eob = _mm256_and_si256(cur_eob, nzero_coeff);
-    *eob = _mm256_max_epi16(*eob, cur_eob);
+    const __m256i tmp_rnd = _mm256_adds_epi16(abs_coeff, qp[0]);
+    const __m256i abs_q = _mm256_mulhi_epu16(tmp_rnd, qp[1]);
+    const __m256i q = _mm256_sign_epi16(abs_q, coeff);
+    const __m256i abs_dq =
+        _mm256_srli_epi16(_mm256_mullo_epi16(abs_q, qp[2]), 1);
+    const __m256i nz_mask = _mm256_cmpgt_epi16(abs_q, _mm256_setzero_si256());
+    const __m256i dq = _mm256_sign_epi16(abs_dq, coeff);
+
+    store_coefficients_avx2(q, qcoeff_ptr);
+    store_coefficients_avx2(dq, dqcoeff_ptr);
+
+    *eob = get_max_lane_eob(iscan_ptr, *eob, nz_mask);
   } else {
-    write_zero(qcoeff);
-    write_zero(dqcoeff);
+    write_zero(qcoeff_ptr);
+    write_zero(dqcoeff_ptr);
   }
 }
 
@@ -320,17 +285,16 @@ void av1_quantize_fp_32x32_avx2(
   (void)scan_ptr;
   (void)zbin_ptr;
   (void)quant_shift_ptr;
-  const unsigned int step = 16;
 
-  __m256i qp[3];
-  __m256i coeff, thr;
   const int log_scale = 1;
+  const unsigned int step = 16;
+  __m256i qp[3], thr;
+  __m256i eob = _mm256_setzero_si256();
 
   init_qp(round_ptr, quant_ptr, dequant_ptr, log_scale, &thr, qp);
-  read_coeff(coeff_ptr, &coeff);
 
-  __m256i eob = _mm256_setzero_si256();
-  quantize_32x32(&thr, qp, &coeff, iscan_ptr, qcoeff_ptr, dqcoeff_ptr, &eob);
+  quantize_fp_32x32(&thr, qp, coeff_ptr, iscan_ptr, qcoeff_ptr, dqcoeff_ptr,
+                    &eob);
 
   coeff_ptr += step;
   qcoeff_ptr += step;
@@ -338,11 +302,11 @@ void av1_quantize_fp_32x32_avx2(
   iscan_ptr += step;
   n_coeffs -= step;
 
-  update_qp(log_scale, &thr, qp);
+  update_qp(&thr, qp);
 
   while (n_coeffs > 0) {
-    read_coeff(coeff_ptr, &coeff);
-    quantize_32x32(&thr, qp, &coeff, iscan_ptr, qcoeff_ptr, dqcoeff_ptr, &eob);
+    quantize_fp_32x32(&thr, qp, coeff_ptr, iscan_ptr, qcoeff_ptr, dqcoeff_ptr,
+                      &eob);
 
     coeff_ptr += step;
     qcoeff_ptr += step;
@@ -353,40 +317,44 @@ void av1_quantize_fp_32x32_avx2(
   *eob_ptr = quant_gather_eob(eob);
 }
 
-static INLINE void quantize_64x64(const __m256i *thr, const __m256i *qp,
-                                  __m256i *c, const int16_t *iscan_ptr,
-                                  tran_low_t *qcoeff, tran_low_t *dqcoeff,
-                                  __m256i *eob) {
-  const __m256i abs_coeff = _mm256_abs_epi16(*c);
-  __m256i mask = _mm256_cmpgt_epi16(abs_coeff, *thr);
-  mask = _mm256_or_si256(mask, _mm256_cmpeq_epi16(abs_coeff, *thr));
+static INLINE void quantize_fp_64x64(const __m256i *thr, const __m256i *qp,
+                                     const tran_low_t *coeff_ptr,
+                                     const int16_t *iscan_ptr,
+                                     tran_low_t *qcoeff_ptr,
+                                     tran_low_t *dqcoeff_ptr, __m256i *eob) {
+  const __m256i coeff = load_coefficients_avx2(coeff_ptr);
+  const __m256i abs_coeff = _mm256_abs_epi16(coeff);
+  const __m256i mask = _mm256_cmpgt_epi16(abs_coeff, *thr);
   const int nzflag = _mm256_movemask_epi8(mask);
 
   if (nzflag) {
-    __m256i q = _mm256_adds_epi16(abs_coeff, qp[0]);
-    __m256i qh = _mm256_mulhi_epi16(q, qp[1]);
-    __m256i ql = _mm256_mullo_epi16(q, qp[1]);
-    qh = _mm256_slli_epi16(qh, 2);
-    ql = _mm256_srli_epi16(ql, 14);
-    q = _mm256_or_si256(qh, ql);
-    const __m256i dqh = _mm256_slli_epi16(_mm256_mulhi_epi16(q, qp[2]), 14);
-    const __m256i dql = _mm256_srli_epi16(_mm256_mullo_epi16(q, qp[2]), 2);
-    __m256i dq = _mm256_or_si256(dqh, dql);
-
-    q = _mm256_sign_epi16(q, *c);
-    dq = _mm256_sign_epi16(dq, *c);
-
-    store_two_quan(q, qcoeff, dq, dqcoeff);
-    const __m256i zero = _mm256_setzero_si256();
-    const __m256i iscan = _mm256_loadu_si256((const __m256i *)iscan_ptr);
-    const __m256i zero_coeff = _mm256_cmpeq_epi16(dq, zero);
-    const __m256i nzero_coeff = _mm256_cmpeq_epi16(zero_coeff, zero);
-    __m256i cur_eob = _mm256_sub_epi16(iscan, nzero_coeff);
-    cur_eob = _mm256_and_si256(cur_eob, nzero_coeff);
-    *eob = _mm256_max_epi16(*eob, cur_eob);
+    const __m256i tmp_rnd =
+        _mm256_and_si256(_mm256_adds_epi16(abs_coeff, qp[0]), mask);
+    const __m256i qh = _mm256_slli_epi16(_mm256_mulhi_epi16(tmp_rnd, qp[1]), 2);
+    const __m256i ql =
+        _mm256_srli_epi16(_mm256_mullo_epi16(tmp_rnd, qp[1]), 14);
+    const __m256i abs_q = _mm256_or_si256(qh, ql);
+    const __m256i dqh = _mm256_slli_epi16(_mm256_mulhi_epi16(abs_q, qp[2]), 14);
+    const __m256i dql = _mm256_srli_epi16(_mm256_mullo_epi16(abs_q, qp[2]), 2);
+    const __m256i abs_dq = _mm256_or_si256(dqh, dql);
+    const __m256i q = _mm256_sign_epi16(abs_q, coeff);
+    const __m256i dq = _mm256_sign_epi16(abs_dq, coeff);
+    // Check the signed q/dq value here instead of the absolute value. When
+    // dequant equals 4, the dequant threshold (*thr) becomes 0 after being
+    // scaled down by (1 + log_scale). See init_qp(). When *thr is 0 and the
+    // abs_coeff is 0, the nzflag will be set. As a result, the eob will be
+    // incorrectly calculated. The psign instruction corrects the error by
+    // zeroing out q/dq if coeff is zero.
+    const __m256i z_mask = _mm256_cmpeq_epi16(dq, _mm256_setzero_si256());
+    const __m256i nz_mask = _mm256_cmpeq_epi16(z_mask, _mm256_setzero_si256());
+
+    store_coefficients_avx2(q, qcoeff_ptr);
+    store_coefficients_avx2(dq, dqcoeff_ptr);
+
+    *eob = get_max_lane_eob(iscan_ptr, *eob, nz_mask);
   } else {
-    write_zero(qcoeff);
-    write_zero(dqcoeff);
+    write_zero(qcoeff_ptr);
+    write_zero(dqcoeff_ptr);
   }
 }
 
@@ -399,17 +367,16 @@ void av1_quantize_fp_64x64_avx2(
   (void)scan_ptr;
   (void)zbin_ptr;
   (void)quant_shift_ptr;
-  const unsigned int step = 16;
 
-  __m256i qp[3];
-  __m256i coeff, thr;
   const int log_scale = 2;
+  const unsigned int step = 16;
+  __m256i qp[3], thr;
+  __m256i eob = _mm256_setzero_si256();
 
   init_qp(round_ptr, quant_ptr, dequant_ptr, log_scale, &thr, qp);
-  read_coeff(coeff_ptr, &coeff);
 
-  __m256i eob = _mm256_setzero_si256();
-  quantize_64x64(&thr, qp, &coeff, iscan_ptr, qcoeff_ptr, dqcoeff_ptr, &eob);
+  quantize_fp_64x64(&thr, qp, coeff_ptr, iscan_ptr, qcoeff_ptr, dqcoeff_ptr,
+                    &eob);
 
   coeff_ptr += step;
   qcoeff_ptr += step;
@@ -417,11 +384,11 @@ void av1_quantize_fp_64x64_avx2(
   iscan_ptr += step;
   n_coeffs -= step;
 
-  update_qp(log_scale, &thr, qp);
+  update_qp(&thr, qp);
 
   while (n_coeffs > 0) {
-    read_coeff(coeff_ptr, &coeff);
-    quantize_64x64(&thr, qp, &coeff, iscan_ptr, qcoeff_ptr, dqcoeff_ptr, &eob);
+    quantize_fp_64x64(&thr, qp, coeff_ptr, iscan_ptr, qcoeff_ptr, dqcoeff_ptr,
+                      &eob);
 
     coeff_ptr += step;
     qcoeff_ptr += step;
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/x86/hash_sse42.c b/chromium/third_party/libaom/source/libaom/av1/encoder/x86/hash_sse42.c
index 2f4e02df1a1..9e06ebe1283 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/x86/hash_sse42.c
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/x86/hash_sse42.c
@@ -47,5 +47,5 @@ uint32_t av1_get_crc32c_value_sse4_2(void *crc_calculator, uint8_t *p,
   CALC_CRC(_mm_crc32_u32, crc, uint32_t, buf, len)
   CALC_CRC(_mm_crc32_u16, crc, uint16_t, buf, len)
   CALC_CRC(_mm_crc32_u8, crc, uint8_t, buf, len)
-  return (crc ^= 0xFFFFFFFF);
+  return (crc ^ 0xFFFFFFFF);
 }
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/x86/highbd_block_error_intrin_sse2.c b/chromium/third_party/libaom/source/libaom/av1/encoder/x86/highbd_block_error_intrin_sse2.c
index 4579e4e4a76..0287f01f328 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/x86/highbd_block_error_intrin_sse2.c
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/x86/highbd_block_error_intrin_sse2.c
@@ -33,7 +33,7 @@ int64_t av1_highbd_block_error_sse2(const tran_low_t *coeff,
     __m128i mm_dqcoeff2 = _mm_load_si128((__m128i *)(dqcoeff + i + 4));
     // Check if any values require more than 15 bit
     max = _mm_set1_epi32(0x3fff);
-    min = _mm_set1_epi32(0xffffc000);
+    min = _mm_set1_epi32((int)0xffffc000);
     cmp0 = _mm_xor_si128(_mm_cmpgt_epi32(mm_coeff, max),
                          _mm_cmplt_epi32(mm_coeff, min));
     cmp1 = _mm_xor_si128(_mm_cmpgt_epi32(mm_coeff2, max),
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/x86/pickrst_avx2.c b/chromium/third_party/libaom/source/libaom/av1/encoder/x86/pickrst_avx2.c
index d53b128567d..e244d5ec386 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/x86/pickrst_avx2.c
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/x86/pickrst_avx2.c
@@ -59,7 +59,7 @@ static INLINE void acc_stat_win7_one_line_avx2(
         M_int[k][l] += D1 * X1 + D2 * X2;
 
         const __m256i kl =
-            _mm256_cvtepu8_epi16(_mm_set1_epi16(loadu_uint16(dgd_ijk + l)));
+            _mm256_cvtepu8_epi16(_mm_set1_epi16(loadu_int16(dgd_ijk + l)));
         acc_stat_avx2(H_ + 0 * 8, dgd_ij + 0 * dgd_stride, shuffle, &kl);
         acc_stat_avx2(H_ + 1 * 8, dgd_ij + 1 * dgd_stride, shuffle, &kl);
         acc_stat_avx2(H_ + 2 * 8, dgd_ij + 2 * dgd_stride, shuffle, &kl);
@@ -88,7 +88,7 @@ static INLINE void acc_stat_win7_one_line_avx2(
         // are (effectively) used as inputs to a multiply-accumulate.
         // So if we set the extra pixel slot to 0, then it is effectively
         // ignored.
-        const __m256i kl = _mm256_cvtepu8_epi16(_mm_set1_epi16((uint16_t)D1));
+        const __m256i kl = _mm256_cvtepu8_epi16(_mm_set1_epi16((int16_t)D1));
         acc_stat_avx2(H_ + 0 * 8, dgd_ij + 0 * dgd_stride, shuffle, &kl);
         acc_stat_avx2(H_ + 1 * 8, dgd_ij + 1 * dgd_stride, shuffle, &kl);
         acc_stat_avx2(H_ + 2 * 8, dgd_ij + 2 * dgd_stride, shuffle, &kl);
@@ -260,7 +260,7 @@ static INLINE void acc_stat_highbd_win7_one_line_avx2(
 
         // Load two u16 values from dgd_ijkl combined as a u32,
         // then broadcast to 8x u32 slots of a 256
-        const __m256i dgd_ijkl = _mm256_set1_epi32(loadu_uint32(dgd_ijk + l));
+        const __m256i dgd_ijkl = _mm256_set1_epi32(loadu_int32(dgd_ijk + l));
         // dgd_ijkl = [y x y x y x y x] [y x y x y x y x] where each is a u16
 
         acc_stat_highbd_avx2(H_ + 0 * 8, dgd_ij + 0 * dgd_stride, shuffle,
@@ -297,7 +297,7 @@ static INLINE void acc_stat_highbd_win7_one_line_avx2(
         // interleaved copies of two pixels, but we only have one. However, the
         // pixels are (effectively) used as inputs to a multiply-accumulate. So
         // if we set the extra pixel slot to 0, then it is effectively ignored.
-        const __m256i dgd_ijkl = _mm256_set1_epi32((uint32_t)D1);
+        const __m256i dgd_ijkl = _mm256_set1_epi32((int)D1);
 
         acc_stat_highbd_avx2(H_ + 0 * 8, dgd_ij + 0 * dgd_stride, shuffle,
                              &dgd_ijkl);
@@ -408,7 +408,7 @@ static INLINE void acc_stat_highbd_win5_one_line_avx2(
 
         // Load two u16 values from dgd_ijkl combined as a u32,
         // then broadcast to 8x u32 slots of a 256
-        const __m256i dgd_ijkl = _mm256_set1_epi32(loadu_uint32(dgd_ijk + l));
+        const __m256i dgd_ijkl = _mm256_set1_epi32(loadu_int32(dgd_ijk + l));
         // dgd_ijkl = [x y x y x y x y] [x y x y x y x y] where each is a u16
 
         acc_stat_highbd_avx2(H_ + 0 * 8, dgd_ij + 0 * dgd_stride, shuffle,
@@ -441,7 +441,7 @@ static INLINE void acc_stat_highbd_win5_one_line_avx2(
         // interleaved copies of two pixels, but we only have one. However, the
         // pixels are (effectively) used as inputs to a multiply-accumulate. So
         // if we set the extra pixel slot to 0, then it is effectively ignored.
-        const __m256i dgd_ijkl = _mm256_set1_epi32((uint32_t)D1);
+        const __m256i dgd_ijkl = _mm256_set1_epi32((int)D1);
 
         acc_stat_highbd_avx2(H_ + 0 * 8, dgd_ij + 0 * dgd_stride, shuffle,
                              &dgd_ijkl);
@@ -569,7 +569,7 @@ static INLINE void acc_stat_win5_one_line_avx2(
         M_int[k][l] += D1 * X1 + D2 * X2;
 
         const __m256i kl =
-            _mm256_cvtepu8_epi16(_mm_set1_epi16(loadu_uint16(dgd_ijk + l)));
+            _mm256_cvtepu8_epi16(_mm_set1_epi16(loadu_int16(dgd_ijk + l)));
         acc_stat_avx2(H_ + 0 * 8, dgd_ij + 0 * dgd_stride, shuffle, &kl);
         acc_stat_avx2(H_ + 1 * 8, dgd_ij + 1 * dgd_stride, shuffle, &kl);
         acc_stat_avx2(H_ + 2 * 8, dgd_ij + 2 * dgd_stride, shuffle, &kl);
@@ -596,7 +596,7 @@ static INLINE void acc_stat_win5_one_line_avx2(
         // are (effectively) used as inputs to a multiply-accumulate.
         // So if we set the extra pixel slot to 0, then it is effectively
         // ignored.
-        const __m256i kl = _mm256_cvtepu8_epi16(_mm_set1_epi16((uint16_t)D1));
+        const __m256i kl = _mm256_cvtepu8_epi16(_mm_set1_epi16((int16_t)D1));
         acc_stat_avx2(H_ + 0 * 8, dgd_ij + 0 * dgd_stride, shuffle, &kl);
         acc_stat_avx2(H_ + 1 * 8, dgd_ij + 1 * dgd_stride, shuffle, &kl);
         acc_stat_avx2(H_ + 2 * 8, dgd_ij + 2 * dgd_stride, shuffle, &kl);
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/x86/pickrst_sse4.c b/chromium/third_party/libaom/source/libaom/av1/encoder/x86/pickrst_sse4.c
index 3d496ef3cd8..8208cca639b 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/x86/pickrst_sse4.c
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/x86/pickrst_sse4.c
@@ -62,7 +62,7 @@ static INLINE void acc_stat_win7_one_line_sse4_1(
         M_int[k][l] += D1 * X1 + D2 * X2;
 
         const __m128i kl =
-            _mm_cvtepu8_epi16(_mm_set1_epi16(*((uint16_t *)(dgd_ijk + l))));
+            _mm_cvtepu8_epi16(_mm_set1_epi16(*((int16_t *)(dgd_ijk + l))));
         acc_stat_sse41(H_ + 0 * 8, dgd_ij + 0 * dgd_stride, shuffle, &kl);
         acc_stat_sse41(H_ + 1 * 8, dgd_ij + 1 * dgd_stride, shuffle, &kl);
         acc_stat_sse41(H_ + 2 * 8, dgd_ij + 2 * dgd_stride, shuffle, &kl);
@@ -91,7 +91,7 @@ static INLINE void acc_stat_win7_one_line_sse4_1(
         // are (effectively) used as inputs to a multiply-accumulate.
         // So if we set the extra pixel slot to 0, then it is effectively
         // ignored.
-        const __m128i kl = _mm_cvtepu8_epi16(_mm_set1_epi16((uint16_t)D1));
+        const __m128i kl = _mm_cvtepu8_epi16(_mm_set1_epi16((int16_t)D1));
         acc_stat_sse41(H_ + 0 * 8, dgd_ij + 0 * dgd_stride, shuffle, &kl);
         acc_stat_sse41(H_ + 1 * 8, dgd_ij + 1 * dgd_stride, shuffle, &kl);
         acc_stat_sse41(H_ + 2 * 8, dgd_ij + 2 * dgd_stride, shuffle, &kl);
@@ -265,7 +265,7 @@ static INLINE void acc_stat_highbd_win7_one_line_sse4_1(
 
         // Load two u16 values from dgd as a single u32
         // Then broadcast to 4x u32 slots of a 128
-        const __m128i dgd_ijkl = _mm_set1_epi32(*((uint32_t *)(dgd_ijk + l)));
+        const __m128i dgd_ijkl = _mm_set1_epi32(*((int *)(dgd_ijk + l)));
         // dgd_ijkl = [y x y x y x y x] as u16
 
         acc_stat_highbd_sse41(H_ + 0 * 8, dgd_ij + 0 * dgd_stride, shuffle,
@@ -302,7 +302,7 @@ static INLINE void acc_stat_highbd_win7_one_line_sse4_1(
         // interleaved copies of two pixels, but we only have one. However, the
         // pixels are (effectively) used as inputs to a multiply-accumulate. So
         // if we set the extra pixel slot to 0, then it is effectively ignored.
-        const __m128i dgd_ijkl = _mm_set1_epi32((uint32_t)D1);
+        const __m128i dgd_ijkl = _mm_set1_epi32((int)D1);
 
         acc_stat_highbd_sse41(H_ + 0 * 8, dgd_ij + 0 * dgd_stride, shuffle,
                               &dgd_ijkl);
@@ -414,7 +414,7 @@ static INLINE void acc_stat_highbd_win5_one_line_sse4_1(
 
         // Load two u16 values from dgd as a single u32
         // then broadcast to 4x u32 slots of a 128
-        const __m128i dgd_ijkl = _mm_set1_epi32(*((uint32_t *)(dgd_ijk + l)));
+        const __m128i dgd_ijkl = _mm_set1_epi32(*((int *)(dgd_ijk + l)));
         // dgd_ijkl = [y x y x y x y x] as u16
 
         acc_stat_highbd_sse41(H_ + 0 * 8, dgd_ij + 0 * dgd_stride, shuffle,
@@ -447,7 +447,7 @@ static INLINE void acc_stat_highbd_win5_one_line_sse4_1(
         // interleaved copies of two pixels, but we only have one. However, the
         // pixels are (effectively) used as inputs to a multiply-accumulate. So
         // if we set the extra pixel slot to 0, then it is effectively ignored.
-        const __m128i dgd_ijkl = _mm_set1_epi32((uint32_t)D1);
+        const __m128i dgd_ijkl = _mm_set1_epi32((int)D1);
 
         acc_stat_highbd_sse41(H_ + 0 * 8, dgd_ij + 0 * dgd_stride, shuffle,
                               &dgd_ijkl);
@@ -574,7 +574,7 @@ static INLINE void acc_stat_win5_one_line_sse4_1(
         M_int[k][l] += D1 * X1 + D2 * X2;
 
         const __m128i kl =
-            _mm_cvtepu8_epi16(_mm_set1_epi16(*((uint16_t *)(dgd_ijk + l))));
+            _mm_cvtepu8_epi16(_mm_set1_epi16(*((int16_t *)(dgd_ijk + l))));
         acc_stat_sse41(H_ + 0 * 8, dgd_ij + 0 * dgd_stride, shuffle, &kl);
         acc_stat_sse41(H_ + 1 * 8, dgd_ij + 1 * dgd_stride, shuffle, &kl);
         acc_stat_sse41(H_ + 2 * 8, dgd_ij + 2 * dgd_stride, shuffle, &kl);
@@ -601,7 +601,7 @@ static INLINE void acc_stat_win5_one_line_sse4_1(
         // are (effectively) used as inputs to a multiply-accumulate.
         // So if we set the extra pixel slot to 0, then it is effectively
         // ignored.
-        const __m128i kl = _mm_cvtepu8_epi16(_mm_set1_epi16((uint16_t)D1));
+        const __m128i kl = _mm_cvtepu8_epi16(_mm_set1_epi16((int16_t)D1));
         acc_stat_sse41(H_ + 0 * 8, dgd_ij + 0 * dgd_stride, shuffle, &kl);
         acc_stat_sse41(H_ + 1 * 8, dgd_ij + 1 * dgd_stride, shuffle, &kl);
         acc_stat_sse41(H_ + 2 * 8, dgd_ij + 2 * dgd_stride, shuffle, &kl);
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/x86/rdopt_avx2.c b/chromium/third_party/libaom/source/libaom/av1/encoder/x86/rdopt_avx2.c
index 3bc763c5872..a0ab3940c03 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/x86/rdopt_avx2.c
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/x86/rdopt_avx2.c
@@ -31,8 +31,8 @@ INLINE static void horver_correlation_4x4(const int16_t *diff, int stride,
   //                      [ m n o p ]
 
   const __m256i pixels = _mm256_set_epi64x(
-      loadu_uint64(&diff[0 * stride]), loadu_uint64(&diff[1 * stride]),
-      loadu_uint64(&diff[2 * stride]), loadu_uint64(&diff[3 * stride]));
+      loadu_int64(&diff[0 * stride]), loadu_int64(&diff[1 * stride]),
+      loadu_int64(&diff[2 * stride]), loadu_int64(&diff[3 * stride]));
   // pixels = [d c b a h g f e] [l k j i p o n m] as i16
 
   const __m256i slli = _mm256_slli_epi64(pixels, 16);
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/x86/rdopt_sse4.c b/chromium/third_party/libaom/source/libaom/av1/encoder/x86/rdopt_sse4.c
index 4c4ec1fa7de..12ac1461959 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/x86/rdopt_sse4.c
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/x86/rdopt_sse4.c
@@ -29,10 +29,10 @@ INLINE static void horver_correlation_4x4(const int16_t *diff, int stride,
   //                      [ i j k l ]
   //                      [ m n o p ]
 
-  const __m128i pixelsa = _mm_set_epi64x(*(uint64_t *)&diff[0 * stride],
-                                         *(uint64_t *)&diff[2 * stride]);
-  const __m128i pixelsb = _mm_set_epi64x(*(uint64_t *)&diff[1 * stride],
-                                         *(uint64_t *)&diff[3 * stride]);
+  const __m128i pixelsa = _mm_set_epi64x(*(int64_t *)&diff[0 * stride],
+                                         *(int64_t *)&diff[2 * stride]);
+  const __m128i pixelsb = _mm_set_epi64x(*(int64_t *)&diff[1 * stride],
+                                         *(int64_t *)&diff[3 * stride]);
   // pixelsa = [d c b a l k j i] as i16
   // pixelsb = [h g f e p o n m] as i16
 
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/x86/reconinter_enc_sse2.c b/chromium/third_party/libaom/source/libaom/av1/encoder/x86/reconinter_enc_sse2.c
index 6455bf3d5ac..d33fec79685 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/x86/reconinter_enc_sse2.c
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/x86/reconinter_enc_sse2.c
@@ -305,13 +305,12 @@ void aom_highbd_dist_wtd_comp_avg_upsampled_pred_sse2(
   assert(!(width * height & 7));
   n = width * height >> 3;
 
-  const uint16_t wt0 = (uint16_t)jcp_param->fwd_offset;
-  const uint16_t wt1 = (uint16_t)jcp_param->bck_offset;
-  const __m128i w0 = _mm_set_epi16(wt0, wt0, wt0, wt0, wt0, wt0, wt0, wt0);
-  const __m128i w1 = _mm_set_epi16(wt1, wt1, wt1, wt1, wt1, wt1, wt1, wt1);
-  const uint16_t round = ((1 << DIST_PRECISION_BITS) >> 1);
-  const __m128i r =
-      _mm_set_epi16(round, round, round, round, round, round, round, round);
+  const int16_t wt0 = (int16_t)jcp_param->fwd_offset;
+  const int16_t wt1 = (int16_t)jcp_param->bck_offset;
+  const __m128i w0 = _mm_set1_epi16(wt0);
+  const __m128i w1 = _mm_set1_epi16(wt1);
+  const int16_t round = (int16_t)((1 << DIST_PRECISION_BITS) >> 1);
+  const __m128i r = _mm_set1_epi16(round);
 
   uint16_t *comp_pred16 = CONVERT_TO_SHORTPTR(comp_pred8);
   for (i = 0; i < n; i++) {
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/x86/reconinter_enc_ssse3.c b/chromium/third_party/libaom/source/libaom/av1/encoder/x86/reconinter_enc_ssse3.c
index 7ac0f0d037b..df7aa958553 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/x86/reconinter_enc_ssse3.c
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/x86/reconinter_enc_ssse3.c
@@ -48,13 +48,12 @@ void aom_dist_wtd_comp_avg_upsampled_pred_ssse3(
   assert(!(width * height & 15));
   n = width * height >> 4;
 
-  const uint8_t w0 = (uint8_t)jcp_param->fwd_offset;
-  const uint8_t w1 = (uint8_t)jcp_param->bck_offset;
+  const int8_t w0 = (int8_t)jcp_param->fwd_offset;
+  const int8_t w1 = (int8_t)jcp_param->bck_offset;
   const __m128i w = _mm_set_epi8(w1, w0, w1, w0, w1, w0, w1, w0, w1, w0, w1, w0,
                                  w1, w0, w1, w0);
-  const uint16_t round = ((1 << DIST_PRECISION_BITS) >> 1);
-  const __m128i r =
-      _mm_set_epi16(round, round, round, round, round, round, round, round);
+  const int16_t round = (int16_t)((1 << DIST_PRECISION_BITS) >> 1);
+  const __m128i r = _mm_set1_epi16(round);
 
   for (i = 0; i < n; i++) {
     __m128i p0 = xx_loadu_128(comp_pred);
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/x86/temporal_filter_avx2.c b/chromium/third_party/libaom/source/libaom/av1/encoder/x86/temporal_filter_avx2.c
index 8aa07641aa5..a9c800401aa 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/x86/temporal_filter_avx2.c
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/x86/temporal_filter_avx2.c
@@ -51,7 +51,7 @@ static AOM_FORCE_INLINE void get_squared_error_16x16_avx2(
 
     _mm256_storeu_si256((__m256i *)(dst), vsqdiff1);
     // Set zero to uninitialized memory to avoid uninitialized loads later
-    *(uint32_t *)(dst + 16) = _mm_cvtsi128_si32(_mm_setzero_si128());
+    *(int *)(dst + 16) = _mm_cvtsi128_si32(_mm_setzero_si128());
 
     src1 += stride, src2 += stride2;
     dst += sse_stride;
@@ -85,7 +85,7 @@ static AOM_FORCE_INLINE void get_squared_error_32x32_avx2(
     _mm256_storeu_si256((__m256i *)(dst), vres1);
     _mm256_storeu_si256((__m256i *)(dst + 16), vres2);
     // Set zero to uninitialized memory to avoid uninitialized loads later
-    *(uint32_t *)(dst + 32) = _mm_cvtsi128_si32(_mm_setzero_si128());
+    *(int *)(dst + 32) = _mm_cvtsi128_si32(_mm_setzero_si128());
 
     src1 += stride;
     src2 += stride2;
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/x86/temporal_filter_sse2.c b/chromium/third_party/libaom/source/libaom/av1/encoder/x86/temporal_filter_sse2.c
index 26c3926dcaa..8be71647aa9 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/x86/temporal_filter_sse2.c
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/x86/temporal_filter_sse2.c
@@ -42,7 +42,7 @@ static void get_squared_error(const uint8_t *frame1, const unsigned int stride,
   for (int i = 0; i < block_height; i++) {
     for (int j = 0; j < block_width; j += 16) {
       // Set zero to uninitialized memory to avoid uninitialized loads later
-      *(uint32_t *)(dst) = _mm_cvtsi128_si32(_mm_setzero_si128());
+      *(int *)(dst) = _mm_cvtsi128_si32(_mm_setzero_si128());
 
       __m128i vsrc1 = _mm_loadu_si128((__m128i *)(src1 + j));
       __m128i vsrc2 = _mm_loadu_si128((__m128i *)(src2 + j));
@@ -63,8 +63,7 @@ static void get_squared_error(const uint8_t *frame1, const unsigned int stride,
     }
 
     // Set zero to uninitialized memory to avoid uninitialized loads later
-    *(uint32_t *)(dst + block_width + 2) =
-        _mm_cvtsi128_si32(_mm_setzero_si128());
+    *(int *)(dst + block_width + 2) = _mm_cvtsi128_si32(_mm_setzero_si128());
 
     src1 += stride;
     src2 += stride2;
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/x86/wedge_utils_avx2.c b/chromium/third_party/libaom/source/libaom/av1/encoder/x86/wedge_utils_avx2.c
index c06bad8f79d..bbc62d5f1f7 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/x86/wedge_utils_avx2.c
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/x86/wedge_utils_avx2.c
@@ -31,7 +31,7 @@ uint64_t av1_wedge_sse_from_residuals_avx2(const int16_t *r1, const int16_t *d,
   uint64_t csse;
 
   const __m256i v_mask_max_w = _mm256_set1_epi16(MAX_MASK_VALUE);
-  const __m256i v_zext_q = yy_set1_64_from_32i(0xffffffff);
+  const __m256i v_zext_q = yy_set1_64_from_32i(~0);
 
   __m256i v_acc0_q = _mm256_setzero_si256();
 
@@ -142,7 +142,7 @@ int8_t av1_wedge_sign_from_residuals_avx2(const int16_t *ds, const uint8_t *m,
   v_acc_q_0 = _mm_add_epi64(v_acc_q_0, v_acc_q_1);
 
 #if ARCH_X86_64
-  acc = (uint64_t)_mm_extract_epi64(v_acc_q_0, 0);
+  acc = _mm_extract_epi64(v_acc_q_0, 0);
 #else
   xx_storel_64(&acc, v_acc_q_0);
 #endif
@@ -155,7 +155,7 @@ int8_t av1_wedge_sign_from_residuals_avx2(const int16_t *ds, const uint8_t *m,
  */
 void av1_wedge_compute_delta_squares_avx2(int16_t *d, const int16_t *a,
                                           const int16_t *b, int N) {
-  const __m256i v_neg_w = _mm256_set1_epi32(0xffff0001);
+  const __m256i v_neg_w = _mm256_set1_epi32((int)0xffff0001);
 
   assert(N % 64 == 0);
 
diff --git a/chromium/third_party/libaom/source/libaom/av1/encoder/x86/wedge_utils_sse2.c b/chromium/third_party/libaom/source/libaom/av1/encoder/x86/wedge_utils_sse2.c
index f3f4b8a7501..e665b2e361e 100644
--- a/chromium/third_party/libaom/source/libaom/av1/encoder/x86/wedge_utils_sse2.c
+++ b/chromium/third_party/libaom/source/libaom/av1/encoder/x86/wedge_utils_sse2.c
@@ -31,7 +31,7 @@ uint64_t av1_wedge_sse_from_residuals_sse2(const int16_t *r1, const int16_t *d,
   uint64_t csse;
 
   const __m128i v_mask_max_w = _mm_set1_epi16(MAX_MASK_VALUE);
-  const __m128i v_zext_q = xx_set1_64_from_32i(0xffffffff);
+  const __m128i v_zext_q = xx_set1_64_from_32i(~0);
 
   __m128i v_acc0_q = _mm_setzero_si128();
 
@@ -175,7 +175,7 @@ int8_t av1_wedge_sign_from_residuals_sse2(const int16_t *ds, const uint8_t *m,
   v_acc_q = _mm_add_epi64(v_acc_q, _mm_srli_si128(v_acc_q, 8));
 
 #if ARCH_X86_64
-  acc = (uint64_t)_mm_cvtsi128_si64(v_acc_q);
+  acc = _mm_cvtsi128_si64(v_acc_q);
 #else
   xx_storel_64(&acc, v_acc_q);
 #endif
diff --git a/chromium/third_party/libaom/source/libaom/av1/qmode_rc/ducky_encode.cc b/chromium/third_party/libaom/source/libaom/av1/qmode_rc/ducky_encode.cc
new file mode 100644
index 00000000000..a1817dbbc46
--- /dev/null
+++ b/chromium/third_party/libaom/source/libaom/av1/qmode_rc/ducky_encode.cc
@@ -0,0 +1,588 @@
+/*
+ * Copyright (c) 2022, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+#include <stdlib.h>
+#include <string.h>
+#include <algorithm>
+#include <memory>
+#include <numeric>
+#include <vector>
+
+#include "av1/common/enums.h"
+#include "config/aom_config.h"
+
+#include "aom/aom_encoder.h"
+
+#include "av1/av1_cx_iface.h"
+#include "av1/av1_iface_common.h"
+#include "av1/encoder/encoder.h"
+#include "av1/encoder/ethread.h"
+#include "av1/encoder/firstpass.h"
+#include "av1/encoder/temporal_filter.h"
+#include "av1/qmode_rc/ducky_encode.h"
+
+#include "common/tools_common.h"
+
+namespace aom {
+struct EncoderResource {
+  FILE *in_file;
+  STATS_BUFFER_CTX *stats_buf_ctx;
+  FIRSTPASS_STATS *stats_buffer;
+  aom_image_t img;
+  AV1_PRIMARY *ppi;
+  int lookahead_push_count;
+  int encode_frame_count;  // Use in second pass only
+};
+
+class DuckyEncode::EncodeImpl {
+ public:
+  VideoInfo video_info;
+  int g_usage;
+  int max_ref_frames;
+  int speed;
+  int base_qindex;
+  enum aom_rc_mode rc_end_usage;
+  aom_rational64_t timestamp_ratio;
+  std::vector<FIRSTPASS_STATS> stats_list;
+  EncoderResource enc_resource;
+};
+
+DuckyEncode::DuckyEncode(const VideoInfo &video_info, int max_ref_frames,
+                         int speed, int base_qindex) {
+  impl_ptr_ = std::unique_ptr<EncodeImpl>(new EncodeImpl());
+  impl_ptr_->video_info = video_info;
+  impl_ptr_->g_usage = GOOD;
+  impl_ptr_->max_ref_frames = max_ref_frames;
+  impl_ptr_->speed = speed;
+  impl_ptr_->base_qindex = base_qindex;
+  impl_ptr_->rc_end_usage = AOM_Q;
+  // TODO(angiebird): Set timestamp_ratio properly
+  // timestamp_ratio.den = cfg->g_timebase.den;
+  // timestamp_ratio.num = (int64_t)cfg->g_timebase.num * TICKS_PER_SEC;
+  impl_ptr_->timestamp_ratio = { 1, 1 };
+  // TODO(angiebird): How to set ptsvol and duration?
+}
+
+DuckyEncode::~DuckyEncode() {}
+
+static AV1EncoderConfig GetEncoderConfig(const VideoInfo &video_info,
+                                         int g_usage, aom_enc_pass pass) {
+  const aom_codec_iface *codec = aom_codec_av1_cx();
+  aom_codec_enc_cfg_t cfg;
+  aom_codec_enc_config_default(codec, &cfg, g_usage);
+  cfg.g_w = video_info.frame_width;
+  cfg.g_h = video_info.frame_height;
+  cfg.g_pass = pass;
+  // g_timebase is the inverse of frame_rate
+  cfg.g_timebase.num = video_info.frame_rate.den;
+  cfg.g_timebase.den = video_info.frame_rate.num;
+  if (pass == AOM_RC_SECOND_PASS) {
+    cfg.rc_twopass_stats_in.sz =
+        (video_info.frame_count + 1) * sizeof(FIRSTPASS_STATS);
+  }
+  AV1EncoderConfig oxcf = av1_get_encoder_config(&cfg);
+  // TODO(angiebird): Why didn't we init use_highbitdepth in
+  // av1_get_encoder_config()?
+  oxcf.use_highbitdepth = 0;
+
+  // TODO(jingning): Change this to 35 when the baseline rate control
+  // logic is in place.
+  // Force maximum look ahead buffer to be 19. This will disable the use
+  // of maximum 32 GOP length.
+  oxcf.gf_cfg.lag_in_frames = 19;
+
+  return oxcf;
+}
+
+static STATS_BUFFER_CTX *CreateStatsBufferCtx(int frame_count,
+                                              FIRSTPASS_STATS **stats_buffer) {
+  STATS_BUFFER_CTX *stats_buf_ctx = new STATS_BUFFER_CTX;
+  // +2 is for total_stats and total_left_stats
+  *stats_buffer = new FIRSTPASS_STATS[frame_count + 2];
+  stats_buf_ctx->stats_in_start = *stats_buffer;
+  stats_buf_ctx->stats_in_end = stats_buf_ctx->stats_in_start;
+  stats_buf_ctx->stats_in_buf_end = stats_buf_ctx->stats_in_start + frame_count;
+  stats_buf_ctx->total_stats = stats_buf_ctx->stats_in_buf_end;
+  stats_buf_ctx->total_left_stats =
+      stats_buf_ctx->stats_in_start + frame_count + 1;
+  av1_twopass_zero_stats(stats_buf_ctx->total_left_stats);
+  av1_twopass_zero_stats(stats_buf_ctx->total_stats);
+  return stats_buf_ctx;
+}
+
+static void DestroyStatsBufferCtx(STATS_BUFFER_CTX **stats_buf_context,
+                                  FIRSTPASS_STATS **stats_buffer) {
+  (*stats_buf_context)->stats_in_start = nullptr;
+  (*stats_buf_context)->stats_in_end = nullptr;
+  (*stats_buf_context)->stats_in_buf_end = nullptr;
+  (*stats_buf_context)->total_stats = nullptr;
+  (*stats_buf_context)->total_left_stats = nullptr;
+  delete *stats_buf_context;
+  *stats_buf_context = nullptr;
+  delete[] (*stats_buffer);
+  *stats_buffer = nullptr;
+}
+
+static FIRSTPASS_STATS ComputeTotalStats(
+    const std::vector<FIRSTPASS_STATS> &stats_list) {
+  FIRSTPASS_STATS total_stats = {};
+  for (size_t i = 0; i < stats_list.size(); ++i) {
+    av1_accumulate_stats(&total_stats, &stats_list[i]);
+  }
+  return total_stats;
+}
+
+static EncoderResource InitEncoder(
+    const VideoInfo &video_info, int g_usage, enum aom_rc_mode rc_end_usage,
+    aom_enc_pass pass, const std::vector<FIRSTPASS_STATS> *stats_list,
+    int max_ref_frames, int speed) {
+  EncoderResource enc_resource = {};
+  enc_resource.in_file = fopen(video_info.file_path.c_str(), "r");
+  enc_resource.lookahead_push_count = 0;
+  aom_img_alloc(&enc_resource.img, video_info.img_fmt, video_info.frame_width,
+                video_info.frame_height, /*align=*/1);
+  AV1EncoderConfig oxcf = GetEncoderConfig(video_info, g_usage, pass);
+  oxcf.dec_model_cfg.decoder_model_info_present_flag = 0;
+  oxcf.dec_model_cfg.display_model_info_present_flag = 0;
+  oxcf.ref_frm_cfg.max_reference_frames = max_ref_frames;
+  oxcf.speed = speed;
+  av1_initialize_enc(g_usage, rc_end_usage);
+  AV1_PRIMARY *ppi =
+      av1_create_primary_compressor(nullptr,
+                                    /*num_lap_buffers=*/0, &oxcf);
+  enc_resource.ppi = ppi;
+
+  assert(ppi != nullptr);
+  // Turn off ppi->b_calculate_psnr to avoid calling generate_psnr_packet() in
+  // av1_post_encode_updates().
+  // TODO(angiebird): Modify generate_psnr_packet() to handle the case that
+  // cpi->ppi->output_pkt_list = nullptr.
+  ppi->b_calculate_psnr = 0;
+
+  aom_codec_err_t res = AOM_CODEC_OK;
+  (void)res;
+  enc_resource.stats_buf_ctx =
+      CreateStatsBufferCtx(video_info.frame_count, &enc_resource.stats_buffer);
+  if (pass == AOM_RC_SECOND_PASS) {
+    assert(stats_list != nullptr);
+    std::copy(stats_list->begin(), stats_list->end(),
+              enc_resource.stats_buffer);
+    *enc_resource.stats_buf_ctx->total_stats = ComputeTotalStats(*stats_list);
+    oxcf.twopass_stats_in.buf = enc_resource.stats_buffer;
+    // We need +1 here because av1 encoder assumes
+    // oxcf.twopass_stats_in.buf[video_info.frame_count] has the total_stats
+    oxcf.twopass_stats_in.sz =
+        (video_info.frame_count + 1) * sizeof(enc_resource.stats_buffer[0]);
+  } else {
+    assert(pass == AOM_RC_FIRST_PASS);
+    // We don't use stats_list for AOM_RC_FIRST_PASS.
+    assert(stats_list == nullptr);
+  }
+  ppi->twopass.stats_buf_ctx = enc_resource.stats_buf_ctx;
+  BufferPool *buffer_pool = nullptr;
+  res = av1_create_context_and_bufferpool(ppi, &ppi->cpi, &buffer_pool, &oxcf,
+                                          ENCODE_STAGE, -1);
+  // TODO(angiebird): Why didn't we set initial_dimensions in
+  // av1_create_compressor()?
+  ppi->cpi->initial_dimensions.width = oxcf.frm_dim_cfg.width;
+  ppi->cpi->initial_dimensions.height = oxcf.frm_dim_cfg.height;
+  // use_ducky_encode is the flag we use to change AV1 behavior
+  // slightly based on DuckyEncode's need. We should minimize this kind of
+  // change unless it's necessary.
+  ppi->cpi->use_ducky_encode = 1;
+  assert(res == AOM_CODEC_OK);
+  assert(ppi->cpi != nullptr);
+  assert(buffer_pool != nullptr);
+  const AV1_COMP *cpi = ppi->cpi;
+  SequenceHeader *seq_params = ppi->cpi->common.seq_params;
+
+  ppi->seq_params_locked = 1;
+  assert(ppi->lookahead == nullptr);
+
+  int lag_in_frames = cpi->oxcf.gf_cfg.lag_in_frames;
+  ppi->lookahead = av1_lookahead_init(
+      cpi->oxcf.frm_dim_cfg.width, cpi->oxcf.frm_dim_cfg.height,
+      seq_params->subsampling_x, seq_params->subsampling_y,
+      seq_params->use_highbitdepth, lag_in_frames, cpi->oxcf.border_in_pixels,
+      cpi->common.features.byte_alignment,
+      /*num_lap_buffers=*/0, /*is_all_intra=*/0,
+      cpi->oxcf.tool_cfg.enable_global_motion);
+
+  av1_tf_info_alloc(&cpi->ppi->tf_info, cpi);
+  assert(ppi->lookahead != nullptr);
+  return enc_resource;
+}
+
+static void FreeEncoder(EncoderResource *enc_resource) {
+  fclose(enc_resource->in_file);
+  enc_resource->in_file = nullptr;
+  aom_img_free(&enc_resource->img);
+  DestroyStatsBufferCtx(&enc_resource->stats_buf_ctx,
+                        &enc_resource->stats_buffer);
+  BufferPool *buffer_pool = enc_resource->ppi->cpi->common.buffer_pool;
+  av1_destroy_context_and_bufferpool(enc_resource->ppi->cpi, &buffer_pool);
+  av1_remove_primary_compressor(enc_resource->ppi);
+  enc_resource->ppi = nullptr;
+}
+
+std::vector<FIRSTPASS_STATS> DuckyEncode::ComputeFirstPassStats() {
+  aom_enc_pass pass = AOM_RC_FIRST_PASS;
+  EncoderResource enc_resource = InitEncoder(
+      impl_ptr_->video_info, impl_ptr_->g_usage, impl_ptr_->rc_end_usage, pass,
+      nullptr, impl_ptr_->max_ref_frames, impl_ptr_->speed);
+  AV1_PRIMARY *ppi = enc_resource.ppi;
+  struct lookahead_ctx *lookahead = ppi->lookahead;
+  int frame_count = impl_ptr_->video_info.frame_count;
+  FILE *in_file = enc_resource.in_file;
+  aom_rational64_t timestamp_ratio = impl_ptr_->timestamp_ratio;
+  // TODO(angiebird): Ideally, ComputeFirstPassStats() doesn't output
+  // bitstream. Do we need bitstream buffer here?
+  std::vector<uint8_t> buf(1000);
+  std::vector<FIRSTPASS_STATS> stats_list;
+  for (int i = 0; i < frame_count; ++i) {
+    if (aom_img_read(&enc_resource.img, in_file)) {
+      // TODO(angiebird): Set ts_start/ts_end properly
+      int64_t ts_start = enc_resource.lookahead_push_count;
+      int64_t ts_end = ts_start + 1;
+      YV12_BUFFER_CONFIG sd;
+      image2yuvconfig(&enc_resource.img, &sd);
+      av1_lookahead_push(lookahead, &sd, ts_start, ts_end,
+                         /*use_highbitdepth=*/0, /*flags=*/0);
+      ++enc_resource.lookahead_push_count;
+      AV1_COMP_DATA cpi_data = {};
+      cpi_data.cx_data = buf.data();
+      cpi_data.cx_data_sz = buf.size();
+      cpi_data.frame_size = 0;
+      cpi_data.flush = 1;  // Makes av1_get_compressed_data process a frame
+      cpi_data.ts_frame_start = ts_start;
+      cpi_data.ts_frame_end = ts_end;
+      cpi_data.pop_lookahead = 1;
+      cpi_data.timestamp_ratio = &timestamp_ratio;
+      // av1_get_compressed_data only generates first pass stats not
+      // compresses data
+      int res = av1_get_compressed_data(ppi->cpi, &cpi_data);
+      (void)res;
+      assert(res == static_cast<int>(AOM_CODEC_OK));
+      stats_list.push_back(*(ppi->twopass.stats_buf_ctx->stats_in_end - 1));
+      av1_post_encode_updates(ppi->cpi, &cpi_data);
+    }
+  }
+  av1_end_first_pass(ppi->cpi);
+
+  FreeEncoder(&enc_resource);
+  return stats_list;
+}
+
+void DuckyEncode::StartEncode(const std::vector<FIRSTPASS_STATS> &stats_list) {
+  aom_enc_pass pass = AOM_RC_SECOND_PASS;
+  impl_ptr_->stats_list = stats_list;
+  impl_ptr_->enc_resource = InitEncoder(
+      impl_ptr_->video_info, impl_ptr_->g_usage, impl_ptr_->rc_end_usage, pass,
+      &stats_list, impl_ptr_->max_ref_frames, impl_ptr_->speed);
+  write_temp_delimiter_ = true;
+}
+
+static void DuckyEncodeInfoSetGopStruct(AV1_PRIMARY *ppi,
+                                        GopStruct gop_struct) {
+  GF_GROUP *gf_group = &ppi->gf_group;
+  ppi->p_rc.baseline_gf_interval = gop_struct.show_frame_count;
+  ppi->internal_altref_allowed = 1;
+
+  gf_group->size = static_cast<int>(gop_struct.gop_frame_list.size());
+  gf_group->max_layer_depth = 0;
+
+  int i = 0;
+  for (const auto &frame : gop_struct.gop_frame_list) {
+    gf_group->update_type[i] = (int)frame.update_type;
+    if (frame.update_type == GopFrameType::kRegularArf) gf_group->arf_index = i;
+
+    gf_group->frame_type[i] = !frame.is_key_frame;
+
+    gf_group->cur_frame_idx[i] = 0;
+    gf_group->arf_src_offset[i] = frame.order_idx - frame.display_idx;
+    gf_group->cur_frame_idx[i] = frame.display_idx;
+    gf_group->src_offset[i] = 0;
+
+    // TODO(jingning): Placeholder - update the arf boost.
+    gf_group->arf_boost[i] = 500;
+    gf_group->layer_depth[i] = frame.layer_depth;
+    gf_group->max_layer_depth =
+        AOMMAX(frame.layer_depth, gf_group->max_layer_depth);
+    gf_group->refbuf_state[i] =
+        frame.is_key_frame ? REFBUF_RESET : REFBUF_UPDATE;
+
+    std::fill_n(gf_group->ref_frame_list[i], REF_FRAMES, -1);
+    gf_group->update_ref_idx[i] = -1;
+    for (int ref_idx = 0;
+         ref_idx < static_cast<int>(frame.ref_frame_list.size()); ++ref_idx) {
+      int ref_frame = static_cast<int>(frame.ref_frame_list[ref_idx].name);
+      gf_group->ref_frame_list[i][ref_frame] =
+          static_cast<int8_t>(frame.ref_frame_list[ref_idx].index);
+    }
+    gf_group->update_ref_idx[i] = frame.update_ref_idx;
+    gf_group->primary_ref_idx[i] = frame.primary_ref_frame.index;
+    ++i;
+  }
+  ppi->cpi->gf_frame_index = 0;
+}
+
+static void DuckyEncodeInfoSetEncodeFrameDecision(
+    DuckyEncodeInfo *ducky_encode_info, const EncodeFrameDecision &decision) {
+  DuckyEncodeFrameInfo *frame_info = &ducky_encode_info->frame_info;
+  frame_info->qp_mode = static_cast<DUCKY_ENCODE_FRAME_MODE>(decision.qp_mode);
+  frame_info->gop_mode = static_cast<DUCKY_ENCODE_GOP_MODE>(decision.gop_mode);
+  frame_info->q_index = decision.parameters.q_index;
+  frame_info->rdmult = decision.parameters.rdmult;
+}
+
+static void DuckyEncodeInfoGetEncodeFrameResult(
+    const DuckyEncodeInfo *ducky_encode_info, EncodeFrameResult *result) {
+  const DuckyEncodeFrameResult &frame_result = ducky_encode_info->frame_result;
+  result->global_order_idx = frame_result.global_order_idx;
+  result->q_index = frame_result.q_index;
+  result->rdmult = frame_result.rdmult;
+  result->rate = frame_result.rate;
+  result->dist = frame_result.dist;
+  result->psnr = frame_result.psnr;
+}
+
+static void WriteObu(AV1_PRIMARY *ppi, AV1_COMP_DATA *cpi_data) {
+  AV1_COMP *const cpi = ppi->cpi;
+  uint32_t obu_header_size = 1;
+  const uint32_t obu_payload_size = 0;
+  const size_t length_field_size = aom_uleb_size_in_bytes(obu_payload_size);
+
+  const size_t move_offset = obu_header_size + length_field_size;
+  memmove(cpi_data->cx_data + move_offset, cpi_data->cx_data,
+          cpi_data->frame_size);
+  obu_header_size =
+      av1_write_obu_header(&ppi->level_params, &cpi->frame_header_count,
+                           OBU_TEMPORAL_DELIMITER, 0, cpi_data->cx_data);
+
+  // OBUs are preceded/succeeded by an unsigned leb128 coded integer.
+  if (av1_write_uleb_obu_size(obu_header_size, obu_payload_size,
+                              cpi_data->cx_data) != AOM_CODEC_OK) {
+    aom_internal_error(&ppi->error, AOM_CODEC_ERROR, NULL);
+  }
+
+  cpi_data->frame_size +=
+      obu_header_size + obu_payload_size + length_field_size;
+}
+
+TplGopStats DuckyEncode::ObtainTplStats(const GopStruct gop_struct) {
+  TplGopStats tpl_gop_stats;
+
+  AV1_PRIMARY *ppi = impl_ptr_->enc_resource.ppi;
+  const uint8_t block_mis_log2 = ppi->tpl_data.tpl_stats_block_mis_log2;
+
+  for (size_t idx = 0; idx < gop_struct.gop_frame_list.size(); ++idx) {
+    TplFrameStats tpl_frame_stats = {};
+    TplDepFrame *tpl_frame = &ppi->tpl_data.tpl_frame[idx];
+    if (gop_struct.gop_frame_list[idx].update_type == GopFrameType::kOverlay ||
+        gop_struct.gop_frame_list[idx].update_type ==
+            GopFrameType::kIntermediateOverlay) {
+      tpl_gop_stats.frame_stats_list.push_back(tpl_frame_stats);
+      continue;
+    }
+
+    int ref_frame_index_mapping[REF_FRAMES] = { 0 };
+    const GopFrame &gop_frame = gop_struct.gop_frame_list[idx];
+
+    for (auto &rf : gop_frame.ref_frame_list) {
+      ref_frame_index_mapping[static_cast<int>(rf.name)] = rf.index;
+    }
+
+    const int mi_rows = tpl_frame->mi_rows;
+    const int mi_cols = tpl_frame->mi_cols;
+    const int tpl_frame_stride = tpl_frame->stride;
+    tpl_frame_stats.frame_height = mi_rows * MI_SIZE;
+    tpl_frame_stats.frame_width = mi_cols * MI_SIZE;
+    tpl_frame_stats.min_block_size = (1 << block_mis_log2) * MI_SIZE;
+
+    const int mi_step = 1 << block_mis_log2;
+    for (int mi_row = 0; mi_row < mi_rows; mi_row += mi_step) {
+      for (int mi_col = 0; mi_col < mi_cols; mi_col += mi_step) {
+        int tpl_blk_pos = (mi_row >> block_mis_log2) * tpl_frame_stride +
+                          (mi_col >> block_mis_log2);
+        TplDepStats *tpl_stats_ptr = &tpl_frame->tpl_stats_ptr[tpl_blk_pos];
+
+        TplBlockStats block_stats;
+        block_stats.row = mi_row * MI_SIZE;
+        block_stats.col = mi_col * MI_SIZE;
+        block_stats.height = (1 << block_mis_log2) * MI_SIZE;
+        block_stats.width = (1 << block_mis_log2) * MI_SIZE;
+        block_stats.inter_cost = tpl_stats_ptr->inter_cost
+                                 << TPL_DEP_COST_SCALE_LOG2;
+        block_stats.intra_cost = tpl_stats_ptr->intra_cost
+                                 << TPL_DEP_COST_SCALE_LOG2;
+        block_stats.ref_frame_index = { -1, -1 };
+
+        for (int i = 0; i < kBlockRefCount; ++i) {
+          if (tpl_stats_ptr->ref_frame_index[i] >= 0) {
+            block_stats.ref_frame_index[i] =
+                ref_frame_index_mapping[tpl_stats_ptr->ref_frame_index[i] + 1];
+            block_stats.mv[i] = {
+              tpl_stats_ptr->mv[tpl_stats_ptr->ref_frame_index[i]].as_mv.row,
+              tpl_stats_ptr->mv[tpl_stats_ptr->ref_frame_index[i]].as_mv.col, 3
+            };
+          }
+        }
+        tpl_frame_stats.block_stats_list.push_back(block_stats);
+      }
+    }
+
+    tpl_gop_stats.frame_stats_list.push_back(tpl_frame_stats);
+  }
+
+  return tpl_gop_stats;
+}
+
+// Obtain TPL stats through ducky_encode.
+std::vector<TplGopStats> DuckyEncode::ComputeTplStats(
+    const GopStructList &gop_list) {
+  std::vector<TplGopStats> tpl_gop_stats_list;
+  AV1_PRIMARY *ppi = impl_ptr_->enc_resource.ppi;
+  const VideoInfo &video_info = impl_ptr_->video_info;
+  write_temp_delimiter_ = true;
+  AllocateBitstreamBuffer(video_info);
+
+  // Go through each gop and encode each frame in the gop
+  for (size_t i = 0; i < gop_list.size(); ++i) {
+    const aom::GopStruct &gop_struct = gop_list[i];
+    DuckyEncodeInfoSetGopStruct(ppi, gop_struct);
+
+    aom::TplGopStats tpl_gop_stats;
+    for (auto &frame : gop_struct.gop_frame_list) {
+      // encoding frame frame_number
+      aom::EncodeFrameDecision frame_decision = { aom::EncodeFrameMode::kQindex,
+                                                  aom::EncodeGopMode::kGopRcl,
+                                                  { impl_ptr_->base_qindex,
+                                                    -1 } };
+      (void)frame;
+      EncodeFrame(frame_decision);
+      if (ppi->cpi->common.show_frame) pending_ctx_size_ = 0;
+      write_temp_delimiter_ = ppi->cpi->common.show_frame;
+    }
+    tpl_gop_stats = ObtainTplStats(gop_struct);
+    // TODO(jingning): Set the tpl stats file format and populate the stats.
+    tpl_gop_stats_list.push_back(tpl_gop_stats);
+  }
+
+  return tpl_gop_stats_list;
+}
+
+// Obtain TPL stats through ducky_encode.
+std::vector<EncodeFrameResult> DuckyEncode::EncodeVideo(
+    const GopStructList &gop_list,
+    const GopEncodeInfoList &gop_encode_info_list) {
+  AV1_PRIMARY *ppi = impl_ptr_->enc_resource.ppi;
+  std::vector<EncodeFrameResult> encoded_frame_list;
+  const VideoInfo &video_info = impl_ptr_->video_info;
+
+  write_temp_delimiter_ = true;
+  AllocateBitstreamBuffer(video_info);
+
+  // Go through each gop and encode each frame in the gop
+  for (size_t i = 0; i < gop_list.size(); ++i) {
+    const aom::GopStruct &gop_struct = gop_list[i];
+    DuckyEncodeInfoSetGopStruct(ppi, gop_struct);
+    aom::GopEncodeInfo gop_encode_info = gop_encode_info_list[i];
+
+    for (auto &frame_param : gop_encode_info.param_list) {
+      aom::EncodeFrameDecision frame_decision = { aom::EncodeFrameMode::kQindex,
+                                                  aom::EncodeGopMode::kGopRcl,
+                                                  frame_param };
+      EncodeFrame(frame_decision);
+      if (ppi->cpi->common.show_frame) {
+        bitstream_buf_.resize(pending_ctx_size_);
+        EncodeFrameResult encode_frame_result = {};
+        encode_frame_result.bitstream_buf = bitstream_buf_;
+        encoded_frame_list.push_back(encode_frame_result);
+
+        AllocateBitstreamBuffer(video_info);
+      }
+      write_temp_delimiter_ = ppi->cpi->common.show_frame;
+    }
+  }
+
+  return encoded_frame_list;
+}
+
+EncodeFrameResult DuckyEncode::EncodeFrame(
+    const EncodeFrameDecision &decision) {
+  EncodeFrameResult encode_frame_result = {};
+  encode_frame_result.bitstream_buf = bitstream_buf_;
+  AV1_PRIMARY *ppi = impl_ptr_->enc_resource.ppi;
+  aom_image_t *img = &impl_ptr_->enc_resource.img;
+  AV1_COMP *const cpi = ppi->cpi;
+  FILE *in_file = impl_ptr_->enc_resource.in_file;
+  struct lookahead_ctx *lookahead = ppi->lookahead;
+
+  while (!av1_lookahead_full(lookahead)) {
+    if (aom_img_read(img, in_file)) {
+      YV12_BUFFER_CONFIG sd;
+      image2yuvconfig(img, &sd);
+      int64_t ts_start = impl_ptr_->enc_resource.lookahead_push_count;
+      int64_t ts_end = ts_start + 1;
+      av1_lookahead_push(lookahead, &sd, ts_start, ts_end,
+                         /*use_highbitdepth=*/0, /*flags=*/0);
+      ++impl_ptr_->enc_resource.lookahead_push_count;
+    } else {
+      break;
+    }
+  }
+
+  AV1_COMP_DATA cpi_data = {};
+  cpi_data.cx_data = bitstream_buf_.data() + pending_ctx_size_;
+  cpi_data.cx_data_sz = bitstream_buf_.size() - pending_ctx_size_;
+  cpi_data.frame_size = 0;
+  cpi_data.flush = 1;
+  // ts_frame_start and ts_frame_end are not as important since we are focusing
+  // on q mode
+  cpi_data.ts_frame_start = impl_ptr_->enc_resource.encode_frame_count;
+  cpi_data.ts_frame_end = cpi_data.ts_frame_start + 1;
+  cpi_data.pop_lookahead = 1;
+  cpi_data.timestamp_ratio = &impl_ptr_->timestamp_ratio;
+  ++impl_ptr_->enc_resource.encode_frame_count;
+
+  av1_compute_num_workers_for_mt(cpi);
+  av1_init_frame_mt(ppi, cpi);
+
+  DuckyEncodeInfoSetEncodeFrameDecision(&cpi->ducky_encode_info, decision);
+  const int status = av1_get_compressed_data(cpi, &cpi_data);
+
+  if (write_temp_delimiter_) WriteObu(ppi, &cpi_data);
+  (void)status;
+  assert(status == static_cast<int>(AOM_CODEC_OK));
+  DuckyEncodeInfoGetEncodeFrameResult(&cpi->ducky_encode_info,
+                                      &encode_frame_result);
+  av1_post_encode_updates(cpi, &cpi_data);
+  if (cpi->common.show_frame) {
+    // decrement frames_left counter
+    ppi->frames_left = AOMMAX(0, ppi->frames_left - 1);
+  }
+
+  pending_ctx_size_ += cpi_data.frame_size;
+
+  fprintf(stderr, "frame %d, qp = %d, size %d, PSNR %f\n",
+          encode_frame_result.global_order_idx, encode_frame_result.q_index,
+          encode_frame_result.rate, encode_frame_result.psnr);
+  return encode_frame_result;
+}
+
+void DuckyEncode::EndEncode() { FreeEncoder(&impl_ptr_->enc_resource); }
+
+void DuckyEncode::AllocateBitstreamBuffer(const VideoInfo &video_info) {
+  pending_ctx_size_ = 0;
+  // TODO(angiebird): Set bitstream_buf size to a conservatve upperbound.
+  bitstream_buf_.assign(
+      video_info.frame_width * video_info.frame_height * 3 * 8, 0);
+}
+}  // namespace aom
diff --git a/chromium/third_party/libaom/source/libaom/av1/qmode_rc/ducky_encode.h b/chromium/third_party/libaom/source/libaom/av1/qmode_rc/ducky_encode.h
new file mode 100644
index 00000000000..eece76149d4
--- /dev/null
+++ b/chromium/third_party/libaom/source/libaom/av1/qmode_rc/ducky_encode.h
@@ -0,0 +1,99 @@
+/*
+ * Copyright (c) 2022, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#ifndef AOM_AV1_QMODE_RC_DUCKY_ENCODE_H_
+#define AOM_AV1_QMODE_RC_DUCKY_ENCODE_H_
+
+#include <cstddef>
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "aom/aom_encoder.h"
+#include "av1/encoder/firstpass.h"
+#include "av1/qmode_rc/ratectrl_qmode_interface.h"
+
+namespace aom {
+struct VideoInfo {
+  int frame_width;
+  int frame_height;
+  aom_rational_t frame_rate;
+  aom_img_fmt_t img_fmt;
+  int frame_count;
+  std::string file_path;
+};
+
+struct EncodeFrameResult {
+  std::vector<uint8_t> bitstream_buf;
+  // TODO(angiebird): update global_coding_idx and global_order_idx properly.
+  int global_coding_idx;
+  int global_order_idx;
+  int q_index;
+  int rdmult;
+  int rate;
+  int64_t dist;
+  double psnr;
+};
+
+enum class EncodeFrameMode {
+  kNone,          // Let native AV1 determine q index and rdmult
+  kQindex,        // DuckyEncode determines q index and AV1 determines rdmult
+  kQindexRdmult,  // DuckyEncode determines q index and rdmult
+};
+
+enum class EncodeGopMode {
+  kNone,    // native AV1 decides GOP
+  kGopRcl,  // rate control lib decides GOP
+};
+
+struct EncodeFrameDecision {
+  EncodeFrameMode qp_mode;
+  EncodeGopMode gop_mode;
+  FrameEncodeParameters parameters;
+};
+
+using GopEncodeInfoList = std::vector<GopEncodeInfo>;
+
+// DuckyEncode is an experimental encoder c++ interface for two-pass mode.
+// This object can be used to do zero or more encode passes, where each encode
+// pass consists of:
+// - StartEncode()
+// - Zero or more calls to EncodeFrame()
+// - EndEncode()
+// Encode passes may not overlap, and any other sequence of these calls is
+// invalid.
+class DuckyEncode {
+ public:
+  explicit DuckyEncode(const VideoInfo &video_info, int max_ref_frames,
+                       int speed, int base_qindex);
+  ~DuckyEncode();
+  std::vector<FIRSTPASS_STATS> ComputeFirstPassStats();
+  void StartEncode(const std::vector<FIRSTPASS_STATS> &stats_list);
+  TplGopStats ObtainTplStats(const GopStruct gop_struct);
+  std::vector<TplGopStats> ComputeTplStats(const GopStructList &gop_list);
+  std::vector<EncodeFrameResult> EncodeVideo(
+      const GopStructList &gop_list,
+      const GopEncodeInfoList &gop_encode_info_list);
+  EncodeFrameResult EncodeFrame(const EncodeFrameDecision &decision);
+  void EndEncode();
+  void AllocateBitstreamBuffer(const VideoInfo &video_info);
+
+ private:
+  class EncodeImpl;
+  std::unique_ptr<EncodeImpl> impl_ptr_;
+  bool write_temp_delimiter_;
+  std::vector<uint8_t> bitstream_buf_;
+  size_t pending_ctx_size_;
+};
+}  // namespace aom
+
+#endif  // AOM_AV1_QMODE_RC_DUCKY_ENCODE_H_
diff --git a/chromium/third_party/libaom/source/libaom/av1/qmode_rc/ratectrl_qmode.cc b/chromium/third_party/libaom/source/libaom/av1/qmode_rc/ratectrl_qmode.cc
new file mode 100644
index 00000000000..708696fc851
--- /dev/null
+++ b/chromium/third_party/libaom/source/libaom/av1/qmode_rc/ratectrl_qmode.cc
@@ -0,0 +1,1332 @@
+/*
+ * Copyright (c) 2022, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+#include "av1/qmode_rc/ratectrl_qmode.h"
+
+#include <algorithm>
+#include <cassert>
+#include <climits>
+#include <functional>
+#include <numeric>
+#include <sstream>
+#include <vector>
+
+#include "aom/aom_codec.h"
+#include "av1/encoder/pass2_strategy.h"
+#include "av1/encoder/tpl_model.h"
+
+namespace aom {
+
+// This is used before division to ensure that the divisor isn't zero or
+// too close to zero.
+static double ModifyDivisor(double divisor) {
+  const double kEpsilon = 0.0000001;
+  return (divisor < 0 ? std::min(divisor, -kEpsilon)
+                      : std::max(divisor, kEpsilon));
+}
+
+GopFrame GopFrameInvalid() {
+  GopFrame gop_frame = {};
+  gop_frame.is_valid = false;
+  gop_frame.coding_idx = -1;
+  gop_frame.order_idx = -1;
+  return gop_frame;
+}
+
+void SetGopFrameByType(GopFrameType gop_frame_type, GopFrame *gop_frame) {
+  gop_frame->update_type = gop_frame_type;
+  switch (gop_frame_type) {
+    case GopFrameType::kRegularKey:
+      gop_frame->is_key_frame = 1;
+      gop_frame->is_arf_frame = 0;
+      gop_frame->is_show_frame = 1;
+      gop_frame->is_golden_frame = 1;
+      gop_frame->encode_ref_mode = EncodeRefMode::kRegular;
+      break;
+    case GopFrameType::kRegularGolden:
+      gop_frame->is_key_frame = 0;
+      gop_frame->is_arf_frame = 0;
+      gop_frame->is_show_frame = 1;
+      gop_frame->is_golden_frame = 1;
+      gop_frame->encode_ref_mode = EncodeRefMode::kRegular;
+      break;
+    case GopFrameType::kRegularArf:
+      gop_frame->is_key_frame = 0;
+      gop_frame->is_arf_frame = 1;
+      gop_frame->is_show_frame = 0;
+      gop_frame->is_golden_frame = 1;
+      gop_frame->encode_ref_mode = EncodeRefMode::kRegular;
+      break;
+    case GopFrameType::kIntermediateArf:
+      gop_frame->is_key_frame = 0;
+      gop_frame->is_arf_frame = 1;
+      gop_frame->is_show_frame = 0;
+      gop_frame->is_golden_frame = gop_frame->layer_depth <= 2 ? 1 : 0;
+      gop_frame->encode_ref_mode = EncodeRefMode::kRegular;
+      break;
+    case GopFrameType::kRegularLeaf:
+      gop_frame->is_key_frame = 0;
+      gop_frame->is_arf_frame = 0;
+      gop_frame->is_show_frame = 1;
+      gop_frame->is_golden_frame = 0;
+      gop_frame->encode_ref_mode = EncodeRefMode::kRegular;
+      break;
+    case GopFrameType::kIntermediateOverlay:
+      gop_frame->is_key_frame = 0;
+      gop_frame->is_arf_frame = 0;
+      gop_frame->is_show_frame = 1;
+      gop_frame->is_golden_frame = 0;
+      gop_frame->encode_ref_mode = EncodeRefMode::kShowExisting;
+      break;
+    case GopFrameType::kOverlay:
+      gop_frame->is_key_frame = 0;
+      gop_frame->is_arf_frame = 0;
+      gop_frame->is_show_frame = 1;
+      gop_frame->is_golden_frame = 0;
+      gop_frame->encode_ref_mode = EncodeRefMode::kOverlay;
+      break;
+  }
+}
+
+GopFrame GopFrameBasic(int global_coding_idx_offset,
+                       int global_order_idx_offset, int coding_idx,
+                       int order_idx, int depth, int display_idx,
+                       GopFrameType gop_frame_type) {
+  GopFrame gop_frame = {};
+  gop_frame.is_valid = true;
+  gop_frame.coding_idx = coding_idx;
+  gop_frame.order_idx = order_idx;
+  gop_frame.display_idx = display_idx;
+  gop_frame.global_coding_idx = global_coding_idx_offset + coding_idx;
+  gop_frame.global_order_idx = global_order_idx_offset + order_idx;
+  gop_frame.layer_depth = depth + kLayerDepthOffset;
+  gop_frame.colocated_ref_idx = -1;
+  gop_frame.update_ref_idx = -1;
+  SetGopFrameByType(gop_frame_type, &gop_frame);
+  return gop_frame;
+}
+
+// This function create gop frames with indices of display order from
+// order_start to order_end - 1. The function will recursively introduce
+// intermediate ARF untill maximum depth is met or the number of regular frames
+// in between two ARFs are less than 3. Than the regular frames will be added
+// into the gop_struct.
+void ConstructGopMultiLayer(GopStruct *gop_struct,
+                            RefFrameManager *ref_frame_manager, int max_depth,
+                            int depth, int order_start, int order_end) {
+  GopFrame gop_frame;
+  int num_frames = order_end - order_start;
+  const int global_coding_idx_offset = gop_struct->global_coding_idx_offset;
+  const int global_order_idx_offset = gop_struct->global_order_idx_offset;
+  // If there are less than kMinIntervalToAddArf frames, stop introducing ARF
+  if (depth < max_depth && num_frames >= kMinIntervalToAddArf) {
+    int order_mid = (order_start + order_end) / 2;
+    // intermediate ARF
+    gop_frame = GopFrameBasic(
+        global_coding_idx_offset, global_order_idx_offset,
+        static_cast<int>(gop_struct->gop_frame_list.size()), order_mid, depth,
+        gop_struct->display_tracker, GopFrameType::kIntermediateArf);
+    ref_frame_manager->UpdateRefFrameTable(&gop_frame);
+    gop_struct->gop_frame_list.push_back(gop_frame);
+    ConstructGopMultiLayer(gop_struct, ref_frame_manager, max_depth, depth + 1,
+                           order_start, order_mid);
+    // show existing intermediate ARF
+    gop_frame =
+        GopFrameBasic(global_coding_idx_offset, global_order_idx_offset,
+                      static_cast<int>(gop_struct->gop_frame_list.size()),
+                      order_mid, max_depth, gop_struct->display_tracker,
+                      GopFrameType::kIntermediateOverlay);
+    ref_frame_manager->UpdateRefFrameTable(&gop_frame);
+    gop_struct->gop_frame_list.push_back(gop_frame);
+    ++gop_struct->display_tracker;
+    ConstructGopMultiLayer(gop_struct, ref_frame_manager, max_depth, depth + 1,
+                           order_mid + 1, order_end);
+  } else {
+    // regular frame
+    for (int i = order_start; i < order_end; ++i) {
+      gop_frame = GopFrameBasic(
+          global_coding_idx_offset, global_order_idx_offset,
+          static_cast<int>(gop_struct->gop_frame_list.size()), i, max_depth,
+          gop_struct->display_tracker, GopFrameType::kRegularLeaf);
+      ref_frame_manager->UpdateRefFrameTable(&gop_frame);
+      gop_struct->gop_frame_list.push_back(gop_frame);
+      ++gop_struct->display_tracker;
+    }
+  }
+}
+
+GopStruct ConstructGop(RefFrameManager *ref_frame_manager, int show_frame_count,
+                       bool has_key_frame, int global_coding_idx_offset,
+                       int global_order_idx_offset) {
+  GopStruct gop_struct;
+  gop_struct.show_frame_count = show_frame_count;
+  gop_struct.global_coding_idx_offset = global_coding_idx_offset;
+  gop_struct.global_order_idx_offset = global_order_idx_offset;
+  int order_start = 0;
+  int order_end = show_frame_count - 1;
+
+  // TODO(jingning): Re-enable the use of pyramid coding structure.
+  bool has_arf_frame = show_frame_count > kMinIntervalToAddArf;
+
+  gop_struct.display_tracker = 0;
+
+  GopFrame gop_frame;
+  if (has_key_frame) {
+    const int key_frame_depth = -1;
+    ref_frame_manager->Reset();
+    gop_frame = GopFrameBasic(
+        global_coding_idx_offset, global_order_idx_offset,
+        static_cast<int>(gop_struct.gop_frame_list.size()), order_start,
+        key_frame_depth, gop_struct.display_tracker, GopFrameType::kRegularKey);
+    ref_frame_manager->UpdateRefFrameTable(&gop_frame);
+    gop_struct.gop_frame_list.push_back(gop_frame);
+    order_start++;
+    ++gop_struct.display_tracker;
+  }
+
+  const int arf_depth = 0;
+  if (has_arf_frame) {
+    // Use multi-layer pyrmaid coding structure.
+    gop_frame = GopFrameBasic(
+        global_coding_idx_offset, global_order_idx_offset,
+        static_cast<int>(gop_struct.gop_frame_list.size()), order_end,
+        arf_depth, gop_struct.display_tracker, GopFrameType::kRegularArf);
+    ref_frame_manager->UpdateRefFrameTable(&gop_frame);
+    gop_struct.gop_frame_list.push_back(gop_frame);
+    ConstructGopMultiLayer(&gop_struct, ref_frame_manager,
+                           ref_frame_manager->MaxRefFrame() - 1, arf_depth + 1,
+                           order_start, order_end);
+    // Overlay
+    gop_frame =
+        GopFrameBasic(global_coding_idx_offset, global_order_idx_offset,
+                      static_cast<int>(gop_struct.gop_frame_list.size()),
+                      order_end, ref_frame_manager->MaxRefFrame() - 1,
+                      gop_struct.display_tracker, GopFrameType::kOverlay);
+    ref_frame_manager->UpdateRefFrameTable(&gop_frame);
+    gop_struct.gop_frame_list.push_back(gop_frame);
+    ++gop_struct.display_tracker;
+  } else {
+    // Use IPPP format.
+    for (int i = order_start; i <= order_end; ++i) {
+      gop_frame = GopFrameBasic(
+          global_coding_idx_offset, global_order_idx_offset,
+          static_cast<int>(gop_struct.gop_frame_list.size()), i, arf_depth + 1,
+          gop_struct.display_tracker, GopFrameType::kRegularLeaf);
+      ref_frame_manager->UpdateRefFrameTable(&gop_frame);
+      gop_struct.gop_frame_list.push_back(gop_frame);
+      ++gop_struct.display_tracker;
+    }
+  }
+
+  return gop_struct;
+}
+
+Status AV1RateControlQMode::SetRcParam(const RateControlParam &rc_param) {
+  std::ostringstream error_message;
+  if (rc_param.max_gop_show_frame_count <
+      std::max(4, rc_param.min_gop_show_frame_count)) {
+    error_message << "max_gop_show_frame_count ("
+                  << rc_param.max_gop_show_frame_count
+                  << ") must be at least 4 and may not be less than "
+                     "min_gop_show_frame_count ("
+                  << rc_param.min_gop_show_frame_count << ")";
+    return { AOM_CODEC_INVALID_PARAM, error_message.str() };
+  }
+  if (rc_param.ref_frame_table_size < 1 || rc_param.ref_frame_table_size > 8) {
+    error_message << "ref_frame_table_size (" << rc_param.ref_frame_table_size
+                  << ") must be in the range [1, 8].";
+    return { AOM_CODEC_INVALID_PARAM, error_message.str() };
+  }
+  if (rc_param.max_ref_frames < 1 || rc_param.max_ref_frames > 7) {
+    error_message << "max_ref_frames (" << rc_param.max_ref_frames
+                  << ") must be in the range [1, 7].";
+    return { AOM_CODEC_INVALID_PARAM, error_message.str() };
+  }
+  if (rc_param.base_q_index < 0 || rc_param.base_q_index > 255) {
+    error_message << "base_q_index (" << rc_param.base_q_index
+                  << ") must be in the range [0, 255].";
+    return { AOM_CODEC_INVALID_PARAM, error_message.str() };
+  }
+  if (rc_param.frame_width < 16 || rc_param.frame_width > 16384 ||
+      rc_param.frame_height < 16 || rc_param.frame_height > 16384) {
+    error_message << "frame_width (" << rc_param.frame_width
+                  << ") and frame_height (" << rc_param.frame_height
+                  << ") must be in the range [16, 16384].";
+    return { AOM_CODEC_INVALID_PARAM, error_message.str() };
+  }
+  rc_param_ = rc_param;
+  return { AOM_CODEC_OK, "" };
+}
+
+// Threshold for use of the lagging second reference frame. High second ref
+// usage may point to a transient event like a flash or occlusion rather than
+// a real scene cut.
+// We adapt the threshold based on number of frames in this key-frame group so
+// far.
+static double GetSecondRefUsageThreshold(int frame_count_so_far) {
+  const int adapt_upto = 32;
+  const double min_second_ref_usage_thresh = 0.085;
+  const double second_ref_usage_thresh_max_delta = 0.035;
+  if (frame_count_so_far >= adapt_upto) {
+    return min_second_ref_usage_thresh + second_ref_usage_thresh_max_delta;
+  }
+  return min_second_ref_usage_thresh +
+         ((double)frame_count_so_far / (adapt_upto - 1)) *
+             second_ref_usage_thresh_max_delta;
+}
+
+// Slide show transition detection.
+// Tests for case where there is very low error either side of the current frame
+// but much higher just for this frame. This can help detect key frames in
+// slide shows even where the slides are pictures of different sizes.
+// Also requires that intra and inter errors are very similar to help eliminate
+// harmful false positives.
+// It will not help if the transition is a fade or other multi-frame effect.
+static bool DetectSlideTransition(const FIRSTPASS_STATS &this_frame,
+                                  const FIRSTPASS_STATS &last_frame,
+                                  const FIRSTPASS_STATS &next_frame) {
+  // Intra / Inter threshold very low
+  constexpr double kVeryLowII = 1.5;
+  // Clean slide transitions we expect a sharp single frame spike in error.
+  constexpr double kErrorSpike = 5.0;
+
+  // TODO(angiebird): Understand the meaning of these conditions.
+  return (this_frame.intra_error < (this_frame.coded_error * kVeryLowII)) &&
+         (this_frame.coded_error > (last_frame.coded_error * kErrorSpike)) &&
+         (this_frame.coded_error > (next_frame.coded_error * kErrorSpike));
+}
+
+// Check if there is a significant intra/inter error change between the current
+// frame and its neighbor. If so, we should further test whether the current
+// frame should be a key frame.
+static bool DetectIntraInterErrorChange(const FIRSTPASS_STATS &this_stats,
+                                        const FIRSTPASS_STATS &last_stats,
+                                        const FIRSTPASS_STATS &next_stats) {
+  // Minimum % intra coding observed in first pass (1.0 = 100%)
+  constexpr double kMinIntraLevel = 0.25;
+  // Minimum ratio between the % of intra coding and inter coding in the first
+  // pass after discounting neutral blocks (discounting neutral blocks in this
+  // way helps catch scene cuts in clips with very flat areas or letter box
+  // format clips with image padding.
+  constexpr double kIntraVsInterRatio = 2.0;
+
+  const double modified_pcnt_inter =
+      this_stats.pcnt_inter - this_stats.pcnt_neutral;
+  const double pcnt_intra_min =
+      std::max(kMinIntraLevel, kIntraVsInterRatio * modified_pcnt_inter);
+
+  // In real scene cuts there is almost always a sharp change in the intra
+  // or inter error score.
+  constexpr double kErrorChangeThreshold = 0.4;
+  const double last_this_error_ratio =
+      fabs(last_stats.coded_error - this_stats.coded_error) /
+      ModifyDivisor(this_stats.coded_error);
+
+  const double this_next_error_ratio =
+      fabs(last_stats.intra_error - this_stats.intra_error) /
+      ModifyDivisor(this_stats.intra_error);
+
+  // Maximum threshold for the relative ratio of intra error score vs best
+  // inter error score.
+  constexpr double kThisIntraCodedErrorRatioMax = 1.9;
+  const double this_intra_coded_error_ratio =
+      this_stats.intra_error / ModifyDivisor(this_stats.coded_error);
+
+  // For real scene cuts we expect an improvment in the intra inter error
+  // ratio in the next frame.
+  constexpr double kNextIntraCodedErrorRatioMin = 3.5;
+  const double next_intra_coded_error_ratio =
+      next_stats.intra_error / ModifyDivisor(next_stats.coded_error);
+
+  double pcnt_intra = 1.0 - this_stats.pcnt_inter;
+  return pcnt_intra > pcnt_intra_min &&
+         this_intra_coded_error_ratio < kThisIntraCodedErrorRatioMax &&
+         (last_this_error_ratio > kErrorChangeThreshold ||
+          this_next_error_ratio > kErrorChangeThreshold ||
+          next_intra_coded_error_ratio > kNextIntraCodedErrorRatioMin);
+}
+
+// Check whether the candidate can be a key frame.
+// This is a rewrite of test_candidate_kf().
+static bool TestCandidateKey(const FirstpassInfo &first_pass_info,
+                             int candidate_key_idx, int frames_since_prev_key) {
+  const auto &stats_list = first_pass_info.stats_list;
+  const int stats_count = static_cast<int>(stats_list.size());
+  if (candidate_key_idx + 1 >= stats_count || candidate_key_idx - 1 < 0) {
+    return false;
+  }
+  const auto &last_stats = stats_list[candidate_key_idx - 1];
+  const auto &this_stats = stats_list[candidate_key_idx];
+  const auto &next_stats = stats_list[candidate_key_idx + 1];
+
+  if (frames_since_prev_key < 3) return false;
+  const double second_ref_usage_threshold =
+      GetSecondRefUsageThreshold(frames_since_prev_key);
+  if (this_stats.pcnt_second_ref >= second_ref_usage_threshold) return false;
+  if (next_stats.pcnt_second_ref >= second_ref_usage_threshold) return false;
+
+  // Hard threshold where the first pass chooses intra for almost all blocks.
+  // In such a case even if the frame is not a scene cut coding a key frame
+  // may be a good option.
+  constexpr double kVeryLowInterThreshold = 0.05;
+  if (this_stats.pcnt_inter < kVeryLowInterThreshold ||
+      DetectSlideTransition(this_stats, last_stats, next_stats) ||
+      DetectIntraInterErrorChange(this_stats, last_stats, next_stats)) {
+    double boost_score = 0.0;
+    double decay_accumulator = 1.0;
+
+    // We do "-1" because the candidate key is not counted.
+    int stats_after_this_stats = stats_count - candidate_key_idx - 1;
+
+    // Number of frames required to test for scene cut detection
+    constexpr int kSceneCutKeyTestIntervalMax = 16;
+
+    // Make sure we have enough stats after the candidate key.
+    const int frames_to_test_after_candidate_key =
+        std::min(kSceneCutKeyTestIntervalMax, stats_after_this_stats);
+
+    // Examine how well the key frame predicts subsequent frames.
+    int i;
+    for (i = 1; i <= frames_to_test_after_candidate_key; ++i) {
+      // Get the next frame details
+      const auto &stats = stats_list[candidate_key_idx + i];
+
+      // Cumulative effect of decay in prediction quality.
+      if (stats.pcnt_inter > 0.85) {
+        decay_accumulator *= stats.pcnt_inter;
+      } else {
+        decay_accumulator *= (0.85 + stats.pcnt_inter) / 2.0;
+      }
+
+      constexpr double kBoostFactor = 12.5;
+      double next_iiratio =
+          (kBoostFactor * stats.intra_error / ModifyDivisor(stats.coded_error));
+      next_iiratio = std::min(next_iiratio, 128.0);
+      double boost_score_increment = decay_accumulator * next_iiratio;
+
+      // Keep a running total.
+      boost_score += boost_score_increment;
+
+      // Test various breakout clauses.
+      // TODO(any): Test of intra error should be normalized to an MB.
+      // TODO(angiebird): Investigate the following questions.
+      // Question 1: next_iiratio (intra_error / coded_error) * kBoostFactor
+      // We know intra_error / coded_error >= 1 and kBoostFactor = 12.5,
+      // therefore, (intra_error / coded_error) * kBoostFactor will always
+      // greater than 1.5. Is "next_iiratio < 1.5" always false?
+      // Question 2: Similar to question 1, is "next_iiratio < 3.0" always true?
+      // Question 3: Why do we need to divide 200 with num_mbs_16x16?
+      if ((stats.pcnt_inter < 0.05) || (next_iiratio < 1.5) ||
+          (((stats.pcnt_inter - stats.pcnt_neutral) < 0.20) &&
+           (next_iiratio < 3.0)) ||
+          (boost_score_increment < 3.0) ||
+          (stats.intra_error <
+           (200.0 / static_cast<double>(first_pass_info.num_mbs_16x16)))) {
+        break;
+      }
+    }
+
+    // If there is tolerable prediction for at least the next 3 frames then
+    // break out else discard this potential key frame and move on
+    const int count_for_tolerable_prediction = 3;
+    if (boost_score > 30.0 && (i > count_for_tolerable_prediction)) {
+      return true;
+    }
+  }
+  return false;
+}
+
+// Compute key frame location from first_pass_info.
+std::vector<int> GetKeyFrameList(const FirstpassInfo &first_pass_info) {
+  std::vector<int> key_frame_list;
+  key_frame_list.push_back(0);  // The first frame is always a key frame
+  int candidate_key_idx = 1;
+  while (candidate_key_idx <
+         static_cast<int>(first_pass_info.stats_list.size())) {
+    const int frames_since_prev_key = candidate_key_idx - key_frame_list.back();
+    // Check for a scene cut.
+    const bool scenecut_detected = TestCandidateKey(
+        first_pass_info, candidate_key_idx, frames_since_prev_key);
+    if (scenecut_detected) {
+      key_frame_list.push_back(candidate_key_idx);
+    }
+    ++candidate_key_idx;
+  }
+  return key_frame_list;
+}
+
+// initialize GF_GROUP_STATS
+static void InitGFStats(GF_GROUP_STATS *gf_stats) {
+  gf_stats->gf_group_err = 0.0;
+  gf_stats->gf_group_raw_error = 0.0;
+  gf_stats->gf_group_skip_pct = 0.0;
+  gf_stats->gf_group_inactive_zone_rows = 0.0;
+
+  gf_stats->mv_ratio_accumulator = 0.0;
+  gf_stats->decay_accumulator = 1.0;
+  gf_stats->zero_motion_accumulator = 1.0;
+  gf_stats->loop_decay_rate = 1.0;
+  gf_stats->last_loop_decay_rate = 1.0;
+  gf_stats->this_frame_mv_in_out = 0.0;
+  gf_stats->mv_in_out_accumulator = 0.0;
+  gf_stats->abs_mv_in_out_accumulator = 0.0;
+
+  gf_stats->avg_sr_coded_error = 0.0;
+  gf_stats->avg_pcnt_second_ref = 0.0;
+  gf_stats->avg_new_mv_count = 0.0;
+  gf_stats->avg_wavelet_energy = 0.0;
+  gf_stats->avg_raw_err_stdev = 0.0;
+  gf_stats->non_zero_stdev_count = 0;
+}
+
+static int FindRegionIndex(const std::vector<REGIONS> &regions, int frame_idx) {
+  for (int k = 0; k < static_cast<int>(regions.size()); k++) {
+    if (regions[k].start <= frame_idx && regions[k].last >= frame_idx) {
+      return k;
+    }
+  }
+  return -1;
+}
+
+// This function detects a flash through the high relative pcnt_second_ref
+// score in the frame following a flash frame. The offset passed in should
+// reflect this.
+static bool DetectFlash(const std::vector<FIRSTPASS_STATS> &stats_list,
+                        int index) {
+  int next_index = index + 1;
+  if (next_index >= static_cast<int>(stats_list.size())) return false;
+  const FIRSTPASS_STATS &next_frame = stats_list[next_index];
+
+  // What we are looking for here is a situation where there is a
+  // brief break in prediction (such as a flash) but subsequent frames
+  // are reasonably well predicted by an earlier (pre flash) frame.
+  // The recovery after a flash is indicated by a high pcnt_second_ref
+  // compared to pcnt_inter.
+  return next_frame.pcnt_second_ref > next_frame.pcnt_inter &&
+         next_frame.pcnt_second_ref >= 0.5;
+}
+
+#define MIN_SHRINK_LEN 6
+
+// This function takes in a suggesting gop interval from cur_start to cur_last,
+// analyzes firstpass stats and region stats and then return a better gop cut
+// location.
+// TODO(b/231517281): Simplify the indices once we have an unit test.
+// We are using four indices here, order_index, cur_start, cur_last, and
+// frames_since_key. Ideally, only three indices are needed.
+// 1) start_index = order_index + cur_start
+// 2) end_index = order_index + cur_end
+// 3) key_index
+int FindBetterGopCut(const std::vector<FIRSTPASS_STATS> &stats_list,
+                     const std::vector<REGIONS> &regions_list,
+                     int min_gop_show_frame_count, int max_gop_show_frame_count,
+                     int order_index, int cur_start, int cur_last,
+                     int frames_since_key) {
+  // only try shrinking if interval smaller than active_max_gf_interval
+  if (cur_last - cur_start > max_gop_show_frame_count ||
+      cur_start >= cur_last) {
+    return cur_last;
+  }
+  int num_regions = static_cast<int>(regions_list.size());
+  int num_stats = static_cast<int>(stats_list.size());
+  const int min_shrink_int = std::max(MIN_SHRINK_LEN, min_gop_show_frame_count);
+
+  // find the region indices of where the first and last frame belong.
+  int k_start = FindRegionIndex(regions_list, cur_start + frames_since_key);
+  int k_last = FindRegionIndex(regions_list, cur_last + frames_since_key);
+  if (cur_start + frames_since_key == 0) k_start = 0;
+
+  int scenecut_idx = -1;
+  // See if we have a scenecut in between
+  for (int r = k_start + 1; r <= k_last; r++) {
+    if (regions_list[r].type == SCENECUT_REGION &&
+        regions_list[r].last - frames_since_key - cur_start >
+            min_gop_show_frame_count) {
+      scenecut_idx = r;
+      break;
+    }
+  }
+
+  // if the found scenecut is very close to the end, ignore it.
+  if (scenecut_idx >= 0 &&
+      regions_list[num_regions - 1].last - regions_list[scenecut_idx].last <
+          4) {
+    scenecut_idx = -1;
+  }
+
+  if (scenecut_idx != -1) {
+    // If we have a scenecut, then stop at it.
+    // TODO(bohanli): add logic here to stop before the scenecut and for
+    // the next gop start from the scenecut with GF
+    int is_minor_sc =
+        (regions_list[scenecut_idx].avg_cor_coeff *
+             (1 - stats_list[order_index + regions_list[scenecut_idx].start -
+                             frames_since_key]
+                          .noise_var /
+                      regions_list[scenecut_idx].avg_intra_err) >
+         0.6);
+    cur_last =
+        regions_list[scenecut_idx].last - frames_since_key - !is_minor_sc;
+  } else {
+    int is_last_analysed =
+        (k_last == num_regions - 1) &&
+        (cur_last + frames_since_key == regions_list[k_last].last);
+    int not_enough_regions =
+        k_last - k_start <= 1 + (regions_list[k_start].type == SCENECUT_REGION);
+    // if we are very close to the end, then do not shrink since it may
+    // introduce intervals that are too short
+    if (!(is_last_analysed && not_enough_regions)) {
+      const double arf_length_factor = 0.1;
+      double best_score = 0;
+      int best_j = -1;
+      const int first_frame = regions_list[0].start - frames_since_key;
+      const int last_frame =
+          regions_list[num_regions - 1].last - frames_since_key;
+      // score of how much the arf helps the whole GOP
+      double base_score = 0.0;
+      // Accumulate base_score in
+      for (int j = cur_start + 1; j < cur_start + min_shrink_int; j++) {
+        if (order_index + j >= num_stats) break;
+        base_score = (base_score + 1.0) * stats_list[order_index + j].cor_coeff;
+      }
+      int met_blending = 0;   // Whether we have met blending areas before
+      int last_blending = 0;  // Whether the previous frame if blending
+      for (int j = cur_start + min_shrink_int; j <= cur_last; j++) {
+        if (order_index + j >= num_stats) break;
+        base_score = (base_score + 1.0) * stats_list[order_index + j].cor_coeff;
+        int this_reg = FindRegionIndex(regions_list, j + frames_since_key);
+        if (this_reg < 0) continue;
+        // A GOP should include at most 1 blending region.
+        if (regions_list[this_reg].type == BLENDING_REGION) {
+          last_blending = 1;
+          if (met_blending) {
+            break;
+          } else {
+            base_score = 0;
+            continue;
+          }
+        } else {
+          if (last_blending) met_blending = 1;
+          last_blending = 0;
+        }
+
+        // Add the factor of how good the neighborhood is for this
+        // candidate arf.
+        double this_score = arf_length_factor * base_score;
+        double temp_accu_coeff = 1.0;
+        // following frames
+        int count_f = 0;
+        for (int n = j + 1; n <= j + 3 && n <= last_frame; n++) {
+          if (order_index + n >= num_stats) break;
+          temp_accu_coeff *= stats_list[order_index + n].cor_coeff;
+          this_score +=
+              temp_accu_coeff *
+              (1 - stats_list[order_index + n].noise_var /
+                       AOMMAX(regions_list[this_reg].avg_intra_err, 0.001));
+          count_f++;
+        }
+        // preceding frames
+        temp_accu_coeff = 1.0;
+        for (int n = j; n > j - 3 * 2 + count_f && n > first_frame; n--) {
+          if (order_index + n < 0) break;
+          temp_accu_coeff *= stats_list[order_index + n].cor_coeff;
+          this_score +=
+              temp_accu_coeff *
+              (1 - stats_list[order_index + n].noise_var /
+                       AOMMAX(regions_list[this_reg].avg_intra_err, 0.001));
+        }
+
+        if (this_score > best_score) {
+          best_score = this_score;
+          best_j = j;
+        }
+      }
+
+      // For blending areas, move one more frame in case we missed the
+      // first blending frame.
+      int best_reg = FindRegionIndex(regions_list, best_j + frames_since_key);
+      if (best_reg < num_regions - 1 && best_reg > 0) {
+        if (regions_list[best_reg - 1].type == BLENDING_REGION &&
+            regions_list[best_reg + 1].type == BLENDING_REGION) {
+          if (best_j + frames_since_key == regions_list[best_reg].start &&
+              best_j + frames_since_key < regions_list[best_reg].last) {
+            best_j += 1;
+          } else if (best_j + frames_since_key == regions_list[best_reg].last &&
+                     best_j + frames_since_key > regions_list[best_reg].start) {
+            best_j -= 1;
+          }
+        }
+      }
+
+      if (cur_last - best_j < 2) best_j = cur_last;
+      if (best_j > 0 && best_score > 0.1) cur_last = best_j;
+      // if cannot find anything, just cut at the original place.
+    }
+  }
+
+  return cur_last;
+}
+
+// Function to test for a condition where a complex transition is followed
+// by a static section. For example in slide shows where there is a fade
+// between slides. This is to help with more optimal kf and gf positioning.
+static bool DetectTransitionToStill(
+    const std::vector<FIRSTPASS_STATS> &stats_list, int next_stats_index,
+    int min_gop_show_frame_count, int frame_interval, int still_interval,
+    double loop_decay_rate, double last_decay_rate) {
+  // Break clause to detect very still sections after motion
+  // For example a static image after a fade or other transition
+  // instead of a clean scene cut.
+  if (frame_interval > min_gop_show_frame_count && loop_decay_rate >= 0.999 &&
+      last_decay_rate < 0.9) {
+    int stats_count = static_cast<int>(stats_list.size());
+    int stats_left = stats_count - next_stats_index;
+    if (stats_left >= still_interval) {
+      // Look ahead a few frames to see if static condition persists...
+      int j;
+      for (j = 0; j < still_interval; ++j) {
+        const FIRSTPASS_STATS &stats = stats_list[next_stats_index + j];
+        if (stats.pcnt_inter - stats.pcnt_motion < 0.999) break;
+      }
+      // Only if it does do we signal a transition to still.
+      return j == still_interval;
+    }
+  }
+  return false;
+}
+
+static int DetectGopCut(const std::vector<FIRSTPASS_STATS> &stats_list,
+                        int start_idx, int candidate_cut_idx, int next_key_idx,
+                        int flash_detected, int min_gop_show_frame_count,
+                        int max_gop_show_frame_count, int frame_width,
+                        int frame_height, const GF_GROUP_STATS &gf_stats) {
+  (void)max_gop_show_frame_count;
+  const int candidate_gop_size = candidate_cut_idx - start_idx;
+
+  if (!flash_detected) {
+    // Break clause to detect very still sections after motion. For example,
+    // a static image after a fade or other transition.
+    if (DetectTransitionToStill(stats_list, start_idx, min_gop_show_frame_count,
+                                candidate_gop_size, 5, gf_stats.loop_decay_rate,
+                                gf_stats.last_loop_decay_rate)) {
+      return 1;
+    }
+    const double arf_abs_zoom_thresh = 4.4;
+    // Motion breakout threshold for loop below depends on image size.
+    const double mv_ratio_accumulator_thresh =
+        (frame_height + frame_width) / 4.0;
+    // Some conditions to breakout after min interval.
+    if (candidate_gop_size >= min_gop_show_frame_count &&
+        // If possible don't break very close to a kf
+        (next_key_idx - candidate_cut_idx >= min_gop_show_frame_count) &&
+        (candidate_gop_size & 0x01) &&
+        (gf_stats.mv_ratio_accumulator > mv_ratio_accumulator_thresh ||
+         gf_stats.abs_mv_in_out_accumulator > arf_abs_zoom_thresh)) {
+      return 1;
+    }
+  }
+
+  // TODO(b/231489624): Check if we need this part.
+  // If almost totally static, we will not use the the max GF length later,
+  // so we can continue for more frames.
+  // if ((candidate_gop_size >= active_max_gf_interval + 1) &&
+  //     !is_almost_static(gf_stats->zero_motion_accumulator,
+  //                       twopass->kf_zeromotion_pct, cpi->ppi->lap_enabled)) {
+  //   return 0;
+  // }
+  return 0;
+}
+
+/*!\brief Determine the length of future GF groups.
+ *
+ * \ingroup gf_group_algo
+ * This function decides the gf group length of future frames in batch
+ *
+ * \param[in]    rc_param         Rate control parameters
+ * \param[in]    stats_list       List of first pass stats
+ * \param[in]    regions_list     List of regions from av1_identify_regions
+ * \param[in]    order_index      Index of current frame in stats_list
+ * \param[in]    frames_since_key Number of frames since the last key frame
+ * \param[in]    frames_to_key    Number of frames to the next key frame
+ *
+ * \return Returns a vector of decided GF group lengths.
+ */
+static std::vector<int> PartitionGopIntervals(
+    const RateControlParam &rc_param,
+    const std::vector<FIRSTPASS_STATS> &stats_list,
+    const std::vector<REGIONS> &regions_list, int order_index,
+    int frames_since_key, int frames_to_key) {
+  int i = 0;
+  // If cpi->gf_state.arf_gf_boost_lst is 0, we are starting with a KF or GF.
+  int cur_start = 0;
+  // Each element is the last frame of the previous GOP. If there are n GOPs,
+  // you need n + 1 cuts to find the durations. So cut_pos starts out with -1,
+  // which is the last frame of the previous GOP.
+  std::vector<int> cut_pos(1, -1);
+  int cut_here = 0;
+  GF_GROUP_STATS gf_stats;
+  InitGFStats(&gf_stats);
+  int num_stats = static_cast<int>(stats_list.size());
+
+  while (i + order_index < num_stats) {
+    // reaches next key frame, break here
+    if (i >= frames_to_key - 1) {
+      cut_here = 2;
+    } else if (i - cur_start >= rc_param.max_gop_show_frame_count) {
+      // reached maximum len, but nothing special yet (almost static)
+      // let's look at the next interval
+      cut_here = 2;
+    } else {
+      // Test for the case where there is a brief flash but the prediction
+      // quality back to an earlier frame is then restored.
+      const int gop_start_idx = cur_start + order_index;
+      const int candidate_gop_cut_idx = i + order_index;
+      const int next_key_idx = frames_to_key + order_index;
+      const bool flash_detected =
+          DetectFlash(stats_list, candidate_gop_cut_idx);
+
+      // TODO(bohanli): remove redundant accumulations here, or unify
+      // this and the ones in define_gf_group
+      const FIRSTPASS_STATS *stats = &stats_list[candidate_gop_cut_idx];
+      av1_accumulate_next_frame_stats(stats, flash_detected, frames_since_key,
+                                      i, &gf_stats, rc_param.frame_width,
+                                      rc_param.frame_height);
+
+      // TODO(angiebird): Can we simplify this part? Looks like we are going to
+      // change the gop cut index with FindBetterGopCut() anyway.
+      cut_here = DetectGopCut(
+          stats_list, gop_start_idx, candidate_gop_cut_idx, next_key_idx,
+          flash_detected, rc_param.min_gop_show_frame_count,
+          rc_param.max_gop_show_frame_count, rc_param.frame_width,
+          rc_param.frame_height, gf_stats);
+    }
+
+    if (!cut_here) {
+      ++i;
+      continue;
+    }
+
+    // the current last frame in the gf group
+    int original_last = cut_here > 1 ? i : i - 1;
+    int cur_last = FindBetterGopCut(
+        stats_list, regions_list, rc_param.min_gop_show_frame_count,
+        rc_param.max_gop_show_frame_count, order_index, cur_start,
+        original_last, frames_since_key);
+    // only try shrinking if interval smaller than active_max_gf_interval
+    cut_pos.push_back(cur_last);
+
+    // reset pointers to the shrunken location
+    cur_start = cur_last;
+    int cur_region_idx =
+        FindRegionIndex(regions_list, cur_start + 1 + frames_since_key);
+    if (cur_region_idx >= 0)
+      if (regions_list[cur_region_idx].type == SCENECUT_REGION) cur_start++;
+
+    // reset accumulators
+    InitGFStats(&gf_stats);
+    i = cur_last + 1;
+
+    if (cut_here == 2 && i >= frames_to_key) break;
+  }
+
+  std::vector<int> gf_intervals;
+  // save intervals
+  for (size_t n = 1; n < cut_pos.size(); n++) {
+    gf_intervals.push_back(cut_pos[n] - cut_pos[n - 1]);
+  }
+
+  return gf_intervals;
+}
+
+StatusOr<GopStructList> AV1RateControlQMode::DetermineGopInfo(
+    const FirstpassInfo &firstpass_info) {
+  const int stats_size = static_cast<int>(firstpass_info.stats_list.size());
+  GopStructList gop_list;
+  RefFrameManager ref_frame_manager(rc_param_.ref_frame_table_size,
+                                    rc_param_.max_ref_frames);
+
+  int global_coding_idx_offset = 0;
+  int global_order_idx_offset = 0;
+  std::vector<int> key_frame_list = GetKeyFrameList(firstpass_info);
+  key_frame_list.push_back(stats_size);  // a sentinel value
+  for (size_t ki = 0; ki + 1 < key_frame_list.size(); ++ki) {
+    int frames_to_key = key_frame_list[ki + 1] - key_frame_list[ki];
+    int key_order_index = key_frame_list[ki];  // The key frame's display order
+
+    std::vector<REGIONS> regions_list(MAX_FIRSTPASS_ANALYSIS_FRAMES);
+    int total_regions = 0;
+    av1_identify_regions(firstpass_info.stats_list.data() + key_order_index,
+                         frames_to_key, 0, regions_list.data(), &total_regions);
+    regions_list.resize(total_regions);
+    std::vector<int> gf_intervals = PartitionGopIntervals(
+        rc_param_, firstpass_info.stats_list, regions_list, key_order_index,
+        /*frames_since_key=*/0, frames_to_key);
+    for (size_t gi = 0; gi < gf_intervals.size(); ++gi) {
+      const bool has_key_frame = gi == 0;
+      const int show_frame_count = gf_intervals[gi];
+      GopStruct gop =
+          ConstructGop(&ref_frame_manager, show_frame_count, has_key_frame,
+                       global_coding_idx_offset, global_order_idx_offset);
+      assert(gop.show_frame_count == show_frame_count);
+      global_coding_idx_offset += static_cast<int>(gop.gop_frame_list.size());
+      global_order_idx_offset += gop.show_frame_count;
+      gop_list.push_back(gop);
+    }
+  }
+  return gop_list;
+}
+
+TplFrameDepStats CreateTplFrameDepStats(int frame_height, int frame_width,
+                                        int min_block_size) {
+  const int unit_rows = (frame_height + min_block_size - 1) / min_block_size;
+  const int unit_cols = (frame_width + min_block_size - 1) / min_block_size;
+  TplFrameDepStats frame_dep_stats;
+  frame_dep_stats.unit_size = min_block_size;
+  frame_dep_stats.unit_stats.resize(unit_rows);
+  for (auto &row : frame_dep_stats.unit_stats) {
+    row.resize(unit_cols);
+  }
+  return frame_dep_stats;
+}
+
+TplUnitDepStats TplBlockStatsToDepStats(const TplBlockStats &block_stats,
+                                        int unit_count) {
+  TplUnitDepStats dep_stats = {};
+  dep_stats.intra_cost = block_stats.intra_cost * 1.0 / unit_count;
+  dep_stats.inter_cost = block_stats.inter_cost * 1.0 / unit_count;
+  // In rare case, inter_cost may be greater than intra_cost.
+  // If so, we need to modify inter_cost such that inter_cost <= intra_cost
+  // because it is required by GetPropagationFraction()
+  dep_stats.inter_cost = std::min(dep_stats.intra_cost, dep_stats.inter_cost);
+  dep_stats.mv = block_stats.mv;
+  dep_stats.ref_frame_index = block_stats.ref_frame_index;
+  return dep_stats;
+}
+
+namespace {
+Status ValidateBlockStats(const TplFrameStats &frame_stats,
+                          const TplBlockStats &block_stats,
+                          int min_block_size) {
+  if (block_stats.col >= frame_stats.frame_width ||
+      block_stats.row >= frame_stats.frame_height) {
+    std::ostringstream error_message;
+    error_message << "Block position (" << block_stats.col << ", "
+                  << block_stats.row
+                  << ") is out of range; frame dimensions are "
+                  << frame_stats.frame_width << " x "
+                  << frame_stats.frame_height;
+    return { AOM_CODEC_INVALID_PARAM, error_message.str() };
+  }
+  if (block_stats.col % min_block_size != 0 ||
+      block_stats.row % min_block_size != 0 ||
+      block_stats.width % min_block_size != 0 ||
+      block_stats.height % min_block_size != 0) {
+    std::ostringstream error_message;
+    error_message
+        << "Invalid block position or dimension, must be a multiple of "
+        << min_block_size << "; col = " << block_stats.col
+        << ", row = " << block_stats.row << ", width = " << block_stats.width
+        << ", height = " << block_stats.height;
+    return { AOM_CODEC_INVALID_PARAM, error_message.str() };
+  }
+  return { AOM_CODEC_OK, "" };
+}
+
+Status ValidateTplStats(const GopStruct &gop_struct,
+                        const TplGopStats &tpl_gop_stats) {
+  constexpr char kAdvice[] =
+      "Do the current RateControlParam settings match those used to generate "
+      "the TPL stats?";
+  if (gop_struct.gop_frame_list.size() !=
+      tpl_gop_stats.frame_stats_list.size()) {
+    std::ostringstream error_message;
+    error_message << "Frame count of GopStruct ("
+                  << gop_struct.gop_frame_list.size()
+                  << ") doesn't match frame count of TPL stats ("
+                  << tpl_gop_stats.frame_stats_list.size() << "). " << kAdvice;
+    return { AOM_CODEC_INVALID_PARAM, error_message.str() };
+  }
+  for (int i = 0; i < static_cast<int>(gop_struct.gop_frame_list.size()); ++i) {
+    const bool is_ref_frame = gop_struct.gop_frame_list[i].update_ref_idx >= 0;
+    const bool has_tpl_stats =
+        !tpl_gop_stats.frame_stats_list[i].block_stats_list.empty();
+    if (is_ref_frame && !has_tpl_stats) {
+      std::ostringstream error_message;
+      error_message << "The frame with global_coding_idx "
+                    << gop_struct.gop_frame_list[i].global_coding_idx
+                    << " is a reference frame, but has no TPL stats. "
+                    << kAdvice;
+      return { AOM_CODEC_INVALID_PARAM, error_message.str() };
+    }
+  }
+  return { AOM_CODEC_OK, "" };
+}
+}  // namespace
+
+StatusOr<TplFrameDepStats> CreateTplFrameDepStatsWithoutPropagation(
+    const TplFrameStats &frame_stats) {
+  if (frame_stats.block_stats_list.empty()) {
+    return TplFrameDepStats();
+  }
+  const int min_block_size = frame_stats.min_block_size;
+  const int unit_rows =
+      (frame_stats.frame_height + min_block_size - 1) / min_block_size;
+  const int unit_cols =
+      (frame_stats.frame_width + min_block_size - 1) / min_block_size;
+  TplFrameDepStats frame_dep_stats = CreateTplFrameDepStats(
+      frame_stats.frame_height, frame_stats.frame_width, min_block_size);
+  for (const TplBlockStats &block_stats : frame_stats.block_stats_list) {
+    Status status =
+        ValidateBlockStats(frame_stats, block_stats, min_block_size);
+    if (!status.ok()) {
+      return status;
+    }
+    const int block_unit_row = block_stats.row / min_block_size;
+    const int block_unit_col = block_stats.col / min_block_size;
+    // The block must start within the frame boundaries, but it may extend past
+    // the right edge or bottom of the frame. Find the number of unit rows and
+    // columns in the block which are fully within the frame.
+    const int block_unit_rows = std::min(block_stats.height / min_block_size,
+                                         unit_rows - block_unit_row);
+    const int block_unit_cols = std::min(block_stats.width / min_block_size,
+                                         unit_cols - block_unit_col);
+    const int unit_count = block_unit_rows * block_unit_cols;
+    TplUnitDepStats unit_stats =
+        TplBlockStatsToDepStats(block_stats, unit_count);
+    for (int r = 0; r < block_unit_rows; r++) {
+      for (int c = 0; c < block_unit_cols; c++) {
+        frame_dep_stats.unit_stats[block_unit_row + r][block_unit_col + c] =
+            unit_stats;
+      }
+    }
+  }
+  return frame_dep_stats;
+}
+
+int GetRefCodingIdxList(const TplUnitDepStats &unit_dep_stats,
+                        const RefFrameTable &ref_frame_table,
+                        int *ref_coding_idx_list) {
+  int ref_frame_count = 0;
+  for (int i = 0; i < kBlockRefCount; ++i) {
+    ref_coding_idx_list[i] = -1;
+    int ref_frame_index = unit_dep_stats.ref_frame_index[i];
+    if (ref_frame_index != -1) {
+      assert(ref_frame_index < static_cast<int>(ref_frame_table.size()));
+      ref_coding_idx_list[i] = ref_frame_table[ref_frame_index].coding_idx;
+      ref_frame_count++;
+    }
+  }
+  return ref_frame_count;
+}
+
+int GetBlockOverlapArea(int r0, int c0, int r1, int c1, int size) {
+  const int r_low = std::max(r0, r1);
+  const int r_high = std::min(r0 + size, r1 + size);
+  const int c_low = std::max(c0, c1);
+  const int c_high = std::min(c0 + size, c1 + size);
+  if (r_high >= r_low && c_high >= c_low) {
+    return (r_high - r_low) * (c_high - c_low);
+  }
+  return 0;
+}
+
+// TODO(angiebird): Merge TplFrameDepStatsAccumulateIntraCost and
+// TplFrameDepStatsAccumulate.
+double TplFrameDepStatsAccumulateIntraCost(
+    const TplFrameDepStats &frame_dep_stats) {
+  auto getIntraCost = [](double sum, const TplUnitDepStats &unit) {
+    return sum + unit.intra_cost;
+  };
+  double sum = 0;
+  for (const auto &row : frame_dep_stats.unit_stats) {
+    sum = std::accumulate(row.begin(), row.end(), sum, getIntraCost);
+  }
+  return std::max(sum, 1.0);
+}
+
+double TplFrameDepStatsAccumulate(const TplFrameDepStats &frame_dep_stats) {
+  auto getOverallCost = [](double sum, const TplUnitDepStats &unit) {
+    return sum + unit.propagation_cost + unit.intra_cost;
+  };
+  double sum = 0;
+  for (const auto &row : frame_dep_stats.unit_stats) {
+    sum = std::accumulate(row.begin(), row.end(), sum, getOverallCost);
+  }
+  return std::max(sum, 1.0);
+}
+
+// This is a generalization of GET_MV_RAWPEL that allows for an arbitrary
+// number of fractional bits.
+// TODO(angiebird): Add unit test to this function
+int GetFullpelValue(int subpel_value, int subpel_bits) {
+  const int subpel_scale = (1 << subpel_bits);
+  const int sign = subpel_value >= 0 ? 1 : -1;
+  int fullpel_value = (abs(subpel_value) + subpel_scale / 2) >> subpel_bits;
+  fullpel_value *= sign;
+  return fullpel_value;
+}
+
+double GetPropagationFraction(const TplUnitDepStats &unit_dep_stats) {
+  assert(unit_dep_stats.intra_cost >= unit_dep_stats.inter_cost);
+  return (unit_dep_stats.intra_cost - unit_dep_stats.inter_cost) /
+         ModifyDivisor(unit_dep_stats.intra_cost);
+}
+
+void TplFrameDepStatsPropagate(int coding_idx,
+                               const RefFrameTable &ref_frame_table,
+                               TplGopDepStats *tpl_gop_dep_stats) {
+  assert(!tpl_gop_dep_stats->frame_dep_stats_list.empty());
+  TplFrameDepStats *frame_dep_stats =
+      &tpl_gop_dep_stats->frame_dep_stats_list[coding_idx];
+
+  if (frame_dep_stats->unit_stats.empty()) return;
+
+  const int unit_size = frame_dep_stats->unit_size;
+  const int frame_unit_rows =
+      static_cast<int>(frame_dep_stats->unit_stats.size());
+  const int frame_unit_cols =
+      static_cast<int>(frame_dep_stats->unit_stats[0].size());
+  for (int unit_row = 0; unit_row < frame_unit_rows; ++unit_row) {
+    for (int unit_col = 0; unit_col < frame_unit_cols; ++unit_col) {
+      TplUnitDepStats &unit_dep_stats =
+          frame_dep_stats->unit_stats[unit_row][unit_col];
+      int ref_coding_idx_list[kBlockRefCount] = { -1, -1 };
+      int ref_frame_count = GetRefCodingIdxList(unit_dep_stats, ref_frame_table,
+                                                ref_coding_idx_list);
+      if (ref_frame_count == 0) continue;
+      for (int i = 0; i < kBlockRefCount; ++i) {
+        if (ref_coding_idx_list[i] == -1) continue;
+        assert(
+            ref_coding_idx_list[i] <
+            static_cast<int>(tpl_gop_dep_stats->frame_dep_stats_list.size()));
+        TplFrameDepStats &ref_frame_dep_stats =
+            tpl_gop_dep_stats->frame_dep_stats_list[ref_coding_idx_list[i]];
+        assert(!ref_frame_dep_stats.unit_stats.empty());
+        const auto &mv = unit_dep_stats.mv[i];
+        const int mv_row = GetFullpelValue(mv.row, mv.subpel_bits);
+        const int mv_col = GetFullpelValue(mv.col, mv.subpel_bits);
+        const int ref_pixel_r = unit_row * unit_size + mv_row;
+        const int ref_pixel_c = unit_col * unit_size + mv_col;
+        const int ref_unit_row_low =
+            (unit_row * unit_size + mv_row) / unit_size;
+        const int ref_unit_col_low =
+            (unit_col * unit_size + mv_col) / unit_size;
+
+        for (int j = 0; j < 2; ++j) {
+          for (int k = 0; k < 2; ++k) {
+            const int ref_unit_row = ref_unit_row_low + j;
+            const int ref_unit_col = ref_unit_col_low + k;
+            if (ref_unit_row >= 0 && ref_unit_row < frame_unit_rows &&
+                ref_unit_col >= 0 && ref_unit_col < frame_unit_cols) {
+              const int overlap_area = GetBlockOverlapArea(
+                  ref_pixel_r, ref_pixel_c, ref_unit_row * unit_size,
+                  ref_unit_col * unit_size, unit_size);
+              const double overlap_ratio =
+                  overlap_area * 1.0 / (unit_size * unit_size);
+              const double propagation_fraction =
+                  GetPropagationFraction(unit_dep_stats);
+              const double propagation_ratio =
+                  1.0 / ref_frame_count * overlap_ratio * propagation_fraction;
+              TplUnitDepStats &ref_unit_stats =
+                  ref_frame_dep_stats.unit_stats[ref_unit_row][ref_unit_col];
+              ref_unit_stats.propagation_cost +=
+                  (unit_dep_stats.intra_cost +
+                   unit_dep_stats.propagation_cost) *
+                  propagation_ratio;
+            }
+          }
+        }
+      }
+    }
+  }
+}
+
+std::vector<RefFrameTable> AV1RateControlQMode::GetRefFrameTableList(
+    const GopStruct &gop_struct,
+    const std::vector<LookaheadStats> &lookahead_stats,
+    RefFrameTable ref_frame_table) {
+  if (gop_struct.global_coding_idx_offset == 0) {
+    // For the first GOP, ref_frame_table need not be initialized. This is fine,
+    // because the first frame (a key frame) will fully initialize it.
+    ref_frame_table.assign(rc_param_.ref_frame_table_size, GopFrameInvalid());
+  } else {
+    // It's not the first GOP, so ref_frame_table must be valid.
+    assert(static_cast<int>(ref_frame_table.size()) ==
+           rc_param_.ref_frame_table_size);
+    assert(std::all_of(ref_frame_table.begin(), ref_frame_table.end(),
+                       std::mem_fn(&GopFrame::is_valid)));
+    // Reset the frame processing order of the initial ref_frame_table.
+    for (GopFrame &gop_frame : ref_frame_table) gop_frame.coding_idx = -1;
+  }
+
+  std::vector<RefFrameTable> ref_frame_table_list;
+  ref_frame_table_list.push_back(ref_frame_table);
+  for (const GopFrame &gop_frame : gop_struct.gop_frame_list) {
+    if (gop_frame.is_key_frame) {
+      ref_frame_table.assign(rc_param_.ref_frame_table_size, gop_frame);
+    } else if (gop_frame.update_ref_idx != -1) {
+      assert(gop_frame.update_ref_idx <
+             static_cast<int>(ref_frame_table.size()));
+      ref_frame_table[gop_frame.update_ref_idx] = gop_frame;
+    }
+    ref_frame_table_list.push_back(ref_frame_table);
+  }
+
+  int gop_size_offset = static_cast<int>(gop_struct.gop_frame_list.size());
+
+  for (const auto &lookahead_stat : lookahead_stats) {
+    for (GopFrame gop_frame : lookahead_stat.gop_struct->gop_frame_list) {
+      if (gop_frame.is_key_frame) {
+        ref_frame_table.assign(rc_param_.ref_frame_table_size, gop_frame);
+      } else if (gop_frame.update_ref_idx != -1) {
+        assert(gop_frame.update_ref_idx <
+               static_cast<int>(ref_frame_table.size()));
+        gop_frame.coding_idx += gop_size_offset;
+        ref_frame_table[gop_frame.update_ref_idx] = gop_frame;
+      }
+      ref_frame_table_list.push_back(ref_frame_table);
+    }
+    gop_size_offset +=
+        static_cast<int>(lookahead_stat.gop_struct->gop_frame_list.size());
+  }
+
+  return ref_frame_table_list;
+}
+
+StatusOr<TplGopDepStats> ComputeTplGopDepStats(
+    const TplGopStats &tpl_gop_stats,
+    const std::vector<LookaheadStats> &lookahead_stats,
+    const std::vector<RefFrameTable> &ref_frame_table_list) {
+  std::vector<const TplFrameStats *> tpl_frame_stats_list_with_lookahead;
+  for (const auto &tpl_frame_stats : tpl_gop_stats.frame_stats_list) {
+    tpl_frame_stats_list_with_lookahead.push_back(&tpl_frame_stats);
+  }
+  for (auto &lookahead_stat : lookahead_stats) {
+    for (const auto &tpl_frame_stats :
+         lookahead_stat.tpl_gop_stats->frame_stats_list) {
+      tpl_frame_stats_list_with_lookahead.push_back(&tpl_frame_stats);
+    }
+  }
+
+  const int frame_count =
+      static_cast<int>(tpl_frame_stats_list_with_lookahead.size());
+
+  // Create the struct to store TPL dependency stats
+  TplGopDepStats tpl_gop_dep_stats;
+
+  tpl_gop_dep_stats.frame_dep_stats_list.reserve(frame_count);
+  for (int coding_idx = 0; coding_idx < frame_count; coding_idx++) {
+    const StatusOr<TplFrameDepStats> tpl_frame_dep_stats =
+        CreateTplFrameDepStatsWithoutPropagation(
+            *tpl_frame_stats_list_with_lookahead[coding_idx]);
+    if (!tpl_frame_dep_stats.ok()) {
+      return tpl_frame_dep_stats.status();
+    }
+    tpl_gop_dep_stats.frame_dep_stats_list.push_back(
+        std::move(*tpl_frame_dep_stats));
+  }
+
+  // Back propagation
+  for (int coding_idx = frame_count - 1; coding_idx >= 0; coding_idx--) {
+    auto &ref_frame_table = ref_frame_table_list[coding_idx];
+    // TODO(angiebird): Handle/test the case where reference frame
+    // is in the previous GOP
+    TplFrameDepStatsPropagate(coding_idx, ref_frame_table, &tpl_gop_dep_stats);
+  }
+  return tpl_gop_dep_stats;
+}
+
+static int GetRDMult(const GopFrame &gop_frame, int qindex) {
+  // TODO(angiebird):
+  // 1) Check if these rdmult rules are good in our use case.
+  // 2) Support high-bit-depth mode
+  if (gop_frame.is_golden_frame) {
+    // Assume ARF_UPDATE/GF_UPDATE share the same remult rule.
+    return av1_compute_rd_mult_based_on_qindex(AOM_BITS_8, GF_UPDATE, qindex);
+  } else if (gop_frame.is_key_frame) {
+    return av1_compute_rd_mult_based_on_qindex(AOM_BITS_8, KF_UPDATE, qindex);
+  } else {
+    // Assume LF_UPDATE/OVERLAY_UPDATE/INTNL_OVERLAY_UPDATE/INTNL_ARF_UPDATE
+    // share the same remult rule.
+    return av1_compute_rd_mult_based_on_qindex(AOM_BITS_8, LF_UPDATE, qindex);
+  }
+}
+
+StatusOr<GopEncodeInfo> AV1RateControlQMode::GetGopEncodeInfo(
+    const GopStruct &gop_struct, const TplGopStats &tpl_gop_stats,
+    const std::vector<LookaheadStats> &lookahead_stats,
+    const RefFrameTable &ref_frame_table_snapshot_init) {
+  Status status = ValidateTplStats(gop_struct, tpl_gop_stats);
+  if (!status.ok()) {
+    return status;
+  }
+
+  for (auto &lookahead_stat : lookahead_stats) {
+    Status status = ValidateTplStats(*lookahead_stat.gop_struct,
+                                     *lookahead_stat.tpl_gop_stats);
+    if (!status.ok()) {
+      return status;
+    }
+  }
+
+  const std::vector<RefFrameTable> ref_frame_table_list = GetRefFrameTableList(
+      gop_struct, lookahead_stats, ref_frame_table_snapshot_init);
+
+  GopEncodeInfo gop_encode_info;
+  gop_encode_info.final_snapshot = ref_frame_table_list.back();
+  StatusOr<TplGopDepStats> gop_dep_stats = ComputeTplGopDepStats(
+      tpl_gop_stats, lookahead_stats, ref_frame_table_list);
+  if (!gop_dep_stats.ok()) {
+    return gop_dep_stats.status();
+  }
+  const int frame_count =
+      static_cast<int>(tpl_gop_stats.frame_stats_list.size());
+  const int active_worst_quality = rc_param_.base_q_index;
+  int active_best_quality = rc_param_.base_q_index;
+  for (int i = 0; i < frame_count; i++) {
+    FrameEncodeParameters param;
+    const GopFrame &gop_frame = gop_struct.gop_frame_list[i];
+
+    if (gop_frame.update_type == GopFrameType::kOverlay ||
+        gop_frame.update_type == GopFrameType::kIntermediateOverlay ||
+        gop_frame.update_type == GopFrameType::kRegularLeaf) {
+      param.q_index = rc_param_.base_q_index;
+    } else if (gop_frame.update_type == GopFrameType::kRegularGolden ||
+               gop_frame.update_type == GopFrameType::kRegularKey ||
+               gop_frame.update_type == GopFrameType::kRegularArf) {
+      const TplFrameDepStats &frame_dep_stats =
+          gop_dep_stats->frame_dep_stats_list[i];
+      const double cost_without_propagation =
+          TplFrameDepStatsAccumulateIntraCost(frame_dep_stats);
+      const double cost_with_propagation =
+          TplFrameDepStatsAccumulate(frame_dep_stats);
+      const double frame_importance =
+          cost_with_propagation / cost_without_propagation;
+      // Imitate the behavior of av1_tpl_get_qstep_ratio()
+      const double qstep_ratio = sqrt(1 / frame_importance);
+      param.q_index = av1_get_q_index_from_qstep_ratio(rc_param_.base_q_index,
+                                                       qstep_ratio, AOM_BITS_8);
+      if (rc_param_.base_q_index) param.q_index = AOMMAX(param.q_index, 1);
+      active_best_quality = param.q_index;
+    } else {
+      // Intermediate ARFs
+      assert(gop_frame.layer_depth >= 1);
+      const int depth_factor = 1 << (gop_frame.layer_depth - 1);
+      param.q_index =
+          (active_worst_quality * (depth_factor - 1) + active_best_quality) /
+          depth_factor;
+    }
+    param.rdmult = GetRDMult(gop_frame, param.q_index);
+    gop_encode_info.param_list.push_back(param);
+  }
+  return gop_encode_info;
+}
+
+}  // namespace aom
diff --git a/chromium/third_party/libaom/source/libaom/av1/qmode_rc/ratectrl_qmode.h b/chromium/third_party/libaom/source/libaom/av1/qmode_rc/ratectrl_qmode.h
new file mode 100644
index 00000000000..6d33bfd7a6a
--- /dev/null
+++ b/chromium/third_party/libaom/source/libaom/av1/qmode_rc/ratectrl_qmode.h
@@ -0,0 +1,118 @@
+/*
+ * Copyright (c) 2022, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#ifndef AOM_AV1_QMODE_RC_RATECTRL_QMODE_H_
+#define AOM_AV1_QMODE_RC_RATECTRL_QMODE_H_
+
+#include <deque>
+#include <queue>
+#include <vector>
+#include "av1/encoder/firstpass.h"
+#include "av1/qmode_rc/ratectrl_qmode_interface.h"
+#include "av1/qmode_rc/reference_manager.h"
+
+namespace aom {
+
+constexpr int kLayerDepthOffset = 1;
+constexpr int kMinIntervalToAddArf = 3;
+constexpr int kMinArfInterval = (kMinIntervalToAddArf + 1) / 2;
+
+struct TplUnitDepStats {
+  double propagation_cost;
+  double intra_cost;
+  double inter_cost;
+  std::array<MotionVector, kBlockRefCount> mv;
+  std::array<int, kBlockRefCount> ref_frame_index;
+};
+
+struct TplFrameDepStats {
+  int unit_size;  // equivalent to min_block_size
+  std::vector<std::vector<TplUnitDepStats>> unit_stats;
+};
+
+struct TplGopDepStats {
+  std::vector<TplFrameDepStats> frame_dep_stats_list;
+};
+
+GopFrame GopFrameInvalid();
+
+// Set up is_key_frame, is_arf_frame, is_show_frame, is_golden_frame and
+// encode_ref_mode in GopFrame based on gop_frame_type
+void SetGopFrameByType(GopFrameType gop_frame_type, GopFrame *gop_frame);
+
+GopFrame GopFrameBasic(int global_coding_idx_offset,
+                       int global_order_idx_offset, int coding_idx,
+                       int order_idx, int depth, int display_idx,
+                       GopFrameType gop_frame_type);
+
+GopStruct ConstructGop(RefFrameManager *ref_frame_manager, int show_frame_count,
+                       bool has_key_frame, int global_coding_idx_offset,
+                       int global_order_idx_offset);
+
+// Creates a TplFrameDepStats containing an 2D array of default-initialized
+// TplUnitDepStats, with dimensions of
+//   ceil(frame_height / min_block_size) x ceil(frame_width / min_block_size).
+// i.e., there will be one entry for each square block of size min_block_size,
+// and blocks along the bottom or right edge of the frame may extend beyond the
+// edges of the frame.
+TplFrameDepStats CreateTplFrameDepStats(int frame_height, int frame_width,
+                                        int min_block_size);
+
+TplUnitDepStats TplBlockStatsToDepStats(const TplBlockStats &block_stats,
+                                        int unit_count);
+
+StatusOr<TplFrameDepStats> CreateTplFrameDepStatsWithoutPropagation(
+    const TplFrameStats &frame_stats);
+
+std::vector<int> GetKeyFrameList(const FirstpassInfo &first_pass_info);
+
+double TplFrameDepStatsAccumulateIntraCost(
+    const TplFrameDepStats &frame_dep_stats);
+
+double TplFrameDepStatsAccumulate(const TplFrameDepStats &frame_dep_stats);
+
+void TplFrameDepStatsPropagate(int coding_idx,
+                               const RefFrameTable &ref_frame_table,
+                               TplGopDepStats *tpl_gop_dep_stats);
+
+int GetBlockOverlapArea(int r0, int c0, int r1, int c1, int size);
+
+StatusOr<TplGopDepStats> ComputeTplGopDepStats(
+    const TplGopStats &tpl_gop_stats,
+    const std::vector<LookaheadStats> &lookahead_stats,
+    const std::vector<RefFrameTable> &ref_frame_table_list);
+
+class AV1RateControlQMode : public AV1RateControlQModeInterface {
+ public:
+  Status SetRcParam(const RateControlParam &rc_param) override;
+  StatusOr<GopStructList> DetermineGopInfo(
+      const FirstpassInfo &firstpass_info) override;
+  StatusOr<GopEncodeInfo> GetGopEncodeInfo(
+      const GopStruct &gop_struct, const TplGopStats &tpl_gop_stats,
+      const std::vector<LookaheadStats> &lookahead_stats,
+      const RefFrameTable &ref_frame_table_snapshot) override;
+
+  // Public for testing only.
+  // Returns snapshots of the ref frame before and after each frame in
+  // gop_struct. The returned list will have n+1 entries for n frames.
+  // If this is first GOP, ref_frame_table is ignored and all refs are assumed
+  // invalid; otherwise ref_frame_table is used as the initial state.
+  std::vector<RefFrameTable> GetRefFrameTableList(
+      const GopStruct &gop_struct,
+      const std::vector<LookaheadStats> &lookahead_stats,
+      RefFrameTable ref_frame_table);
+
+ private:
+  RateControlParam rc_param_;
+};
+}  // namespace aom
+
+#endif  // AOM_AV1_QMODE_RC_RATECTRL_QMODE_H_
diff --git a/chromium/third_party/libaom/source/libaom/av1/ratectrl_qmode_interface.cc b/chromium/third_party/libaom/source/libaom/av1/qmode_rc/ratectrl_qmode_interface.cc
index eb29e433034..1f03e0c1333 100644
--- a/chromium/third_party/libaom/source/libaom/av1/ratectrl_qmode_interface.cc
+++ b/chromium/third_party/libaom/source/libaom/av1/qmode_rc/ratectrl_qmode_interface.cc
@@ -9,7 +9,7 @@
  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
  */
 
-#include "av1/ratectrl_qmode_interface.h"
+#include "av1/qmode_rc/ratectrl_qmode_interface.h"
 
 namespace aom {
 
diff --git a/chromium/third_party/libaom/source/libaom/av1/qmode_rc/ratectrl_qmode_interface.h b/chromium/third_party/libaom/source/libaom/av1/qmode_rc/ratectrl_qmode_interface.h
new file mode 100644
index 00000000000..3ce5fcdbe52
--- /dev/null
+++ b/chromium/third_party/libaom/source/libaom/av1/qmode_rc/ratectrl_qmode_interface.h
@@ -0,0 +1,281 @@
+/*
+ * Copyright (c) 2022, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#ifndef AOM_AV1_QMODE_RC_RATECTRL_QMODE_INTERFACE_H_
+#define AOM_AV1_QMODE_RC_RATECTRL_QMODE_INTERFACE_H_
+
+#include <array>
+#include <string>
+#include <vector>
+
+#include "aom/aom_codec.h"
+#include "av1/encoder/firstpass.h"
+
+namespace aom {
+
+constexpr int kBlockRefCount = 2;
+
+struct MotionVector {
+  int row;  // subpel row
+  int col;  // subpel col
+  // TODO(b/241589513): Move this to TPLFrameStats; it's wasteful to code it
+  // separately for each block.
+  int subpel_bits;  // number of fractional bits used by row/col
+};
+
+struct RateControlParam {
+  // Range of allowed GOP sizes (number of displayed frames).
+  int max_gop_show_frame_count;
+  int min_gop_show_frame_count;
+  // Number of reference frame buffers, i.e., size of the DPB.
+  int ref_frame_table_size;
+  // Maximum number of references a single frame may use.
+  int max_ref_frames;
+
+  int base_q_index;
+
+  int frame_width;
+  int frame_height;
+};
+
+struct TplBlockStats {
+  int16_t height;  // Pixel height.
+  int16_t width;   // Pixel width.
+  int16_t row;     // Pixel row of the top left corner.
+  int16_t col;     // Pixel col of the top lef corner.
+  int64_t intra_cost;
+  int64_t inter_cost;
+
+  // Valid only if TplFrameStats::rate_dist_present is true:
+  int64_t recrf_rate;  // Bits when using recon as reference.
+  int64_t recrf_dist;  // Distortion when using recon as reference.
+
+  std::array<MotionVector, kBlockRefCount> mv;
+  std::array<int, kBlockRefCount> ref_frame_index;
+};
+
+// gop frame type used for facilitate setting up GopFrame
+// TODO(angiebird): Define names for forward key frame and
+// key frame with overlay
+enum class GopFrameType {
+  kRegularKey,     // High quality key frame without overlay
+  kRegularLeaf,    // Regular leaf frame
+  kRegularGolden,  // Regular golden frame
+  kRegularArf,  // High quality arf with strong filtering followed by an overlay
+                // later
+  kOverlay,     // Overlay frame
+  kIntermediateOverlay,  // Intermediate overlay frame
+  kIntermediateArf,  // Good quality arf with weak or no filtering followed by a
+                     // show_existing later
+};
+
+enum class EncodeRefMode {
+  kRegular,
+  kOverlay,
+  kShowExisting,
+};
+
+enum class ReferenceName {
+  kNoneFrame = -1,
+  kIntraFrame = 0,
+  kLastFrame = 1,
+  kLast2Frame = 2,
+  kLast3Frame = 3,
+  kGoldenFrame = 4,
+  kBwdrefFrame = 5,
+  kAltref2Frame = 6,
+  kAltrefFrame = 7,
+};
+
+struct Status {
+  aom_codec_err_t code;
+  std::string message;  // Empty if code == AOM_CODEC_OK.
+  bool ok() const { return code == AOM_CODEC_OK; }
+};
+
+// A very simple imitation of absl::StatusOr, this is conceptually a union of a
+// Status struct and an object of type T. It models an object that is either a
+// usable object, or an error explaining why such an object is not present. A
+// StatusOr<T> may never hold a status with a code of AOM_CODEC_OK.
+template <typename T>
+class StatusOr {
+ public:
+  StatusOr(const T &value) : value_(value) {}
+  StatusOr(T &&value) : value_(std::move(value)) {}
+  StatusOr(Status status) : status_(std::move(status)) {
+    assert(status_.code != AOM_CODEC_OK);
+  }
+
+  const Status &status() const { return status_; }
+  bool ok() const { return status().ok(); }
+
+  // operator* returns the value; it should only be called after checking that
+  // ok() returns true.
+  const T &operator*() const & { return value_; }
+  T &operator*() & { return value_; }
+  const T &&operator*() const && { return value_; }
+  T &&operator*() && { return std::move(value_); }
+
+  // sor->field is equivalent to (*sor).field.
+  const T *operator->() const & { return &value_; }
+  T *operator->() & { return &value_; }
+
+  // value() is equivalent to operator*, but asserts that ok() is true.
+  const T &value() const & {
+    assert(ok());
+    return value_;
+  }
+  T &value() & {
+    assert(ok());
+    return value_;
+  }
+  const T &&value() const && {
+    assert(ok());
+    return value_;
+  }
+  T &&value() && {
+    assert(ok());
+    return std::move(value_);
+  }
+
+ private:
+  T value_;  // This could be std::optional<T> if it were available.
+  Status status_ = { AOM_CODEC_OK, "" };
+};
+
+struct ReferenceFrame {
+  int index;  // Index of reference slot containing the reference frame
+  ReferenceName name;
+};
+
+struct GopFrame {
+  // basic info
+  bool is_valid;
+  int order_idx;    // Index in display order in a GOP
+  int coding_idx;   // Index in coding order in a GOP
+  int display_idx;  // The number of displayed frames preceding this frame in
+                    // a GOP
+
+  int global_order_idx;   // Index in display order in the whole video chunk
+  int global_coding_idx;  // Index in coding order in the whole video chunk
+
+  bool is_key_frame;     // If this is key frame, reset reference buffers are
+                         // required
+  bool is_arf_frame;     // Is this a forward frame, a frame with order_idx
+                         // higher than the current display order
+  bool is_show_frame;    // Is this frame a show frame after coding
+  bool is_golden_frame;  // Is this a high quality frame
+
+  GopFrameType update_type;  // This is a redundant field. It is only used for
+                             // easy conversion in SW integration.
+
+  // reference frame info
+  EncodeRefMode encode_ref_mode;
+  int colocated_ref_idx;  // colocated_ref_idx == -1 when encode_ref_mode ==
+                          // EncodeRefMode::kRegular
+  int update_ref_idx;     // The reference index that this frame should be
+                          // updated to. update_ref_idx == -1 when this frame
+                          // will not serve as a reference frame
+  std::vector<ReferenceFrame>
+      ref_frame_list;  // A list of available reference frames in priority order
+                       // for the current to-be-coded frame. The list size
+                       // should be less or equal to ref_frame_table_size. The
+                       // reference frames with smaller indices are more likely
+                       // to be a good reference frame. Therefore, they should
+                       // be prioritized when the reference frame count is
+                       // limited. For example, if we plan to use 3 reference
+                       // frames, we should choose ref_frame_list[0],
+                       // ref_frame_list[1] and ref_frame_list[2].
+  int layer_depth;     // Layer depth in the GOP structure
+  ReferenceFrame primary_ref_frame;  // We will use the primary reference frame
+                                     // to update current frame's initial
+                                     // probability model
+};
+
+struct GopStruct {
+  int show_frame_count;
+  int global_coding_idx_offset;
+  int global_order_idx_offset;
+  // TODO(jingning): This can be removed once the framework is up running.
+  int display_tracker;  // Track the number of frames displayed proceeding a
+                        // current coding frame.
+  std::vector<GopFrame> gop_frame_list;
+};
+
+using GopStructList = std::vector<GopStruct>;
+
+struct FrameEncodeParameters {
+  int q_index;
+  int rdmult;
+};
+
+struct FirstpassInfo {
+  int num_mbs_16x16;  // Count of 16x16 unit blocks in each frame.
+                      // FIRSTPASS_STATS's unit block size is 16x16
+  std::vector<FIRSTPASS_STATS> stats_list;
+};
+
+// In general, the number of elements in RefFrameTable must always equal
+// ref_frame_table_size (as specified in RateControlParam), but see
+// GetGopEncodeInfo for the one exception.
+using RefFrameTable = std::vector<GopFrame>;
+
+struct GopEncodeInfo {
+  std::vector<FrameEncodeParameters> param_list;
+  RefFrameTable final_snapshot;  // RefFrameTable snapshot after coding this GOP
+};
+
+struct TplFrameStats {
+  int min_block_size;
+  int frame_width;
+  int frame_height;
+  bool rate_dist_present;  // True if recrf_rate and recrf_dist are populated.
+  std::vector<TplBlockStats> block_stats_list;
+};
+
+struct TplGopStats {
+  std::vector<TplFrameStats> frame_stats_list;
+};
+
+// Structure and TPL stats for a single GOP, to be used for lookahead.
+struct LookaheadStats {
+  const GopStruct *gop_struct;       // Not owned, may not be nullptr.
+  const TplGopStats *tpl_gop_stats;  // Not owned, may not be nullptr.
+};
+
+class AV1RateControlQModeInterface {
+ public:
+  AV1RateControlQModeInterface();
+  virtual ~AV1RateControlQModeInterface();
+
+  virtual Status SetRcParam(const RateControlParam &rc_param) = 0;
+  virtual StatusOr<GopStructList> DetermineGopInfo(
+      const FirstpassInfo &firstpass_info) = 0;
+
+  // Accepts GOP structure and TPL info from the encoder and returns q index and
+  // rdmult for each frame. This should be called with consecutive GOPs as
+  // returned by DetermineGopInfo.
+  //
+  // GOP structure and TPL info from zero or more subsequent GOPs may optionally
+  // be passed in lookahead_stats.
+  //
+  // For the first GOP, a default-constructed RefFrameTable may be passed in as
+  // ref_frame_table_snapshot_init; for subsequent GOPs, it should be the
+  // final_snapshot returned on the previous call.
+  //
+  virtual StatusOr<GopEncodeInfo> GetGopEncodeInfo(
+      const GopStruct &gop_struct, const TplGopStats &tpl_gop_stats,
+      const std::vector<LookaheadStats> &lookahead_stats,
+      const RefFrameTable &ref_frame_table_snapshot_init) = 0;
+};
+}  // namespace aom
+
+#endif  // AOM_AV1_QMODE_RC_RATECTRL_QMODE_INTERFACE_H_
diff --git a/chromium/third_party/libaom/source/libaom/av1/qmode_rc/reference_manager.cc b/chromium/third_party/libaom/source/libaom/av1/qmode_rc/reference_manager.cc
new file mode 100644
index 00000000000..eea7b7d6338
--- /dev/null
+++ b/chromium/third_party/libaom/source/libaom/av1/qmode_rc/reference_manager.cc
@@ -0,0 +1,339 @@
+/*
+ * Copyright (c) 2022, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <algorithm>
+#include <set>
+#include <utility>
+#include <tuple>
+#include <vector>
+
+#include "av1/qmode_rc/reference_manager.h"
+#include "av1/qmode_rc/ratectrl_qmode.h"
+
+namespace aom {
+
+void RefFrameManager::Reset() {
+  free_ref_idx_list_.clear();
+  for (int i = 0; i < static_cast<int>(ref_frame_table_.size()); ++i) {
+    free_ref_idx_list_.push_back(i);
+    ref_frame_table_[i] = GopFrameInvalid();
+  }
+  forward_stack_.clear();
+  backward_queue_.clear();
+  last_queue_.clear();
+}
+
+int RefFrameManager::AllocateRefIdx() {
+  if (free_ref_idx_list_.empty()) {
+    size_t backward_size = backward_queue_.size();
+    size_t last_size = last_queue_.size();
+    if (last_size >= backward_size) {
+      int ref_idx = last_queue_.front();
+      last_queue_.pop_front();
+      free_ref_idx_list_.push_back(ref_idx);
+    } else {
+      int ref_idx = backward_queue_.front();
+      backward_queue_.pop_front();
+      free_ref_idx_list_.push_back(ref_idx);
+    }
+  }
+
+  int ref_idx = free_ref_idx_list_.front();
+  free_ref_idx_list_.pop_front();
+  return ref_idx;
+}
+
+int RefFrameManager::GetRefFrameCountByType(
+    RefUpdateType ref_update_type) const {
+  size_t cnt = 0;
+  switch (ref_update_type) {
+    case RefUpdateType::kForward: cnt = forward_stack_.size(); break;
+    case RefUpdateType::kBackward: cnt = backward_queue_.size(); break;
+    case RefUpdateType::kLast: cnt = last_queue_.size(); break;
+    case RefUpdateType::kNone: cnt = 0; break;
+  }
+  return static_cast<int>(cnt);
+}
+
+int RefFrameManager::GetRefFrameCount() const {
+  return GetRefFrameCountByType(RefUpdateType::kForward) +
+         GetRefFrameCountByType(RefUpdateType::kBackward) +
+         GetRefFrameCountByType(RefUpdateType::kLast);
+}
+
+// TODO(angiebird): Add unit test.
+// Find the ref_idx corresponding to a ref_update_type.
+// Return -1 if no ref frame is found.
+// The priority_idx indicate closeness between the current frame and
+// the ref frame in display order.
+// For example, ref_update_type == kForward and priority_idx == 0 means
+// find the closest ref frame in forward_stack_.
+int RefFrameManager::GetRefFrameIdxByPriority(RefUpdateType ref_update_type,
+                                              int priority_idx) const {
+  if (ref_update_type == RefUpdateType::kForward) {
+    int size = static_cast<int>(forward_stack_.size());
+    // When two or more forward reference frames can be used, first get
+    // the highest quality one as the ARF, then going from nearest to
+    // the more distant ones in the forward reference frame list.
+    if (priority_idx < size) {
+      if (allow_two_fwd_frames_) {
+        if (priority_idx == 0) return forward_stack_[0];
+        return forward_stack_[size - priority_idx];
+      }
+
+      // Handle the special case where only one forward reference frame
+      // can be used. In this setting, we prefer the nearest frame.
+      return forward_stack_[size - 1 - priority_idx];
+    }
+  } else if (ref_update_type == RefUpdateType::kBackward) {
+    int size = static_cast<int>(backward_queue_.size());
+    if (priority_idx < size) {
+      return backward_queue_[size - priority_idx - 1];
+    }
+  } else if (ref_update_type == RefUpdateType::kLast) {
+    int size = static_cast<int>(last_queue_.size());
+    if (priority_idx < size) {
+      return last_queue_[size - priority_idx - 1];
+    }
+  }
+  return -1;
+}
+
+// The priority_idx indicate closeness between the current frame and
+// the ref frame in display order.
+// For example, ref_update_type == kForward and priority_idx == 0 means
+// find the closest ref frame in forward_stack_.
+GopFrame RefFrameManager::GetRefFrameByPriority(RefUpdateType ref_update_type,
+                                                int priority_idx) const {
+  int ref_idx = GetRefFrameIdxByPriority(ref_update_type, priority_idx);
+  if (ref_idx == -1) {
+    return GopFrameInvalid();
+  }
+  assert(ref_frame_table_[ref_idx].update_ref_idx == ref_idx);
+  return ref_frame_table_[ref_idx];
+}
+
+GopFrame RefFrameManager::GetRefFrameByIndex(int ref_idx) const {
+  return ref_frame_table_[ref_idx];
+}
+
+ReferenceName get_ref_name(RefUpdateType ref_update_type, int priority_idx,
+                           const std::set<ReferenceName> &used_name_set) {
+  // TODO(angiebird): Find the better way to assign name lists.
+  // Maybe sort the names based on how frequent each name is being used in the
+  // past?
+  const std::vector<ReferenceName> forward_name_list{
+    ReferenceName::kAltrefFrame,  ReferenceName::kBwdrefFrame,
+    ReferenceName::kAltref2Frame, ReferenceName::kGoldenFrame,
+    ReferenceName::kLast3Frame,   ReferenceName::kLast2Frame,
+    ReferenceName::kLastFrame
+  };
+  const std::vector<ReferenceName> backward_name_list{
+    ReferenceName::kGoldenFrame, ReferenceName::kLastFrame,
+    ReferenceName::kLast2Frame,  ReferenceName::kLast3Frame,
+    ReferenceName::kBwdrefFrame, ReferenceName::kAltref2Frame,
+    ReferenceName::kAltrefFrame
+  };
+  const std::vector<ReferenceName> last_name_list{
+    ReferenceName::kLastFrame,   ReferenceName::kLast2Frame,
+    ReferenceName::kLast3Frame,  ReferenceName::kGoldenFrame,
+    ReferenceName::kBwdrefFrame, ReferenceName::kAltref2Frame,
+    ReferenceName::kAltrefFrame
+  };
+
+  const std::vector<ReferenceName> *name_list = nullptr;
+  switch (ref_update_type) {
+    case RefUpdateType::kForward: name_list = &forward_name_list; break;
+    case RefUpdateType::kBackward: name_list = &backward_name_list; break;
+    case RefUpdateType::kLast: name_list = &last_name_list; break;
+    case RefUpdateType::kNone: break;
+  }
+
+  if (name_list) {
+    const int name_list_size = static_cast<int>(name_list->size());
+    for (int idx = priority_idx; idx < name_list_size; ++idx) {
+      ReferenceName ref_name = name_list->at(idx);
+      bool not_used = used_name_set.find(ref_name) == used_name_set.end();
+      if (not_used) return ref_name;
+    }
+  }
+  return ReferenceName::kNoneFrame;
+}
+
+// Generate a list of available reference frames in priority order for the
+// current to-be-coded frame. The list size should be less or equal to the size
+// of ref_frame_table_. The reference frames with smaller indices are more
+// likely to be a good reference frame. Therefore, they should be prioritized
+// when the reference frame count is limited. For example, if we plan to use 3
+// reference frames, we should choose ref_frame_list[0], ref_frame_list[1] and
+// ref_frame_list[2].
+std::vector<ReferenceFrame> RefFrameManager::GetRefFrameListByPriority() const {
+  constexpr int round_robin_size = 3;
+  const std::vector<RefUpdateType> round_robin_list{ RefUpdateType::kForward,
+                                                     RefUpdateType::kBackward,
+                                                     RefUpdateType::kLast };
+  std::vector<int> priority_idx_list(round_robin_size, 0);
+  int available_ref_frames = GetRefFrameCount();
+  std::vector<ReferenceFrame> ref_frame_list;
+  int ref_frame_count = 0;
+  int round_robin_idx = 0;
+
+  std::set<ReferenceName> used_name_set;
+  while (ref_frame_count < available_ref_frames &&
+         ref_frame_count < max_ref_frames_) {
+    const RefUpdateType ref_update_type = round_robin_list[round_robin_idx];
+    int priority_idx = priority_idx_list[round_robin_idx];
+    int ref_idx = GetRefFrameIdxByPriority(ref_update_type, priority_idx);
+    if (ref_idx != -1) {
+      const ReferenceName name =
+          get_ref_name(ref_update_type, priority_idx, used_name_set);
+      assert(name != ReferenceName::kNoneFrame);
+      used_name_set.insert(name);
+      ReferenceFrame ref_frame = { ref_idx, name };
+      ref_frame_list.push_back(ref_frame);
+      ++ref_frame_count;
+      ++priority_idx_list[round_robin_idx];
+    }
+    round_robin_idx = (round_robin_idx + 1) % round_robin_size;
+  }
+  return ref_frame_list;
+}
+
+void RefFrameManager::UpdateOrder(int global_order_idx) {
+  cur_global_order_idx_ = global_order_idx;
+  if (forward_stack_.empty()) {
+    return;
+  }
+  int ref_idx = forward_stack_.back();
+  const GopFrame &gf_frame = ref_frame_table_[ref_idx];
+
+  // If the current processing frame is an overlay / show existing frame.
+  if (gf_frame.global_order_idx == global_order_idx) {
+    forward_stack_.pop_back();
+    if (gf_frame.is_golden_frame) {
+      // high quality frame
+      backward_queue_.push_back(ref_idx);
+    } else {
+      last_queue_.push_back(ref_idx);
+    }
+  }
+}
+
+int RefFrameManager::ColocatedRefIdx(int global_order_idx) {
+  if (forward_stack_.empty()) return -1;
+  int ref_idx = forward_stack_.back();
+  int arf_global_order_idx = ref_frame_table_[ref_idx].global_order_idx;
+  if (arf_global_order_idx == global_order_idx) {
+    return ref_idx;
+  }
+  return -1;
+}
+
+static RefUpdateType infer_ref_update_type(const GopFrame &gop_frame,
+                                           int cur_global_order_idx) {
+  if (gop_frame.global_order_idx > cur_global_order_idx) {
+    return RefUpdateType::kForward;
+  }
+  if (gop_frame.is_golden_frame) {
+    return RefUpdateType::kBackward;
+  }
+  if (gop_frame.encode_ref_mode == EncodeRefMode::kShowExisting ||
+      gop_frame.encode_ref_mode == EncodeRefMode::kOverlay) {
+    return RefUpdateType::kNone;
+  }
+  return RefUpdateType::kLast;
+}
+
+using PrimaryRefKey = std::tuple<int,   // abs layer_depth delta
+                                 bool,  // is_key_frame differs
+                                 bool,  // is_golden_frame differs
+                                 bool,  // is_arf_frame differs
+                                 bool,  // is_show_frame differs
+                                 bool,  // encode_ref_mode differs
+                                 int>;  // abs order_idx delta
+
+// Generate PrimaryRefKey based on abs layer_depth delta,
+// frame flags and abs order_idx delta. These are the fields that will
+// be used to pick the primary reference frame for probability model
+static PrimaryRefKey get_primary_ref_key(const GopFrame &cur_frame,
+                                         const GopFrame &ref_frame) {
+  return std::make_tuple(abs(cur_frame.layer_depth - ref_frame.layer_depth),
+                         cur_frame.is_key_frame != ref_frame.is_key_frame,
+                         cur_frame.is_golden_frame != ref_frame.is_golden_frame,
+                         cur_frame.is_arf_frame != ref_frame.is_arf_frame,
+                         cur_frame.is_show_frame != ref_frame.is_show_frame,
+                         cur_frame.encode_ref_mode != ref_frame.encode_ref_mode,
+                         abs(cur_frame.order_idx - ref_frame.order_idx));
+}
+
+// Pick primary_ref_idx for probability model.
+ReferenceFrame RefFrameManager::GetPrimaryRefFrame(
+    const GopFrame &gop_frame) const {
+  assert(gop_frame.is_valid);
+  std::vector<std::pair<PrimaryRefKey, int>> candidate_list;
+  for (auto &ref_frame_in_gop_frame : gop_frame.ref_frame_list) {
+    const GopFrame &ref_frame = ref_frame_table_[ref_frame_in_gop_frame.index];
+    if (ref_frame.is_valid) {
+      assert(ref_frame_in_gop_frame.index == ref_frame.update_ref_idx);
+      PrimaryRefKey key = get_primary_ref_key(gop_frame, ref_frame);
+      std::pair<PrimaryRefKey, int> candidate = {
+        key, ref_frame_in_gop_frame.index
+      };
+      candidate_list.push_back(candidate);
+    }
+  }
+
+  std::sort(candidate_list.begin(), candidate_list.end());
+
+  ReferenceFrame ref_frame = { -1, ReferenceName::kNoneFrame };
+  assert(candidate_list.size() == gop_frame.ref_frame_list.size());
+  if (!candidate_list.empty()) {
+    int ref_idx = candidate_list[0].second;
+    for (const auto &frame : gop_frame.ref_frame_list) {
+      if (frame.index == ref_idx) {
+        ref_frame = frame;
+      }
+    }
+  }
+  return ref_frame;
+}
+
+void RefFrameManager::UpdateRefFrameTable(GopFrame *gop_frame) {
+  allow_two_fwd_frames_ =
+      (max_ref_frames_ - !!GetRefFrameCountByType(RefUpdateType::kBackward) -
+       !!GetRefFrameCountByType(RefUpdateType::kLast)) >= 2;
+  gop_frame->ref_frame_list = GetRefFrameListByPriority();
+  gop_frame->primary_ref_frame = GetPrimaryRefFrame(*gop_frame);
+  gop_frame->colocated_ref_idx = ColocatedRefIdx(gop_frame->global_order_idx);
+
+  if (gop_frame->is_show_frame) {
+    UpdateOrder(gop_frame->global_order_idx);
+  }
+  // Call infer_ref_update_type() after UpdateOrder() so that
+  // cur_global_order_idx_ is up-to-date
+  RefUpdateType ref_update_type =
+      infer_ref_update_type(*gop_frame, cur_global_order_idx_);
+  if (ref_update_type == RefUpdateType::kNone) {
+    gop_frame->update_ref_idx = -1;
+  } else {
+    const int ref_idx = AllocateRefIdx();
+    gop_frame->update_ref_idx = ref_idx;
+    switch (ref_update_type) {
+      case RefUpdateType::kForward: forward_stack_.push_back(ref_idx); break;
+      case RefUpdateType::kBackward: backward_queue_.push_back(ref_idx); break;
+      case RefUpdateType::kLast: last_queue_.push_back(ref_idx); break;
+      case RefUpdateType::kNone: break;
+    }
+    ref_frame_table_[ref_idx] = *gop_frame;
+  }
+}
+
+}  // namespace aom
diff --git a/chromium/third_party/libaom/source/libaom/av1/reference_manager.h b/chromium/third_party/libaom/source/libaom/av1/qmode_rc/reference_manager.h
index 50c8369aab8..37b50381d89 100644
--- a/chromium/third_party/libaom/source/libaom/av1/reference_manager.h
+++ b/chromium/third_party/libaom/source/libaom/av1/qmode_rc/reference_manager.h
@@ -9,14 +9,14 @@
  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
  */
 
-#ifndef AOM_AV1_REFERENCE_MANAGER_H_
-#define AOM_AV1_REFERENCE_MANAGER_H_
+#ifndef AOM_AV1_QMODE_RC_REFERENCE_MANAGER_H_
+#define AOM_AV1_QMODE_RC_REFERENCE_MANAGER_H_
 
 #include <deque>
 #include <iostream>
 #include <vector>
 
-#include "av1/ratectrl_qmode_interface.h"
+#include "av1/qmode_rc/ratectrl_qmode_interface.h"
 
 namespace aom {
 
@@ -24,9 +24,14 @@ enum class RefUpdateType { kForward, kBackward, kLast, kNone };
 
 class RefFrameManager {
  public:
-  explicit RefFrameManager(int max_ref_frames)
-      : max_ref_frames_(max_ref_frames) {
-    forward_max_size_ = max_ref_frames - 2;
+  explicit RefFrameManager(int ref_frame_table_size, int max_ref_frames)
+      : ref_frame_table_(ref_frame_table_size),
+        max_ref_frames_(max_ref_frames) {
+    // forward_max_size_ define max number of arf frames that can exists at
+    // the same time. In the other words, it's the max size of forward_stack_.
+    // TODO(angiebird): Figure out if this number is optimal.
+    forward_max_size_ = ref_frame_table_size - 2;
+    cur_global_order_idx_ = 0;
     Reset();
   }
   ~RefFrameManager() = default;
@@ -57,18 +62,28 @@ class RefFrameManager {
 
   void Reset();
   int AllocateRefIdx();
-  void UpdateOrder(int order_idx);
-  int ColocatedRefIdx(int order_idx);
+  int GetRefFrameCountByType(RefUpdateType ref_update_type) const;
+  int GetRefFrameCount() const;
+  std::vector<ReferenceFrame> GetRefFrameListByPriority() const;
+  int GetRefFrameIdxByPriority(RefUpdateType ref_update_type,
+                               int priority_idx) const;
+  GopFrame GetRefFrameByPriority(RefUpdateType ref_update_type,
+                                 int priority_idx) const;
+  GopFrame GetRefFrameByIndex(int ref_idx) const;
+  void UpdateOrder(int global_order_idx);
+  int ColocatedRefIdx(int global_order_idx);
   int ForwardMaxSize() const { return forward_max_size_; }
-  int MaxRefFrames() const { return max_ref_frames_; }
-  void UpdateFrame(GopFrame *gop_frame, RefUpdateType ref_update_type,
-                   EncodeRefMode encode_ref_mode);
+  int MaxRefFrame() const { return max_ref_frames_; }
+  int CurGlobalOrderIdx() const { return cur_global_order_idx_; }
+  void UpdateRefFrameTable(GopFrame *gop_frame);
+  ReferenceFrame GetPrimaryRefFrame(const GopFrame &gop_frame) const;
 
  private:
-  // TODO(angiebird): // Make RefFrameTable comply with max_ref_frames_
-  int max_ref_frames_;
   int forward_max_size_;
+  int cur_global_order_idx_;
   RefFrameTable ref_frame_table_;
+  int max_ref_frames_;
+  bool allow_two_fwd_frames_;
   std::deque<int> free_ref_idx_list_;
   std::vector<int> forward_stack_;
   std::deque<int> backward_queue_;
@@ -77,4 +92,4 @@ class RefFrameManager {
 
 }  // namespace aom
 
-#endif  // AOM_AV1_REFERENCE_MANAGER_H_
+#endif  // AOM_AV1_QMODE_RC_REFERENCE_MANAGER_H_
diff --git a/chromium/third_party/libaom/source/libaom/av1/ratectrl_qmode.cc b/chromium/third_party/libaom/source/libaom/av1/ratectrl_qmode.cc
deleted file mode 100644
index 4c69a563063..00000000000
--- a/chromium/third_party/libaom/source/libaom/av1/ratectrl_qmode.cc
+++ /dev/null
@@ -1,653 +0,0 @@
-/*
- * Copyright (c) 2022, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-#include "av1/ratectrl_qmode.h"
-
-#include <algorithm>
-#include <cassert>
-#include <climits>
-#include <numeric>
-#include <vector>
-
-#include "av1/encoder/pass2_strategy.h"
-#include "av1/encoder/tpl_model.h"
-
-namespace aom {
-
-GopFrame gop_frame_invalid() {
-  GopFrame gop_frame = {};
-  gop_frame.is_valid = false;
-  gop_frame.coding_idx = -1;
-  gop_frame.order_idx = -1;
-  return gop_frame;
-}
-
-GopFrame gop_frame_basic(int coding_idx, int order_idx, bool is_key_frame,
-                         bool is_arf_frame, bool is_golden_frame,
-                         bool is_show_frame, int depth) {
-  GopFrame gop_frame;
-  gop_frame.is_valid = true;
-  gop_frame.coding_idx = coding_idx;
-  gop_frame.order_idx = order_idx;
-  gop_frame.is_key_frame = is_key_frame;
-  gop_frame.is_arf_frame = is_arf_frame;
-  gop_frame.is_golden_frame = is_golden_frame;
-  gop_frame.is_show_frame = is_show_frame;
-  gop_frame.encode_ref_mode = EncodeRefMode::kRegular;
-  gop_frame.colocated_ref_idx = -1;
-  gop_frame.update_ref_idx = -1;
-  gop_frame.layer_depth = depth + kLayerDepthOffset;
-  return gop_frame;
-}
-
-// This function create gop frames with indices of display order from
-// order_start to order_end - 1. The function will recursively introduce
-// intermediate ARF untill maximum depth is met or the number of regular frames
-// in between two ARFs are less than 3. Than the regular frames will be added
-// into the gop_struct.
-void construct_gop_multi_layer(GopStruct *gop_struct,
-                               RefFrameManager *ref_frame_manager,
-                               int max_depth, int depth, int order_start,
-                               int order_end) {
-  int coding_idx = static_cast<int>(gop_struct->gop_frame_list.size());
-  GopFrame gop_frame;
-  int num_frames = order_end - order_start;
-  // If there are less than kMinIntervalToAddArf frames, stop introducing ARF
-  if (depth < max_depth && num_frames >= kMinIntervalToAddArf) {
-    int order_mid = (order_start + order_end) / 2;
-    // intermediate ARF
-    gop_frame = gop_frame_basic(coding_idx, order_mid, 0, 1, 0, 0, depth);
-    ref_frame_manager->UpdateFrame(&gop_frame, RefUpdateType::kForward,
-                                   EncodeRefMode::kRegular);
-    gop_struct->gop_frame_list.push_back(gop_frame);
-    construct_gop_multi_layer(gop_struct, ref_frame_manager, max_depth,
-                              depth + 1, order_start, order_mid);
-    // show existing intermediate ARF
-    gop_frame = gop_frame_basic(coding_idx, order_mid, 0, 0, 0, 1, max_depth);
-    ref_frame_manager->UpdateFrame(&gop_frame, RefUpdateType::kNone,
-                                   EncodeRefMode::kShowExisting);
-    gop_struct->gop_frame_list.push_back(gop_frame);
-    construct_gop_multi_layer(gop_struct, ref_frame_manager, max_depth,
-                              depth + 1, order_mid + 1, order_end);
-  } else {
-    // regular frame
-    for (int i = order_start; i < order_end; ++i) {
-      coding_idx = static_cast<int>(gop_struct->gop_frame_list.size());
-      gop_frame = gop_frame_basic(coding_idx, i, 0, 0, 0, 1, max_depth);
-      ref_frame_manager->UpdateFrame(&gop_frame, RefUpdateType::kLast,
-                                     EncodeRefMode::kRegular);
-      gop_struct->gop_frame_list.push_back(gop_frame);
-    }
-  }
-}
-
-GopStruct construct_gop(RefFrameManager *ref_frame_manager,
-                        int show_frame_count, bool has_key_frame) {
-  GopStruct gop_struct;
-  gop_struct.show_frame_count = show_frame_count;
-  int order_start = 0;
-  int order_arf = show_frame_count - 1;
-  int coding_idx;
-  GopFrame gop_frame;
-  if (has_key_frame) {
-    const int key_frame_depth = -1;
-    ref_frame_manager->Reset();
-    coding_idx = static_cast<int>(gop_struct.gop_frame_list.size());
-    gop_frame =
-        gop_frame_basic(coding_idx, order_start, 1, 0, 1, 1, key_frame_depth);
-    ref_frame_manager->UpdateFrame(&gop_frame, RefUpdateType::kBackward,
-                                   EncodeRefMode::kRegular);
-    gop_struct.gop_frame_list.push_back(gop_frame);
-    order_start++;
-  }
-  // ARF
-  const int arf_depth = 0;
-  coding_idx = static_cast<int>(gop_struct.gop_frame_list.size());
-  gop_frame = gop_frame_basic(coding_idx, order_arf, 0, 1, 1, 0, arf_depth);
-  ref_frame_manager->UpdateFrame(&gop_frame, RefUpdateType::kForward,
-                                 EncodeRefMode::kRegular);
-  gop_struct.gop_frame_list.push_back(gop_frame);
-  construct_gop_multi_layer(&gop_struct, ref_frame_manager,
-                            ref_frame_manager->ForwardMaxSize(), arf_depth + 1,
-                            order_start, order_arf);
-  // Overlay
-  coding_idx = static_cast<int>(gop_struct.gop_frame_list.size());
-  gop_frame = gop_frame_basic(coding_idx, order_arf, 0, 0, 0, 1,
-                              ref_frame_manager->ForwardMaxSize());
-  ref_frame_manager->UpdateFrame(&gop_frame, RefUpdateType::kNone,
-                                 EncodeRefMode::kOverlay);
-  gop_struct.gop_frame_list.push_back(gop_frame);
-  return gop_struct;
-}
-
-void AV1RateControlQMode::SetRcParam(const RateControlParam &rc_param) {
-  rc_param_ = rc_param;
-}
-
-// initialize GF_GROUP_STATS
-static void init_gf_stats(GF_GROUP_STATS *gf_stats) {
-  gf_stats->gf_group_err = 0.0;
-  gf_stats->gf_group_raw_error = 0.0;
-  gf_stats->gf_group_skip_pct = 0.0;
-  gf_stats->gf_group_inactive_zone_rows = 0.0;
-
-  gf_stats->mv_ratio_accumulator = 0.0;
-  gf_stats->decay_accumulator = 1.0;
-  gf_stats->zero_motion_accumulator = 1.0;
-  gf_stats->loop_decay_rate = 1.0;
-  gf_stats->last_loop_decay_rate = 1.0;
-  gf_stats->this_frame_mv_in_out = 0.0;
-  gf_stats->mv_in_out_accumulator = 0.0;
-  gf_stats->abs_mv_in_out_accumulator = 0.0;
-
-  gf_stats->avg_sr_coded_error = 0.0;
-  gf_stats->avg_pcnt_second_ref = 0.0;
-  gf_stats->avg_new_mv_count = 0.0;
-  gf_stats->avg_wavelet_energy = 0.0;
-  gf_stats->avg_raw_err_stdev = 0.0;
-  gf_stats->non_zero_stdev_count = 0;
-}
-
-static int find_regions_index(const std::vector<REGIONS> &regions,
-                              int frame_idx) {
-  for (int k = 0; k < static_cast<int>(regions.size()); k++) {
-    if (regions[k].start <= frame_idx && regions[k].last >= frame_idx) {
-      return k;
-    }
-  }
-  return -1;
-}
-
-#define MIN_SHRINK_LEN 6
-
-/*!\brief Determine the length of future GF groups.
- *
- * \ingroup gf_group_algo
- * This function decides the gf group length of future frames in batch
- *
- * \param[in]    rc_param         Rate control parameters
- * \param[in]    stats_list       List of first pass stats
- * \param[in]    regions_list     List of regions from av1_identify_regions
- * \param[in]    order_index      Index of current frame in stats_list
- * \param[in]    frames_since_key Number of frames since the last key frame
- * \param[in]    frames_to_key    Number of frames to the next key frame
- *
- * \return Returns a vector of decided GF group lengths.
- */
-static std::vector<int> partition_gop_intervals(
-    const RateControlParam &rc_param,
-    const std::vector<FIRSTPASS_STATS> &stats_list,
-    const std::vector<REGIONS> &regions_list, int order_index,
-    int frames_since_key, int frames_to_key) {
-  const int min_shrink_int =
-      std::max(MIN_SHRINK_LEN, rc_param.min_gop_show_frame_count);
-  int i = (frames_since_key == 0) ? 1 : 0;
-  // If cpi->gf_state.arf_gf_boost_lst is 0, we are starting with a KF or GF.
-  int cur_start = 0, cur_last;
-  // Each element is the last frame of the previous GOP. If there are n GOPs,
-  // you need n + 1 cuts to find the durations. So cut_pos starts out with -1,
-  // which is the last frame of the previous GOP.
-  std::vector<int> cut_pos(1, -1);
-  int cut_here = 0;
-  GF_GROUP_STATS gf_stats;
-  init_gf_stats(&gf_stats);
-  int num_regions = static_cast<int>(regions_list.size());
-  int num_stats = static_cast<int>(stats_list.size());
-  int stats_in_loop_index = order_index;
-  while (i + order_index < num_stats) {
-    // reaches next key frame, break here
-    if (i >= frames_to_key) {
-      cut_here = 2;
-    } else if (i - cur_start >= rc_param.max_gop_show_frame_count) {
-      // reached maximum len, but nothing special yet (almost static)
-      // let's look at the next interval
-      cut_here = 1;
-    } else if (stats_in_loop_index >= num_stats) {
-      // reaches last frame, break
-      cut_here = 2;
-    }
-
-    if (!cut_here) {
-      ++i;
-      continue;
-    }
-    cur_last = i - 1;  // the current last frame in the gf group
-    int ori_last = cur_last;
-    int scenecut_idx = -1;
-    // only try shrinking if interval smaller than active_max_gf_interval
-    if (cur_last - cur_start <= rc_param.max_gop_show_frame_count &&
-        cur_last > cur_start) {
-      // find the region indices of where the first and last frame belong.
-      int k_start =
-          find_regions_index(regions_list, cur_start + frames_since_key);
-      int k_last =
-          find_regions_index(regions_list, cur_last + frames_since_key);
-      if (cur_start + frames_since_key == 0) k_start = 0;
-
-      // See if we have a scenecut in between
-      for (int r = k_start + 1; r <= k_last; r++) {
-        if (regions_list[r].type == SCENECUT_REGION &&
-            regions_list[r].last - frames_since_key - cur_start >
-                rc_param.min_gop_show_frame_count) {
-          scenecut_idx = r;
-          break;
-        }
-      }
-
-      // if the found scenecut is very close to the end, ignore it.
-      if (regions_list[num_regions - 1].last - regions_list[scenecut_idx].last <
-          4) {
-        scenecut_idx = -1;
-      }
-
-      if (scenecut_idx != -1) {
-        // If we have a scenecut, then stop at it.
-        // TODO(bohanli): add logic here to stop before the scenecut and for
-        // the next gop start from the scenecut with GF
-        int is_minor_sc =
-            (regions_list[scenecut_idx].avg_cor_coeff *
-                 (1 -
-                  stats_list[order_index + regions_list[scenecut_idx].start -
-                             frames_since_key]
-                          .noise_var /
-                      regions_list[scenecut_idx].avg_intra_err) >
-             0.6);
-        cur_last =
-            regions_list[scenecut_idx].last - frames_since_key - !is_minor_sc;
-      } else {
-        int is_last_analysed =
-            (k_last == num_regions - 1) &&
-            (cur_last + frames_since_key == regions_list[k_last].last);
-        int not_enough_regions =
-            k_last - k_start <=
-            1 + (regions_list[k_start].type == SCENECUT_REGION);
-        // if we are very close to the end, then do not shrink since it may
-        // introduce intervals that are too short
-        if (!(is_last_analysed && not_enough_regions)) {
-          const double arf_length_factor = 0.1;
-          double best_score = 0;
-          int best_j = -1;
-          const int first_frame = regions_list[0].start - frames_since_key;
-          const int last_frame =
-              regions_list[num_regions - 1].last - frames_since_key;
-          // score of how much the arf helps the whole GOP
-          double base_score = 0.0;
-          // Accumulate base_score in
-          for (int j = cur_start + 1; j < cur_start + min_shrink_int; j++) {
-            if (order_index + j >= num_stats) break;
-            base_score =
-                (base_score + 1.0) * stats_list[order_index + j].cor_coeff;
-          }
-          int met_blending = 0;   // Whether we have met blending areas before
-          int last_blending = 0;  // Whether the previous frame if blending
-          for (int j = cur_start + min_shrink_int; j <= cur_last; j++) {
-            if (order_index + j >= num_stats) break;
-            base_score =
-                (base_score + 1.0) * stats_list[order_index + j].cor_coeff;
-            int this_reg =
-                find_regions_index(regions_list, j + frames_since_key);
-            if (this_reg < 0) continue;
-            // A GOP should include at most 1 blending region.
-            if (regions_list[this_reg].type == BLENDING_REGION) {
-              last_blending = 1;
-              if (met_blending) {
-                break;
-              } else {
-                base_score = 0;
-                continue;
-              }
-            } else {
-              if (last_blending) met_blending = 1;
-              last_blending = 0;
-            }
-
-            // Add the factor of how good the neighborhood is for this
-            // candidate arf.
-            double this_score = arf_length_factor * base_score;
-            double temp_accu_coeff = 1.0;
-            // following frames
-            int count_f = 0;
-            for (int n = j + 1; n <= j + 3 && n <= last_frame; n++) {
-              if (order_index + n >= num_stats) break;
-              temp_accu_coeff *= stats_list[order_index + n].cor_coeff;
-              this_score +=
-                  temp_accu_coeff *
-                  (1 - stats_list[order_index + n].noise_var /
-                           AOMMAX(regions_list[this_reg].avg_intra_err, 0.001));
-              count_f++;
-            }
-            // preceding frames
-            temp_accu_coeff = 1.0;
-            for (int n = j; n > j - 3 * 2 + count_f && n > first_frame; n--) {
-              if (order_index + n < num_stats) break;
-              temp_accu_coeff *= stats_list[order_index + n].cor_coeff;
-              this_score +=
-                  temp_accu_coeff *
-                  (1 - stats_list[order_index + n].noise_var /
-                           AOMMAX(regions_list[this_reg].avg_intra_err, 0.001));
-            }
-
-            if (this_score > best_score) {
-              best_score = this_score;
-              best_j = j;
-            }
-          }
-
-          // For blending areas, move one more frame in case we missed the
-          // first blending frame.
-          int best_reg =
-              find_regions_index(regions_list, best_j + frames_since_key);
-          if (best_reg < num_regions - 1 && best_reg > 0) {
-            if (regions_list[best_reg - 1].type == BLENDING_REGION &&
-                regions_list[best_reg + 1].type == BLENDING_REGION) {
-              if (best_j + frames_since_key == regions_list[best_reg].start &&
-                  best_j + frames_since_key < regions_list[best_reg].last) {
-                best_j += 1;
-              } else if (best_j + frames_since_key ==
-                             regions_list[best_reg].last &&
-                         best_j + frames_since_key >
-                             regions_list[best_reg].start) {
-                best_j -= 1;
-              }
-            }
-          }
-
-          if (cur_last - best_j < 2) best_j = cur_last;
-          if (best_j > 0 && best_score > 0.1) cur_last = best_j;
-          // if cannot find anything, just cut at the original place.
-        }
-      }
-    }
-    cut_pos.push_back(cur_last);
-
-    // reset pointers to the shrunken location
-    stats_in_loop_index = order_index + cur_last;
-    cur_start = cur_last;
-    int cur_region_idx =
-        find_regions_index(regions_list, cur_start + 1 + frames_since_key);
-    if (cur_region_idx >= 0)
-      if (regions_list[cur_region_idx].type == SCENECUT_REGION) cur_start++;
-
-    if (cut_here > 1 && cur_last == ori_last) break;
-    // reset accumulators
-    init_gf_stats(&gf_stats);
-    i = cur_last + 1;
-  }
-  std::vector<int> gf_intervals;
-  // save intervals
-  for (size_t n = 1; n < cut_pos.size(); n++) {
-    gf_intervals.push_back(cut_pos[n] - cut_pos[n - 1]);
-  }
-
-  return gf_intervals;
-}
-
-GopStructList AV1RateControlQMode::DetermineGopInfo(
-    const FirstpassInfo &firstpass_info) {
-  std::vector<REGIONS> regions_list(MAX_FIRSTPASS_ANALYSIS_FRAMES);
-  int total_regions = 0;
-  // TODO(jianj): firstpass_info.size() should eventually be replaced
-  // by the number of frames to the next KF.
-  av1_identify_regions(firstpass_info.data(),
-                       std::min(static_cast<int>(firstpass_info.size()),
-                                MAX_FIRSTPASS_ANALYSIS_FRAMES),
-                       0, regions_list.data(), &total_regions);
-  regions_list.resize(total_regions);
-  int order_index = 0, frames_since_key = 0, frames_to_key = 0;
-  std::vector<int> gf_intervals =
-      partition_gop_intervals(rc_param_, firstpass_info, regions_list,
-                              order_index, frames_since_key, frames_to_key);
-  // A temporary simple implementation
-  const int max_gop_show_frame_count = 16;
-  int remaining_show_frame_count = static_cast<int>(firstpass_info.size());
-  GopStructList gop_list;
-
-  RefFrameManager ref_frame_manager(rc_param_.max_ref_frames);
-
-  while (remaining_show_frame_count > 0) {
-    int show_frame_count =
-        std::min(remaining_show_frame_count, max_gop_show_frame_count);
-    // TODO(angiebird): determine gop show frame count based on first pass stats
-    // here.
-    bool has_key_frame = gop_list.size() == 0;
-    GopStruct gop =
-        construct_gop(&ref_frame_manager, show_frame_count, has_key_frame);
-    gop_list.push_back(gop);
-    remaining_show_frame_count -= show_frame_count;
-  }
-  return gop_list;
-}
-
-TplFrameDepStats create_tpl_frame_dep_stats_empty(int frame_height,
-                                                  int frame_width,
-                                                  int min_block_size) {
-  const int unit_rows =
-      frame_height / min_block_size + !!(frame_height % min_block_size);
-  const int unit_cols =
-      frame_width / min_block_size + !!(frame_width % min_block_size);
-  TplFrameDepStats frame_dep_stats;
-  frame_dep_stats.unit_size = min_block_size;
-  frame_dep_stats.unit_stats = std::vector<std::vector<double>>(
-      unit_rows, std::vector<double>(unit_cols, 0));
-  return frame_dep_stats;
-}
-
-TplFrameDepStats create_tpl_frame_dep_stats_wo_propagation(
-    const TplFrameStats &frame_stats) {
-  const int min_block_size = frame_stats.min_block_size;
-  TplFrameDepStats frame_dep_stats = create_tpl_frame_dep_stats_empty(
-      frame_stats.frame_height, frame_stats.frame_width, min_block_size);
-  for (const TplBlockStats &block_stats : frame_stats.block_stats_list) {
-    const int block_unit_rows = block_stats.height / min_block_size;
-    const int block_unit_cols = block_stats.width / min_block_size;
-    const int unit_count = block_unit_rows * block_unit_cols;
-    const int block_unit_row = block_stats.row / min_block_size;
-    const int block_unit_col = block_stats.col / min_block_size;
-    const double cost_diff =
-        (block_stats.inter_cost - block_stats.intra_cost) * 1.0 / unit_count;
-    for (int r = 0; r < block_unit_rows; r++) {
-      for (int c = 0; c < block_unit_cols; c++) {
-        frame_dep_stats.unit_stats[block_unit_row + r][block_unit_col + c] =
-            cost_diff;
-      }
-    }
-  }
-  return frame_dep_stats;
-}
-
-int get_ref_coding_idx_list(const TplBlockStats &block_stats,
-                            const RefFrameTable &ref_frame_table,
-                            int *ref_coding_idx_list) {
-  int ref_frame_count = 0;
-  for (int i = 0; i < kBlockRefCount; ++i) {
-    ref_coding_idx_list[i] = -1;
-    int ref_frame_index = block_stats.ref_frame_index[i];
-    if (ref_frame_index != -1) {
-      ref_coding_idx_list[i] = ref_frame_table[ref_frame_index].coding_idx;
-      ref_frame_count++;
-    }
-  }
-  return ref_frame_count;
-}
-
-int get_block_overlap_area(int r0, int c0, int r1, int c1, int size) {
-  const int r_low = std::max(r0, r1);
-  const int r_high = std::min(r0 + size, r1 + size);
-  const int c_low = std::max(c0, c1);
-  const int c_high = std::min(c0 + size, c1 + size);
-  if (r_high >= r_low && c_high >= c_low) {
-    return (r_high - r_low) * (c_high - c_low);
-  }
-  return 0;
-}
-
-double tpl_frame_stats_accumulate(const TplFrameStats &frame_stats) {
-  double ref_sum_cost_diff = 0;
-  for (auto &block_stats : frame_stats.block_stats_list) {
-    ref_sum_cost_diff += block_stats.inter_cost - block_stats.intra_cost;
-  }
-  return ref_sum_cost_diff;
-}
-
-double tpl_frame_dep_stats_accumulate(const TplFrameDepStats &frame_dep_stats) {
-  double sum = 0;
-  for (const auto &row : frame_dep_stats.unit_stats) {
-    sum = std::accumulate(row.begin(), row.end(), sum);
-  }
-  return sum;
-}
-
-// This is a generalization of GET_MV_RAWPEL that allows for an arbitrary number
-// of fractional bits.
-// TODO(angiebird): Add unit test to this function
-int get_fullpel_value(int subpel_value, int subpel_bits) {
-  const int subpel_scale = (1 << subpel_bits);
-  const int sign = subpel_value >= 0 ? 1 : -1;
-  int fullpel_value = (abs(subpel_value) + subpel_scale / 2) >> subpel_bits;
-  fullpel_value *= sign;
-  return fullpel_value;
-}
-
-void tpl_frame_dep_stats_propagate(const TplFrameStats &frame_stats,
-                                   const RefFrameTable &ref_frame_table,
-                                   TplGopDepStats *tpl_gop_dep_stats) {
-  const int min_block_size = frame_stats.min_block_size;
-  const int frame_unit_rows =
-      frame_stats.frame_height / frame_stats.min_block_size;
-  const int frame_unit_cols =
-      frame_stats.frame_width / frame_stats.min_block_size;
-  for (const TplBlockStats &block_stats : frame_stats.block_stats_list) {
-    int ref_coding_idx_list[kBlockRefCount] = { -1, -1 };
-    int ref_frame_count = get_ref_coding_idx_list(block_stats, ref_frame_table,
-                                                  ref_coding_idx_list);
-    if (ref_frame_count > 0) {
-      double propagation_ratio = 1.0 / ref_frame_count;
-      for (int i = 0; i < kBlockRefCount; ++i) {
-        if (ref_coding_idx_list[i] != -1) {
-          auto &ref_frame_dep_stats =
-              tpl_gop_dep_stats->frame_dep_stats_list[ref_coding_idx_list[i]];
-          const auto &mv = block_stats.mv[i];
-          const int mv_row = get_fullpel_value(mv.row, mv.subpel_bits);
-          const int mv_col = get_fullpel_value(mv.col, mv.subpel_bits);
-          const int block_unit_rows = block_stats.height / min_block_size;
-          const int block_unit_cols = block_stats.width / min_block_size;
-          const int unit_count = block_unit_rows * block_unit_cols;
-          const double cost_diff =
-              (block_stats.inter_cost - block_stats.intra_cost) * 1.0 /
-              unit_count;
-          for (int r = 0; r < block_unit_rows; r++) {
-            for (int c = 0; c < block_unit_cols; c++) {
-              const int ref_block_row =
-                  block_stats.row + r * min_block_size + mv_row;
-              const int ref_block_col =
-                  block_stats.col + c * min_block_size + mv_col;
-              const int ref_unit_row_low = ref_block_row / min_block_size;
-              const int ref_unit_col_low = ref_block_col / min_block_size;
-              for (int j = 0; j < 2; ++j) {
-                for (int k = 0; k < 2; ++k) {
-                  const int unit_row = ref_unit_row_low + j;
-                  const int unit_col = ref_unit_col_low + k;
-                  if (unit_row >= 0 && unit_row < frame_unit_rows &&
-                      unit_col >= 0 && unit_col < frame_unit_cols) {
-                    const int overlap_area = get_block_overlap_area(
-                        unit_row * min_block_size, unit_col * min_block_size,
-                        ref_block_row, ref_block_col, min_block_size);
-                    const double overlap_ratio =
-                        overlap_area * 1.0 / (min_block_size * min_block_size);
-                    ref_frame_dep_stats.unit_stats[unit_row][unit_col] +=
-                        cost_diff * overlap_ratio * propagation_ratio;
-                  }
-                }
-              }
-            }
-          }
-        }
-      }
-    }
-  }
-}
-
-// TODO(angiebird): Add unit test for this function
-std::vector<RefFrameTable> get_ref_frame_table_list(
-    const GopStruct &gop_struct, RefFrameTable ref_frame_table) {
-  const int frame_count = static_cast<int>(gop_struct.gop_frame_list.size());
-  std::vector<RefFrameTable> ref_frame_table_list;
-  ref_frame_table_list.push_back(ref_frame_table);
-  for (int coding_idx = 0; coding_idx < frame_count; coding_idx++) {
-    const auto &gop_frame = gop_struct.gop_frame_list[coding_idx];
-    if (gop_frame.update_ref_idx != -1) {
-      ref_frame_table[gop_frame.update_ref_idx] = gop_frame;
-    }
-    ref_frame_table_list.push_back(ref_frame_table);
-  }
-  return ref_frame_table_list;
-}
-
-TplGopDepStats compute_tpl_gop_dep_stats(
-    const TplGopStats &tpl_gop_stats,
-    const std::vector<RefFrameTable> &ref_frame_table_list) {
-  const int frame_count = static_cast<int>(ref_frame_table_list.size());
-
-  // Create the struct to store TPL dependency stats
-  TplGopDepStats tpl_gop_dep_stats;
-  for (int coding_idx = 0; coding_idx < frame_count; coding_idx++) {
-    tpl_gop_dep_stats.frame_dep_stats_list.push_back(
-        create_tpl_frame_dep_stats_wo_propagation(
-            tpl_gop_stats.frame_stats_list[coding_idx]));
-  }
-
-  // Back propagation
-  for (int coding_idx = frame_count - 1; coding_idx >= 0; coding_idx--) {
-    auto &ref_frame_table = ref_frame_table_list[coding_idx];
-    // TODO(angiebird): Handle/test the case where reference frame
-    // is in the previous GOP
-    tpl_frame_dep_stats_propagate(tpl_gop_stats.frame_stats_list[coding_idx],
-                                  ref_frame_table, &tpl_gop_dep_stats);
-  }
-  return tpl_gop_dep_stats;
-}
-
-GopEncodeInfo AV1RateControlQMode::GetGopEncodeInfo(
-    const GopStruct &gop_struct, const TplGopStats &tpl_gop_stats,
-    const RefFrameTable &ref_frame_table_snapshot_init) {
-  const std::vector<RefFrameTable> ref_frame_table_list =
-      get_ref_frame_table_list(gop_struct, ref_frame_table_snapshot_init);
-
-  GopEncodeInfo gop_encode_info;
-  gop_encode_info.final_snapshot = ref_frame_table_list.back();
-  TplGopDepStats gop_dep_stats =
-      compute_tpl_gop_dep_stats(tpl_gop_stats, ref_frame_table_list);
-  const int frame_count =
-      static_cast<int>(tpl_gop_stats.frame_stats_list.size());
-  for (int i = 0; i < frame_count; i++) {
-    const TplFrameStats &frame_stats = tpl_gop_stats.frame_stats_list[i];
-    const TplFrameDepStats &frame_dep_stats =
-        gop_dep_stats.frame_dep_stats_list[i];
-    const double cost_without_propagation =
-        tpl_frame_stats_accumulate(frame_stats);
-    const double cost_with_propagation =
-        tpl_frame_dep_stats_accumulate(frame_dep_stats);
-    // TODO(angiebird): This part is still a draft. Check whether this makes
-    // sense mathematically.
-    const double frame_importance =
-        cost_with_propagation / cost_without_propagation;
-    // Imitate the behavior of av1_tpl_get_qstep_ratio()
-    const double qstep_ratio = sqrt(1 / frame_importance);
-    FrameEncodeParameters param;
-    param.q_index = av1_get_q_index_from_qstep_ratio(rc_param_.base_q_index,
-                                                     qstep_ratio, AOM_BITS_8);
-    // TODO(angiebird): Determine rdmult based on q_index
-    param.rdmult = 1;
-    gop_encode_info.param_list.push_back(param);
-  }
-  return gop_encode_info;
-}
-
-}  // namespace aom
diff --git a/chromium/third_party/libaom/source/libaom/av1/ratectrl_qmode.h b/chromium/third_party/libaom/source/libaom/av1/ratectrl_qmode.h
deleted file mode 100644
index c53f4e11a29..00000000000
--- a/chromium/third_party/libaom/source/libaom/av1/ratectrl_qmode.h
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * Copyright (c) 2022, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_RATECTRL_QMODE_H_
-#define AOM_AV1_RATECTRL_QMODE_H_
-
-#include <deque>
-#include <queue>
-#include <vector>
-#include "av1/encoder/firstpass.h"
-#include "av1/ratectrl_qmode_interface.h"
-#include "av1/reference_manager.h"
-
-namespace aom {
-
-constexpr int kLayerDepthOffset = 1;
-constexpr int kMinIntervalToAddArf = 3;
-constexpr int kMinArfInterval = (kMinIntervalToAddArf + 1) / 2;
-
-struct TplFrameDepStats {
-  int unit_size;  // equivalent to min_block_size
-  std::vector<std::vector<double>> unit_stats;
-};
-
-struct TplGopDepStats {
-  std::vector<TplFrameDepStats> frame_dep_stats_list;
-};
-
-GopFrame gop_frame_invalid();
-
-GopFrame gop_frame_basic(int coding_idx, int order_idx, bool is_key_frame,
-                         bool is_arf_frame, bool is_golden_frame,
-                         bool is_show_frame, int depth);
-
-GopStruct construct_gop(RefFrameManager *ref_frame_manager,
-                        int show_frame_count, bool has_key_frame);
-
-TplFrameDepStats create_tpl_frame_dep_stats_empty(int frame_height,
-                                                  int frame_width,
-                                                  int min_block_size);
-TplFrameDepStats create_tpl_frame_dep_stats_wo_propagation(
-    const TplFrameStats &frame_stats);
-
-double tpl_frame_stats_accumulate(const TplFrameStats &frame_stats);
-
-double tpl_frame_dep_stats_accumulate(const TplFrameDepStats &frame_dep_stats);
-
-void tpl_frame_dep_stats_propagate(const TplFrameStats &frame_stats,
-                                   const RefFrameTable &ref_frame_table,
-                                   TplGopDepStats *tpl_gop_dep_stats);
-
-int get_block_overlap_area(int r0, int c0, int r1, int c1, int size);
-
-TplGopDepStats compute_tpl_gop_dep_stats(
-    const TplGopStats &tpl_gop_stats,
-    const std::vector<RefFrameTable> &ref_frame_table_list);
-
-class AV1RateControlQMode : public AV1RateControlQModeInterface {
- public:
-  void SetRcParam(const RateControlParam &rc_param) override;
-  GopStructList DetermineGopInfo(const FirstpassInfo &firstpass_info) override;
-  GopEncodeInfo GetGopEncodeInfo(
-      const GopStruct &gop_struct, const TplGopStats &tpl_gop_stats,
-      const RefFrameTable &ref_frame_table_snapshot) override;
-
- private:
-  RateControlParam rc_param_;
-};
-}  // namespace aom
-
-#endif  // AOM_AV1_RATECTRL_QMODE_H_
diff --git a/chromium/third_party/libaom/source/libaom/av1/ratectrl_qmode_interface.h b/chromium/third_party/libaom/source/libaom/av1/ratectrl_qmode_interface.h
deleted file mode 100644
index e7dac8340e5..00000000000
--- a/chromium/third_party/libaom/source/libaom/av1/ratectrl_qmode_interface.h
+++ /dev/null
@@ -1,134 +0,0 @@
-/*
- * Copyright (c) 2022, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#ifndef AOM_AV1_RATECTRL_QMODE_INTERFACE_H_
-#define AOM_AV1_RATECTRL_QMODE_INTERFACE_H_
-
-#include <array>
-#include <vector>
-
-#include "av1/encoder/firstpass.h"
-
-namespace aom {
-
-constexpr int kBlockRefCount = 2;
-constexpr int kRefFrameTableSize = 8;
-
-struct MotionVector {
-  int row;          // subpel row
-  int col;          // subpel col
-  int subpel_bits;  // number of fractional bits used by row/col
-};
-
-struct RateControlParam {
-  int max_gop_show_frame_count;
-  int min_gop_show_frame_count;
-  int max_ref_frames;
-  int base_q_index;
-};
-
-struct TplBlockStats {
-  int height;  // pixel height
-  int width;   // pixel width
-  int row;     // pixel row of the top left corner
-  int col;     // pixel col of the top lef corner
-  int64_t intra_cost;
-  int64_t inter_cost;
-  std::array<MotionVector, kBlockRefCount> mv;
-  std::array<int, kBlockRefCount> ref_frame_index;
-};
-
-enum class EncodeRefMode {
-  kRegular,
-  kOverlay,
-  kShowExisting,
-};
-
-struct GopFrame {
-  // basic info
-  bool is_valid;
-  int order_idx;   // Index in display order in a GOP
-  int coding_idx;  // Index in coding order in a GOP
-
-  int global_order_idx;   // Index in display order in the whole video chunk
-  int global_coding_idx;  // Index in coding order in the whole video chunk
-
-  bool is_key_frame;     // If this is key frame, reset reference buffers are
-                         // required
-  bool is_arf_frame;     // Is this a forward frame, a frame with order_idx
-                         // higher than the current display order
-  bool is_show_frame;    // Is this frame a show frame after coding
-  bool is_golden_frame;  // Is this a high quality frame
-
-  // reference frame info
-  EncodeRefMode encode_ref_mode;
-  int colocated_ref_idx;  // colocated_ref_idx == -1 when encode_ref_mode ==
-                          // EncodeRefMode::kRegular
-  int update_ref_idx;  // The reference index that this frame should be updated
-                       // to. update_ref_idx == -1 when this frame will not
-                       // serve as a reference frame
-  std::vector<int>
-      ref_idx_list;     // The indices of reference frames.
-                        // The size should be less or equal to max_ref_frames.
-  int layer_depth;      // Layer depth in the GOP structure
-  int primary_ref_idx;  // We will use the primary reference to update current
-                        // frame's initial probability model
-};
-
-struct GopStruct {
-  int show_frame_count;
-  std::vector<GopFrame> gop_frame_list;
-};
-
-using GopStructList = std::vector<GopStruct>;
-
-struct FrameEncodeParameters {
-  int q_index;
-  int rdmult;
-};
-
-using FirstpassInfo = std::vector<FIRSTPASS_STATS>;
-using RefFrameTable = std::array<GopFrame, kRefFrameTableSize>;
-
-struct GopEncodeInfo {
-  std::vector<FrameEncodeParameters> param_list;
-  RefFrameTable final_snapshot;  // RefFrameTable snapshot after coding this GOP
-};
-
-struct TplFrameStats {
-  int min_block_size;
-  int frame_width;
-  int frame_height;
-  std::vector<TplBlockStats> block_stats_list;
-};
-
-struct TplGopStats {
-  std::vector<TplFrameStats> frame_stats_list;
-};
-
-class AV1RateControlQModeInterface {
- public:
-  AV1RateControlQModeInterface();
-  virtual ~AV1RateControlQModeInterface();
-
-  virtual void SetRcParam(const RateControlParam &rc_param) = 0;
-  virtual GopStructList DetermineGopInfo(
-      const FirstpassInfo &firstpass_stats_list) = 0;
-  // Accept firstpass and tpl info from the encoder and return q index and
-  // rdmult. This needs to be called with consecutive GOPs as returned by
-  // DetermineGopInfo.
-  virtual GopEncodeInfo GetGopEncodeInfo(
-      const GopStruct &gop_struct, const TplGopStats &tpl_gop_stats,
-      const RefFrameTable &ref_frame_table_snapshot_init) = 0;
-};  // class AV1RateCtrlQMode
-}  // namespace aom
-
-#endif  // AOM_AV1_RATECTRL_QMODE_INTERFACE_H_
diff --git a/chromium/third_party/libaom/source/libaom/av1/ratectrl_rtc.cc b/chromium/third_party/libaom/source/libaom/av1/ratectrl_rtc.cc
index b68c4bd2d9f..db6fbcdd34c 100644
--- a/chromium/third_party/libaom/source/libaom/av1/ratectrl_rtc.cc
+++ b/chromium/third_party/libaom/source/libaom/av1/ratectrl_rtc.cc
@@ -65,8 +65,11 @@ std::unique_ptr<AV1RateControlRTC> AV1RateControlRTC::Create(
   rc_api->cpi_->ppi =
       static_cast<AV1_PRIMARY *>(aom_memalign(32, sizeof(AV1_PRIMARY)));
   if (!rc_api->cpi_->ppi) return nullptr;
+  av1_zero(*rc_api->cpi_->ppi);
   rc_api->cpi_->common.seq_params = &rc_api->cpi_->ppi->seq_params;
   av1_zero(*rc_api->cpi_->common.seq_params);
+  const int num_layers = cfg.ss_number_layers * cfg.ts_number_layers;
+  av1_alloc_layer_context(rc_api->cpi_, num_layers);
   rc_api->InitRateControl(cfg);
   if (cfg.aq_mode) {
     AV1_COMP *const cpi = rc_api->cpi_;
@@ -94,6 +97,9 @@ AV1RateControlRTC::~AV1RateControlRTC() {
         }
       }
     }
+    aom_free(cpi_->svc.layer_context);
+    cpi_->svc.layer_context = nullptr;
+
     if (cpi_->oxcf.q_cfg.aq_mode == CYCLIC_REFRESH_AQ) {
       aom_free(cpi_->enc_seg.map);
       cpi_->enc_seg.map = nullptr;
@@ -112,6 +118,7 @@ void AV1RateControlRTC::InitRateControl(const AV1RateControlRtcConfig &rc_cfg) {
   cm->seq_params->bit_depth = AOM_BITS_8;
   cm->show_frame = 1;
   oxcf->profile = cm->seq_params->profile;
+  oxcf->mode = REALTIME;
   oxcf->rc_cfg.mode = AOM_CBR;
   oxcf->pass = AOM_RC_ONE_PASS;
   oxcf->q_cfg.aq_mode = rc_cfg.aq_mode ? CYCLIC_REFRESH_AQ : NO_AQ;
diff --git a/chromium/third_party/libaom/source/libaom/av1/reference_manager.cc b/chromium/third_party/libaom/source/libaom/av1/reference_manager.cc
deleted file mode 100644
index 71075965127..00000000000
--- a/chromium/third_party/libaom/source/libaom/av1/reference_manager.cc
+++ /dev/null
@@ -1,96 +0,0 @@
-/*
- * Copyright (c) 2022, Alliance for Open Media. All rights reserved
- *
- * This source code is subject to the terms of the BSD 2 Clause License and
- * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
- * was not distributed with this source code in the LICENSE file, you can
- * obtain it at www.aomedia.org/license/software. If the Alliance for Open
- * Media Patent License 1.0 was not distributed with this source code in the
- * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
- */
-
-#include "av1/reference_manager.h"
-
-namespace aom {
-
-void RefFrameManager::Reset() {
-  free_ref_idx_list_.clear();
-  for (int i = 0; i < kRefFrameTableSize; ++i) {
-    free_ref_idx_list_.push_back(i);
-  }
-  forward_stack_.clear();
-  backward_queue_.clear();
-  last_queue_.clear();
-}
-
-int RefFrameManager::AllocateRefIdx() {
-  if (free_ref_idx_list_.empty()) {
-    size_t backward_size = backward_queue_.size();
-    size_t last_size = last_queue_.size();
-    if (last_size >= backward_size) {
-      int ref_idx = last_queue_.front();
-      last_queue_.pop_front();
-      free_ref_idx_list_.push_back(ref_idx);
-    } else {
-      int ref_idx = backward_queue_.front();
-      backward_queue_.pop_front();
-      free_ref_idx_list_.push_back(ref_idx);
-    }
-  }
-
-  int ref_idx = free_ref_idx_list_.front();
-  free_ref_idx_list_.pop_front();
-  return ref_idx;
-}
-
-void RefFrameManager::UpdateOrder(int order_idx) {
-  if (forward_stack_.empty()) {
-    return;
-  }
-  int ref_idx = forward_stack_.back();
-  const GopFrame &gf_frame = ref_frame_table_[ref_idx];
-  if (gf_frame.order_idx <= order_idx) {
-    forward_stack_.pop_back();
-    if (gf_frame.is_golden_frame) {
-      // high quality frame
-      backward_queue_.push_back(ref_idx);
-    } else {
-      last_queue_.push_back(ref_idx);
-    }
-  }
-}
-
-int RefFrameManager::ColocatedRefIdx(int order_idx) {
-  if (forward_stack_.size() == 0) return -1;
-  int ref_idx = forward_stack_.back();
-  int arf_order_idx = ref_frame_table_[ref_idx].order_idx;
-  if (arf_order_idx == order_idx) {
-    return ref_idx;
-  }
-  return -1;
-}
-
-void RefFrameManager::UpdateFrame(GopFrame *gop_frame,
-                                  RefUpdateType ref_update_type,
-                                  EncodeRefMode encode_ref_mode) {
-  gop_frame->colocated_ref_idx = ColocatedRefIdx(gop_frame->order_idx);
-  if (gop_frame->is_show_frame) {
-    UpdateOrder(gop_frame->order_idx);
-  }
-  if (ref_update_type == RefUpdateType::kNone) {
-    gop_frame->update_ref_idx = -1;
-  } else {
-    const int ref_idx = AllocateRefIdx();
-    gop_frame->update_ref_idx = ref_idx;
-    switch (ref_update_type) {
-      case RefUpdateType::kForward: forward_stack_.push_back(ref_idx); break;
-      case RefUpdateType::kBackward: backward_queue_.push_back(ref_idx); break;
-      case RefUpdateType::kLast: last_queue_.push_back(ref_idx); break;
-      case RefUpdateType::kNone: break;
-    }
-    ref_frame_table_[ref_idx] = *gop_frame;
-  }
-  gop_frame->encode_ref_mode = encode_ref_mode;
-}
-
-}  // namespace aom
diff --git a/chromium/third_party/libaom/source/libaom/build/cmake/aom_config_defaults.cmake b/chromium/third_party/libaom/source/libaom/build/cmake/aom_config_defaults.cmake
index 4c39a3a05a2..eafd26cf585 100644
--- a/chromium/third_party/libaom/source/libaom/build/cmake/aom_config_defaults.cmake
+++ b/chromium/third_party/libaom/source/libaom/build/cmake/aom_config_defaults.cmake
@@ -24,33 +24,27 @@ set_aom_detect_var(INLINE "" "Sets INLINE value for current target.")
 
 # CPUs.
 set_aom_detect_var(ARCH_ARM 0 "Enables ARM architecture.")
-set_aom_detect_var(ARCH_MIPS 0 "Enables MIPS architecture.")
 set_aom_detect_var(ARCH_PPC 0 "Enables PPC architecture.")
 set_aom_detect_var(ARCH_X86 0 "Enables X86 architecture.")
 set_aom_detect_var(ARCH_X86_64 0 "Enables X86_64 architecture.")
 
 # ARM feature flags.
 set_aom_detect_var(HAVE_NEON 0 "Enables NEON intrinsics optimizations.")
-
-# MIPS feature flags.
-set_aom_detect_var(HAVE_DSPR2 0 "Enables DSPR2 optimizations.")
-set_aom_detect_var(HAVE_MIPS32 0 "Enables MIPS32 optimizations.")
-set_aom_detect_var(HAVE_MIPS64 0 "Enables MIPS64 optimizations. ")
-set_aom_detect_var(HAVE_MSA 0 "Enables MSA optimizations.")
+set_aom_detect_var(HAVE_ARM_CRC32 0 "Enables Arm CRC32 optimizations.")
 
 # PPC feature flags.
 set_aom_detect_var(HAVE_VSX 0 "Enables VSX optimizations.")
 
 # x86/x86_64 feature flags.
-set_aom_detect_var(HAVE_AVX 0 "Enables AVX optimizations.")
-set_aom_detect_var(HAVE_AVX2 0 "Enables AVX2 optimizations.")
 set_aom_detect_var(HAVE_MMX 0 "Enables MMX optimizations. ")
 set_aom_detect_var(HAVE_SSE 0 "Enables SSE optimizations.")
 set_aom_detect_var(HAVE_SSE2 0 "Enables SSE2 optimizations.")
 set_aom_detect_var(HAVE_SSE3 0 "Enables SSE3 optimizations.")
+set_aom_detect_var(HAVE_SSSE3 0 "Enables SSSE3 optimizations.")
 set_aom_detect_var(HAVE_SSE4_1 0 "Enables SSE 4.1 optimizations.")
 set_aom_detect_var(HAVE_SSE4_2 0 "Enables SSE 4.2 optimizations.")
-set_aom_detect_var(HAVE_SSSE3 0 "Enables SSSE3 optimizations.")
+set_aom_detect_var(HAVE_AVX 0 "Enables AVX optimizations.")
+set_aom_detect_var(HAVE_AVX2 0 "Enables AVX2 optimizations.")
 
 # Flags describing the build environment.
 set_aom_detect_var(HAVE_FEXCEPT 0
@@ -71,35 +65,30 @@ set_aom_config_var(AOM_RTCD_FLAGS ""
 set_aom_config_var(CONFIG_AV1_DECODER 1 "Enable AV1 decoder.")
 set_aom_config_var(CONFIG_AV1_ENCODER 1 "Enable AV1 encoder.")
 set_aom_config_var(CONFIG_BIG_ENDIAN 0 "Internal flag.")
-set_aom_config_var(CONFIG_FRAME_PARALLEL_ENCODE 0
-                   "Enable frame parallelism during encode.")
-set_aom_config_var(
-  CONFIG_FRAME_PARALLEL_ENCODE_2 0
-  "Enable frame parallelism during encode for frames in lower layer depths.")
 set_aom_config_var(CONFIG_FPMT_TEST 0 "Enable FPMT testing.")
 set_aom_config_var(CONFIG_GCC 0 "Building with GCC (detect).")
 set_aom_config_var(CONFIG_GCOV 0 "Enable gcov support.")
 set_aom_config_var(CONFIG_GPROF 0 "Enable gprof support.")
 set_aom_config_var(CONFIG_LIBYUV 1 "Enables libyuv scaling/conversion support.")
 
+set_aom_config_var(CONFIG_AV1_HIGHBITDEPTH 1
+                   "Build with high bitdepth support.")
+set_aom_config_var(CONFIG_AV1_TEMPORAL_DENOISING 0
+                   "Build with temporal denoising support.")
 set_aom_config_var(CONFIG_MULTITHREAD 1 "Multithread support.")
 set_aom_config_var(CONFIG_OS_SUPPORT 0 "Internal flag.")
 set_aom_config_var(CONFIG_PIC 0 "Build with PIC enabled.")
+set_aom_config_var(CONFIG_REALTIME_ONLY 0
+                   "Build for RTC-only. See aomcx.h for all disabled features.")
 set_aom_config_var(CONFIG_RUNTIME_CPU_DETECT 1 "Runtime CPU detection support.")
 set_aom_config_var(CONFIG_SHARED 0 "Build shared libs.")
 set_aom_config_var(CONFIG_WEBM_IO 1 "Enables WebM support.")
-set_aom_config_var(CONFIG_REALTIME_ONLY 0
-                   "Build for RTC-only. See aomcx.h for all disabled features.")
-set_aom_config_var(CONFIG_AV1_HIGHBITDEPTH 1
-                   "Build with high bitdepth support.")
-set_aom_config_var(CONFIG_AV1_TEMPORAL_DENOISING 0
-                   "Build with temporal denoising support.")
 
 # Debugging flags.
 set_aom_config_var(CONFIG_DEBUG 0 "Enable debug-only code.")
-set_aom_config_var(CONFIG_MISMATCH_DEBUG 0 "Mismatch debugging flag.")
 set_aom_config_var(CONFIG_EXCLUDE_SIMD_MISMATCH 0
                    "Exclude mismatch in SIMD functions for testing/debugging.")
+set_aom_config_var(CONFIG_MISMATCH_DEBUG 0 "Mismatch debugging flag.")
 
 # AV1 feature flags.
 set_aom_config_var(CONFIG_ACCOUNTING 0 "Enables bit accounting.")
@@ -115,53 +104,55 @@ set_aom_config_var(FORCE_HIGHBITDEPTH_DECODING 0
 mark_as_advanced(FORCE_HIGHBITDEPTH_DECODING)
 set_aom_config_var(CONFIG_MAX_DECODE_PROFILE 2
                    "Max profile to support decoding.")
-set_aom_config_var(CONFIG_NORMAL_TILE_MODE 0 "Only enables normal tile mode.")
+set_aom_config_var(
+  CONFIG_NORMAL_TILE_MODE 0
+  "Only enables general decoding (disables large scale tile decoding).")
 set_aom_config_var(CONFIG_SIZE_LIMIT 0 "Limit max decode width/height.")
 set_aom_config_var(CONFIG_SPATIAL_RESAMPLING 1 "Spatial resampling.")
-set_aom_config_var(DECODE_HEIGHT_LIMIT 0 "Set limit for decode height.")
-set_aom_config_var(DECODE_WIDTH_LIMIT 0 "Set limit for decode width.")
-set_aom_config_var(CONFIG_TUNE_VMAF 0 "Enable encoding tuning for VMAF.")
 set_aom_config_var(CONFIG_TUNE_BUTTERAUGLI 0
                    "Enable encoding tuning for Butteraugli.")
+set_aom_config_var(CONFIG_TUNE_VMAF 0 "Enable encoding tuning for VMAF.")
+set_aom_config_var(DECODE_HEIGHT_LIMIT 0 "Set limit for decode height.")
+set_aom_config_var(DECODE_WIDTH_LIMIT 0 "Set limit for decode width.")
 set_aom_config_var(STATIC_LINK_JXL 0 "Statically link the JPEG-XL library.")
 
 # AV1 experiment flags.
-set_aom_config_var(CONFIG_TFLITE 0
-                   "AV1 experiment: Enable tensorflow lite library.")
-set_aom_config_var(CONFIG_SPEED_STATS 0 "AV1 experiment.")
-set_aom_config_var(CONFIG_COLLECT_RD_STATS 0 "AV1 experiment.")
-set_aom_config_var(CONFIG_ENTROPY_STATS 0 "AV1 experiment.")
-set_aom_config_var(CONFIG_INTER_STATS_ONLY 0 "AV1 experiment.")
+set_aom_config_var(CONFIG_BITRATE_ACCURACY 0
+                   "AV1 experiment: Improve bitrate accuracy.")
+set_aom_config_var(
+  CONFIG_BITRATE_ACCURACY_BL 0
+  "AV1 experiment: Baseline of improve bitrate accuracy experiment.")
 set_aom_config_var(CONFIG_BITSTREAM_DEBUG 0
                    "AV1 experiment: Bitstream debugging.")
-set_aom_config_var(CONFIG_RD_DEBUG 0 "AV1 experiment.")
-set_aom_config_var(CONFIG_RD_COMMAND 0
-                   "AV1 experiment: Use external rdmult and q_index.")
 set_aom_config_var(
-  CONFIG_DISABLE_FULL_PIXEL_SPLIT_8X8 1
-  "AV1 experiment: Disable full_pixel_motion_search_based_split on BLOCK_8X8.")
+  CONFIG_COLLECT_COMPONENT_TIMING 0
+  "AV1 experiment: Collect encoding component timing information.")
 set_aom_config_var(
   CONFIG_COLLECT_PARTITION_STATS 0
   "AV1 experiment: Collect partition timing stats. Can be 1 or 2.")
+set_aom_config_var(CONFIG_COLLECT_RD_STATS 0 "AV1 experiment.")
 set_aom_config_var(
-  CONFIG_COLLECT_COMPONENT_TIMING 0
-  "AV1 experiment: Collect encoding component timing information.")
+  CONFIG_DISABLE_FULL_PIXEL_SPLIT_8X8 1
+  "AV1 experiment: Disable full_pixel_motion_search_based_split on BLOCK_8X8.")
+set_aom_config_var(CONFIG_ENTROPY_STATS 0 "AV1 experiment.")
+set_aom_config_var(CONFIG_INTER_STATS_ONLY 0 "AV1 experiment.")
 set_aom_config_var(CONFIG_NN_V2 0
                    "AV1 experiment: Fully-connected neural nets ver.2.")
 set_aom_config_var(CONFIG_OPTICAL_FLOW_API 0
                    "AV1 experiment: for optical flow API.")
-set_aom_config_var(
-  CONFIG_RT_ML_PARTITIONING 0
-  "AV1 experiment: Build with ML-based partitioning for Real Time.")
 set_aom_config_var(CONFIG_PARTITION_SEARCH_ORDER 0
                    "AV1 experiment: Use alternative partition search order.")
-set_aom_config_var(
-  CONFIG_BITRATE_ACCURACY_BL 0
-  "AV1 experiment: Baseline of improve bitrate accuracy experiment.")
-set_aom_config_var(CONFIG_BITRATE_ACCURACY 0
-                   "AV1 experiment: Improve bitrate accuracy.")
 set_aom_config_var(CONFIG_RATECTRL_LOG 0
                    "AV1 experiment: Log rate control decision.")
+set_aom_config_var(CONFIG_RD_COMMAND 0
+                   "AV1 experiment: Use external rdmult and q_index.")
+set_aom_config_var(CONFIG_RD_DEBUG 0 "AV1 experiment.")
+set_aom_config_var(
+  CONFIG_RT_ML_PARTITIONING 0
+  "AV1 experiment: Build with ML-based partitioning for Real Time.")
+set_aom_config_var(CONFIG_SPEED_STATS 0 "AV1 experiment.")
+set_aom_config_var(CONFIG_TFLITE 0
+                   "AV1 experiment: Enable tensorflow lite library.")
 set_aom_config_var(CONFIG_THREE_PASS 0
                    "AV1 experiment: Enable three-pass encoding.")
 
@@ -193,11 +184,6 @@ set_aom_option_var(ENABLE_WERROR "Converts warnings to errors at compile time."
 # ARM assembly/intrinsics flags.
 set_aom_option_var(ENABLE_NEON "Enables NEON optimizations on ARM targets." ON)
 
-# MIPS assembly/intrinsics flags.
-set_aom_option_var(ENABLE_DSPR2 "Enables DSPR2 optimizations on MIPS targets."
-                   OFF)
-set_aom_option_var(ENABLE_MSA "Enables MSA optimizations on MIPS targets." OFF)
-
 # VSX intrinsics flags.
 set_aom_option_var(ENABLE_VSX "Enables VSX optimizations on PowerPC targets."
                    ON)
diff --git a/chromium/third_party/libaom/source/libaom/build/cmake/aom_configure.cmake b/chromium/third_party/libaom/source/libaom/build/cmake/aom_configure.cmake
index 2492f4ffe8f..ee566af6570 100644
--- a/chromium/third_party/libaom/source/libaom/build/cmake/aom_configure.cmake
+++ b/chromium/third_party/libaom/source/libaom/build/cmake/aom_configure.cmake
@@ -40,6 +40,10 @@ if(FORCE_HIGHBITDEPTH_DECODING AND NOT CONFIG_AV1_HIGHBITDEPTH)
                          "FORCE_HIGHBITDEPTH_DECODING")
 endif()
 
+if(CONFIG_THREE_PASS AND NOT CONFIG_AV1_DECODER)
+  change_config_and_warn(CONFIG_THREE_PASS 0 "CONFIG_AV1_DECODER=0")
+endif()
+
 # Generate the user config settings.
 list(APPEND aom_build_vars ${AOM_CONFIG_VARS} ${AOM_OPTION_VARS})
 foreach(cache_var ${aom_build_vars})
@@ -67,7 +71,7 @@ if(NOT AOM_TARGET_CPU)
     endif()
   elseif(cpu_lowercase STREQUAL "i386" OR cpu_lowercase STREQUAL "x86")
     set(AOM_TARGET_CPU "x86")
-  elseif(cpu_lowercase MATCHES "^arm" OR cpu_lowercase MATCHES "^mips")
+  elseif(cpu_lowercase MATCHES "^arm")
     set(AOM_TARGET_CPU "${cpu_lowercase}")
   elseif(cpu_lowercase MATCHES "aarch64")
     set(AOM_TARGET_CPU "arm64")
@@ -110,6 +114,29 @@ endif()
 if(BUILD_SHARED_LIBS)
   set(CONFIG_PIC 1)
   set(CONFIG_SHARED 1)
+elseif(NOT CONFIG_PIC)
+  # Update the variable only when it does not carry the CMake assigned help
+  # string for variables specified via the command line. This allows the user to
+  # force CONFIG_PIC=0.
+  unset(cache_helpstring)
+  get_property(cache_helpstring CACHE CONFIG_PIC PROPERTY HELPSTRING)
+  if(NOT "${cache_helpstring}" STREQUAL "${cmake_cmdline_helpstring}")
+    aom_check_c_compiles("pie_check" "
+                          #if !(__pie__ || __PIE__)
+                          #error Neither __pie__ or __PIE__ are set
+                          #endif
+                          extern void unused(void);
+                          void unused(void) {}" HAVE_PIE)
+
+    if(HAVE_PIE)
+      # If -fpie or -fPIE are used ensure the assembly code has PIC enabled to
+      # avoid DT_TEXTRELs: /usr/bin/ld: warning: creating DT_TEXTREL in a PIE
+      set(CONFIG_PIC 1)
+      message(
+        "CONFIG_PIC enabled for position independent executable (PIE) build")
+    endif()
+  endif()
+  unset(cache_helpstring)
 endif()
 
 if(NOT MSVC)
@@ -277,7 +304,17 @@ else()
   add_compiler_flag_if_supported("-Wall")
   add_compiler_flag_if_supported("-Wdisabled-optimization")
   add_compiler_flag_if_supported("-Wextra")
-  add_compiler_flag_if_supported("-Wextra-semi")
+  # Prior to version 3.19.0 cmake would fail to parse the warning emitted by gcc
+  # with this flag. Note the order of this check and -Wextra-semi-stmt is
+  # important due to is_flag_present() matching substrings with string(FIND
+  # ...).
+  if(CMAKE_VERSION VERSION_LESS "3.19"
+     AND CMAKE_C_COMPILER_ID STREQUAL "GNU"
+     AND CMAKE_C_COMPILER_VERSION VERSION_GREATER_EQUAL 10)
+    add_cxx_flag_if_supported("-Wextra-semi")
+  else()
+    add_compiler_flag_if_supported("-Wextra-semi")
+  endif()
   add_compiler_flag_if_supported("-Wextra-semi-stmt")
   add_compiler_flag_if_supported("-Wfloat-conversion")
   add_compiler_flag_if_supported("-Wformat=2")
@@ -291,6 +328,9 @@ else()
   add_compiler_flag_if_supported("-Wuninitialized")
   add_compiler_flag_if_supported("-Wunused")
   add_compiler_flag_if_supported("-Wvla")
+  add_cxx_flag_if_supported("-Wc++14-extensions")
+  add_cxx_flag_if_supported("-Wc++17-extensions")
+  add_cxx_flag_if_supported("-Wc++20-extensions")
 
   if(CMAKE_C_COMPILER_ID MATCHES "GNU" AND SANITIZE MATCHES "address|undefined")
 
@@ -334,6 +374,26 @@ else()
   add_compiler_flag_if_supported("-D_FILE_OFFSET_BITS=64")
 endif()
 
+# Prior to r23, or with ANDROID_USE_LEGACY_TOOLCHAIN_FILE set,
+# android.toolchain.cmake would set normal (non-cache) versions of variables
+# like CMAKE_C_FLAGS_RELEASE which would mask the ones added to the cache
+# variable in add_compiler_flag_if_supported(), etc. As a workaround we add
+# everything accumulated in AOM_C/CXX_FLAGS to the normal versions. This could
+# also be addressed by reworking the flag tests and adding the results directly
+# to target_compile_options() as in e.g., libgav1, but that's a larger task.
+# https://github.com/android/ndk/wiki/Changelog-r23#changes
+if(ANDROID
+   AND ("${ANDROID_NDK_MAJOR}" LESS 23 OR ANDROID_USE_LEGACY_TOOLCHAIN_FILE))
+  foreach(lang C;CXX)
+    string(STRIP "${AOM_${lang}_FLAGS}" AOM_${lang}_FLAGS)
+    if(AOM_${lang}_FLAGS)
+      foreach(config ${AOM_${lang}_CONFIGS})
+        set(${config} "${${config}} ${AOM_${lang}_FLAGS}")
+      endforeach()
+    endif()
+  endforeach()
+endif()
+
 set(AOM_LIB_LINK_TYPE PUBLIC)
 if(EMSCRIPTEN)
 
diff --git a/chromium/third_party/libaom/source/libaom/build/cmake/compiler_flags.cmake b/chromium/third_party/libaom/source/libaom/build/cmake/compiler_flags.cmake
index 24484bcadef..f008b964f53 100644
--- a/chromium/third_party/libaom/source/libaom/build/cmake/compiler_flags.cmake
+++ b/chromium/third_party/libaom/source/libaom/build/cmake/compiler_flags.cmake
@@ -59,6 +59,12 @@ function(add_c_flag_if_supported c_flag)
     return()
   endif()
 
+  # Between 3.17.0 and 3.18.2 check_c_compiler_flag() sets a normal variable at
+  # parent scope while check_cxx_source_compiles() continues to set an internal
+  # cache variable, so we unset both to avoid the failure / success state
+  # persisting between checks. See
+  # https://gitlab.kitware.com/cmake/cmake/-/issues/21207.
+  unset(C_FLAG_SUPPORTED)
   unset(C_FLAG_SUPPORTED CACHE)
   message("Checking C compiler flag support for: " ${c_flag})
   check_c_compiler_flag("${c_flag}" C_FLAG_SUPPORTED)
@@ -89,6 +95,12 @@ function(add_cxx_flag_if_supported cxx_flag)
     return()
   endif()
 
+  # Between 3.17.0 and 3.18.2 check_cxx_compiler_flag() sets a normal variable
+  # at parent scope while check_cxx_source_compiles() continues to set an
+  # internal cache variable, so we unset both to avoid the failure / success
+  # state persisting between checks. See
+  # https://gitlab.kitware.com/cmake/cmake/-/issues/21207.
+  unset(CXX_FLAG_SUPPORTED)
   unset(CXX_FLAG_SUPPORTED CACHE)
   message("Checking C++ compiler flag support for: " ${cxx_flag})
   check_cxx_compiler_flag("${cxx_flag}" CXX_FLAG_SUPPORTED)
diff --git a/chromium/third_party/libaom/source/libaom/build/cmake/cpu.cmake b/chromium/third_party/libaom/source/libaom/build/cmake/cpu.cmake
index ef2d7552bb9..99ac38ab541 100644
--- a/chromium/third_party/libaom/source/libaom/build/cmake/cpu.cmake
+++ b/chromium/third_party/libaom/source/libaom/build/cmake/cpu.cmake
@@ -20,33 +20,19 @@ if("${AOM_TARGET_CPU}" MATCHES "^arm")
     set(HAVE_NEON 0)
     set(AOM_RTCD_FLAGS ${AOM_RTCD_FLAGS} --disable-neon)
   endif()
-elseif("${AOM_TARGET_CPU}" MATCHES "^mips")
-  set(ARCH_MIPS 1)
-  set(RTCD_ARCH_MIPS "yes")
 
-  if("${AOM_TARGET_CPU}" STREQUAL "mips32")
-    set(HAVE_MIPS32 1)
-    set(RTCD_HAVE_MIPS32 "yes")
-  elseif("${AOM_TARGET_CPU}" STREQUAL "mips64")
-    set(HAVE_MIPS64 1)
-    set(RTCD_HAVE_MIPS64 "yes")
-  endif()
-
-  # HAVE_DSPR2 is set by mips toolchain files.
-  if(ENABLE_DSPR2 AND HAVE_DSPR2)
-    set(RTCD_HAVE_DSPR2 "yes")
+  check_c_source_compiles("
+    #if !defined(__ARM_FEATURE_CRC32) || __ARM_FEATURE_CRC32 != 1
+    #error \"CRC32 is unavailable.\"
+    #endif
+    int main(void) { return 0; }" HAVE_CRC32)
+  if(HAVE_CRC32)
+    set(HAVE_ARM_CRC32 1)
   else()
-    set(HAVE_DSPR2 0)
-    set(AOM_RTCD_FLAGS ${AOM_RTCD_FLAGS} --disable-dspr2)
+    set(HAVE_ARM_CRC32 0)
+    set(AOM_RTCD_FLAGS ${AOM_RTCD_FLAGS} --disable-arm_crc32)
   endif()
 
-  # HAVE_MSA is set by mips toolchain files.
-  if(ENABLE_MSA AND HAVE_MSA)
-    set(RTCD_HAVE_MSA "yes")
-  else()
-    set(HAVE_MSA 0)
-    set(AOM_RTCD_FLAGS ${AOM_RTCD_FLAGS} --disable-msa)
-  endif()
 elseif("${AOM_TARGET_CPU}" MATCHES "ppc")
   set(ARCH_PPC 1)
   set(RTCD_ARCH_PPC "yes")
diff --git a/chromium/third_party/libaom/source/libaom/build/cmake/rtcd.pl b/chromium/third_party/libaom/source/libaom/build/cmake/rtcd.pl
index e9f75dd44ba..bd3b9d534da 100755
--- a/chromium/third_party/libaom/source/libaom/build/cmake/rtcd.pl
+++ b/chromium/third_party/libaom/source/libaom/build/cmake/rtcd.pl
@@ -321,38 +321,6 @@ EOF
   common_bottom;
 }
 
-sub mips() {
-  determine_indirection("c", @ALL_ARCHS);
-
-  # Assign the helper variable for each enabled extension
-  foreach my $opt (@ALL_ARCHS) {
-    my $opt_uc = uc $opt;
-    eval "\$have_${opt}=\"flags & HAS_${opt_uc}\"";
-  }
-
-  common_top;
-
-  print <<EOF;
-#include "config/aom_config.h"
-
-#ifdef RTCD_C
-static void setup_rtcd_internal(void)
-{
-EOF
-
-  set_function_pointers("c", @ALL_ARCHS);
-
-  print <<EOF;
-#if HAVE_DSPR2
-void aom_dsputil_static_init();
-aom_dsputil_static_init();
-#endif
-}
-#endif
-EOF
-  common_bottom;
-}
-
 sub ppc() {
   determine_indirection("c", @ALL_ARCHS);
 
@@ -420,20 +388,12 @@ if ($opts{arch} eq 'x86') {
   @REQUIRES = filter(qw/mmx sse sse2/);
   &require(@REQUIRES);
   x86;
-} elsif ($opts{arch} eq 'mips32' || $opts{arch} eq 'mips64') {
-  @ALL_ARCHS = filter("$opts{arch}");
-  if (aom_config("HAVE_DSPR2") eq "yes") {
-    @ALL_ARCHS = filter("$opts{arch}", qw/dspr2/);
-  } elsif (aom_config("HAVE_MSA") eq "yes") {
-    @ALL_ARCHS = filter("$opts{arch}", qw/msa/);
-  }
-  mips;
 } elsif ($opts{arch} =~ /armv[78]\w?/) {
   @ALL_ARCHS = filter(qw/neon/);
   arm;
 } elsif ($opts{arch} eq 'arm64' ) {
-  @ALL_ARCHS = filter(qw/neon/);
-  &require("neon");
+  @ALL_ARCHS = filter(qw/neon arm_crc32/);
+  &require(@ALL_ARCHS);
   arm;
 } elsif ($opts{arch} eq 'ppc') {
   @ALL_ARCHS = filter(qw/vsx/);
diff --git a/chromium/third_party/libaom/source/libaom/build/cmake/toolchains/arm64-linux-gcc.cmake b/chromium/third_party/libaom/source/libaom/build/cmake/toolchains/arm64-linux-gcc.cmake
index fc4b277bb99..64e460b604b 100644
--- a/chromium/third_party/libaom/source/libaom/build/cmake/toolchains/arm64-linux-gcc.cmake
+++ b/chromium/third_party/libaom/source/libaom/build/cmake/toolchains/arm64-linux-gcc.cmake
@@ -21,9 +21,15 @@ if("${CROSS}" STREQUAL "")
   set(CROSS aarch64-linux-gnu-)
 endif()
 
-set(CMAKE_C_COMPILER ${CROSS}gcc)
-set(CMAKE_CXX_COMPILER ${CROSS}g++)
-set(AS_EXECUTABLE ${CROSS}as)
+if(NOT CMAKE_C_COMPILER)
+  set(CMAKE_C_COMPILER ${CROSS}gcc)
+endif()
+if(NOT CMAKE_CXX_COMPILER)
+  set(CMAKE_CXX_COMPILER ${CROSS}g++)
+endif()
+if(NOT AS_EXECUTABLE)
+  set(AS_EXECUTABLE ${CROSS}as)
+endif()
 set(CMAKE_C_FLAGS_INIT "-march=armv8-a")
 set(CMAKE_CXX_FLAGS_INIT "-march=armv8-a")
 set(AOM_AS_FLAGS "-march=armv8-a")
diff --git a/chromium/third_party/libaom/source/libaom/build/cmake/toolchains/arm64-mingw-gcc.cmake b/chromium/third_party/libaom/source/libaom/build/cmake/toolchains/arm64-mingw-gcc.cmake
index a8e15cb3171..5472ed4d4e5 100644
--- a/chromium/third_party/libaom/source/libaom/build/cmake/toolchains/arm64-mingw-gcc.cmake
+++ b/chromium/third_party/libaom/source/libaom/build/cmake/toolchains/arm64-mingw-gcc.cmake
@@ -20,10 +20,18 @@ if("${CROSS}" STREQUAL "")
   set(CROSS aarch64-w64-mingw32-)
 endif()
 
-set(CMAKE_C_COMPILER ${CROSS}gcc)
-set(CMAKE_CXX_COMPILER ${CROSS}g++)
-set(CMAKE_AR ${CROSS}ar CACHE FILEPATH Archiver)
-set(CMAKE_RANLIB ${CROSS}ranlib CACHE FILEPATH Indexer)
+if(NOT CMAKE_C_COMPILER)
+  set(CMAKE_C_COMPILER ${CROSS}gcc)
+endif()
+if(NOT CMAKE_CXX_COMPILER)
+  set(CMAKE_CXX_COMPILER ${CROSS}g++)
+endif()
+if(NOT CMAKE_AR)
+  set(CMAKE_AR ${CROSS}ar CACHE FILEPATH Archiver)
+endif()
+if(NOT CMAKE_RANLIB)
+  set(CMAKE_RANLIB ${CROSS}ranlib CACHE FILEPATH Indexer)
+endif()
 
 # No runtime cpu detect for arm64-mingw-gcc.
 set(CONFIG_RUNTIME_CPU_DETECT 0 CACHE STRING "")
diff --git a/chromium/third_party/libaom/source/libaom/build/cmake/toolchains/armv7-linux-gcc.cmake b/chromium/third_party/libaom/source/libaom/build/cmake/toolchains/armv7-linux-gcc.cmake
index 26c028f11ff..1201538a259 100644
--- a/chromium/third_party/libaom/source/libaom/build/cmake/toolchains/armv7-linux-gcc.cmake
+++ b/chromium/third_party/libaom/source/libaom/build/cmake/toolchains/armv7-linux-gcc.cmake
@@ -25,9 +25,15 @@ if(NOT ${CROSS} MATCHES hf-$)
   set(AOM_EXTRA_TOOLCHAIN_FLAGS "-mfloat-abi=softfp")
 endif()
 
-set(CMAKE_C_COMPILER ${CROSS}gcc)
-set(CMAKE_CXX_COMPILER ${CROSS}g++)
-set(AS_EXECUTABLE ${CROSS}as)
+if(NOT CMAKE_C_COMPILER)
+  set(CMAKE_C_COMPILER ${CROSS}gcc)
+endif()
+if(NOT CMAKE_CXX_COMPILER)
+  set(CMAKE_CXX_COMPILER ${CROSS}g++)
+endif()
+if(NOT AS_EXECUTABLE)
+  set(AS_EXECUTABLE ${CROSS}as)
+endif()
 set(CMAKE_C_FLAGS_INIT "-march=armv7-a -mfpu=vfpv3 \
                           ${AOM_EXTRA_TOOLCHAIN_FLAGS}")
 set(CMAKE_CXX_FLAGS_INIT "-march=armv7-a -mfpu=vfpv3 \
diff --git a/chromium/third_party/libaom/source/libaom/build/cmake/toolchains/armv7-mingw-gcc.cmake b/chromium/third_party/libaom/source/libaom/build/cmake/toolchains/armv7-mingw-gcc.cmake
index 2dc4b1882dc..8a928916d19 100644
--- a/chromium/third_party/libaom/source/libaom/build/cmake/toolchains/armv7-mingw-gcc.cmake
+++ b/chromium/third_party/libaom/source/libaom/build/cmake/toolchains/armv7-mingw-gcc.cmake
@@ -20,10 +20,18 @@ if("${CROSS}" STREQUAL "")
   set(CROSS armv7-w64-mingw32-)
 endif()
 
-set(CMAKE_C_COMPILER ${CROSS}gcc)
-set(CMAKE_CXX_COMPILER ${CROSS}g++)
-set(CMAKE_AR ${CROSS}ar CACHE FILEPATH Archiver)
-set(CMAKE_RANLIB ${CROSS}ranlib CACHE FILEPATH Indexer)
+if(NOT CMAKE_C_COMPILER)
+  set(CMAKE_C_COMPILER ${CROSS}gcc)
+endif()
+if(NOT CMAKE_CXX_COMPILER)
+  set(CMAKE_CXX_COMPILER ${CROSS}g++)
+endif()
+if(NOT CMAKE_AR)
+  set(CMAKE_AR ${CROSS}ar CACHE FILEPATH Archiver)
+endif()
+if(NOT CMAKE_RANLIB)
+  set(CMAKE_RANLIB ${CROSS}ranlib CACHE FILEPATH Indexer)
+endif()
 
 # No runtime cpu detect for armv7-mingw-gcc.
 set(CONFIG_RUNTIME_CPU_DETECT 0 CACHE STRING "")
diff --git a/chromium/third_party/libaom/source/libaom/build/cmake/toolchains/mips32-linux-gcc.cmake b/chromium/third_party/libaom/source/libaom/build/cmake/toolchains/mips32-linux-gcc.cmake
deleted file mode 100644
index ad5ebffdc6c..00000000000
--- a/chromium/third_party/libaom/source/libaom/build/cmake/toolchains/mips32-linux-gcc.cmake
+++ /dev/null
@@ -1,78 +0,0 @@
-#
-# Copyright (c) 2017, Alliance for Open Media. All rights reserved
-#
-# This source code is subject to the terms of the BSD 2 Clause License and the
-# Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License was
-# not distributed with this source code in the LICENSE file, you can obtain it
-# at www.aomedia.org/license/software. If the Alliance for Open Media Patent
-# License 1.0 was not distributed with this source code in the PATENTS file, you
-# can obtain it at www.aomedia.org/license/patent.
-#
-if(AOM_BUILD_CMAKE_TOOLCHAINS_MIPS32_LINUX_GCC_CMAKE_)
-  return()
-endif() # AOM_BUILD_CMAKE_TOOLCHAINS_MIPS32_LINUX_GCC_CMAKE_
-set(AOM_BUILD_CMAKE_TOOLCHAINS_MIPS32_LINUX_GCC_CMAKE_ 1)
-
-set(CMAKE_SYSTEM_NAME "Linux")
-
-if(ENABLE_DSPR2 AND ENABLE_MSA)
-  message(FATAL_ERROR "ENABLE_DSPR2 and ENABLE_MSA cannot be combined.")
-endif()
-
-if(ENABLE_DSPR2)
-  set(HAVE_DSPR2 1 CACHE BOOL "" FORCE)
-
-  if("${CROSS}" STREQUAL "")
-
-    # Default the cross compiler prefix to something known to work.
-    set(CROSS mips-linux-gnu-)
-  endif()
-
-  set(MIPS_CFLAGS "-mdspr2")
-  set(MIPS_CXXFLAGS "-mdspr2")
-elseif(ENABLE_MSA)
-  set(HAVE_MSA 1 CACHE BOOL "" FORCE)
-
-  if("${CROSS}" STREQUAL "")
-
-    # Default the cross compiler prefix to something known to work.
-    set(CROSS mips-mti-linux-gnu-)
-  endif()
-
-  set(MIPS_CFLAGS "-mmsa")
-  set(MIPS_CXXFLAGS "-mmsa")
-endif()
-
-if("${CROSS}" STREQUAL "")
-
-  # TODO(tomfinegan): Make it possible to turn this off. The $CROSS prefix won't
-  # be desired on a mips host.  Default cross compiler prefix to something that
-  # might work for an  unoptimized build.
-  set(CROSS mips-linux-gnu-)
-endif()
-
-if("${MIPS_CPU}" STREQUAL "")
-  set(MIPS_CFLAGS "${MIPS_CFLAGS} -mips32r2")
-  set(MIPS_CXXFLAGS "${MIPS_CXXFLAGS} -mips32r2")
-elseif("${MIPS_CPU}" STREQUAL "p5600")
-  set(P56_FLAGS
-      "-mips32r5 -mload-store-pairs -msched-weight -mhard-float -mfp64")
-  set(MIPS_CFLAGS "${MIPS_CFLAGS} ${P56_FLAGS}")
-  set(MIPS_CXXFLAGS "${MIPS_CXXFLAGS} ${P56_FLAGS}")
-  set(CMAKE_EXE_LINKER_FLAGS "-mfp64 ${CMAKE_EXE_LINKER_FLAGS}")
-endif()
-
-set(CMAKE_C_COMPILER ${CROSS}gcc)
-set(CMAKE_CXX_COMPILER ${CROSS}g++)
-set(AS_EXECUTABLE ${CROSS}as)
-set(CMAKE_C_FLAGS_INIT "-EL ${MIPS_CFLAGS}")
-set(CMAKE_CXX_FLAGS_INIT "-EL ${MIPS_CXXFLAGS}")
-set(CMAKE_EXE_LINKER_FLAGS_INIT "-EL ${MIPS_CXXFLAGS}")
-set(CMAKE_SYSTEM_PROCESSOR "mips32")
-
-# No runtime cpu detect for mips32-linux-gcc.
-if(CONFIG_RUNTIME_CPU_DETECT)
-  message("--- CONFIG_RUNTIME_CPU_DETECT not supported for mips32 targets.")
-endif()
-
-set(CONFIG_RUNTIME_CPU_DETECT 0 CACHE STRING "" FORCE)
diff --git a/chromium/third_party/libaom/source/libaom/build/cmake/toolchains/mips64-linux-gcc.cmake b/chromium/third_party/libaom/source/libaom/build/cmake/toolchains/mips64-linux-gcc.cmake
deleted file mode 100644
index 0af992451cd..00000000000
--- a/chromium/third_party/libaom/source/libaom/build/cmake/toolchains/mips64-linux-gcc.cmake
+++ /dev/null
@@ -1,55 +0,0 @@
-#
-# Copyright (c) 2017, Alliance for Open Media. All rights reserved
-#
-# This source code is subject to the terms of the BSD 2 Clause License and the
-# Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License was
-# not distributed with this source code in the LICENSE file, you can obtain it
-# at www.aomedia.org/license/software. If the Alliance for Open Media Patent
-# License 1.0 was not distributed with this source code in the PATENTS file, you
-# can obtain it at www.aomedia.org/license/patent.
-#
-if(AOM_BUILD_CMAKE_TOOLCHAINS_MIPS64_LINUX_GCC_CMAKE_)
-  return()
-endif() # AOM_BUILD_CMAKE_TOOLCHAINS_MIPS64_LINUX_GCC_CMAKE_
-set(AOM_BUILD_CMAKE_TOOLCHAINS_MIPS64_LINUX_GCC_CMAKE_ 1)
-
-set(CMAKE_SYSTEM_NAME "Linux")
-
-if("${CROSS}" STREQUAL "")
-
-  # TODO(tomfinegan): Make it possible to turn this off. The $CROSS prefix won't
-  # be desired on a mips host.
-  #
-  # Default the cross compiler prefix to something known to work.
-  set(CROSS mips-img-linux-gnu-)
-endif()
-
-if(ENABLE_MSA)
-  set(HAVE_MSA 1 CACHE BOOL "" FORCE)
-  set(MIPS_CFLAGS "-mmsa")
-  set(MIPS_CXXFLAGS "-mmsa")
-endif()
-
-if("${MIPS_CPU}" STREQUAL "i6400" OR "${MIPS_CPU}" STREQUAL "p6600")
-  set(MIPS_CPU_FLAGS "-mips64r6 -mabi=64 -mload-store-pairs -msched-weight")
-  set(MIPS_CPU_FLAGS "${MIPS_CPU_FLAGS} -mhard-float -mfp64")
-  set(MIPS_CFLAGS "${MIPS_CFLAGS} ${MIPS_CPU_FLAGS}")
-  set(MIPS_CXXFLAGS "${MIPS_CXXFLAGS} ${MIPS_CPU_FLAGS}")
-  set(CMAKE_EXE_LINKER_FLAGS
-      "-mips64r6 -mabi64 -mfp64 ${CMAKE_EXE_LINKER_FLAGS}")
-endif()
-
-set(CMAKE_C_COMPILER ${CROSS}gcc)
-set(CMAKE_CXX_COMPILER ${CROSS}g++)
-set(AS_EXECUTABLE ${CROSS}as)
-set(CMAKE_C_FLAGS_INIT "-EL ${MIPS_CFLAGS}")
-set(CMAKE_CXX_FLAGS_INIT "-EL ${MIPS_CXXFLAGS}")
-set(CMAKE_EXE_LINKER_FLAGS_INIT "-EL ${MIPS_CXXFLAGS}")
-set(CMAKE_SYSTEM_PROCESSOR "mips64")
-
-# No runtime cpu detect for mips64-linux-gcc.
-if(CONFIG_RUNTIME_CPU_DETECT)
-  message("--- CONFIG_RUNTIME_CPU_DETECT not supported for mips64 targets.")
-endif()
-
-set(CONFIG_RUNTIME_CPU_DETECT 0 CACHE STRING "" FORCE)
diff --git a/chromium/third_party/libaom/source/libaom/build/cmake/toolchains/ppc-linux-gcc.cmake b/chromium/third_party/libaom/source/libaom/build/cmake/toolchains/ppc-linux-gcc.cmake
index 54db99bb488..ab0efeab06b 100644
--- a/chromium/third_party/libaom/source/libaom/build/cmake/toolchains/ppc-linux-gcc.cmake
+++ b/chromium/third_party/libaom/source/libaom/build/cmake/toolchains/ppc-linux-gcc.cmake
@@ -21,9 +21,15 @@ if("${CROSS}" STREQUAL "")
   set(CROSS powerpc64le-unknown-linux-gnu-)
 endif()
 
-set(CMAKE_C_COMPILER ${CROSS}gcc)
-set(CMAKE_CXX_COMPILER ${CROSS}g++)
-set(AS_EXECUTABLE ${CROSS}as)
+if(NOT CMAKE_C_COMPILER)
+  set(CMAKE_C_COMPILER ${CROSS}gcc)
+endif()
+if(NOT CMAKE_CXX_COMPILER)
+  set(CMAKE_CXX_COMPILER ${CROSS}g++)
+endif()
+if(NOT AS_EXECUTABLE)
+  set(AS_EXECUTABLE ${CROSS}as)
+endif()
 set(CMAKE_SYSTEM_PROCESSOR "ppc")
 
 set(CONFIG_RUNTIME_CPU_DETECT 0 CACHE STRING "")
diff --git a/chromium/third_party/libaom/source/libaom/build/cmake/toolchains/x86-mingw-gcc.cmake b/chromium/third_party/libaom/source/libaom/build/cmake/toolchains/x86-mingw-gcc.cmake
index 2e9a9a84b65..f75728f2c76 100644
--- a/chromium/third_party/libaom/source/libaom/build/cmake/toolchains/x86-mingw-gcc.cmake
+++ b/chromium/third_party/libaom/source/libaom/build/cmake/toolchains/x86-mingw-gcc.cmake
@@ -23,7 +23,15 @@ if("${CROSS}" STREQUAL "")
   set(CROSS i686-w64-mingw32-)
 endif()
 
-set(CMAKE_C_COMPILER ${CROSS}gcc)
-set(CMAKE_CXX_COMPILER ${CROSS}g++)
-set(CMAKE_AR ${CROSS}ar CACHE FILEPATH Archiver)
-set(CMAKE_RANLIB ${CROSS}ranlib CACHE FILEPATH Indexer)
+if(NOT CMAKE_C_COMPILER)
+  set(CMAKE_C_COMPILER ${CROSS}gcc)
+endif()
+if(NOT CMAKE_CXX_COMPILER)
+  set(CMAKE_CXX_COMPILER ${CROSS}g++)
+endif()
+if(NOT CMAKE_AR)
+  set(CMAKE_AR ${CROSS}ar CACHE FILEPATH Archiver)
+endif()
+if(NOT CMAKE_RANLIB)
+  set(CMAKE_RANLIB ${CROSS}ranlib CACHE FILEPATH Indexer)
+endif()
diff --git a/chromium/third_party/libaom/source/libaom/build/cmake/toolchains/x86_64-mingw-gcc.cmake b/chromium/third_party/libaom/source/libaom/build/cmake/toolchains/x86_64-mingw-gcc.cmake
index 4b2d28debc6..56e9b6ecb98 100644
--- a/chromium/third_party/libaom/source/libaom/build/cmake/toolchains/x86_64-mingw-gcc.cmake
+++ b/chromium/third_party/libaom/source/libaom/build/cmake/toolchains/x86_64-mingw-gcc.cmake
@@ -20,7 +20,15 @@ if("${CROSS}" STREQUAL "")
   set(CROSS x86_64-w64-mingw32-)
 endif()
 
-set(CMAKE_C_COMPILER ${CROSS}gcc)
-set(CMAKE_CXX_COMPILER ${CROSS}g++)
-set(CMAKE_AR ${CROSS}ar CACHE FILEPATH Archiver)
-set(CMAKE_RANLIB ${CROSS}ranlib CACHE FILEPATH Indexer)
+if(NOT CMAKE_C_COMPILER)
+  set(CMAKE_C_COMPILER ${CROSS}gcc)
+endif()
+if(NOT CMAKE_CXX_COMPILER)
+  set(CMAKE_CXX_COMPILER ${CROSS}g++)
+endif()
+if(NOT CMAKE_AR)
+  set(CMAKE_AR ${CROSS}ar CACHE FILEPATH Archiver)
+endif()
+if(NOT CMAKE_RANLIB)
+  set(CMAKE_RANLIB ${CROSS}ranlib CACHE FILEPATH Indexer)
+endif()
diff --git a/chromium/third_party/libaom/source/libaom/common/args.c b/chromium/third_party/libaom/source/libaom/common/args.c
index 686fcd28775..b5ede193b57 100644
--- a/chromium/third_party/libaom/source/libaom/common/args.c
+++ b/chromium/third_party/libaom/source/libaom/common/args.c
@@ -146,6 +146,7 @@ const char *arg_next(struct arg *arg) {
 
 char **argv_dup(int argc, const char **argv) {
   char **new_argv = malloc((argc + 1) * sizeof(*argv));
+  if (!new_argv) return NULL;
 
   memcpy(new_argv, argv, argc * sizeof(*argv));
   new_argv[argc] = NULL;
diff --git a/chromium/third_party/libaom/source/libaom/common/md5_utils.c b/chromium/third_party/libaom/source/libaom/common/md5_utils.c
index b69e1cc72c6..c69aa57a3bf 100644
--- a/chromium/third_party/libaom/source/libaom/common/md5_utils.c
+++ b/chromium/third_party/libaom/source/libaom/common/md5_utils.c
@@ -150,19 +150,26 @@ void MD5Final(md5byte digest[16], struct MD5Context *ctx) {
 #define AOM_NO_UNSIGNED_OVERFLOW_CHECK \
   __attribute__((no_sanitize("unsigned-integer-overflow")))
 #endif
-#endif
+#if __clang_major__ >= 12
+#define VPX_NO_UNSIGNED_SHIFT_CHECK \
+  __attribute__((no_sanitize("unsigned-shift-base")))
+#endif  // __clang__ >= 12
+#endif  // __clang__
 
 #ifndef AOM_NO_UNSIGNED_OVERFLOW_CHECK
 #define AOM_NO_UNSIGNED_OVERFLOW_CHECK
 #endif
+#ifndef AOM_NO_UNSIGNED_SHIFT_CHECK
+#define AOM_NO_UNSIGNED_SHIFT_CHECK
+#endif
 
 /*
  * The core of the MD5 algorithm, this alters an existing MD5 hash to
  * reflect the addition of 16 longwords of new data.  MD5Update blocks
  * the data and converts bytes into longwords for this routine.
  */
-AOM_NO_UNSIGNED_OVERFLOW_CHECK void MD5Transform(UWORD32 buf[4],
-                                                 UWORD32 const in[16]) {
+AOM_NO_UNSIGNED_OVERFLOW_CHECK AOM_NO_UNSIGNED_SHIFT_CHECK void MD5Transform(
+    UWORD32 buf[4], UWORD32 const in[16]) {
   register UWORD32 a, b, c, d;
 
   a = buf[0];
@@ -245,5 +252,6 @@ AOM_NO_UNSIGNED_OVERFLOW_CHECK void MD5Transform(UWORD32 buf[4],
 }
 
 #undef AOM_NO_UNSIGNED_OVERFLOW_CHECK
+#undef AOM_NO_UNSIGNED_SHIFT_CHECK
 
 #endif
diff --git a/chromium/third_party/libaom/source/libaom/common/obudec.c b/chromium/third_party/libaom/source/libaom/common/obudec.c
index 650f9973bd2..a71a6dde92e 100644
--- a/chromium/third_party/libaom/source/libaom/common/obudec.c
+++ b/chromium/third_party/libaom/source/libaom/common/obudec.c
@@ -288,6 +288,7 @@ int file_is_obu(struct ObuDecInputContext *obu_ctx) {
     if (obudec_read_leb128(f, &detect_buf[0], &length_of_unit_size,
                            &unit_size) != 0) {
       fprintf(stderr, "obudec: Failure reading temporal unit header\n");
+      rewind(f);
       return 0;
     }
 
@@ -295,6 +296,7 @@ int file_is_obu(struct ObuDecInputContext *obu_ctx) {
     if (obudec_read_leb128(f, &detect_buf[length_of_unit_size],
                            &annexb_header_length, &unit_size) != 0) {
       fprintf(stderr, "obudec: Failure reading frame unit header\n");
+      rewind(f);
       return 0;
     }
     annexb_header_length += length_of_unit_size;
@@ -316,6 +318,7 @@ int file_is_obu(struct ObuDecInputContext *obu_ctx) {
 
   if (obu_header.type != OBU_TEMPORAL_DELIMITER &&
       obu_header.type != OBU_SEQUENCE_HEADER) {
+    rewind(f);
     return 0;
   }
 
@@ -350,6 +353,7 @@ int file_is_obu(struct ObuDecInputContext *obu_ctx) {
     if (payload_length > (obu_ctx->buffer_capacity - bytes_read)) {
       fprintf(stderr, "obudec: First OBU's payload is too large\n");
       rewind(f);
+      obudec_free(obu_ctx);
       return 0;
     }
 
@@ -358,6 +362,7 @@ int file_is_obu(struct ObuDecInputContext *obu_ctx) {
         f, payload_length, &obu_ctx->buffer[bytes_read], &payload_bytes);
     if (status < 0) {
       rewind(f);
+      obudec_free(obu_ctx);
       return 0;
     }
     obu_ctx->bytes_buffered += payload_bytes;
@@ -483,4 +488,9 @@ int obudec_read_temporal_unit(struct ObuDecInputContext *obu_ctx,
   return 0;
 }
 
-void obudec_free(struct ObuDecInputContext *obu_ctx) { free(obu_ctx->buffer); }
+void obudec_free(struct ObuDecInputContext *obu_ctx) {
+  free(obu_ctx->buffer);
+  obu_ctx->buffer = NULL;
+  obu_ctx->buffer_capacity = 0;
+  obu_ctx->bytes_buffered = 0;
+}
diff --git a/chromium/third_party/libaom/source/libaom/common/tools_common.c b/chromium/third_party/libaom/source/libaom/common/tools_common.c
index 9f2debaa304..86f05b118e2 100644
--- a/chromium/third_party/libaom/source/libaom/common/tools_common.c
+++ b/chromium/third_party/libaom/source/libaom/common/tools_common.c
@@ -252,24 +252,24 @@ void aom_img_write(const aom_image_t *img, FILE *file) {
   }
 }
 
-int aom_img_read(aom_image_t *img, FILE *file) {
+bool aom_img_read(aom_image_t *img, FILE *file) {
   int plane;
+  const int bytespp = (img->fmt & AOM_IMG_FMT_HIGHBITDEPTH) ? 2 : 1;
 
   for (plane = 0; plane < 3; ++plane) {
     unsigned char *buf = img->planes[plane];
     const int stride = img->stride[plane];
-    const int w = aom_img_plane_width(img, plane) *
-                  ((img->fmt & AOM_IMG_FMT_HIGHBITDEPTH) ? 2 : 1);
+    const int w = aom_img_plane_width(img, plane) * bytespp;
     const int h = aom_img_plane_height(img, plane);
     int y;
 
     for (y = 0; y < h; ++y) {
-      if (fread(buf, 1, w, file) != (size_t)w) return 0;
+      if (fread(buf, 1, w, file) != (size_t)w) return false;
       buf += stride;
     }
   }
 
-  return 1;
+  return true;
 }
 
 // TODO(dkovalev) change sse_to_psnr signature: double -> int64_t
diff --git a/chromium/third_party/libaom/source/libaom/common/tools_common.h b/chromium/third_party/libaom/source/libaom/common/tools_common.h
index 70e422357ca..77494dea376 100644
--- a/chromium/third_party/libaom/source/libaom/common/tools_common.h
+++ b/chromium/third_party/libaom/source/libaom/common/tools_common.h
@@ -173,7 +173,8 @@ uint32_t get_fourcc_by_aom_decoder(aom_codec_iface_t *iface);
 int read_yuv_frame(struct AvxInputContext *input_ctx, aom_image_t *yuv_frame);
 
 void aom_img_write(const aom_image_t *img, FILE *file);
-int aom_img_read(aom_image_t *img, FILE *file);
+// Returns true on success, false on failure.
+bool aom_img_read(aom_image_t *img, FILE *file);
 
 double sse_to_psnr(double samples, double peak, double mse);
 void aom_img_upshift(aom_image_t *dst, const aom_image_t *src, int input_shift);
diff --git a/chromium/third_party/libaom/source/libaom/common/webmdec.h b/chromium/third_party/libaom/source/libaom/common/webmdec.h
index 5ac75cb304e..fcbdeffe4d0 100644
--- a/chromium/third_party/libaom/source/libaom/common/webmdec.h
+++ b/chromium/third_party/libaom/source/libaom/common/webmdec.h
@@ -28,7 +28,7 @@ struct WebmInputContext {
   const void *block;
   int block_frame_index;
   int video_track_index;
-  uint64_t timestamp_ns;
+  int64_t timestamp_ns;
   int is_key_frame;
   int reached_eos;
 };
diff --git a/chromium/third_party/libaom/source/libaom/common/y4menc.c b/chromium/third_party/libaom/source/libaom/common/y4menc.c
index eaeedba57d5..7d3246546a2 100644
--- a/chromium/third_party/libaom/source/libaom/common/y4menc.c
+++ b/chromium/third_party/libaom/source/libaom/common/y4menc.c
@@ -52,30 +52,25 @@ static const char *colorspace(unsigned int bit_depth,
   switch (bit_depth) {
     case 8: return colorspace8(csp, fmt);
     case 9:
-      return fmt == AOM_IMG_FMT_I44416
-                 ? "C444p9 XYSCSS=444P9"
-                 : fmt == AOM_IMG_FMT_I42216 ? "C422p9 XYSCSS=422P9"
-                                             : "C420p9 XYSCSS=420P9";
+      return fmt == AOM_IMG_FMT_I44416   ? "C444p9 XYSCSS=444P9"
+             : fmt == AOM_IMG_FMT_I42216 ? "C422p9 XYSCSS=422P9"
+                                         : "C420p9 XYSCSS=420P9";
     case 10:
-      return fmt == AOM_IMG_FMT_I44416
-                 ? "C444p10 XYSCSS=444P10"
-                 : fmt == AOM_IMG_FMT_I42216 ? "C422p10 XYSCSS=422P10"
-                                             : "C420p10 XYSCSS=420P10";
+      return fmt == AOM_IMG_FMT_I44416   ? "C444p10 XYSCSS=444P10"
+             : fmt == AOM_IMG_FMT_I42216 ? "C422p10 XYSCSS=422P10"
+                                         : "C420p10 XYSCSS=420P10";
     case 12:
-      return fmt == AOM_IMG_FMT_I44416
-                 ? "C444p12 XYSCSS=444P12"
-                 : fmt == AOM_IMG_FMT_I42216 ? "C422p12 XYSCSS=422P12"
-                                             : "C420p12 XYSCSS=420P12";
+      return fmt == AOM_IMG_FMT_I44416   ? "C444p12 XYSCSS=444P12"
+             : fmt == AOM_IMG_FMT_I42216 ? "C422p12 XYSCSS=422P12"
+                                         : "C420p12 XYSCSS=420P12";
     case 14:
-      return fmt == AOM_IMG_FMT_I44416
-                 ? "C444p14 XYSCSS=444P14"
-                 : fmt == AOM_IMG_FMT_I42216 ? "C422p14 XYSCSS=422P14"
-                                             : "C420p14 XYSCSS=420P14";
+      return fmt == AOM_IMG_FMT_I44416   ? "C444p14 XYSCSS=444P14"
+             : fmt == AOM_IMG_FMT_I42216 ? "C422p14 XYSCSS=422P14"
+                                         : "C420p14 XYSCSS=420P14";
     case 16:
-      return fmt == AOM_IMG_FMT_I44416
-                 ? "C444p16 XYSCSS=444P16"
-                 : fmt == AOM_IMG_FMT_I42216 ? "C422p16 XYSCSS=422P16"
-                                             : "C420p16 XYSCSS=420P16";
+      return fmt == AOM_IMG_FMT_I44416   ? "C444p16 XYSCSS=444P16"
+             : fmt == AOM_IMG_FMT_I42216 ? "C422p16 XYSCSS=422P16"
+                                         : "C420p16 XYSCSS=420P16";
     default: assert(0); return NULL;
   }
 }
diff --git a/chromium/third_party/libaom/source/libaom/common/y4minput.c b/chromium/third_party/libaom/source/libaom/common/y4minput.c
index 8e20b49610f..2fc83795c08 100644
--- a/chromium/third_party/libaom/source/libaom/common/y4minput.c
+++ b/chromium/third_party/libaom/source/libaom/common/y4minput.c
@@ -23,12 +23,13 @@
 // Reads 'size' bytes from 'file' into 'buf' with some fault tolerance.
 // Returns true on success.
 static int file_read(void *buf, size_t size, FILE *file) {
-  const int kMaxRetries = 5;
-  int retry_count = 0;
-  int file_error;
+  const int kMaxTries = 5;
+  int try_count = 0;
+  int file_error = 0;
   size_t len = 0;
-  do {
+  while (!feof(file) && len < size && try_count < kMaxTries) {
     const size_t n = fread((uint8_t *)buf + len, 1, size - len, file);
+    ++try_count;
     len += n;
     file_error = ferror(file);
     if (file_error) {
@@ -41,13 +42,13 @@ static int file_read(void *buf, size_t size, FILE *file) {
         return 0;
       }
     }
-  } while (!feof(file) && len < size && ++retry_count < kMaxRetries);
+  }
 
   if (!feof(file) && len != size) {
     fprintf(stderr,
             "Error reading file: %u of %u bytes read,"
-            " error: %d, retries: %d, %d: %s\n",
-            (uint32_t)len, (uint32_t)size, file_error, retry_count, errno,
+            " error: %d, tries: %d, %d: %s\n",
+            (uint32_t)len, (uint32_t)size, file_error, try_count, errno,
             strerror(errno));
   }
   return len == size;
@@ -1141,9 +1142,15 @@ int y4m_input_open(y4m_input *y4m_ctx, FILE *file, char *skip_buffer,
     y4m_ctx->dst_buf = (unsigned char *)malloc(y4m_ctx->dst_buf_sz);
   else
     y4m_ctx->dst_buf = (unsigned char *)malloc(2 * y4m_ctx->dst_buf_sz);
+  if (!y4m_ctx->dst_buf) return -1;
 
-  if (y4m_ctx->aux_buf_sz > 0)
+  if (y4m_ctx->aux_buf_sz > 0) {
     y4m_ctx->aux_buf = (unsigned char *)malloc(y4m_ctx->aux_buf_sz);
+    if (!y4m_ctx->aux_buf) {
+      free(y4m_ctx->dst_buf);
+      return -1;
+    }
+  }
   return 0;
 }
 
diff --git a/chromium/third_party/libaom/source/libaom/examples/inspect.c b/chromium/third_party/libaom/source/libaom/examples/inspect.c
index 0a2e962c7f1..8e7213ab43a 100644
--- a/chromium/third_party/libaom/source/libaom/examples/inspect.c
+++ b/chromium/third_party/libaom/source/libaom/examples/inspect.c
@@ -623,6 +623,10 @@ void inspect(void *pbi, void *data) {
   // We allocate enough space and hope we don't write out of bounds. Totally
   // unsafe but this speeds things up, especially when compiled to Javascript.
   char *buffer = aom_malloc(MAX_BUFFER);
+  if (!buffer) {
+    fprintf(stderr, "Error allocating inspect info buffer\n");
+    abort();
+  }
   char *buf = buffer;
   buf += put_str(buf, "{\n");
   if (layers & BLOCK_SIZE_LAYER) {
diff --git a/chromium/third_party/libaom/source/libaom/examples/lightfield_bitstream_parsing.c b/chromium/third_party/libaom/source/libaom/examples/lightfield_bitstream_parsing.c
index 30e4fca4e91..35b4ad093e0 100644
--- a/chromium/third_party/libaom/source/libaom/examples/lightfield_bitstream_parsing.c
+++ b/chromium/third_party/libaom/source/libaom/examples/lightfield_bitstream_parsing.c
@@ -267,6 +267,8 @@ int main(int argc, char **argv) {
   unsigned char **frames =
       (unsigned char **)malloc(num_frames * sizeof(unsigned char *));
   size_t *frame_sizes = (size_t *)malloc(num_frames * sizeof(size_t));
+  if (!(frames && frame_sizes)) die("Failed to allocate frame data.");
+
   // Seek to the first camera image.
   fseeko(infile, camera_frame_pos, SEEK_SET);
   for (int f = 0; f < num_frames; ++f) {
@@ -275,6 +277,7 @@ int main(int argc, char **argv) {
     const unsigned char *frame =
         aom_video_reader_get_frame(reader, &frame_size);
     frames[f] = (unsigned char *)malloc(frame_size * sizeof(unsigned char));
+    if (!frames[f]) die("Failed to allocate frame data.");
     memcpy(frames[f], frame, frame_size);
     frame_sizes[f] = frame_size;
   }
diff --git a/chromium/third_party/libaom/source/libaom/examples/lightfield_decoder.c b/chromium/third_party/libaom/source/libaom/examples/lightfield_decoder.c
index dae2748a6a7..65b13efa1ae 100644
--- a/chromium/third_party/libaom/source/libaom/examples/lightfield_decoder.c
+++ b/chromium/third_party/libaom/source/libaom/examples/lightfield_decoder.c
@@ -270,12 +270,14 @@ int main(int argc, char **argv) {
   unsigned char **frames =
       (unsigned char **)malloc(num_frames * sizeof(unsigned char *));
   size_t *frame_sizes = (size_t *)malloc(num_frames * sizeof(size_t));
+  if (!(frames && frame_sizes)) die("Failed to allocate frame data.");
   // Seek to the first camera image.
   fseeko(infile, camera_frame_pos, SEEK_SET);
   for (int f = 0; f < num_frames; ++f) {
     aom_video_reader_read_frame(reader);
     frame = aom_video_reader_get_frame(reader, &frame_size);
     frames[f] = (unsigned char *)malloc(frame_size * sizeof(unsigned char));
+    if (!frames[f]) die("Failed to allocate frame data.");
     memcpy(frames[f], frame, frame_size);
     frame_sizes[f] = frame_size;
   }
diff --git a/chromium/third_party/libaom/source/libaom/examples/lightfield_encoder.c b/chromium/third_party/libaom/source/libaom/examples/lightfield_encoder.c
index d24aabdf20e..9aef836ac2b 100644
--- a/chromium/third_party/libaom/source/libaom/examples/lightfield_encoder.c
+++ b/chromium/third_party/libaom/source/libaom/examples/lightfield_encoder.c
@@ -39,6 +39,7 @@
 #include "aom/aomcx.h"
 #include "aom_scale/yv12config.h"
 #include "av1/common/enums.h"
+#include "av1/encoder/encoder_utils.h"
 #include "common/tools_common.h"
 #include "common/video_writer.h"
 
@@ -81,6 +82,7 @@ static int get_frame_stats(aom_codec_ctx_t *ctx, const aom_image_t *img,
       const uint8_t *const pkt_buf = pkt->data.twopass_stats.buf;
       const size_t pkt_size = pkt->data.twopass_stats.sz;
       stats->buf = realloc(stats->buf, stats->sz + pkt_size);
+      if (!stats->buf) die("Failed to allocate frame stats buffer.");
       memcpy((uint8_t *)stats->buf + stats->sz, pkt_buf, pkt_size);
       stats->sz += pkt_size;
     }
@@ -290,10 +292,12 @@ static void pass1(aom_image_t *raw, FILE *infile, const char *outfile_name,
   aom_img_fmt_t ref_fmt = AOM_IMG_FMT_I420;
   if (FORCE_HIGHBITDEPTH_DECODING) ref_fmt |= AOM_IMG_FMT_HIGHBITDEPTH;
   // Allocate memory with the border so that it can be used as a reference.
+  const bool resize =
+      codec.config.enc->rc_resize_mode || codec.config.enc->rc_superres_mode;
+  const bool all_intra = reference_image_num - 1 == 0;
   int border_in_pixels =
-      (codec.config.enc->rc_resize_mode || codec.config.enc->rc_superres_mode)
-          ? AOM_BORDER_IN_PIXELS
-          : AOM_ENC_NO_SCALE_BORDER;
+      av1_get_enc_border_size(resize, all_intra, BLOCK_64X64);
+
   for (i = 0; i < reference_image_num; i++) {
     if (!aom_img_alloc_with_border(&reference_images[i], ref_fmt, cfg->g_w,
                                    cfg->g_h, 32, 8, border_in_pixels)) {
diff --git a/chromium/third_party/libaom/source/libaom/examples/noise_model.c b/chromium/third_party/libaom/source/libaom/examples/noise_model.c
index d55c7342e16..1de13267fc6 100644
--- a/chromium/third_party/libaom/source/libaom/examples/noise_model.c
+++ b/chromium/third_party/libaom/source/libaom/examples/noise_model.c
@@ -330,6 +330,7 @@ int main(int argc, char *argv[]) {
   const int num_blocks_w = (info.frame_width + block_size - 1) / block_size;
   const int num_blocks_h = (info.frame_height + block_size - 1) / block_size;
   uint8_t *flat_blocks = (uint8_t *)aom_malloc(num_blocks_w * num_blocks_h);
+  if (!flat_blocks) die("Failed to allocate block data.");
   // Sets the random seed on the first entry in the output table
   int16_t random_seed = 7391;
   aom_noise_model_t noise_model;
diff --git a/chromium/third_party/libaom/source/libaom/examples/resize_util.c b/chromium/third_party/libaom/source/libaom/examples/resize_util.c
index 5692c2062cc..45a1db20280 100644
--- a/chromium/third_party/libaom/source/libaom/examples/resize_util.c
+++ b/chromium/third_party/libaom/source/libaom/examples/resize_util.c
@@ -53,6 +53,7 @@ int main(int argc, char *argv[]) {
   uint8_t *inbuf_v, *outbuf_v;
   int f, frames;
   int width, height, target_width, target_height;
+  int failed = 0;
 
   exec_name = argv[0];
 
@@ -102,6 +103,11 @@ int main(int argc, char *argv[]) {
 
   inbuf = (uint8_t *)malloc(width * height * 3 / 2);
   outbuf = (uint8_t *)malloc(target_width * target_height * 3 / 2);
+  if (!(inbuf && outbuf)) {
+    printf("Failed to allocate buffers.\n");
+    failed = 1;
+    goto Error;
+  }
   inbuf_u = inbuf + width * height;
   inbuf_v = inbuf_u + width * height / 4;
   outbuf_u = outbuf + target_width * target_height;
@@ -116,10 +122,11 @@ int main(int argc, char *argv[]) {
     f++;
   }
   printf("%d frames processed\n", f);
+Error:
   fclose(fpin);
   fclose(fpout);
 
   free(inbuf);
   free(outbuf);
-  return 0;
+  return failed;
 }
diff --git a/chromium/third_party/libaom/source/libaom/examples/set_maps.c b/chromium/third_party/libaom/source/libaom/examples/set_maps.c
index 5a84faa5659..2593faba342 100644
--- a/chromium/third_party/libaom/source/libaom/examples/set_maps.c
+++ b/chromium/third_party/libaom/source/libaom/examples/set_maps.c
@@ -15,7 +15,7 @@
 // This is an example demonstrating how to control the AOM encoder's
 // ROI and Active maps.
 //
-// ROI (Reigon of Interest) maps are a way for the application to assign
+// ROI (Region of Interest) maps are a way for the application to assign
 // each macroblock in the image to a region, and then set quantizer and
 // filtering parameters on that image.
 //
@@ -27,12 +27,12 @@
 // Configuration
 // -------------
 // An ROI map is set on frame 22. If the width of the image in macroblocks
-// is evenly divisble by 4, then the output will appear to have distinct
+// is evenly divisible by 4, then the output will appear to have distinct
 // columns, where the quantizer, loopfilter, and static threshold differ
 // from column to column.
 //
 // An active map is set on frame 33. If the width of the image in macroblocks
-// is evenly divisble by 4, then the output will appear to have distinct
+// is evenly divisible by 4, then the output will appear to have distinct
 // columns, where one column will have motion and the next will not.
 //
 // The active map is cleared on frame 44.
@@ -69,6 +69,7 @@ static void set_active_map(const aom_codec_enc_cfg_t *cfg,
   map.cols = (cfg->g_w + 15) / 16;
 
   map.active_map = (uint8_t *)malloc(map.rows * map.cols);
+  if (!map.active_map) die("Failed to allocate active map");
   for (i = 0; i < map.rows * map.cols; ++i) map.active_map[i] = i % 2;
 
   if (aom_codec_control(codec, AOME_SET_ACTIVEMAP, &map))
diff --git a/chromium/third_party/libaom/source/libaom/examples/svc_encoder_rtc.c b/chromium/third_party/libaom/source/libaom/examples/svc_encoder_rtc.c
index 2e76c96aa66..7bd79aa1536 100644
--- a/chromium/third_party/libaom/source/libaom/examples/svc_encoder_rtc.c
+++ b/chromium/third_party/libaom/source/libaom/examples/svc_encoder_rtc.c
@@ -37,6 +37,7 @@ typedef struct {
   int aq_mode;
   int layering_mode;
   int output_obu;
+  int decode;
 } AppInput;
 
 typedef enum {
@@ -87,6 +88,9 @@ static const arg_def_t error_resilient_arg =
 static const arg_def_t output_obu_arg =
     ARG_DEF(NULL, "output-obu", 1,
             "Write OBUs when set to 1. Otherwise write IVF files.");
+static const arg_def_t test_decode_arg =
+    ARG_DEF(NULL, "test-decode", 1,
+            "Attempt to test decoding the output when set to 1. Default is 1.");
 
 #if CONFIG_AV1_HIGHBITDEPTH
 static const struct arg_enum_list bitdepth_enum[] = {
@@ -97,18 +101,31 @@ static const arg_def_t bitdepth_arg = ARG_DEF_ENUM(
     "d", "bit-depth", 1, "Bit depth for codec 8, 10 or 12. ", bitdepth_enum);
 #endif  // CONFIG_AV1_HIGHBITDEPTH
 
-static const arg_def_t *svc_args[] = {
-  &frames_arg,          &outputfile,     &width_arg,
-  &height_arg,          &timebase_arg,   &bitrate_arg,
-  &spatial_layers_arg,  &kf_dist_arg,    &scale_factors_arg,
-  &min_q_arg,           &max_q_arg,      &temporal_layers_arg,
-  &layering_mode_arg,   &threads_arg,    &aqmode_arg,
+static const arg_def_t *svc_args[] = { &frames_arg,
+                                       &outputfile,
+                                       &width_arg,
+                                       &height_arg,
+                                       &timebase_arg,
+                                       &bitrate_arg,
+                                       &spatial_layers_arg,
+                                       &kf_dist_arg,
+                                       &scale_factors_arg,
+                                       &min_q_arg,
+                                       &max_q_arg,
+                                       &temporal_layers_arg,
+                                       &layering_mode_arg,
+                                       &threads_arg,
+                                       &aqmode_arg,
 #if CONFIG_AV1_HIGHBITDEPTH
-  &bitdepth_arg,
+                                       &bitdepth_arg,
 #endif
-  &speed_arg,           &bitrates_arg,   &dropframe_thresh_arg,
-  &error_resilient_arg, &output_obu_arg, NULL
-};
+                                       &speed_arg,
+                                       &bitrates_arg,
+                                       &dropframe_thresh_arg,
+                                       &error_resilient_arg,
+                                       &output_obu_arg,
+                                       &test_decode_arg,
+                                       NULL };
 
 #define zero(Dest) memset(&(Dest), 0, sizeof(Dest))
 
@@ -226,6 +243,7 @@ static aom_codec_err_t parse_layer_options_from_string(
     return AOM_CODEC_INVALID_PARAM;
 
   input_string = malloc(strlen(input));
+  if (!input_string) die("Failed to allocate input string.");
   memcpy(input_string, input, strlen(input));
   if (input_string == NULL) return AOM_CODEC_MEM_ERROR;
   token = strtok(input_string, delim);  // NOLINT
@@ -260,11 +278,16 @@ static void parse_command_line(int argc, const char **argv_,
   svc_params->number_temporal_layers = 1;
   app_input->layering_mode = 0;
   app_input->output_obu = 0;
+  app_input->decode = 1;
   enc_cfg->g_threads = 1;
   enc_cfg->rc_end_usage = AOM_CBR;
 
   // process command line options
   argv = argv_dup(argc - 1, argv_ + 1);
+  if (!argv) {
+    fprintf(stderr, "Error allocating argument list\n");
+    exit(EXIT_FAILURE);
+  }
   for (argi = argj = argv; (*argj = *argi); argi += arg.argv_step) {
     arg.argv_step = 1;
 
@@ -337,6 +360,11 @@ static void parse_command_line(int argc, const char **argv_,
       if (app_input->output_obu != 0 && app_input->output_obu != 1)
         die("Invalid value for obu output flag (0, 1): %d.",
             app_input->output_obu);
+    } else if (arg_match(&arg, &test_decode_arg, argi)) {
+      app_input->decode = arg_parse_uint(&arg);
+      if (app_input->decode != 0 && app_input->decode != 1)
+        die("Invalid value for test decode flag (0, 1): %d.",
+            app_input->decode);
     } else {
       ++argj;
     }
@@ -561,6 +589,9 @@ static void set_layer_pattern(
     aom_svc_ref_frame_config_t *ref_frame_config,
     aom_svc_ref_frame_comp_pred_t *ref_frame_comp_pred, int *use_svc_control,
     int spatial_layer_id, int is_key_frame, int ksvc_mode, int speed) {
+  // Setting this flag to 1 enables simplex example of
+  // RPS (Reference Picture Selection) for 1 layer.
+  int use_rps_example = 1;
   int i;
   int enable_longterm_temporal_ref = 1;
   int shift = (layering_mode == 8) ? 2 : 0;
@@ -585,10 +616,64 @@ static void set_layer_pattern(
   }
   switch (layering_mode) {
     case 0:
-      // 1-layer: update LAST on every frame, reference LAST.
-      layer_id->temporal_layer_id = 0;
-      ref_frame_config->refresh[0] = 1;
-      ref_frame_config->reference[SVC_LAST_FRAME] = 1;
+      if (use_rps_example == 0) {
+        // 1-layer: update LAST on every frame, reference LAST.
+        layer_id->temporal_layer_id = 0;
+        layer_id->spatial_layer_id = 0;
+        ref_frame_config->refresh[0] = 1;
+        ref_frame_config->reference[SVC_LAST_FRAME] = 1;
+      } else {
+        // Pattern of 2 references (ALTREF and GOLDEN) trailing
+        // LAST by 4 and 8 frame, with some switching logic to
+        // sometimes only predict from longer-term reference.
+        // This is simple example to test RPS (reference picture selection)
+        // as method to handle network packet loss.
+        int last_idx = 0;
+        int last_idx_refresh = 0;
+        int gld_idx = 0;
+        int alt_ref_idx = 0;
+        int lag_alt = 4;
+        int lag_gld = 8;
+        layer_id->temporal_layer_id = 0;
+        layer_id->spatial_layer_id = 0;
+        int sh = 8;  // slots 0 - 7.
+        // Moving index slot for last: 0 - (sh - 1)
+        if (superframe_cnt > 1) last_idx = (superframe_cnt - 1) % sh;
+        // Moving index for refresh of last: one ahead for next frame.
+        last_idx_refresh = superframe_cnt % sh;
+        // Moving index for gld_ref, lag behind current by lag_gld
+        if (superframe_cnt > lag_gld) gld_idx = (superframe_cnt - lag_gld) % sh;
+        // Moving index for alt_ref, lag behind LAST by lag_alt frames.
+        if (superframe_cnt > lag_alt)
+          alt_ref_idx = (superframe_cnt - lag_alt) % sh;
+        // Set the ref_idx.
+        // Default all references to slot for last.
+        for (i = 0; i < INTER_REFS_PER_FRAME; i++)
+          ref_frame_config->ref_idx[i] = last_idx;
+        // Set the ref_idx for the relevant references.
+        ref_frame_config->ref_idx[SVC_LAST_FRAME] = last_idx;
+        ref_frame_config->ref_idx[SVC_LAST2_FRAME] = last_idx_refresh;
+        ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = gld_idx;
+        ref_frame_config->ref_idx[SVC_ALTREF_FRAME] = alt_ref_idx;
+        // Refresh this slot, which will become LAST on next frame.
+        ref_frame_config->refresh[last_idx_refresh] = 1;
+        // Reference LAST, ALTREF, and GOLDEN
+        ref_frame_config->reference[SVC_LAST_FRAME] = 1;
+        ref_frame_config->reference[SVC_ALTREF_FRAME] = 1;
+        ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
+        // Switch to only ALTREF for frames 200 to 250.
+        if (superframe_cnt >= 200 && superframe_cnt < 250) {
+          ref_frame_config->reference[SVC_LAST_FRAME] = 0;
+          ref_frame_config->reference[SVC_ALTREF_FRAME] = 1;
+          ref_frame_config->reference[SVC_GOLDEN_FRAME] = 0;
+        }
+        // Switch to only GOLDEN for frames 400 to 450.
+        if (superframe_cnt >= 400 && superframe_cnt < 450) {
+          ref_frame_config->reference[SVC_LAST_FRAME] = 0;
+          ref_frame_config->reference[SVC_ALTREF_FRAME] = 0;
+          ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
+        }
+      }
       break;
     case 1:
       // 2-temporal layer.
@@ -1000,7 +1085,7 @@ static void set_layer_pattern(
       if (enable_longterm_temporal_ref && layer_id->spatial_layer_id == 2 &&
           layering_mode == 8) {
         ref_frame_config->ref_idx[SVC_ALTREF_FRAME] = REF_FRAMES - 1;
-        ref_frame_config->reference[SVC_ALTREF_FRAME] = 1;
+        if (!is_key_frame) ref_frame_config->reference[SVC_ALTREF_FRAME] = 1;
         if (base_count % 10 == 0 && layer_id->temporal_layer_id == 0)
           ref_frame_config->refresh[REF_FRAMES - 1] = 1;
       }
@@ -1100,7 +1185,8 @@ int main(int argc, const char **argv) {
 
   struct RateControlMetrics rc;
   int64_t cx_time = 0;
-  int64_t cx_time_sl[3];  // max number of spatial layers.
+  int64_t cx_time_layer[AOM_MAX_LAYERS];  // max number of layers.
+  int frame_cnt_layer[AOM_MAX_LAYERS];
   double sum_bitrate = 0.0;
   double sum_bitrate2 = 0.0;
   double framerate = 30.0;
@@ -1244,16 +1330,18 @@ int main(int argc, const char **argv) {
     die("Failed to initialize encoder");
 
 #if CONFIG_AV1_DECODER
-  if (aom_codec_dec_init(&decoder, get_aom_decoder_by_index(0), NULL, 0)) {
-    die("Failed to initialize decoder");
+  if (app_input.decode) {
+    if (aom_codec_dec_init(&decoder, get_aom_decoder_by_index(0), NULL, 0)) {
+      die("Failed to initialize decoder");
+    }
   }
 #endif
 
   aom_codec_control(&codec, AOME_SET_CPUUSED, app_input.speed);
   aom_codec_control(&codec, AV1E_SET_AQ_MODE, app_input.aq_mode ? 3 : 0);
   aom_codec_control(&codec, AV1E_SET_GF_CBR_BOOST_PCT, 0);
-  aom_codec_control(&codec, AV1E_SET_ENABLE_CDEF, 2);
-  aom_codec_control(&codec, AV1E_SET_LOOPFILTER_CONTROL, 2);
+  aom_codec_control(&codec, AV1E_SET_ENABLE_CDEF, 1);
+  aom_codec_control(&codec, AV1E_SET_LOOPFILTER_CONTROL, 1);
   aom_codec_control(&codec, AV1E_SET_ENABLE_WARPED_MOTION, 0);
   aom_codec_control(&codec, AV1E_SET_ENABLE_OBMC, 0);
   aom_codec_control(&codec, AV1E_SET_ENABLE_GLOBAL_MOTION, 0);
@@ -1300,7 +1388,11 @@ int main(int argc, const char **argv) {
                       max_intra_size_pct);
   }
 
-  for (unsigned int slx = 0; slx < ss_number_layers; slx++) cx_time_sl[slx] = 0;
+  for (unsigned int lx = 0; lx < ts_number_layers * ss_number_layers; lx++) {
+    cx_time_layer[lx] = 0;
+    frame_cnt_layer[lx] = 0;
+  }
+
   frame_avail = 1;
   while (frame_avail || got_data) {
     struct aom_usec_timer timer;
@@ -1374,7 +1466,8 @@ int main(int argc, const char **argv) {
         die_codec(&codec, "Failed to encode frame");
       aom_usec_timer_mark(&timer);
       cx_time += aom_usec_timer_elapsed(&timer);
-      cx_time_sl[slx] += aom_usec_timer_elapsed(&timer);
+      cx_time_layer[layer] += aom_usec_timer_elapsed(&timer);
+      frame_cnt_layer[layer] += 1;
 
       got_data = 0;
       while ((pkt = aom_codec_get_cx_data(&codec, &iter))) {
@@ -1449,9 +1542,11 @@ int main(int argc, const char **argv) {
             }
 
 #if CONFIG_AV1_DECODER
-            if (aom_codec_decode(&decoder, pkt->data.frame.buf,
-                                 (unsigned int)pkt->data.frame.sz, NULL))
-              die_codec(&decoder, "Failed to decode frame.");
+            if (app_input.decode) {
+              if (aom_codec_decode(&decoder, pkt->data.frame.buf,
+                                   (unsigned int)pkt->data.frame.sz, NULL))
+                die_codec(&decoder, "Failed to decode frame.");
+            }
 #endif
 
             break;
@@ -1459,12 +1554,14 @@ int main(int argc, const char **argv) {
         }
       }
 #if CONFIG_AV1_DECODER
-      // Don't look for mismatch on top spatial and top temporal layers as they
-      // are non reference frames.
-      if ((ss_number_layers > 1 || ts_number_layers > 1) &&
-          !(layer_id.temporal_layer_id > 0 &&
-            layer_id.temporal_layer_id == (int)ts_number_layers - 1)) {
-        test_decode(&codec, &decoder, frame_cnt, &mismatch_seen);
+      if (app_input.decode) {
+        // Don't look for mismatch on top spatial and top temporal layers as
+        // they are non reference frames.
+        if ((ss_number_layers > 1 || ts_number_layers > 1) &&
+            !(layer_id.temporal_layer_id > 0 &&
+              layer_id.temporal_layer_id == (int)ts_number_layers - 1)) {
+          test_decode(&codec, &decoder, frame_cnt, &mismatch_seen);
+        }
       }
 #endif
     }  // loop over spatial layers
@@ -1475,19 +1572,22 @@ int main(int argc, const char **argv) {
   close_input_file(&(app_input.input_ctx));
   printout_rate_control_summary(&rc, frame_cnt, ss_number_layers,
                                 ts_number_layers);
+
+  printf("\n");
+  for (unsigned int slx = 0; slx < ss_number_layers; slx++)
+    for (unsigned int tlx = 0; tlx < ts_number_layers; tlx++) {
+      int lx = slx * ts_number_layers + tlx;
+      printf("Per layer encoding time/FPS stats for encoder: %d %d %d %f %f \n",
+             slx, tlx, frame_cnt_layer[lx],
+             (float)cx_time_layer[lx] / (double)(frame_cnt_layer[lx] * 1000),
+             1000000 * (double)frame_cnt_layer[lx] / (double)cx_time_layer[lx]);
+    }
+
   printf("\n");
   printf("Frame cnt and encoding time/FPS stats for encoding: %d %f %f\n",
          frame_cnt, 1000 * (float)cx_time / (double)(frame_cnt * 1000000),
          1000000 * (double)frame_cnt / (double)cx_time);
 
-  if (ss_number_layers > 1) {
-    printf("Per spatial layer: \n");
-    for (unsigned int slx = 0; slx < ss_number_layers; slx++)
-      printf("Frame cnt and encoding time/FPS stats for encoding: %d %f %f\n",
-             frame_cnt, (float)cx_time_sl[slx] / (double)(frame_cnt * 1000),
-             1000000 * (double)frame_cnt / (double)cx_time_sl[slx]);
-  }
-
   if (aom_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec");
 
 #if CONFIG_INTERNAL_STATS
diff --git a/chromium/third_party/libaom/source/libaom/examples/twopass_encoder.c b/chromium/third_party/libaom/source/libaom/examples/twopass_encoder.c
index b62e7a71afd..388f68bd4dd 100644
--- a/chromium/third_party/libaom/source/libaom/examples/twopass_encoder.c
+++ b/chromium/third_party/libaom/source/libaom/examples/twopass_encoder.c
@@ -83,6 +83,7 @@ static int get_frame_stats(aom_codec_ctx_t *ctx, const aom_image_t *img,
       const uint8_t *const pkt_buf = pkt->data.twopass_stats.buf;
       const size_t pkt_size = pkt->data.twopass_stats.sz;
       stats->buf = realloc(stats->buf, stats->sz + pkt_size);
+      if (!stats->buf) die("Failed to allocate frame stats buffer.");
       memcpy((uint8_t *)stats->buf + stats->sz, pkt_buf, pkt_size);
       stats->sz += pkt_size;
     }
diff --git a/chromium/third_party/libaom/source/libaom/libs.doxy_template b/chromium/third_party/libaom/source/libaom/libs.doxy_template
index 6e042ac931e..ba77751a508 100644
--- a/chromium/third_party/libaom/source/libaom/libs.doxy_template
+++ b/chromium/third_party/libaom/source/libaom/libs.doxy_template
@@ -103,14 +103,6 @@ ALLOW_UNICODE_NAMES    = NO
 
 OUTPUT_LANGUAGE        = English
 
-# The OUTPUT_TEXT_DIRECTION tag is used to specify the direction in which all
-# documentation generated by doxygen is written. Doxygen will use this
-# information to generate all generated output in the proper direction.
-# Possible values are: None, LTR, RTL and Context.
-# The default value is: None.
-
-OUTPUT_TEXT_DIRECTION  = None
-
 # If the BRIEF_MEMBER_DESC tag is set to YES, doxygen will include brief member
 # descriptions after the members that are listed in the file and class
 # documentation (similar to Javadoc). Set to NO to disable this.
@@ -1820,16 +1812,6 @@ LATEX_BATCHMODE        = NO
 
 LATEX_HIDE_INDICES     = NO
 
-# If the LATEX_SOURCE_CODE tag is set to YES then doxygen will include source
-# code with syntax highlighting in the LaTeX output.
-#
-# Note that which sources are shown also depends on other settings such as
-# SOURCE_BROWSER.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_LATEX is set to YES.
-
-LATEX_SOURCE_CODE      = NO
-
 # The LATEX_BIB_STYLE tag can be used to specify the style to use for the
 # bibliography, e.g. plainnat, or ieeetr. See
 # https://en.wikipedia.org/wiki/BibTeX and \cite for more info.
@@ -1910,16 +1892,6 @@ RTF_STYLESHEET_FILE    =
 
 RTF_EXTENSIONS_FILE    =
 
-# If the RTF_SOURCE_CODE tag is set to YES then doxygen will include source code
-# with syntax highlighting in the RTF output.
-#
-# Note that which sources are shown also depends on other settings such as
-# SOURCE_BROWSER.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_RTF is set to YES.
-
-RTF_SOURCE_CODE        = NO
-
 #---------------------------------------------------------------------------
 # Configuration options related to the man page output
 #---------------------------------------------------------------------------
@@ -1982,15 +1954,6 @@ GENERATE_DOCBOOK       = NO
 
 DOCBOOK_OUTPUT         = docbook
 
-# If the DOCBOOK_PROGRAMLISTING tag is set to YES, doxygen will include the
-# program listings (including syntax highlighting and cross-referencing
-# information) to the DOCBOOK output. Note that enabling this will significantly
-# increase the size of the DOCBOOK output.
-# The default value is: NO.
-# This tag requires that the tag GENERATE_DOCBOOK is set to YES.
-
-DOCBOOK_PROGRAMLISTING = NO
-
 #---------------------------------------------------------------------------
 # Configuration options for the AutoGen Definitions output
 #---------------------------------------------------------------------------
@@ -2172,15 +2135,6 @@ EXTERNAL_PAGES         = YES
 # Configuration options related to the dot tool
 #---------------------------------------------------------------------------
 
-# If the CLASS_DIAGRAMS tag is set to YES, doxygen will generate a class diagram
-# (in HTML and LaTeX) for classes with base or super classes. Setting the tag to
-# NO turns the diagrams off. Note that this option also works with HAVE_DOT
-# disabled, but it is recommended to install and use dot, since it yields more
-# powerful graphs.
-# The default value is: YES.
-
-CLASS_DIAGRAMS         = YES
-
 # You can include diagrams made with dia in doxygen documentation. Doxygen will
 # then run dia to produce the diagram and insert it in the documentation. The
 # DIA_PATH tag allows you to specify the directory where the dia binary resides.
@@ -2237,11 +2191,14 @@ DOT_FONTSIZE           = 10
 
 DOT_FONTPATH           =
 
-# If the CLASS_GRAPH tag is set to YES then doxygen will generate a graph for
-# each documented class showing the direct and indirect inheritance relations.
-# Setting this tag to YES will force the CLASS_DIAGRAMS tag to NO.
+# If the CLASS_GRAPH tag is set to YES (or GRAPH) then doxygen will generate a
+# graph for each documented class showing the direct and indirect inheritance
+# relations. In case HAVE_DOT is set as well dot will be used to draw the graph,
+# otherwise the built-in generator will be used. If the CLASS_GRAPH tag is set
+# to TEXT the direct and indirect inheritance relations will be shown as texts /
+# links.
+# Possible values are: NO, YES, TEXT and GRAPH.
 # The default value is: YES.
-# This tag requires that the tag HAVE_DOT is set to YES.
 
 CLASS_GRAPH            = YES
 
diff --git a/chromium/third_party/libaom/source/libaom/stats/rate_hist.c b/chromium/third_party/libaom/source/libaom/stats/rate_hist.c
index d820d51a495..ae76fda1022 100644
--- a/chromium/third_party/libaom/source/libaom/stats/rate_hist.c
+++ b/chromium/third_party/libaom/source/libaom/stats/rate_hist.c
@@ -38,7 +38,13 @@ struct rate_hist {
 struct rate_hist *init_rate_histogram(const aom_codec_enc_cfg_t *cfg,
                                       const aom_rational_t *fps) {
   int i;
-  struct rate_hist *hist = malloc(sizeof(*hist));
+  struct rate_hist *hist = calloc(1, sizeof(*hist));
+
+  if (hist == NULL || cfg == NULL || fps == NULL || fps->num == 0 ||
+      fps->den == 0) {
+    destroy_rate_histogram(hist);
+    return NULL;
+  }
 
   // Determine the number of samples in the buffer. Use the file's framerate
   // to determine the number of frames in rc_buf_sz milliseconds, with an
@@ -81,7 +87,11 @@ void update_rate_histogram(struct rate_hist *hist,
                       (uint64_t)cfg->g_timebase.num /
                       (uint64_t)cfg->g_timebase.den;
 
-  int idx = hist->frames++ % hist->samples;
+  int idx;
+
+  if (hist == NULL || cfg == NULL || pkt == NULL) return;
+
+  idx = hist->frames++ % hist->samples;
   hist->pts[idx] = now;
   hist->sz[idx] = (int)pkt->data.frame.sz;
 
@@ -117,9 +127,14 @@ void update_rate_histogram(struct rate_hist *hist,
 static int merge_hist_buckets(struct hist_bucket *bucket, int max_buckets,
                               int *num_buckets) {
   int small_bucket = 0, merge_bucket = INT_MAX, big_bucket = 0;
-  int buckets = *num_buckets;
+  int buckets;
   int i;
 
+  assert(bucket != NULL);
+  assert(num_buckets != NULL);
+
+  buckets = *num_buckets;
+
   /* Find the extrema for this list of buckets */
   big_bucket = small_bucket = 0;
   for (i = 0; i < buckets; i++) {
@@ -182,6 +197,10 @@ static void show_histogram(const struct hist_bucket *bucket, int buckets,
   int width1, width2;
   int i;
 
+  if (!buckets) return;
+  assert(bucket != NULL);
+  assert(buckets > 0);
+
   switch ((int)(log(bucket[buckets - 1].high) / log(10)) + 1) {
     case 1:
     case 2:
@@ -261,6 +280,8 @@ void show_rate_histogram(struct rate_hist *hist, const aom_codec_enc_cfg_t *cfg,
   int i, scale;
   int buckets = 0;
 
+  if (hist == NULL || cfg == NULL) return;
+
   for (i = 0; i < RATE_BINS; i++) {
     if (hist->bucket[i].low == INT_MAX) continue;
     hist->bucket[buckets++] = hist->bucket[i];
diff --git a/chromium/third_party/libaom/source/libaom/third_party/SVT-AV1/EbMemory_AVX2.h b/chromium/third_party/libaom/source/libaom/third_party/SVT-AV1/EbMemory_AVX2.h
new file mode 100644
index 00000000000..0d0ea10abca
--- /dev/null
+++ b/chromium/third_party/libaom/source/libaom/third_party/SVT-AV1/EbMemory_AVX2.h
@@ -0,0 +1,110 @@
+/*
+ * Copyright(c) 2019 Intel Corporation
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at https://www.aomedia.org/license/software-license. If the
+ * Alliance for Open Media Patent License 1.0 was not distributed with this
+ * source code in the PATENTS file, you can obtain it at
+ * https://www.aomedia.org/license/patent-license.
+ */
+
+#ifndef AOM_THIRD_PARTY_SVT_AV1_EBMEMORY_AVX2_H_
+#define AOM_THIRD_PARTY_SVT_AV1_EBMEMORY_AVX2_H_
+
+#include <immintrin.h>
+
+#include "config/aom_config.h"
+
+#include "aom/aom_integer.h"
+
+#ifndef _mm256_set_m128i
+#define _mm256_set_m128i(/* __m128i */ hi, /* __m128i */ lo) \
+  _mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 0x1)
+#endif
+
+#ifndef _mm256_setr_m128i
+#define _mm256_setr_m128i(/* __m128i */ lo, /* __m128i */ hi) \
+  _mm256_set_m128i((hi), (lo))
+#endif
+
+static INLINE __m256i load_u8_4x2_avx2(const uint8_t *const src,
+                                       const ptrdiff_t stride) {
+  __m128i src01;
+  src01 = _mm_cvtsi32_si128(*(int32_t *)(src + 0 * stride));
+  src01 = _mm_insert_epi32(src01, *(int32_t *)(src + 1 * stride), 1);
+  return _mm256_setr_m128i(src01, _mm_setzero_si128());
+}
+
+static INLINE __m256i load_u8_4x4_avx2(const uint8_t *const src,
+                                       const ptrdiff_t stride) {
+  __m128i src01, src23;
+  src01 = _mm_cvtsi32_si128(*(int32_t *)(src + 0 * stride));
+  src01 = _mm_insert_epi32(src01, *(int32_t *)(src + 1 * stride), 1);
+  src23 = _mm_cvtsi32_si128(*(int32_t *)(src + 2 * stride));
+  src23 = _mm_insert_epi32(src23, *(int32_t *)(src + 3 * stride), 1);
+  return _mm256_setr_m128i(src01, src23);
+}
+
+static INLINE __m256i load_u8_8x2_avx2(const uint8_t *const src,
+                                       const ptrdiff_t stride) {
+  const __m128i src0 = _mm_loadl_epi64((__m128i *)(src + 0 * stride));
+  const __m128i src1 = _mm_loadl_epi64((__m128i *)(src + 1 * stride));
+  return _mm256_setr_m128i(src0, src1);
+}
+
+static INLINE __m256i load_u8_8x4_avx2(const uint8_t *const src,
+                                       const ptrdiff_t stride) {
+  __m128i src01, src23;
+  src01 = _mm_loadl_epi64((__m128i *)(src + 0 * stride));
+  src01 = _mm_castpd_si128(_mm_loadh_pd(_mm_castsi128_pd(src01),
+                                        (double *)(void *)(src + 1 * stride)));
+  src23 = _mm_loadl_epi64((__m128i *)(src + 2 * stride));
+  src23 = _mm_castpd_si128(_mm_loadh_pd(_mm_castsi128_pd(src23),
+                                        (double *)(void *)(src + 3 * stride)));
+  return _mm256_setr_m128i(src01, src23);
+}
+
+static INLINE __m256i loadu_8bit_16x2_avx2(const void *const src,
+                                           const ptrdiff_t strideInByte) {
+  const __m128i src0 = _mm_loadu_si128((__m128i *)src);
+  const __m128i src1 =
+      _mm_loadu_si128((__m128i *)((uint8_t *)src + strideInByte));
+  return _mm256_setr_m128i(src0, src1);
+}
+
+static INLINE __m256i loadu_u8_16x2_avx2(const uint8_t *const src,
+                                         const ptrdiff_t stride) {
+  return loadu_8bit_16x2_avx2(src, sizeof(*src) * stride);
+}
+
+static INLINE __m256i loadu_u16_8x2_avx2(const uint16_t *const src,
+                                         const ptrdiff_t stride) {
+  return loadu_8bit_16x2_avx2(src, sizeof(*src) * stride);
+}
+
+static INLINE void storeu_8bit_16x2_avx2(const __m256i src, void *const dst,
+                                         const ptrdiff_t strideInByte) {
+  const __m128i d0 = _mm256_castsi256_si128(src);
+  const __m128i d1 = _mm256_extracti128_si256(src, 1);
+  _mm_storeu_si128((__m128i *)dst, d0);
+  _mm_storeu_si128((__m128i *)((uint8_t *)dst + strideInByte), d1);
+}
+
+static INLINE void storeu_u8_16x2_avx2(const __m256i src, uint8_t *const dst,
+                                       const ptrdiff_t stride) {
+  storeu_8bit_16x2_avx2(src, dst, sizeof(*dst) * stride);
+}
+
+static INLINE void storeu_s16_8x2_avx2(const __m256i src, int16_t *const dst,
+                                       const ptrdiff_t stride) {
+  storeu_8bit_16x2_avx2(src, dst, sizeof(*dst) * stride);
+}
+
+static INLINE void storeu_u16_8x2_avx2(const __m256i src, uint16_t *const dst,
+                                       const ptrdiff_t stride) {
+  storeu_8bit_16x2_avx2(src, dst, sizeof(*dst) * stride);
+}
+
+#endif  // AOM_THIRD_PARTY_SVT_AV1_EBMEMORY_AVX2_H_
diff --git a/chromium/third_party/libaom/source/libaom/third_party/SVT-AV1/EbMemory_SSE4_1.h b/chromium/third_party/libaom/source/libaom/third_party/SVT-AV1/EbMemory_SSE4_1.h
new file mode 100644
index 00000000000..d821d9a307e
--- /dev/null
+++ b/chromium/third_party/libaom/source/libaom/third_party/SVT-AV1/EbMemory_SSE4_1.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright(c) 2019 Intel Corporation
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at https://www.aomedia.org/license/software-license. If the
+ * Alliance for Open Media Patent License 1.0 was not distributed with this
+ * source code in the PATENTS file, you can obtain it at
+ * https://www.aomedia.org/license/patent-license.
+ */
+
+#ifndef AOM_THIRD_PARTY_SVT_AV1_EBMEMORY_SSE4_1_H_
+#define AOM_THIRD_PARTY_SVT_AV1_EBMEMORY_SSE4_1_H_
+
+#include <smmintrin.h>
+
+#include "config/aom_config.h"
+
+#include "aom/aom_integer.h"
+
+static INLINE __m128i load8bit_4x2_sse4_1(const void *const src,
+                                          const ptrdiff_t strideInByte) {
+  const __m128i s = _mm_cvtsi32_si128(*(int32_t *)((uint8_t *)src));
+  return _mm_insert_epi32(s, *(int32_t *)((uint8_t *)src + strideInByte), 1);
+}
+
+static INLINE __m128i load_u8_4x2_sse4_1(const uint8_t *const src,
+                                         const ptrdiff_t stride) {
+  return load8bit_4x2_sse4_1(src, sizeof(*src) * stride);
+}
+
+static INLINE __m128i load_u16_2x2_sse4_1(const uint16_t *const src,
+                                          const ptrdiff_t stride) {
+  return load8bit_4x2_sse4_1(src, sizeof(*src) * stride);
+}
+
+#endif  // AOM_THIRD_PARTY_SVT_AV1_EBMEMORY_SSE4_1_H_
diff --git a/chromium/third_party/libaom/source/libaom/third_party/SVT-AV1/LICENSE.md b/chromium/third_party/libaom/source/libaom/third_party/SVT-AV1/LICENSE.md
new file mode 100644
index 00000000000..aff96d15ed2
--- /dev/null
+++ b/chromium/third_party/libaom/source/libaom/third_party/SVT-AV1/LICENSE.md
@@ -0,0 +1,32 @@
+BSD 3-Clause Clear License
+The Clear BSD License
+
+Copyright (c) 2021, Alliance for Open Media
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted (subject to the limitations in the disclaimer below)
+provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in
+   the documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the Alliance for Open Media nor the names of its
+   contributors may be used to endorse or promote products derived from
+   this software without specific prior written permission.
+
+NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY THIS LICENSE.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
+OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/chromium/third_party/libaom/source/libaom/third_party/SVT-AV1/PATENTS.md b/chromium/third_party/libaom/source/libaom/third_party/SVT-AV1/PATENTS.md
new file mode 100644
index 00000000000..1de4dd7531b
--- /dev/null
+++ b/chromium/third_party/libaom/source/libaom/third_party/SVT-AV1/PATENTS.md
@@ -0,0 +1,107 @@
+**Alliance for Open Media Patent License 1.0**
+
+ 1. **License Terms.**
+
+    **Patent License.** Subject to the terms and conditions of this License, each
+     Licensor, on behalf of itself and successors in interest and assigns,
+     grants Licensee a non-sublicensable, perpetual, worldwide, non-exclusive,
+     no-charge, royalty-free, irrevocable (except as expressly stated in this
+     License) patent license to its Necessary Claims to make, use, sell, offer
+     for sale, import or distribute any Implementation.
+
+     **Conditions.**
+
+    *Availability.* As a condition to the grant of rights to Licensee to make,
+       sell, offer for sale, import or distribute an Implementation under
+       Section 1.1, Licensee must make its Necessary Claims available under
+       this License, and must reproduce this License with any Implementation
+       as follows:
+
+          a. For distribution in source code, by including this License in the
+          root directory of the source code with its Implementation.
+
+          b. For distribution in any other form (including binary, object form,
+          and/or hardware description code (e.g., HDL, RTL, Gate Level Netlist,
+          GDSII, etc.)), by including this License in the documentation, legal
+          notices, and/or other written materials provided with the
+          Implementation.
+
+    *Additional Conditions.* This license is directly from Licensor to
+       Licensee.  Licensee acknowledges as a condition of benefiting from it
+       that no rights from Licensor are received from suppliers, distributors,
+       or otherwise in connection with this License.
+
+    **Defensive Termination**. If any Licensee, its Affiliates, or its agents
+     initiates patent litigation or files, maintains, or voluntarily
+     participates in a lawsuit against another entity or any person asserting
+     that any Implementation infringes Necessary Claims, any patent licenses
+     granted under this License directly to the Licensee are immediately
+     terminated as of the date of the initiation of action unless 1) that suit
+     was in response to a corresponding suit regarding an Implementation first
+     brought against an initiating entity, or 2) that suit was brought to
+     enforce the terms of this License (including intervention in a third-party
+     action by a Licensee).
+
+    **Disclaimers.** The Reference Implementation and Specification are provided
+     "AS IS" and without warranty. The entire risk as to implementing or
+     otherwise using the Reference Implementation or Specification is assumed
+     by the implementer and user. Licensor expressly disclaims any warranties
+     (express, implied, or otherwise), including implied warranties of
+     merchantability, non-infringement, fitness for a particular purpose, or
+     title, related to the material. IN NO EVENT WILL LICENSOR BE LIABLE TO
+     ANY OTHER PARTY FOR LOST PROFITS OR ANY FORM OF INDIRECT, SPECIAL,
+     INCIDENTAL, OR CONSEQUENTIAL DAMAGES OF ANY CHARACTER FROM ANY CAUSES OF
+     ACTION OF ANY KIND WITH RESPECT TO THIS LICENSE, WHETHER BASED ON BREACH
+     OF CONTRACT, TORT (INCLUDING NEGLIGENCE), OR OTHERWISE, AND WHETHER OR
+     NOT THE OTHER PARTRY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+2. **Definitions.**
+
+     **Affiliate.**  "Affiliate" means an entity that directly or indirectly
+     Controls, is Controlled by, or is under common Control of that party.
+
+    **Control.** "Control" means direct or indirect control of more than 50% of
+     the voting power to elect directors of that corporation, or for any other
+     entity, the power to direct management of such entity.
+
+    **Decoder.**  "Decoder" means any decoder that conforms fully with all
+     non-optional portions of the Specification.
+
+    **Encoder.**  "Encoder" means any encoder that produces a bitstream that can
+     be decoded by a Decoder only to the extent it produces such a bitstream.
+
+    **Final Deliverable.**  "Final Deliverable" means the final version of a
+     deliverable approved by the Alliance for Open Media as a Final
+     Deliverable.
+
+    **Implementation.**  "Implementation" means any implementation, including the
+     Reference Implementation, that is an Encoder and/or a Decoder. An
+     Implementation also includes components of an Implementation only to the
+     extent they are used as part of an Implementation.
+
+    **License.** "License" means this license.
+
+    **Licensee.** "Licensee" means any person or entity who exercises patent
+     rights granted under this License.
+
+    **Licensor.**  "Licensor" means (i) any Licensee that makes, sells, offers
+     for sale, imports or distributes any Implementation, or (ii) a person
+     or entity that has a licensing obligation to the Implementation as a
+     result of its membership and/or participation in the Alliance for Open
+     Media working group that developed the Specification.
+
+    **Necessary Claims.**  "Necessary Claims" means all claims of patents or
+      patent applications, (a) that currently or at any time in the future,
+      are owned or controlled by the Licensor, and (b) (i) would be an
+      Essential Claim as defined by the W3C Policy as of February 5, 2004
+      (https://www.w3.org/Consortium/Patent-Policy-20040205/#def-essential)
+      as if the Specification was a W3C Recommendation; or (ii) are infringed
+      by the Reference Implementation.
+
+     **Reference Implementation.** "Reference Implementation" means an Encoder
+      and/or Decoder released by the Alliance for Open Media as a Final
+      Deliverable.
+
+     **Specification.** "Specification" means the specification designated by
+      the Alliance for Open Media as a Final Deliverable for which this
+      License was issued.
diff --git a/chromium/third_party/libaom/source/libaom/third_party/SVT-AV1/README.libaom b/chromium/third_party/libaom/source/libaom/third_party/SVT-AV1/README.libaom
new file mode 100644
index 00000000000..ff365057eb8
--- /dev/null
+++ b/chromium/third_party/libaom/source/libaom/third_party/SVT-AV1/README.libaom
@@ -0,0 +1,14 @@
+URL: https://gitlab.com/AOMediaCodec/SVT-AV1
+
+Version: 8ff99c90359330d2e807757c9425560bbc452ff3
+License: BSD-3-clause clear
+License File: LICENSE.md
+
+Description:
+Port the x86 intrinsics used for single reference convolve reconstructions.
+
+Local Changes:
+Only ported the functions pertinent to single reference convolves.
+All functions are made static inline to avoid function call overheads.
+References to some arrays are changed to libaom version when applicable.
+Some extra intrinsic functions are added to support missing block sizes.
diff --git a/chromium/third_party/libaom/source/libaom/third_party/SVT-AV1/convolve_2d_avx2.h b/chromium/third_party/libaom/source/libaom/third_party/SVT-AV1/convolve_2d_avx2.h
new file mode 100644
index 00000000000..64cd810f772
--- /dev/null
+++ b/chromium/third_party/libaom/source/libaom/third_party/SVT-AV1/convolve_2d_avx2.h
@@ -0,0 +1,1199 @@
+/*
+ * Copyright (c) 2017, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#ifndef THIRD_PARTY_SVT_AV1_CONVOLVE_2D_AVX2_H_
+#define THIRD_PARTY_SVT_AV1_CONVOLVE_2D_AVX2_H_
+
+#include "convolve_avx2.h"
+
+static void convolve_2d_sr_hor_2tap_avx2(
+    const uint8_t *const src, const int32_t src_stride, const int32_t w,
+    const int32_t h, const InterpFilterParams *const filter_params_x,
+    const int32_t subpel_x_q4, int16_t *const im_block) {
+  const uint8_t *src_ptr = src;
+  int32_t y = h;
+  int16_t *im = im_block;
+
+  if (w <= 8) {
+    __m128i coeffs_128;
+
+    prepare_half_coeffs_2tap_ssse3(filter_params_x, subpel_x_q4, &coeffs_128);
+
+    if (w == 2) {
+      do {
+        const __m128i r =
+            x_convolve_2tap_2x2_sse4_1(src_ptr, src_stride, &coeffs_128);
+        xy_x_round_store_2x2_sse2(r, im);
+        src_ptr += 2 * src_stride;
+        im += 2 * 2;
+        y -= 2;
+      } while (y);
+    } else if (w == 4) {
+      do {
+        const __m128i r =
+            x_convolve_2tap_4x2_ssse3(src_ptr, src_stride, &coeffs_128);
+        xy_x_round_store_4x2_sse2(r, im);
+        src_ptr += 2 * src_stride;
+        im += 2 * 4;
+        y -= 2;
+      } while (y);
+    } else {
+      assert(w == 8);
+
+      do {
+        __m128i r[2];
+
+        x_convolve_2tap_8x2_ssse3(src_ptr, src_stride, &coeffs_128, r);
+        xy_x_round_store_8x2_sse2(r, im);
+        src_ptr += 2 * src_stride;
+        im += 2 * 8;
+        y -= 2;
+      } while (y);
+    }
+  } else {
+    __m256i coeffs_256;
+
+    prepare_half_coeffs_2tap_avx2(filter_params_x, subpel_x_q4, &coeffs_256);
+
+    if (w == 16) {
+      do {
+        __m256i r[2];
+
+        x_convolve_2tap_16x2_avx2(src_ptr, src_stride, &coeffs_256, r);
+        xy_x_round_store_32_avx2(r, im);
+        src_ptr += 2 * src_stride;
+        im += 2 * 16;
+        y -= 2;
+      } while (y);
+    } else if (w == 32) {
+      do {
+        xy_x_2tap_32_avx2(src_ptr, &coeffs_256, im);
+        src_ptr += src_stride;
+        im += 32;
+      } while (--y);
+    } else if (w == 64) {
+      do {
+        xy_x_2tap_32_avx2(src_ptr + 0 * 32, &coeffs_256, im + 0 * 32);
+        xy_x_2tap_32_avx2(src_ptr + 1 * 32, &coeffs_256, im + 1 * 32);
+        src_ptr += src_stride;
+        im += 64;
+      } while (--y);
+    } else {
+      assert(w == 128);
+
+      do {
+        xy_x_2tap_32_avx2(src_ptr + 0 * 32, &coeffs_256, im + 0 * 32);
+        xy_x_2tap_32_avx2(src_ptr + 1 * 32, &coeffs_256, im + 1 * 32);
+        xy_x_2tap_32_avx2(src_ptr + 2 * 32, &coeffs_256, im + 2 * 32);
+        xy_x_2tap_32_avx2(src_ptr + 3 * 32, &coeffs_256, im + 3 * 32);
+        src_ptr += src_stride;
+        im += 128;
+      } while (--y);
+    }
+  }
+}
+
+static void convolve_2d_sr_hor_4tap_ssse3(
+    const uint8_t *const src, const int32_t src_stride, const int32_t w,
+    const int32_t h, const InterpFilterParams *const filter_params_x,
+    const int32_t subpel_x_q4, int16_t *const im_block) {
+  const uint8_t *src_ptr = src - 1;
+  int32_t y = h;
+  int16_t *im = im_block;
+
+  if (w <= 4) {
+    __m128i coeffs_128[2];
+
+    prepare_half_coeffs_4tap_ssse3(filter_params_x, subpel_x_q4, coeffs_128);
+    if (w == 2) {
+      do {
+        const __m128i r =
+            x_convolve_4tap_2x2_ssse3(src_ptr, src_stride, coeffs_128);
+        xy_x_round_store_2x2_sse2(r, im);
+        src_ptr += 2 * src_stride;
+        im += 2 * 2;
+        y -= 2;
+      } while (y);
+    } else if (w == 4) {
+      do {
+        const __m128i r =
+            x_convolve_4tap_4x2_ssse3(src_ptr, src_stride, coeffs_128);
+        xy_x_round_store_4x2_sse2(r, im);
+        src_ptr += 2 * src_stride;
+        im += 2 * 4;
+        y -= 2;
+      } while (y);
+    }
+  } else {
+    // TODO(chiyotsai@google.com): Add better optimization
+    __m256i coeffs_256[2], filt_256[2];
+
+    prepare_half_coeffs_4tap_avx2(filter_params_x, subpel_x_q4, coeffs_256);
+    filt_256[0] = _mm256_load_si256((__m256i const *)filt1_global_avx2);
+    filt_256[1] = _mm256_load_si256((__m256i const *)filt2_global_avx2);
+
+    if (w == 8) {
+      do {
+        __m256i res =
+            x_convolve_4tap_8x2_avx2(src_ptr, src_stride, coeffs_256, filt_256);
+        xy_x_round_store_8x2_avx2(res, im);
+
+        src_ptr += 2 * src_stride;
+        im += 2 * 8;
+        y -= 2;
+      } while (y);
+    } else if (w == 16) {
+      do {
+        __m256i r[2];
+
+        x_convolve_4tap_16x2_avx2(src_ptr, src_stride, coeffs_256, filt_256, r);
+        xy_x_round_store_32_avx2(r, im);
+        src_ptr += 2 * src_stride;
+        im += 2 * 16;
+        y -= 2;
+      } while (y);
+    } else if (w == 32) {
+      do {
+        xy_x_4tap_32_avx2(src_ptr, coeffs_256, filt_256, im);
+
+        src_ptr += src_stride;
+        im += 32;
+      } while (--y);
+    } else if (w == 64) {
+      do {
+        xy_x_4tap_32_avx2(src_ptr, coeffs_256, filt_256, im);
+        xy_x_4tap_32_avx2(src_ptr + 32, coeffs_256, filt_256, im + 32);
+        src_ptr += src_stride;
+        im += 64;
+      } while (--y);
+    } else {
+      assert(w == 128);
+
+      do {
+        xy_x_4tap_32_avx2(src_ptr, coeffs_256, filt_256, im);
+        xy_x_4tap_32_avx2(src_ptr + 32, coeffs_256, filt_256, im + 32);
+        xy_x_4tap_32_avx2(src_ptr + 64, coeffs_256, filt_256, im + 64);
+        xy_x_4tap_32_avx2(src_ptr + 96, coeffs_256, filt_256, im + 96);
+        src_ptr += src_stride;
+        im += 128;
+      } while (--y);
+    }
+  }
+}
+
+static void convolve_2d_sr_hor_6tap_avx2(
+    const uint8_t *const src, const int32_t src_stride, const int32_t w,
+    const int32_t h, const InterpFilterParams *const filter_params_x,
+    const int32_t subpel_x_q4, int16_t *const im_block) {
+  const uint8_t *src_ptr = src - 2;
+  int32_t y = h;
+  int16_t *im = im_block;
+
+  if (w <= 4) {
+    __m128i coeffs_128[3];
+
+    prepare_half_coeffs_6tap_ssse3(filter_params_x, subpel_x_q4, coeffs_128);
+    if (w == 2) {
+      do {
+        const __m128i r =
+            x_convolve_6tap_2x2_ssse3(src_ptr, src_stride, coeffs_128);
+        xy_x_round_store_2x2_sse2(r, im);
+        src_ptr += 2 * src_stride;
+        im += 2 * 2;
+        y -= 2;
+      } while (y);
+    } else if (w == 4) {
+      do {
+        const __m128i r =
+            x_convolve_6tap_4x2_ssse3(src_ptr, src_stride, coeffs_128);
+        xy_x_round_store_4x2_sse2(r, im);
+        src_ptr += 2 * src_stride;
+        im += 2 * 4;
+        y -= 2;
+      } while (y);
+    }
+  } else {
+    __m256i coeffs_256[3], filt_256[3];
+
+    filt_256[0] = _mm256_loadu_si256((__m256i const *)filt1_global_avx2);
+    filt_256[1] = _mm256_loadu_si256((__m256i const *)filt2_global_avx2);
+    filt_256[2] = _mm256_loadu_si256((__m256i const *)filt3_global_avx2);
+
+    prepare_half_coeffs_6tap_avx2(filter_params_x, subpel_x_q4, coeffs_256);
+
+    if (w == 8) {
+      do {
+        const __m256i res =
+            x_convolve_6tap_8x2_avx2(src_ptr, src_stride, coeffs_256, filt_256);
+        xy_x_round_store_8x2_avx2(res, im);
+
+        src_ptr += 2 * src_stride;
+        im += 2 * 8;
+        y -= 2;
+      } while (y);
+    } else if (w == 16) {
+      do {
+        __m256i r[2];
+
+        x_convolve_6tap_16x2_avx2(src_ptr, src_stride, coeffs_256, filt_256, r);
+        xy_x_round_store_32_avx2(r, im);
+        src_ptr += 2 * src_stride;
+        im += 2 * 16;
+        y -= 2;
+      } while (y);
+    } else if (w == 32) {
+      do {
+        xy_x_6tap_32_avx2(src_ptr, coeffs_256, filt_256, im);
+        src_ptr += src_stride;
+        im += 32;
+      } while (--y);
+    } else if (w == 64) {
+      do {
+        xy_x_6tap_32_avx2(src_ptr, coeffs_256, filt_256, im);
+        xy_x_6tap_32_avx2(src_ptr + 32, coeffs_256, filt_256, im + 32);
+        src_ptr += src_stride;
+        im += 64;
+      } while (--y);
+    } else {
+      assert(w == 128);
+
+      do {
+        xy_x_6tap_32_avx2(src_ptr, coeffs_256, filt_256, im);
+        xy_x_6tap_32_avx2(src_ptr + 32, coeffs_256, filt_256, im + 32);
+        xy_x_6tap_32_avx2(src_ptr + 64, coeffs_256, filt_256, im + 64);
+        xy_x_6tap_32_avx2(src_ptr + 96, coeffs_256, filt_256, im + 96);
+        src_ptr += src_stride;
+        im += 128;
+      } while (--y);
+    }
+  }
+}
+
+static void convolve_2d_sr_hor_8tap_avx2(
+    const uint8_t *const src, const int32_t src_stride, const int32_t w,
+    const int32_t h, const InterpFilterParams *const filter_params_x,
+    const int32_t subpel_x_q4, int16_t *const im_block) {
+  const uint8_t *src_ptr = src - 3;
+  int32_t y = h;
+  int16_t *im = im_block;
+  __m256i coeffs_256[4], filt_256[4];
+
+  filt_256[0] = _mm256_loadu_si256((__m256i const *)filt1_global_avx2);
+  filt_256[1] = _mm256_loadu_si256((__m256i const *)filt2_global_avx2);
+  filt_256[2] = _mm256_loadu_si256((__m256i const *)filt3_global_avx2);
+  filt_256[3] = _mm256_loadu_si256((__m256i const *)filt4_global_avx2);
+
+  prepare_half_coeffs_8tap_avx2(filter_params_x, subpel_x_q4, coeffs_256);
+
+  if (w == 8) {
+    do {
+      const __m256i res =
+          x_convolve_8tap_8x2_avx2(src_ptr, src_stride, coeffs_256, filt_256);
+      xy_x_round_store_8x2_avx2(res, im);
+      src_ptr += 2 * src_stride;
+      im += 2 * 8;
+      y -= 2;
+    } while (y);
+  } else if (w == 16) {
+    do {
+      __m256i r[2];
+
+      x_convolve_8tap_16x2_avx2(src_ptr, src_stride, coeffs_256, filt_256, r);
+      xy_x_round_store_32_avx2(r, im);
+      src_ptr += 2 * src_stride;
+      im += 2 * 16;
+      y -= 2;
+    } while (y);
+  } else if (w == 32) {
+    do {
+      xy_x_8tap_32_avx2(src_ptr, coeffs_256, filt_256, im);
+      src_ptr += src_stride;
+      im += 32;
+    } while (--y);
+  } else if (w == 64) {
+    do {
+      xy_x_8tap_32_avx2(src_ptr, coeffs_256, filt_256, im);
+      xy_x_8tap_32_avx2(src_ptr + 32, coeffs_256, filt_256, im + 32);
+      src_ptr += src_stride;
+      im += 64;
+    } while (--y);
+  } else {
+    assert(w == 128);
+
+    do {
+      xy_x_8tap_32_avx2(src_ptr, coeffs_256, filt_256, im);
+      xy_x_8tap_32_avx2(src_ptr + 32, coeffs_256, filt_256, im + 32);
+      xy_x_8tap_32_avx2(src_ptr + 64, coeffs_256, filt_256, im + 64);
+      xy_x_8tap_32_avx2(src_ptr + 96, coeffs_256, filt_256, im + 96);
+      src_ptr += src_stride;
+      im += 128;
+    } while (--y);
+  }
+}
+
+static void convolve_2d_sr_ver_2tap_avx2(
+    const int16_t *const im_block, const int32_t w, const int32_t h,
+    const InterpFilterParams *const filter_params_y, const int32_t subpel_y_q4,
+    uint8_t *dst, const int32_t dst_stride) {
+  const int16_t *im = im_block;
+  int32_t y = h;
+
+  if (w <= 4) {
+    __m128i coeffs_128;
+
+    prepare_coeffs_2tap_sse2(filter_params_y, subpel_y_q4, &coeffs_128);
+
+    if (w == 2) {
+      __m128i s_32[2];
+
+      s_32[0] = _mm_cvtsi32_si128(*(int32_t *)im);
+
+      do {
+        const __m128i res = xy_y_convolve_2tap_2x2_sse2(im, s_32, &coeffs_128);
+        xy_y_round_store_2x2_sse2(res, dst, dst_stride);
+        im += 2 * 2;
+        dst += 2 * dst_stride;
+        y -= 2;
+      } while (y);
+    } else {
+      __m128i s_64[2], r[2];
+
+      assert(w == 4);
+
+      s_64[0] = _mm_loadl_epi64((__m128i *)im);
+
+      do {
+        xy_y_convolve_2tap_4x2_sse2(im, s_64, &coeffs_128, r);
+        r[0] = xy_y_round_sse2(r[0]);
+        r[1] = xy_y_round_sse2(r[1]);
+        const __m128i rr = _mm_packs_epi32(r[0], r[1]);
+        pack_store_4x2_sse2(rr, dst, dst_stride);
+        im += 2 * 4;
+        dst += 2 * dst_stride;
+        y -= 2;
+      } while (y);
+    }
+  } else {
+    __m256i coeffs_256;
+
+    prepare_coeffs_2tap_avx2(filter_params_y, subpel_y_q4, &coeffs_256);
+
+    if (w == 8) {
+      __m128i s_128[2];
+      __m256i r[2];
+
+      s_128[0] = _mm_loadu_si128((__m128i *)im);
+
+      do {
+        xy_y_convolve_2tap_8x2_avx2(im, s_128, &coeffs_256, r);
+        xy_y_round_store_8x2_avx2(r, dst, dst_stride);
+        im += 2 * 8;
+        dst += 2 * dst_stride;
+        y -= 2;
+      } while (y);
+    } else if (w == 16) {
+      __m256i s_256[2], r[4];
+
+      s_256[0] = _mm256_loadu_si256((__m256i *)im);
+
+      do {
+        xy_y_convolve_2tap_16x2_avx2(im, s_256, &coeffs_256, r);
+        xy_y_round_store_16x2_avx2(r, dst, dst_stride);
+        im += 2 * 16;
+        dst += 2 * dst_stride;
+        y -= 2;
+      } while (y);
+    } else if (w == 32) {
+      __m256i s_256[2][2];
+
+      s_256[0][0] = _mm256_loadu_si256((__m256i *)(im + 0 * 16));
+      s_256[0][1] = _mm256_loadu_si256((__m256i *)(im + 1 * 16));
+
+      do {
+        xy_y_convolve_2tap_32_all_avx2(im + 32, s_256[0], s_256[1], &coeffs_256,
+                                       dst);
+        im += 2 * 32;
+        xy_y_convolve_2tap_32_all_avx2(im, s_256[1], s_256[0], &coeffs_256,
+                                       dst + dst_stride);
+        dst += 2 * dst_stride;
+        y -= 2;
+      } while (y);
+    } else if (w == 64) {
+      __m256i s_256[2][4];
+
+      s_256[0][0] = _mm256_loadu_si256((__m256i *)(im + 0 * 16));
+      s_256[0][1] = _mm256_loadu_si256((__m256i *)(im + 1 * 16));
+      s_256[0][2] = _mm256_loadu_si256((__m256i *)(im + 2 * 16));
+      s_256[0][3] = _mm256_loadu_si256((__m256i *)(im + 3 * 16));
+
+      do {
+        xy_y_convolve_2tap_32_all_avx2(im + 64, s_256[0] + 0, s_256[1] + 0,
+                                       &coeffs_256, dst);
+        xy_y_convolve_2tap_32_all_avx2(im + 96, s_256[0] + 2, s_256[1] + 2,
+                                       &coeffs_256, dst + 32);
+        im += 2 * 64;
+        xy_y_convolve_2tap_32_all_avx2(im, s_256[1] + 0, s_256[0] + 0,
+                                       &coeffs_256, dst + dst_stride);
+        xy_y_convolve_2tap_32_all_avx2(im + 32, s_256[1] + 2, s_256[0] + 2,
+                                       &coeffs_256, dst + dst_stride + 32);
+        dst += 2 * dst_stride;
+        y -= 2;
+      } while (y);
+    } else {
+      __m256i s_256[2][8];
+
+      assert(w == 128);
+
+      load_16bit_8rows_avx2(im, 16, s_256[0]);
+
+      do {
+        xy_y_convolve_2tap_32_all_avx2(im + 128, s_256[0] + 0, s_256[1] + 0,
+                                       &coeffs_256, dst);
+        xy_y_convolve_2tap_32_all_avx2(im + 160, s_256[0] + 2, s_256[1] + 2,
+                                       &coeffs_256, dst + 1 * 32);
+        xy_y_convolve_2tap_32_all_avx2(im + 192, s_256[0] + 4, s_256[1] + 4,
+                                       &coeffs_256, dst + 2 * 32);
+        xy_y_convolve_2tap_32_all_avx2(im + 224, s_256[0] + 6, s_256[1] + 6,
+                                       &coeffs_256, dst + 3 * 32);
+        im += 2 * 128;
+        xy_y_convolve_2tap_32_all_avx2(im, s_256[1] + 0, s_256[0] + 0,
+                                       &coeffs_256, dst + dst_stride);
+        xy_y_convolve_2tap_32_all_avx2(im + 32, s_256[1] + 2, s_256[0] + 2,
+                                       &coeffs_256, dst + dst_stride + 1 * 32);
+        xy_y_convolve_2tap_32_all_avx2(im + 64, s_256[1] + 4, s_256[0] + 4,
+                                       &coeffs_256, dst + dst_stride + 2 * 32);
+        xy_y_convolve_2tap_32_all_avx2(im + 96, s_256[1] + 6, s_256[0] + 6,
+                                       &coeffs_256, dst + dst_stride + 3 * 32);
+        dst += 2 * dst_stride;
+        y -= 2;
+      } while (y);
+    }
+  }
+}
+
+static void convolve_2d_sr_ver_2tap_half_avx2(
+    const int16_t *const im_block, const int32_t w, const int32_t h,
+    const InterpFilterParams *const filter_params_y, const int32_t subpel_y_q4,
+    uint8_t *dst, const int32_t dst_stride) {
+  const int16_t *im = im_block;
+  int32_t y = h;
+
+  (void)filter_params_y;
+  (void)subpel_y_q4;
+
+  if (w == 2) {
+    __m128i s_32[2];
+
+    s_32[0] = _mm_cvtsi32_si128(*(int32_t *)im);
+
+    do {
+      const __m128i res = xy_y_convolve_2tap_2x2_half_pel_sse2(im, s_32);
+      const __m128i r = xy_y_round_half_pel_sse2(res);
+      pack_store_2x2_sse2(r, dst, dst_stride);
+      im += 2 * 2;
+      dst += 2 * dst_stride;
+      y -= 2;
+    } while (y);
+  } else if (w == 4) {
+    __m128i s_64[2];
+
+    s_64[0] = _mm_loadl_epi64((__m128i *)im);
+
+    do {
+      const __m128i res = xy_y_convolve_2tap_4x2_half_pel_sse2(im, s_64);
+      const __m128i r = xy_y_round_half_pel_sse2(res);
+      pack_store_4x2_sse2(r, dst, dst_stride);
+      im += 2 * 4;
+      dst += 2 * dst_stride;
+      y -= 2;
+    } while (y);
+  } else if (w == 8) {
+    __m128i s_128[2];
+
+    s_128[0] = _mm_loadu_si128((__m128i *)im);
+
+    do {
+      const __m256i res = xy_y_convolve_2tap_8x2_half_pel_avx2(im, s_128);
+      const __m256i r = xy_y_round_half_pel_avx2(res);
+      pack_store_8x2_avx2(r, dst, dst_stride);
+      im += 2 * 8;
+      dst += 2 * dst_stride;
+      y -= 2;
+    } while (y);
+  } else if (w == 16) {
+    __m256i s_256[2], r[2];
+
+    s_256[0] = _mm256_loadu_si256((__m256i *)im);
+
+    do {
+      xy_y_convolve_2tap_16x2_half_pel_avx2(im, s_256, r);
+      r[0] = xy_y_round_half_pel_avx2(r[0]);
+      r[1] = xy_y_round_half_pel_avx2(r[1]);
+      xy_y_pack_store_16x2_avx2(r[0], r[1], dst, dst_stride);
+      im += 2 * 16;
+      dst += 2 * dst_stride;
+      y -= 2;
+    } while (y);
+  } else if (w == 32) {
+    __m256i s_256[2][2];
+
+    s_256[0][0] = _mm256_loadu_si256((__m256i *)(im + 0 * 16));
+    s_256[0][1] = _mm256_loadu_si256((__m256i *)(im + 1 * 16));
+
+    do {
+      xy_y_convolve_2tap_half_pel_32_all_avx2(im + 32, s_256[0], s_256[1], dst);
+      xy_y_convolve_2tap_half_pel_32_all_avx2(im + 2 * 32, s_256[1], s_256[0],
+                                              dst + dst_stride);
+      im += 2 * 32;
+      dst += 2 * dst_stride;
+      y -= 2;
+    } while (y);
+  } else if (w == 64) {
+    __m256i s_256[2][4];
+
+    s_256[0][0] = _mm256_loadu_si256((__m256i *)(im + 0 * 16));
+    s_256[0][1] = _mm256_loadu_si256((__m256i *)(im + 1 * 16));
+    s_256[0][2] = _mm256_loadu_si256((__m256i *)(im + 2 * 16));
+    s_256[0][3] = _mm256_loadu_si256((__m256i *)(im + 3 * 16));
+
+    do {
+      xy_y_convolve_2tap_half_pel_32_all_avx2(im + 64, s_256[0] + 0,
+                                              s_256[1] + 0, dst);
+      xy_y_convolve_2tap_half_pel_32_all_avx2(im + 96, s_256[0] + 2,
+                                              s_256[1] + 2, dst + 32);
+      im += 2 * 64;
+      xy_y_convolve_2tap_half_pel_32_all_avx2(im, s_256[1] + 0, s_256[0] + 0,
+                                              dst + dst_stride);
+      xy_y_convolve_2tap_half_pel_32_all_avx2(
+          im + 32, s_256[1] + 2, s_256[0] + 2, dst + dst_stride + 32);
+      dst += 2 * dst_stride;
+      y -= 2;
+    } while (y);
+  } else {
+    __m256i s_256[2][8];
+
+    assert(w == 128);
+
+    load_16bit_8rows_avx2(im, 16, s_256[0]);
+
+    do {
+      xy_y_convolve_2tap_half_pel_32_all_avx2(im + 128, s_256[0] + 0,
+                                              s_256[1] + 0, dst);
+      xy_y_convolve_2tap_half_pel_32_all_avx2(im + 160, s_256[0] + 2,
+                                              s_256[1] + 2, dst + 1 * 32);
+      xy_y_convolve_2tap_half_pel_32_all_avx2(im + 192, s_256[0] + 4,
+                                              s_256[1] + 4, dst + 2 * 32);
+      xy_y_convolve_2tap_half_pel_32_all_avx2(im + 224, s_256[0] + 6,
+                                              s_256[1] + 6, dst + 3 * 32);
+      im += 2 * 128;
+      xy_y_convolve_2tap_half_pel_32_all_avx2(im, s_256[1] + 0, s_256[0] + 0,
+                                              dst + dst_stride);
+      xy_y_convolve_2tap_half_pel_32_all_avx2(
+          im + 32, s_256[1] + 2, s_256[0] + 2, dst + dst_stride + 1 * 32);
+      xy_y_convolve_2tap_half_pel_32_all_avx2(
+          im + 64, s_256[1] + 4, s_256[0] + 4, dst + dst_stride + 2 * 32);
+      xy_y_convolve_2tap_half_pel_32_all_avx2(
+          im + 96, s_256[1] + 6, s_256[0] + 6, dst + dst_stride + 3 * 32);
+      dst += 2 * dst_stride;
+      y -= 2;
+    } while (y);
+  }
+}
+
+static void convolve_2d_sr_ver_4tap_avx2(
+    const int16_t *const im_block, const int32_t w, const int32_t h,
+    const InterpFilterParams *const filter_params_y, const int32_t subpel_y_q4,
+    uint8_t *dst, const int32_t dst_stride) {
+  const int16_t *im = im_block;
+  int32_t y = h;
+
+  if (w == 2) {
+    __m128i coeffs_128[2], s_32[4], ss_128[2];
+
+    prepare_coeffs_4tap_sse2(filter_params_y, subpel_y_q4, coeffs_128);
+
+    s_32[0] = _mm_cvtsi32_si128(*(int32_t *)(im + 0 * 2));
+    s_32[1] = _mm_cvtsi32_si128(*(int32_t *)(im + 1 * 2));
+    s_32[2] = _mm_cvtsi32_si128(*(int32_t *)(im + 2 * 2));
+
+    const __m128i src01 = _mm_unpacklo_epi32(s_32[0], s_32[1]);
+    const __m128i src12 = _mm_unpacklo_epi32(s_32[1], s_32[2]);
+
+    ss_128[0] = _mm_unpacklo_epi16(src01, src12);
+
+    do {
+      const __m128i res =
+          xy_y_convolve_4tap_2x2_sse2(im, s_32, ss_128, coeffs_128);
+      xy_y_round_store_2x2_sse2(res, dst, dst_stride);
+      im += 2 * 2;
+      dst += 2 * dst_stride;
+      y -= 2;
+    } while (y);
+  } else {
+    __m256i coeffs_256[2];
+
+    prepare_coeffs_4tap_avx2(filter_params_y, subpel_y_q4, coeffs_256);
+
+    if (w == 4) {
+      __m128i s_64[4];
+      __m256i s_256[2], ss_256[2];
+
+      s_64[0] = _mm_loadl_epi64((__m128i *)(im + 0 * 4));
+      s_64[1] = _mm_loadl_epi64((__m128i *)(im + 1 * 4));
+      s_64[2] = _mm_loadl_epi64((__m128i *)(im + 2 * 4));
+
+      // Load lines a and b. Line a to lower 128, line b to upper 128
+      s_256[0] = _mm256_setr_m128i(s_64[0], s_64[1]);
+      s_256[1] = _mm256_setr_m128i(s_64[1], s_64[2]);
+
+      ss_256[0] = _mm256_unpacklo_epi16(s_256[0], s_256[1]);
+
+      do {
+        const __m256i res =
+            xy_y_convolve_4tap_4x2_avx2(im, s_64, ss_256, coeffs_256);
+        xy_y_round_store_4x2_avx2(res, dst, dst_stride);
+        im += 2 * 4;
+        dst += 2 * dst_stride;
+        y -= 2;
+      } while (y);
+    } else if (w == 8) {
+      __m256i s_256[4], r[2];
+
+      s_256[0] = _mm256_loadu_si256((__m256i *)(im + 0 * 8));
+      s_256[1] = _mm256_loadu_si256((__m256i *)(im + 1 * 8));
+
+      if (subpel_y_q4 != 8) {
+        __m256i ss_256[4];
+
+        ss_256[0] = _mm256_unpacklo_epi16(s_256[0], s_256[1]);
+        ss_256[2] = _mm256_unpackhi_epi16(s_256[0], s_256[1]);
+
+        do {
+          xy_y_convolve_4tap_8x2_avx2(im, ss_256, coeffs_256, r);
+          xy_y_round_store_8x2_avx2(r, dst, dst_stride);
+          im += 2 * 8;
+          dst += 2 * dst_stride;
+          y -= 2;
+        } while (y);
+      } else {
+        do {
+          xy_y_convolve_4tap_8x2_half_pel_avx2(im, coeffs_256, s_256, r);
+          xy_y_round_store_8x2_avx2(r, dst, dst_stride);
+          im += 2 * 8;
+          dst += 2 * dst_stride;
+          y -= 2;
+        } while (y);
+      }
+    } else if (w == 16) {
+      __m256i s_256[5];
+
+      s_256[0] = _mm256_loadu_si256((__m256i *)(im + 0 * 16));
+      s_256[1] = _mm256_loadu_si256((__m256i *)(im + 1 * 16));
+      s_256[2] = _mm256_loadu_si256((__m256i *)(im + 2 * 16));
+
+      if (subpel_y_q4 != 8) {
+        __m256i ss_256[4], tt_256[4], r[4];
+
+        ss_256[0] = _mm256_unpacklo_epi16(s_256[0], s_256[1]);
+        ss_256[2] = _mm256_unpackhi_epi16(s_256[0], s_256[1]);
+
+        tt_256[0] = _mm256_unpacklo_epi16(s_256[1], s_256[2]);
+        tt_256[2] = _mm256_unpackhi_epi16(s_256[1], s_256[2]);
+
+        do {
+          xy_y_convolve_4tap_16x2_avx2(im, s_256, ss_256, tt_256, coeffs_256,
+                                       r);
+          xy_y_round_store_16x2_avx2(r, dst, dst_stride);
+          im += 2 * 16;
+          dst += 2 * dst_stride;
+          y -= 2;
+        } while (y);
+      } else {
+        __m256i r[4];
+
+        do {
+          xy_y_convolve_4tap_16x2_half_pelavx2(im, s_256, coeffs_256, r);
+          xy_y_round_store_16x2_avx2(r, dst, dst_stride);
+          im += 2 * 16;
+          dst += 2 * dst_stride;
+          y -= 2;
+        } while (y);
+      }
+    } else {
+      /*It's a special condition for OBMC. A/c  to Av1 spec 4-tap won't
+      support for width(w)>16, but for OBMC while predicting above block
+      it reduces size block to Wx(h/2), for example, if above block size
+      is 32x8, we get block size as 32x4 for OBMC.*/
+      int32_t x = 0;
+
+      assert(!(w % 32));
+
+      __m256i s_256[2][4], ss_256[2][4], tt_256[2][4], r0[4], r1[4];
+      do {
+        const int16_t *s = im + x;
+        uint8_t *d = dst + x;
+
+        loadu_unpack_16bit_3rows_avx2(s, w, s_256[0], ss_256[0], tt_256[0]);
+        loadu_unpack_16bit_3rows_avx2(s + 16, w, s_256[1], ss_256[1],
+                                      tt_256[1]);
+
+        y = h;
+        do {
+          xy_y_convolve_4tap_32x2_avx2(s, w, s_256[0], ss_256[0], tt_256[0],
+                                       coeffs_256, r0);
+          xy_y_convolve_4tap_32x2_avx2(s + 16, w, s_256[1], ss_256[1],
+                                       tt_256[1], coeffs_256, r1);
+
+          xy_y_round_store_32_avx2(r0 + 0, r1 + 0, d);
+          xy_y_round_store_32_avx2(r0 + 2, r1 + 2, d + dst_stride);
+
+          s += 2 * w;
+          d += 2 * dst_stride;
+          y -= 2;
+        } while (y);
+
+        x += 32;
+      } while (x < w);
+    }
+  }
+}
+
+static void convolve_2d_sr_ver_6tap_avx2(
+    const int16_t *const im_block, const int32_t w, const int32_t h,
+    const InterpFilterParams *const filter_params_y, const int32_t subpel_y_q4,
+    uint8_t *dst, const int32_t dst_stride) {
+  const int16_t *im = im_block;
+  int32_t y;
+
+  if (w == 2) {
+    __m128i coeffs_128[3], s_32[6], ss_128[3];
+
+    prepare_coeffs_6tap_ssse3(filter_params_y, subpel_y_q4, coeffs_128);
+
+    s_32[0] = _mm_cvtsi32_si128(*(int32_t *)(im + 0 * 2));
+    s_32[1] = _mm_cvtsi32_si128(*(int32_t *)(im + 1 * 2));
+    s_32[2] = _mm_cvtsi32_si128(*(int32_t *)(im + 2 * 2));
+    s_32[3] = _mm_cvtsi32_si128(*(int32_t *)(im + 3 * 2));
+    s_32[4] = _mm_cvtsi32_si128(*(int32_t *)(im + 4 * 2));
+
+    const __m128i src01 = _mm_unpacklo_epi32(s_32[0], s_32[1]);
+    const __m128i src12 = _mm_unpacklo_epi32(s_32[1], s_32[2]);
+    const __m128i src23 = _mm_unpacklo_epi32(s_32[2], s_32[3]);
+    const __m128i src34 = _mm_unpacklo_epi32(s_32[3], s_32[4]);
+
+    ss_128[0] = _mm_unpacklo_epi16(src01, src12);
+    ss_128[1] = _mm_unpacklo_epi16(src23, src34);
+
+    y = h;
+    do {
+      const __m128i res =
+          xy_y_convolve_6tap_2x2_sse2(im, s_32, ss_128, coeffs_128);
+      xy_y_round_store_2x2_sse2(res, dst, dst_stride);
+      im += 2 * 2;
+      dst += 2 * dst_stride;
+      y -= 2;
+    } while (y);
+  } else {
+    __m256i coeffs_256[3];
+
+    prepare_coeffs_6tap_avx2(filter_params_y, subpel_y_q4, coeffs_256);
+
+    if (w == 4) {
+      __m128i s_64[6];
+      __m256i s_256[6], ss_256[3];
+
+      s_64[0] = _mm_loadl_epi64((__m128i *)(im + 0 * 4));
+      s_64[1] = _mm_loadl_epi64((__m128i *)(im + 1 * 4));
+      s_64[2] = _mm_loadl_epi64((__m128i *)(im + 2 * 4));
+      s_64[3] = _mm_loadl_epi64((__m128i *)(im + 3 * 4));
+      s_64[4] = _mm_loadl_epi64((__m128i *)(im + 4 * 4));
+
+      // Load lines a and b. Line a to lower 128, line b to upper 128
+      s_256[0] = _mm256_setr_m128i(s_64[0], s_64[1]);
+      s_256[1] = _mm256_setr_m128i(s_64[1], s_64[2]);
+      s_256[2] = _mm256_setr_m128i(s_64[2], s_64[3]);
+      s_256[3] = _mm256_setr_m128i(s_64[3], s_64[4]);
+
+      ss_256[0] = _mm256_unpacklo_epi16(s_256[0], s_256[1]);
+      ss_256[1] = _mm256_unpacklo_epi16(s_256[2], s_256[3]);
+
+      y = h;
+      do {
+        const __m256i res =
+            xy_y_convolve_6tap_4x2_avx2(im, s_64, ss_256, coeffs_256);
+        xy_y_round_store_4x2_avx2(res, dst, dst_stride);
+        im += 2 * 4;
+        dst += 2 * dst_stride;
+        y -= 2;
+      } while (y);
+    } else if (w == 8) {
+      __m256i s_256[6], r[2];
+
+      s_256[0] = _mm256_loadu_si256((__m256i *)(im + 0 * 8));
+      s_256[1] = _mm256_loadu_si256((__m256i *)(im + 1 * 8));
+      s_256[2] = _mm256_loadu_si256((__m256i *)(im + 2 * 8));
+      s_256[3] = _mm256_loadu_si256((__m256i *)(im + 3 * 8));
+      y = h;
+
+      if (subpel_y_q4 != 8) {
+        __m256i ss_256[6];
+
+        ss_256[0] = _mm256_unpacklo_epi16(s_256[0], s_256[1]);
+        ss_256[1] = _mm256_unpacklo_epi16(s_256[2], s_256[3]);
+
+        ss_256[3] = _mm256_unpackhi_epi16(s_256[0], s_256[1]);
+        ss_256[4] = _mm256_unpackhi_epi16(s_256[2], s_256[3]);
+
+        do {
+          xy_y_convolve_6tap_8x2_avx2(im, ss_256, coeffs_256, r);
+          xy_y_round_store_8x2_avx2(r, dst, dst_stride);
+          im += 2 * 8;
+          dst += 2 * dst_stride;
+          y -= 2;
+        } while (y);
+      } else {
+        do {
+          xy_y_convolve_6tap_8x2_half_pel_avx2(im, coeffs_256, s_256, r);
+          xy_y_round_store_8x2_avx2(r, dst, dst_stride);
+          im += 2 * 8;
+          dst += 2 * dst_stride;
+          y -= 2;
+        } while (y);
+      }
+    } else if (w == 16) {
+      __m256i s_256[6];
+
+      s_256[0] = _mm256_loadu_si256((__m256i *)(im + 0 * 16));
+      s_256[1] = _mm256_loadu_si256((__m256i *)(im + 1 * 16));
+      s_256[2] = _mm256_loadu_si256((__m256i *)(im + 2 * 16));
+      s_256[3] = _mm256_loadu_si256((__m256i *)(im + 3 * 16));
+      s_256[4] = _mm256_loadu_si256((__m256i *)(im + 4 * 16));
+      y = h;
+
+      if (subpel_y_q4 != 8) {
+        __m256i ss_256[6], tt_256[6], r[4];
+
+        ss_256[0] = _mm256_unpacklo_epi16(s_256[0], s_256[1]);
+        ss_256[1] = _mm256_unpacklo_epi16(s_256[2], s_256[3]);
+        ss_256[3] = _mm256_unpackhi_epi16(s_256[0], s_256[1]);
+        ss_256[4] = _mm256_unpackhi_epi16(s_256[2], s_256[3]);
+
+        tt_256[0] = _mm256_unpacklo_epi16(s_256[1], s_256[2]);
+        tt_256[1] = _mm256_unpacklo_epi16(s_256[3], s_256[4]);
+        tt_256[3] = _mm256_unpackhi_epi16(s_256[1], s_256[2]);
+        tt_256[4] = _mm256_unpackhi_epi16(s_256[3], s_256[4]);
+
+        do {
+          xy_y_convolve_6tap_16x2_avx2(im, 16, s_256, ss_256, tt_256,
+                                       coeffs_256, r);
+          xy_y_round_store_16x2_avx2(r, dst, dst_stride);
+          im += 2 * 16;
+          dst += 2 * dst_stride;
+          y -= 2;
+        } while (y);
+      } else {
+        __m256i ss_256[4], r[4];
+
+        do {
+          xy_y_convolve_6tap_16x2_half_pel_avx2(im, 16, s_256, ss_256,
+                                                coeffs_256, r);
+          xy_y_round_store_16x2_avx2(r, dst, dst_stride);
+
+          im += 2 * 16;
+          dst += 2 * dst_stride;
+          y -= 2;
+        } while (y);
+      }
+    } else {
+      int32_t x = 0;
+
+      assert(!(w % 32));
+
+      __m256i s_256[2][6], ss_256[2][6], tt_256[2][6], r0[4], r1[4];
+
+      do {
+        const int16_t *s = im + x;
+        uint8_t *d = dst + x;
+
+        loadu_unpack_16bit_5rows_avx2(s, w, s_256[0], ss_256[0], tt_256[0]);
+        loadu_unpack_16bit_5rows_avx2(s + 16, w, s_256[1], ss_256[1],
+                                      tt_256[1]);
+
+        y = h;
+        do {
+          xy_y_convolve_6tap_16x2_avx2(s, w, s_256[0], ss_256[0], tt_256[0],
+                                       coeffs_256, r0);
+          xy_y_convolve_6tap_16x2_avx2(s + 16, w, s_256[1], ss_256[1],
+                                       tt_256[1], coeffs_256, r1);
+
+          xy_y_round_store_32_avx2(r0 + 0, r1 + 0, d);
+          xy_y_round_store_32_avx2(r0 + 2, r1 + 2, d + dst_stride);
+
+          s += 2 * w;
+          d += 2 * dst_stride;
+          y -= 2;
+        } while (y);
+
+        x += 32;
+      } while (x < w);
+    }
+  }
+}
+
+static void convolve_2d_sr_ver_8tap_avx2(
+    const int16_t *const im_block, const int32_t w, const int32_t h,
+    const InterpFilterParams *const filter_params_y, const int32_t subpel_y_q4,
+    uint8_t *dst, const int32_t dst_stride) {
+  const int16_t *im = im_block;
+  int32_t y;
+
+  if (w == 2) {
+    __m128i coeffs_128[4], s_32[8], ss_128[4];
+
+    prepare_coeffs_8tap_sse2(filter_params_y, subpel_y_q4, coeffs_128);
+
+    s_32[0] = _mm_cvtsi32_si128(*(int32_t *)(im + 0 * 2));
+    s_32[1] = _mm_cvtsi32_si128(*(int32_t *)(im + 1 * 2));
+    s_32[2] = _mm_cvtsi32_si128(*(int32_t *)(im + 2 * 2));
+    s_32[3] = _mm_cvtsi32_si128(*(int32_t *)(im + 3 * 2));
+    s_32[4] = _mm_cvtsi32_si128(*(int32_t *)(im + 4 * 2));
+    s_32[5] = _mm_cvtsi32_si128(*(int32_t *)(im + 5 * 2));
+    s_32[6] = _mm_cvtsi32_si128(*(int32_t *)(im + 6 * 2));
+
+    const __m128i src01 = _mm_unpacklo_epi32(s_32[0], s_32[1]);
+    const __m128i src12 = _mm_unpacklo_epi32(s_32[1], s_32[2]);
+    const __m128i src23 = _mm_unpacklo_epi32(s_32[2], s_32[3]);
+    const __m128i src34 = _mm_unpacklo_epi32(s_32[3], s_32[4]);
+    const __m128i src45 = _mm_unpacklo_epi32(s_32[4], s_32[5]);
+    const __m128i src56 = _mm_unpacklo_epi32(s_32[5], s_32[6]);
+
+    ss_128[0] = _mm_unpacklo_epi16(src01, src12);
+    ss_128[1] = _mm_unpacklo_epi16(src23, src34);
+    ss_128[2] = _mm_unpacklo_epi16(src45, src56);
+
+    y = h;
+    do {
+      const __m128i res =
+          xy_y_convolve_8tap_2x2_sse2(im, s_32, ss_128, coeffs_128);
+      xy_y_round_store_2x2_sse2(res, dst, dst_stride);
+      im += 2 * 2;
+      dst += 2 * dst_stride;
+      y -= 2;
+    } while (y);
+  } else {
+    __m256i coeffs_256[4];
+
+    prepare_coeffs_8tap_avx2(filter_params_y, subpel_y_q4, coeffs_256);
+
+    if (w == 4) {
+      __m128i s_64[8];
+      __m256i s_256[8], ss_256[4];
+
+      s_64[0] = _mm_loadl_epi64((__m128i *)(im + 0 * 4));
+      s_64[1] = _mm_loadl_epi64((__m128i *)(im + 1 * 4));
+      s_64[2] = _mm_loadl_epi64((__m128i *)(im + 2 * 4));
+      s_64[3] = _mm_loadl_epi64((__m128i *)(im + 3 * 4));
+      s_64[4] = _mm_loadl_epi64((__m128i *)(im + 4 * 4));
+      s_64[5] = _mm_loadl_epi64((__m128i *)(im + 5 * 4));
+      s_64[6] = _mm_loadl_epi64((__m128i *)(im + 6 * 4));
+
+      // Load lines a and b. Line a to lower 128, line b to upper 128
+      s_256[0] = _mm256_setr_m128i(s_64[0], s_64[1]);
+      s_256[1] = _mm256_setr_m128i(s_64[1], s_64[2]);
+      s_256[2] = _mm256_setr_m128i(s_64[2], s_64[3]);
+      s_256[3] = _mm256_setr_m128i(s_64[3], s_64[4]);
+      s_256[4] = _mm256_setr_m128i(s_64[4], s_64[5]);
+      s_256[5] = _mm256_setr_m128i(s_64[5], s_64[6]);
+
+      ss_256[0] = _mm256_unpacklo_epi16(s_256[0], s_256[1]);
+      ss_256[1] = _mm256_unpacklo_epi16(s_256[2], s_256[3]);
+      ss_256[2] = _mm256_unpacklo_epi16(s_256[4], s_256[5]);
+
+      y = h;
+      do {
+        const __m256i res =
+            xy_y_convolve_8tap_4x2_avx2(im, s_64, ss_256, coeffs_256);
+        xy_y_round_store_4x2_avx2(res, dst, dst_stride);
+        im += 2 * 4;
+        dst += 2 * dst_stride;
+        y -= 2;
+      } while (y);
+    } else if (w == 8) {
+      __m256i s_256[8], r[2];
+
+      s_256[0] = _mm256_loadu_si256((__m256i *)(im + 0 * 8));
+      s_256[1] = _mm256_loadu_si256((__m256i *)(im + 1 * 8));
+      s_256[2] = _mm256_loadu_si256((__m256i *)(im + 2 * 8));
+      s_256[3] = _mm256_loadu_si256((__m256i *)(im + 3 * 8));
+      s_256[4] = _mm256_loadu_si256((__m256i *)(im + 4 * 8));
+      s_256[5] = _mm256_loadu_si256((__m256i *)(im + 5 * 8));
+      y = h;
+
+      if (subpel_y_q4 != 8) {
+        __m256i ss_256[8];
+
+        convolve_8tap_unpack_avx2(s_256, ss_256);
+
+        do {
+          xy_y_convolve_8tap_8x2_avx2(im, ss_256, coeffs_256, r);
+          xy_y_round_store_8x2_avx2(r, dst, dst_stride);
+          im += 2 * 8;
+          dst += 2 * dst_stride;
+          y -= 2;
+        } while (y);
+      } else {
+        do {
+          xy_y_convolve_8tap_8x2_half_pel_avx2(im, coeffs_256, s_256, r);
+          xy_y_round_store_8x2_avx2(r, dst, dst_stride);
+          im += 2 * 8;
+          dst += 2 * dst_stride;
+          y -= 2;
+        } while (y);
+      }
+    } else if (w == 16) {
+      __m256i s_256[8], r[4];
+
+      load_16bit_7rows_avx2(im, 16, s_256);
+      y = h;
+
+      if (subpel_y_q4 != 8) {
+        __m256i ss_256[8], tt_256[8];
+
+        convolve_8tap_unpack_avx2(s_256, ss_256);
+        convolve_8tap_unpack_avx2(s_256 + 1, tt_256);
+
+        do {
+          xy_y_convolve_8tap_16x2_avx2(im, 16, coeffs_256, s_256, ss_256,
+                                       tt_256, r);
+          xy_y_round_store_16x2_avx2(r, dst, dst_stride);
+
+          im += 2 * 16;
+          dst += 2 * dst_stride;
+          y -= 2;
+        } while (y);
+      } else {
+        do {
+          xy_y_convolve_8tap_16x2_half_pel_avx2(im, 16, coeffs_256, s_256, r);
+          xy_y_round_store_16x2_avx2(r, dst, dst_stride);
+
+          im += 2 * 16;
+          dst += 2 * dst_stride;
+          y -= 2;
+        } while (y);
+      }
+    } else {
+      int32_t x = 0;
+      __m256i s_256[2][8], r0[4], r1[4];
+
+      assert(!(w % 32));
+
+      __m256i ss_256[2][8], tt_256[2][8];
+
+      do {
+        const int16_t *s = im + x;
+        uint8_t *d = dst + x;
+
+        load_16bit_7rows_avx2(s, w, s_256[0]);
+        convolve_8tap_unpack_avx2(s_256[0], ss_256[0]);
+        convolve_8tap_unpack_avx2(s_256[0] + 1, tt_256[0]);
+
+        load_16bit_7rows_avx2(s + 16, w, s_256[1]);
+        convolve_8tap_unpack_avx2(s_256[1], ss_256[1]);
+        convolve_8tap_unpack_avx2(s_256[1] + 1, tt_256[1]);
+
+        y = h;
+        do {
+          xy_y_convolve_8tap_16x2_avx2(s, w, coeffs_256, s_256[0], ss_256[0],
+                                       tt_256[0], r0);
+          xy_y_convolve_8tap_16x2_avx2(s + 16, w, coeffs_256, s_256[1],
+                                       ss_256[1], tt_256[1], r1);
+          xy_y_round_store_32_avx2(r0 + 0, r1 + 0, d);
+          xy_y_round_store_32_avx2(r0 + 2, r1 + 2, d + dst_stride);
+
+          s += 2 * w;
+          d += 2 * dst_stride;
+          y -= 2;
+        } while (y);
+
+        x += 32;
+      } while (x < w);
+    }
+  }
+}
+
+typedef void (*Convolve2dSrHorTapFunc)(
+    const uint8_t *const src, const int32_t src_stride, const int32_t w,
+    const int32_t h, const InterpFilterParams *const filter_params_x,
+    const int32_t subpel_x_q4, int16_t *const im_block);
+
+typedef void (*Convolve2dSrVerTapFunc)(
+    const int16_t *const im_block, const int32_t w, const int32_t h,
+    const InterpFilterParams *const filter_params_y, const int32_t subpel_y_q4,
+    uint8_t *dst, const int32_t dst_stride);
+
+static AOM_FORCE_INLINE void av1_convolve_2d_sr_specialized_avx2(
+    const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride,
+    int32_t w, int32_t h, const InterpFilterParams *filter_params_x,
+    const InterpFilterParams *filter_params_y, const int32_t subpel_x_q4,
+    const int32_t subpel_y_q4, ConvolveParams *conv_params) {
+  static const Convolve2dSrHorTapFunc
+      convolve_2d_sr_hor_tap_func_table[MAX_FILTER_TAP + 1] = {
+        NULL,
+        NULL,
+        convolve_2d_sr_hor_2tap_avx2,
+        NULL,
+        convolve_2d_sr_hor_4tap_ssse3,
+        NULL,
+        convolve_2d_sr_hor_6tap_avx2,
+        NULL,
+        convolve_2d_sr_hor_8tap_avx2
+      };
+  static const Convolve2dSrVerTapFunc
+      convolve_2d_sr_ver_tap_func_table[MAX_FILTER_TAP + 1] = {
+        NULL,
+        convolve_2d_sr_ver_2tap_half_avx2,
+        convolve_2d_sr_ver_2tap_avx2,
+        convolve_2d_sr_ver_4tap_avx2,
+        convolve_2d_sr_ver_4tap_avx2,
+        convolve_2d_sr_ver_6tap_avx2,
+        convolve_2d_sr_ver_6tap_avx2,
+        convolve_2d_sr_ver_8tap_avx2,
+        convolve_2d_sr_ver_8tap_avx2
+      };
+  const int32_t tap_x = get_filter_tap(filter_params_x, subpel_x_q4);
+  const int32_t tap_y = get_filter_tap(filter_params_y, subpel_y_q4);
+
+  assert(tap_x != 12 && tap_y != 12);
+
+  const uint8_t *src_ptr = src - ((tap_y >> 1) - 1) * src_stride;
+  // Note: im_block is 8-pixel interlaced for width 32 and up, to avoid data
+  //       permutation.
+  DECLARE_ALIGNED(32, int16_t,
+                  im_block[(MAX_SB_SIZE + MAX_FILTER_TAP) * MAX_SB_SIZE]);
+
+  (void)conv_params;
+
+  assert(conv_params->round_0 == 3);
+  assert(conv_params->round_1 == 11);
+
+  // horizontal filter
+  int32_t hh = h + tap_y;
+  assert(!(hh % 2));
+
+  convolve_2d_sr_hor_tap_func_table[tap_x](
+      src_ptr, src_stride, w, hh, filter_params_x, subpel_x_q4, im_block);
+
+  // vertical filter
+  convolve_2d_sr_ver_tap_func_table[tap_y - (subpel_y_q4 == 8)](
+      im_block, w, h, filter_params_y, subpel_y_q4, dst, dst_stride);
+}
+
+#endif  // THIRD_PARTY_SVT_AV1_CONVOLVE_2D_AVX2_H_
diff --git a/chromium/third_party/libaom/source/libaom/third_party/SVT-AV1/convolve_avx2.h b/chromium/third_party/libaom/source/libaom/third_party/SVT-AV1/convolve_avx2.h
new file mode 100644
index 00000000000..452a7134226
--- /dev/null
+++ b/chromium/third_party/libaom/source/libaom/third_party/SVT-AV1/convolve_avx2.h
@@ -0,0 +1,3334 @@
+/*
+ * Copyright (c) 2018, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#ifndef THIRD_PARTY_SVT_AV1_CONVOLVE_AVX2_H_
+#define THIRD_PARTY_SVT_AV1_CONVOLVE_AVX2_H_
+
+#include "EbMemory_AVX2.h"
+#include "EbMemory_SSE4_1.h"
+#include "synonyms.h"
+
+#include "aom_dsp/aom_filter.h"
+#include "aom_dsp/x86/convolve_avx2.h"
+
+static INLINE void populate_coeffs_4tap_avx2(const __m128i coeffs_128,
+                                             __m256i coeffs[2]) {
+  const __m256i coeffs_256 = _mm256_broadcastsi128_si256(coeffs_128);
+
+  // coeffs 2 3 2 3 2 3 2 3
+  coeffs[0] = _mm256_shuffle_epi8(coeffs_256, _mm256_set1_epi16(0x0604u));
+  // coeffs 4 5 4 5 4 5 4 5
+  coeffs[1] = _mm256_shuffle_epi8(coeffs_256, _mm256_set1_epi16(0x0a08u));
+}
+
+static INLINE void populate_coeffs_6tap_avx2(const __m128i coeffs_128,
+                                             __m256i coeffs[3]) {
+  const __m256i coeffs_256 = _mm256_broadcastsi128_si256(coeffs_128);
+
+  // coeffs 1 2 1 2 1 2 1 2
+  coeffs[0] = _mm256_shuffle_epi8(coeffs_256, _mm256_set1_epi16(0x0402u));
+  // coeffs 3 4 3 4 3 4 3 4
+  coeffs[1] = _mm256_shuffle_epi8(coeffs_256, _mm256_set1_epi16(0x0806u));
+  // coeffs 5 6 5 6 5 6 5 6
+  coeffs[2] = _mm256_shuffle_epi8(coeffs_256, _mm256_set1_epi16(0x0C0Au));
+}
+
+static INLINE void populate_coeffs_8tap_avx2(const __m128i coeffs_128,
+                                             __m256i coeffs[4]) {
+  const __m256i coeffs_256 = _mm256_broadcastsi128_si256(coeffs_128);
+
+  // coeffs 0 1 0 1 0 1 0 1
+  coeffs[0] = _mm256_shuffle_epi8(coeffs_256, _mm256_set1_epi16(0x0200u));
+  // coeffs 2 3 2 3 2 3 2 3
+  coeffs[1] = _mm256_shuffle_epi8(coeffs_256, _mm256_set1_epi16(0x0604u));
+  // coeffs 4 5 4 5 4 5 4 5
+  coeffs[2] = _mm256_shuffle_epi8(coeffs_256, _mm256_set1_epi16(0x0a08u));
+  // coeffs 6 7 6 7 6 7 6 7
+  coeffs[3] = _mm256_shuffle_epi8(coeffs_256, _mm256_set1_epi16(0x0e0cu));
+}
+
+static INLINE void prepare_half_coeffs_2tap_ssse3(
+    const InterpFilterParams *const filter_params, const int32_t subpel_q4,
+    __m128i *const coeffs /* [1] */) {
+  const int16_t *const filter = av1_get_interp_filter_subpel_kernel(
+      filter_params, subpel_q4 & SUBPEL_MASK);
+  const __m128i coeffs_8 = _mm_cvtsi32_si128(*(const int32_t *)(filter + 3));
+
+  // right shift all filter co-efficients by 1 to reduce the bits required.
+  // This extra right shift will be taken care of at the end while rounding
+  // the result.
+  // Since all filter co-efficients are even, this change will not affect the
+  // end result
+  assert(_mm_test_all_zeros(_mm_and_si128(coeffs_8, _mm_set1_epi16(1)),
+                            _mm_set1_epi16((short)0xffff)));
+
+  const __m128i coeffs_1 = _mm_srai_epi16(coeffs_8, 1);
+
+  // coeffs 3 4 3 4 3 4 3 4
+  *coeffs = _mm_shuffle_epi8(coeffs_1, _mm_set1_epi16(0x0200u));
+}
+
+static INLINE void prepare_half_coeffs_4tap_ssse3(
+    const InterpFilterParams *const filter_params, const int32_t subpel_q4,
+    __m128i *const coeffs /* [2] */) {
+  const int16_t *const filter = av1_get_interp_filter_subpel_kernel(
+      filter_params, subpel_q4 & SUBPEL_MASK);
+  const __m128i coeffs_8 = _mm_loadu_si128((__m128i *)filter);
+
+  // right shift all filter co-efficients by 1 to reduce the bits required.
+  // This extra right shift will be taken care of at the end while rounding
+  // the result.
+  // Since all filter co-efficients are even, this change will not affect the
+  // end result
+  assert(_mm_test_all_zeros(_mm_and_si128(coeffs_8, _mm_set1_epi16(1)),
+                            _mm_set1_epi16((short)0xffff)));
+
+  const __m128i coeffs_1 = _mm_srai_epi16(coeffs_8, 1);
+
+  // coeffs 2 3 2 3 2 3 2 3
+  coeffs[0] = _mm_shuffle_epi8(coeffs_1, _mm_set1_epi16(0x0604u));
+  // coeffs 4 5 4 5 4 5 4 5
+  coeffs[1] = _mm_shuffle_epi8(coeffs_1, _mm_set1_epi16(0x0a08u));
+}
+
+static INLINE void prepare_half_coeffs_6tap_ssse3(
+    const InterpFilterParams *const filter_params, const int32_t subpel_q4,
+    __m128i *const coeffs /* [3] */) {
+  const int16_t *const filter = av1_get_interp_filter_subpel_kernel(
+      filter_params, subpel_q4 & SUBPEL_MASK);
+  const __m128i coeffs_8 = _mm_loadu_si128((__m128i *)filter);
+
+  // right shift all filter co-efficients by 1 to reduce the bits required.
+  // This extra right shift will be taken care of at the end while rounding
+  // the result.
+  // Since all filter co-efficients are even, this change will not affect the
+  // end result
+  assert(_mm_test_all_zeros(_mm_and_si128(coeffs_8, _mm_set1_epi16(1)),
+                            _mm_set1_epi16((short)0xffff)));
+
+  const __m128i coeffs_1 = _mm_srai_epi16(coeffs_8, 1);
+
+  // coeffs 1 2 1 2 1 2 1 2
+  coeffs[0] = _mm_shuffle_epi8(coeffs_1, _mm_set1_epi16(0x0402u));
+  // coeffs 3 4 3 4 3 4 3 4
+  coeffs[1] = _mm_shuffle_epi8(coeffs_1, _mm_set1_epi16(0x0806u));
+  // coeffs 5 6 5 6 5 6 5 6
+  coeffs[2] = _mm_shuffle_epi8(coeffs_1, _mm_set1_epi16(0x0C0Au));
+}
+
+static INLINE void prepare_half_coeffs_8tap_ssse3(
+    const InterpFilterParams *const filter_params, const int32_t subpel_q4,
+    __m128i *const coeffs /* [4] */) {
+  const int16_t *const filter = av1_get_interp_filter_subpel_kernel(
+      filter_params, subpel_q4 & SUBPEL_MASK);
+  const __m128i coeffs_8 = _mm_loadu_si128((__m128i *)filter);
+
+  // right shift all filter co-efficients by 1 to reduce the bits required.
+  // This extra right shift will be taken care of at the end while rounding
+  // the result.
+  // Since all filter co-efficients are even, this change will not affect the
+  // end result
+  assert(_mm_test_all_zeros(_mm_and_si128(coeffs_8, _mm_set1_epi16(1)),
+                            _mm_set1_epi16((short)0xffff)));
+
+  const __m128i coeffs_1 = _mm_srai_epi16(coeffs_8, 1);
+
+  // coeffs 0 1 0 1 0 1 0 1
+  coeffs[0] = _mm_shuffle_epi8(coeffs_1, _mm_set1_epi16(0x0200u));
+  // coeffs 2 3 2 3 2 3 2 3
+  coeffs[1] = _mm_shuffle_epi8(coeffs_1, _mm_set1_epi16(0x0604u));
+  // coeffs 4 5 4 5 4 5 4 5
+  coeffs[2] = _mm_shuffle_epi8(coeffs_1, _mm_set1_epi16(0x0a08u));
+  // coeffs 6 7 6 7 6 7 6 7
+  coeffs[3] = _mm_shuffle_epi8(coeffs_1, _mm_set1_epi16(0x0e0cu));
+}
+
+static INLINE void prepare_half_coeffs_2tap_avx2(
+    const InterpFilterParams *const filter_params, const int32_t subpel_q4,
+    __m256i *const coeffs /* [1] */) {
+  const int16_t *const filter = av1_get_interp_filter_subpel_kernel(
+      filter_params, subpel_q4 & SUBPEL_MASK);
+  const __m128i coeffs_8 = _mm_cvtsi32_si128(*(const int32_t *)(filter + 3));
+  const __m256i filter_coeffs = _mm256_broadcastsi128_si256(coeffs_8);
+
+  // right shift all filter co-efficients by 1 to reduce the bits required.
+  // This extra right shift will be taken care of at the end while rounding
+  // the result.
+  // Since all filter co-efficients are even, this change will not affect the
+  // end result
+  assert(_mm_test_all_zeros(_mm_and_si128(coeffs_8, _mm_set1_epi16(1)),
+                            _mm_set1_epi16((short)0xffff)));
+
+  const __m256i coeffs_1 = _mm256_srai_epi16(filter_coeffs, 1);
+
+  // coeffs 3 4 3 4 3 4 3 4
+  *coeffs = _mm256_shuffle_epi8(coeffs_1, _mm256_set1_epi16(0x0200u));
+}
+
+static INLINE void prepare_half_coeffs_4tap_avx2(
+    const InterpFilterParams *const filter_params, const int32_t subpel_q4,
+    __m256i *const coeffs /* [2] */) {
+  const int16_t *const filter = av1_get_interp_filter_subpel_kernel(
+      filter_params, subpel_q4 & SUBPEL_MASK);
+  const __m128i coeffs_8 = _mm_loadu_si128((__m128i *)filter);
+
+  // right shift all filter co-efficients by 1 to reduce the bits required.
+  // This extra right shift will be taken care of at the end while rounding
+  // the result.
+  // Since all filter co-efficients are even, this change will not affect the
+  // end result
+  assert(_mm_test_all_zeros(_mm_and_si128(coeffs_8, _mm_set1_epi16(1)),
+                            _mm_set1_epi16((short)0xffff)));
+  const __m128i coeffs_1 = _mm_srai_epi16(coeffs_8, 1);
+  populate_coeffs_4tap_avx2(coeffs_1, coeffs);
+}
+
+static INLINE void prepare_half_coeffs_6tap_avx2(
+    const InterpFilterParams *const filter_params, const int32_t subpel_q4,
+    __m256i *const coeffs /* [3] */) {
+  const int16_t *const filter = av1_get_interp_filter_subpel_kernel(
+      filter_params, subpel_q4 & SUBPEL_MASK);
+  const __m128i coeffs_8 = _mm_loadu_si128((__m128i *)filter);
+
+  // right shift all filter co-efficients by 1 to reduce the bits required.
+  // This extra right shift will be taken care of at the end while rounding
+  // the result.
+  // Since all filter co-efficients are even, this change will not affect the
+  // end result
+  assert(_mm_test_all_zeros(_mm_and_si128(coeffs_8, _mm_set1_epi16(1)),
+                            _mm_set1_epi16((short)0xffff)));
+  const __m128i coeffs_1 = _mm_srai_epi16(coeffs_8, 1);
+  populate_coeffs_6tap_avx2(coeffs_1, coeffs);
+}
+
+static INLINE void prepare_half_coeffs_8tap_avx2(
+    const InterpFilterParams *const filter_params, const int32_t subpel_q4,
+    __m256i *const coeffs /* [4] */) {
+  const int16_t *const filter = av1_get_interp_filter_subpel_kernel(
+      filter_params, subpel_q4 & SUBPEL_MASK);
+  const __m128i coeffs_8 = _mm_loadu_si128((__m128i *)filter);
+
+  // right shift all filter co-efficients by 1 to reduce the bits required.
+  // This extra right shift will be taken care of at the end while rounding
+  // the result.
+  // Since all filter co-efficients are even, this change will not affect the
+  // end result
+  assert(_mm_test_all_zeros(_mm_and_si128(coeffs_8, _mm_set1_epi16(1)),
+                            _mm_set1_epi16((short)0xffff)));
+  const __m128i coeffs_1 = _mm_srai_epi16(coeffs_8, 1);
+  populate_coeffs_8tap_avx2(coeffs_1, coeffs);
+}
+
+static INLINE void prepare_coeffs_2tap_sse2(
+    const InterpFilterParams *const filter_params, const int32_t subpel_q4,
+    __m128i *const coeffs /* [1] */) {
+  const int16_t *filter = av1_get_interp_filter_subpel_kernel(
+      filter_params, subpel_q4 & SUBPEL_MASK);
+
+  const __m128i coeff = _mm_cvtsi32_si128(*(const int32_t *)(filter + 3));
+
+  // coeffs 3 4 3 4 3 4 3 4
+  coeffs[0] = _mm_shuffle_epi32(coeff, 0x00);
+}
+
+static INLINE void prepare_coeffs_4tap_sse2(
+    const InterpFilterParams *const filter_params, const int32_t subpel_q4,
+    __m128i *const coeffs /* [2] */) {
+  const int16_t *filter = av1_get_interp_filter_subpel_kernel(
+      filter_params, subpel_q4 & SUBPEL_MASK);
+
+  const __m128i coeff = _mm_loadu_si128((__m128i *)filter);
+
+  // coeffs 2 3 2 3 2 3 2 3
+  coeffs[0] = _mm_shuffle_epi32(coeff, 0x55);
+  // coeffs 4 5 4 5 4 5 4 5
+  coeffs[1] = _mm_shuffle_epi32(coeff, 0xaa);
+}
+
+static INLINE void prepare_coeffs_6tap_ssse3(
+    const InterpFilterParams *const filter_params, const int32_t subpel_q4,
+    __m128i *const coeffs /* [3] */) {
+  const int16_t *const filter = av1_get_interp_filter_subpel_kernel(
+      filter_params, subpel_q4 & SUBPEL_MASK);
+  const __m128i coeff = _mm_loadu_si128((__m128i *)filter);
+
+  // coeffs 1 2 1 2 1 2 1 2
+  coeffs[0] = _mm_shuffle_epi8(coeff, _mm_set1_epi32(0x05040302u));
+  // coeffs 3 4 3 4 3 4 3 4
+  coeffs[1] = _mm_shuffle_epi8(coeff, _mm_set1_epi32(0x09080706u));
+  // coeffs 5 6 5 6 5 6 5 6
+  coeffs[2] = _mm_shuffle_epi8(coeff, _mm_set1_epi32(0x0D0C0B0Au));
+}
+
+static INLINE void prepare_coeffs_8tap_sse2(
+    const InterpFilterParams *const filter_params, const int32_t subpel_q4,
+    __m128i *const coeffs /* [4] */) {
+  const int16_t *filter = av1_get_interp_filter_subpel_kernel(
+      filter_params, subpel_q4 & SUBPEL_MASK);
+
+  const __m128i coeff = _mm_loadu_si128((__m128i *)filter);
+
+  // coeffs 0 1 0 1 0 1 0 1
+  coeffs[0] = _mm_shuffle_epi32(coeff, 0x00);
+  // coeffs 2 3 2 3 2 3 2 3
+  coeffs[1] = _mm_shuffle_epi32(coeff, 0x55);
+  // coeffs 4 5 4 5 4 5 4 5
+  coeffs[2] = _mm_shuffle_epi32(coeff, 0xaa);
+  // coeffs 6 7 6 7 6 7 6 7
+  coeffs[3] = _mm_shuffle_epi32(coeff, 0xff);
+}
+
+static INLINE void prepare_coeffs_2tap_avx2(
+    const InterpFilterParams *const filter_params, const int32_t subpel_q4,
+    __m256i *const coeffs /* [1] */) {
+  const int16_t *filter = av1_get_interp_filter_subpel_kernel(
+      filter_params, subpel_q4 & SUBPEL_MASK);
+
+  const __m128i coeff_8 = _mm_cvtsi32_si128(*(const int32_t *)(filter + 3));
+  const __m256i coeff = _mm256_broadcastsi128_si256(coeff_8);
+
+  // coeffs 3 4 3 4 3 4 3 4
+  coeffs[0] = _mm256_shuffle_epi32(coeff, 0x00);
+}
+
+static INLINE void prepare_coeffs_4tap_avx2(
+    const InterpFilterParams *const filter_params, const int32_t subpel_q4,
+    __m256i *const coeffs /* [2] */) {
+  const int16_t *filter = av1_get_interp_filter_subpel_kernel(
+      filter_params, subpel_q4 & SUBPEL_MASK);
+
+  const __m128i coeff_8 = _mm_loadu_si128((__m128i *)filter);
+  const __m256i coeff = _mm256_broadcastsi128_si256(coeff_8);
+
+  // coeffs 2 3 2 3 2 3 2 3
+  coeffs[0] = _mm256_shuffle_epi32(coeff, 0x55);
+  // coeffs 4 5 4 5 4 5 4 5
+  coeffs[1] = _mm256_shuffle_epi32(coeff, 0xaa);
+}
+
+static INLINE void prepare_coeffs_6tap_avx2(
+    const InterpFilterParams *const filter_params, const int32_t subpel_q4,
+    __m256i *const coeffs /* [3]*/) {
+  const int16_t *const filter = av1_get_interp_filter_subpel_kernel(
+      filter_params, subpel_q4 & SUBPEL_MASK);
+  const __m128i coeffs_8 = _mm_loadu_si128((__m128i *)filter);
+  const __m256i coeff = _mm256_broadcastsi128_si256(coeffs_8);
+
+  // coeffs 1 2 1 2 1 2 1 2
+  coeffs[0] = _mm256_shuffle_epi8(coeff, _mm256_set1_epi32(0x05040302u));
+  // coeffs 3 4 3 4 3 4 3 4
+  coeffs[1] = _mm256_shuffle_epi8(coeff, _mm256_set1_epi32(0x09080706u));
+  // coeffs 5 6 5 6 5 6 5 6
+  coeffs[2] = _mm256_shuffle_epi8(coeff, _mm256_set1_epi32(0x0D0C0B0Au));
+}
+
+static INLINE void prepare_coeffs_8tap_avx2(
+    const InterpFilterParams *const filter_params, const int32_t subpel_q4,
+    __m256i *const coeffs /* [4] */) {
+  const int16_t *filter = av1_get_interp_filter_subpel_kernel(
+      filter_params, subpel_q4 & SUBPEL_MASK);
+
+  const __m128i coeff_8 = _mm_loadu_si128((__m128i *)filter);
+  const __m256i coeff = _mm256_broadcastsi128_si256(coeff_8);
+
+  // coeffs 0 1 0 1 0 1 0 1
+  coeffs[0] = _mm256_shuffle_epi32(coeff, 0x00);
+  // coeffs 2 3 2 3 2 3 2 3
+  coeffs[1] = _mm256_shuffle_epi32(coeff, 0x55);
+  // coeffs 4 5 4 5 4 5 4 5
+  coeffs[2] = _mm256_shuffle_epi32(coeff, 0xaa);
+  // coeffs 6 7 6 7 6 7 6 7
+  coeffs[3] = _mm256_shuffle_epi32(coeff, 0xff);
+}
+
+static INLINE void load_16bit_5rows_avx2(const int16_t *const src,
+                                         const ptrdiff_t stride,
+                                         __m256i dst[5]) {
+  dst[0] = _mm256_loadu_si256((__m256i *)(src + 0 * stride));
+  dst[1] = _mm256_loadu_si256((__m256i *)(src + 1 * stride));
+  dst[2] = _mm256_loadu_si256((__m256i *)(src + 2 * stride));
+  dst[3] = _mm256_loadu_si256((__m256i *)(src + 3 * stride));
+  dst[4] = _mm256_loadu_si256((__m256i *)(src + 4 * stride));
+}
+
+static INLINE void load_16bit_7rows_avx2(const int16_t *const src,
+                                         const ptrdiff_t stride,
+                                         __m256i dst[7]) {
+  dst[0] = _mm256_loadu_si256((__m256i *)(src + 0 * stride));
+  dst[1] = _mm256_loadu_si256((__m256i *)(src + 1 * stride));
+  dst[2] = _mm256_loadu_si256((__m256i *)(src + 2 * stride));
+  dst[3] = _mm256_loadu_si256((__m256i *)(src + 3 * stride));
+  dst[4] = _mm256_loadu_si256((__m256i *)(src + 4 * stride));
+  dst[5] = _mm256_loadu_si256((__m256i *)(src + 5 * stride));
+  dst[6] = _mm256_loadu_si256((__m256i *)(src + 6 * stride));
+}
+
+static AOM_FORCE_INLINE void load_16bit_8rows_avx2(const int16_t *const src,
+                                                   const ptrdiff_t stride,
+                                                   __m256i dst[8]) {
+  dst[0] = _mm256_loadu_si256((__m256i *)(src + 0 * stride));
+  dst[1] = _mm256_loadu_si256((__m256i *)(src + 1 * stride));
+  dst[2] = _mm256_loadu_si256((__m256i *)(src + 2 * stride));
+  dst[3] = _mm256_loadu_si256((__m256i *)(src + 3 * stride));
+  dst[4] = _mm256_loadu_si256((__m256i *)(src + 4 * stride));
+  dst[5] = _mm256_loadu_si256((__m256i *)(src + 5 * stride));
+  dst[6] = _mm256_loadu_si256((__m256i *)(src + 6 * stride));
+  dst[7] = _mm256_loadu_si256((__m256i *)(src + 7 * stride));
+}
+
+static AOM_FORCE_INLINE void loadu_unpack_16bit_5rows_avx2(
+    const int16_t *const src, const ptrdiff_t stride, __m256i s_256[5],
+    __m256i ss_256[5], __m256i tt_256[5]) {
+  s_256[0] = _mm256_loadu_si256((__m256i *)(src + 0 * stride));
+  s_256[1] = _mm256_loadu_si256((__m256i *)(src + 1 * stride));
+  s_256[2] = _mm256_loadu_si256((__m256i *)(src + 2 * stride));
+  s_256[3] = _mm256_loadu_si256((__m256i *)(src + 3 * stride));
+  s_256[4] = _mm256_loadu_si256((__m256i *)(src + 4 * stride));
+
+  ss_256[0] = _mm256_unpacklo_epi16(s_256[0], s_256[1]);
+  ss_256[1] = _mm256_unpacklo_epi16(s_256[2], s_256[3]);
+  ss_256[3] = _mm256_unpackhi_epi16(s_256[0], s_256[1]);
+  ss_256[4] = _mm256_unpackhi_epi16(s_256[2], s_256[3]);
+
+  tt_256[0] = _mm256_unpacklo_epi16(s_256[1], s_256[2]);
+  tt_256[1] = _mm256_unpacklo_epi16(s_256[3], s_256[4]);
+  tt_256[3] = _mm256_unpackhi_epi16(s_256[1], s_256[2]);
+  tt_256[4] = _mm256_unpackhi_epi16(s_256[3], s_256[4]);
+}
+
+static AOM_FORCE_INLINE void loadu_unpack_16bit_3rows_avx2(
+    const int16_t *const src, const ptrdiff_t stride, __m256i s_256[3],
+    __m256i ss_256[3], __m256i tt_256[3]) {
+  s_256[0] = _mm256_loadu_si256((__m256i *)(src + 0 * stride));
+  s_256[1] = _mm256_loadu_si256((__m256i *)(src + 1 * stride));
+  s_256[2] = _mm256_loadu_si256((__m256i *)(src + 2 * stride));
+
+  ss_256[0] = _mm256_unpacklo_epi16(s_256[0], s_256[1]);
+  ss_256[2] = _mm256_unpackhi_epi16(s_256[0], s_256[1]);
+
+  tt_256[0] = _mm256_unpacklo_epi16(s_256[1], s_256[2]);
+  tt_256[2] = _mm256_unpackhi_epi16(s_256[1], s_256[2]);
+}
+
+static INLINE void convolve_8tap_unpack_avx2(const __m256i s[6],
+                                             __m256i ss[7]) {
+  ss[0] = _mm256_unpacklo_epi16(s[0], s[1]);
+  ss[1] = _mm256_unpacklo_epi16(s[2], s[3]);
+  ss[2] = _mm256_unpacklo_epi16(s[4], s[5]);
+  ss[4] = _mm256_unpackhi_epi16(s[0], s[1]);
+  ss[5] = _mm256_unpackhi_epi16(s[2], s[3]);
+  ss[6] = _mm256_unpackhi_epi16(s[4], s[5]);
+}
+
+static INLINE __m128i convolve_2tap_ssse3(const __m128i ss[1],
+                                          const __m128i coeffs[1]) {
+  return _mm_maddubs_epi16(ss[0], coeffs[0]);
+}
+
+static INLINE __m128i convolve_4tap_ssse3(const __m128i ss[2],
+                                          const __m128i coeffs[2]) {
+  const __m128i res_23 = _mm_maddubs_epi16(ss[0], coeffs[0]);
+  const __m128i res_45 = _mm_maddubs_epi16(ss[1], coeffs[1]);
+  return _mm_add_epi16(res_23, res_45);
+}
+
+static INLINE __m128i convolve_6tap_ssse3(const __m128i ss[3],
+                                          const __m128i coeffs[3]) {
+  const __m128i res_12 = _mm_maddubs_epi16(ss[0], coeffs[0]);
+  const __m128i res_34 = _mm_maddubs_epi16(ss[1], coeffs[1]);
+  const __m128i res_56 = _mm_maddubs_epi16(ss[2], coeffs[2]);
+  const __m128i res_1256 = _mm_add_epi16(res_12, res_56);
+  return _mm_add_epi16(res_1256, res_34);
+}
+
+static INLINE __m128i convolve_8tap_ssse3(const __m128i ss[4],
+                                          const __m128i coeffs[4]) {
+  const __m128i res_01 = _mm_maddubs_epi16(ss[0], coeffs[0]);
+  const __m128i res_23 = _mm_maddubs_epi16(ss[1], coeffs[1]);
+  const __m128i res_45 = _mm_maddubs_epi16(ss[2], coeffs[2]);
+  const __m128i res_67 = _mm_maddubs_epi16(ss[3], coeffs[3]);
+  const __m128i res_0145 = _mm_add_epi16(res_01, res_45);
+  const __m128i res_2367 = _mm_add_epi16(res_23, res_67);
+  return _mm_add_epi16(res_0145, res_2367);
+}
+
+static INLINE __m256i convolve_2tap_avx2(const __m256i ss[1],
+                                         const __m256i coeffs[1]) {
+  return _mm256_maddubs_epi16(ss[0], coeffs[0]);
+}
+
+static INLINE __m256i convolve_4tap_avx2(const __m256i ss[2],
+                                         const __m256i coeffs[2]) {
+  const __m256i res_23 = _mm256_maddubs_epi16(ss[0], coeffs[0]);
+  const __m256i res_45 = _mm256_maddubs_epi16(ss[1], coeffs[1]);
+  return _mm256_add_epi16(res_23, res_45);
+}
+
+static INLINE __m256i convolve_6tap_avx2(const __m256i ss[3],
+                                         const __m256i coeffs[3]) {
+  const __m256i res_01 = _mm256_maddubs_epi16(ss[0], coeffs[0]);
+  const __m256i res_23 = _mm256_maddubs_epi16(ss[1], coeffs[1]);
+  const __m256i res_45 = _mm256_maddubs_epi16(ss[2], coeffs[2]);
+  const __m256i res_0145 = _mm256_add_epi16(res_01, res_45);
+  return _mm256_add_epi16(res_0145, res_23);
+}
+
+static INLINE __m256i convolve_8tap_avx2(const __m256i ss[4],
+                                         const __m256i coeffs[4]) {
+  const __m256i res_01 = _mm256_maddubs_epi16(ss[0], coeffs[0]);
+  const __m256i res_23 = _mm256_maddubs_epi16(ss[1], coeffs[1]);
+  const __m256i res_45 = _mm256_maddubs_epi16(ss[2], coeffs[2]);
+  const __m256i res_67 = _mm256_maddubs_epi16(ss[3], coeffs[3]);
+  const __m256i res_0145 = _mm256_add_epi16(res_01, res_45);
+  const __m256i res_2367 = _mm256_add_epi16(res_23, res_67);
+  return _mm256_add_epi16(res_0145, res_2367);
+}
+
+static INLINE __m128i convolve16_2tap_sse2(const __m128i ss[1],
+                                           const __m128i coeffs[1]) {
+  return _mm_madd_epi16(ss[0], coeffs[0]);
+}
+
+static INLINE __m128i convolve16_4tap_sse2(const __m128i ss[2],
+                                           const __m128i coeffs[2]) {
+  const __m128i res_01 = _mm_madd_epi16(ss[0], coeffs[0]);
+  const __m128i res_23 = _mm_madd_epi16(ss[1], coeffs[1]);
+  return _mm_add_epi32(res_01, res_23);
+}
+
+static INLINE __m128i convolve16_6tap_sse2(const __m128i ss[3],
+                                           const __m128i coeffs[3]) {
+  const __m128i res_01 = _mm_madd_epi16(ss[0], coeffs[0]);
+  const __m128i res_23 = _mm_madd_epi16(ss[1], coeffs[1]);
+  const __m128i res_45 = _mm_madd_epi16(ss[2], coeffs[2]);
+  const __m128i res_0123 = _mm_add_epi32(res_01, res_23);
+  return _mm_add_epi32(res_0123, res_45);
+}
+
+static INLINE __m128i convolve16_8tap_sse2(const __m128i ss[4],
+                                           const __m128i coeffs[4]) {
+  const __m128i res_01 = _mm_madd_epi16(ss[0], coeffs[0]);
+  const __m128i res_23 = _mm_madd_epi16(ss[1], coeffs[1]);
+  const __m128i res_45 = _mm_madd_epi16(ss[2], coeffs[2]);
+  const __m128i res_67 = _mm_madd_epi16(ss[3], coeffs[3]);
+  const __m128i res_0123 = _mm_add_epi32(res_01, res_23);
+  const __m128i res_4567 = _mm_add_epi32(res_45, res_67);
+  return _mm_add_epi32(res_0123, res_4567);
+}
+
+static INLINE __m256i convolve16_2tap_avx2(const __m256i ss[1],
+                                           const __m256i coeffs[1]) {
+  return _mm256_madd_epi16(ss[0], coeffs[0]);
+}
+
+static INLINE __m256i convolve16_4tap_avx2(const __m256i ss[2],
+                                           const __m256i coeffs[2]) {
+  const __m256i res_1 = _mm256_madd_epi16(ss[0], coeffs[0]);
+  const __m256i res_2 = _mm256_madd_epi16(ss[1], coeffs[1]);
+  return _mm256_add_epi32(res_1, res_2);
+}
+
+static INLINE __m256i convolve16_6tap_avx2(const __m256i ss[3],
+                                           const __m256i coeffs[3]) {
+  const __m256i res_01 = _mm256_madd_epi16(ss[0], coeffs[0]);
+  const __m256i res_23 = _mm256_madd_epi16(ss[1], coeffs[1]);
+  const __m256i res_45 = _mm256_madd_epi16(ss[2], coeffs[2]);
+  const __m256i res_0123 = _mm256_add_epi32(res_01, res_23);
+  return _mm256_add_epi32(res_0123, res_45);
+}
+
+static INLINE __m256i convolve16_8tap_avx2(const __m256i ss[4],
+                                           const __m256i coeffs[4]) {
+  const __m256i res_01 = _mm256_madd_epi16(ss[0], coeffs[0]);
+  const __m256i res_23 = _mm256_madd_epi16(ss[1], coeffs[1]);
+  const __m256i res_45 = _mm256_madd_epi16(ss[2], coeffs[2]);
+  const __m256i res_67 = _mm256_madd_epi16(ss[3], coeffs[3]);
+  const __m256i res_0123 = _mm256_add_epi32(res_01, res_23);
+  const __m256i res_4567 = _mm256_add_epi32(res_45, res_67);
+  return _mm256_add_epi32(res_0123, res_4567);
+}
+
+static INLINE __m256i x_convolve_4tap_avx2(const __m256i data,
+                                           const __m256i coeffs[2],
+                                           const __m256i filt[2]) {
+  __m256i ss[2];
+
+  ss[0] = _mm256_shuffle_epi8(data, filt[0]);
+  ss[1] = _mm256_shuffle_epi8(data, filt[1]);
+
+  return convolve_4tap_avx2(ss, coeffs);
+}
+
+static INLINE __m256i x_convolve_6tap_avx2(const __m256i data,
+                                           const __m256i coeffs[3],
+                                           const __m256i filt[3]) {
+  __m256i ss[3];
+
+  ss[0] = _mm256_shuffle_epi8(data, filt[0]);
+  ss[1] = _mm256_shuffle_epi8(data, filt[1]);
+  ss[2] = _mm256_shuffle_epi8(data, filt[2]);
+
+  return convolve_6tap_avx2(ss, coeffs);
+}
+
+static INLINE __m256i x_convolve_8tap_avx2(const __m256i data,
+                                           const __m256i coeffs[4],
+                                           const __m256i filt[4]) {
+  __m256i ss[4];
+
+  ss[0] = _mm256_shuffle_epi8(data, filt[0]);
+  ss[1] = _mm256_shuffle_epi8(data, filt[1]);
+  ss[2] = _mm256_shuffle_epi8(data, filt[2]);
+  ss[3] = _mm256_shuffle_epi8(data, filt[3]);
+
+  return convolve_8tap_avx2(ss, coeffs);
+}
+
+static INLINE __m256i sr_y_round_avx2(const __m256i src) {
+  const __m256i round = _mm256_set1_epi16(32);
+  const __m256i dst = _mm256_add_epi16(src, round);
+  return _mm256_srai_epi16(dst, FILTER_BITS - 1);
+}
+
+static INLINE __m128i xy_x_round_sse2(const __m128i src) {
+  const __m128i round = _mm_set1_epi16(2);
+  const __m128i dst = _mm_add_epi16(src, round);
+  return _mm_srai_epi16(dst, 2);
+}
+
+static INLINE __m256i xy_x_round_avx2(const __m256i src) {
+  const __m256i round = _mm256_set1_epi16(2);
+  const __m256i dst = _mm256_add_epi16(src, round);
+  return _mm256_srai_epi16(dst, 2);
+}
+
+static INLINE void xy_x_round_store_2x2_sse2(const __m128i res,
+                                             int16_t *const dst) {
+  const __m128i d = xy_x_round_sse2(res);
+  _mm_storel_epi64((__m128i *)dst, d);
+}
+
+static INLINE void xy_x_round_store_4x2_sse2(const __m128i res,
+                                             int16_t *const dst) {
+  const __m128i d = xy_x_round_sse2(res);
+  _mm_storeu_si128((__m128i *)dst, d);
+}
+
+static INLINE void xy_x_round_store_8x2_sse2(const __m128i res[2],
+                                             int16_t *const dst) {
+  __m128i r[2];
+
+  r[0] = xy_x_round_sse2(res[0]);
+  r[1] = xy_x_round_sse2(res[1]);
+  _mm_storeu_si128((__m128i *)dst, r[0]);
+  _mm_storeu_si128((__m128i *)(dst + 8), r[1]);
+}
+
+static INLINE void xy_x_round_store_8x2_avx2(const __m256i res,
+                                             int16_t *const dst) {
+  const __m256i d = xy_x_round_avx2(res);
+  _mm256_storeu_si256((__m256i *)dst, d);
+}
+
+static INLINE void xy_x_round_store_32_avx2(const __m256i res[2],
+                                            int16_t *const dst) {
+  __m256i r[2];
+
+  r[0] = xy_x_round_avx2(res[0]);
+  r[1] = xy_x_round_avx2(res[1]);
+  const __m256i d0 =
+      _mm256_inserti128_si256(r[0], _mm256_castsi256_si128(r[1]), 1);
+  const __m256i d1 =
+      _mm256_inserti128_si256(r[1], _mm256_extracti128_si256(r[0], 1), 0);
+  _mm256_storeu_si256((__m256i *)dst, d0);
+  _mm256_storeu_si256((__m256i *)(dst + 16), d1);
+}
+
+static INLINE __m128i xy_y_round_sse2(const __m128i src) {
+  const __m128i round = _mm_set1_epi32(1024);
+  const __m128i dst = _mm_add_epi32(src, round);
+  return _mm_srai_epi32(dst, 11);
+}
+
+static INLINE __m128i xy_y_round_half_pel_sse2(const __m128i src) {
+  const __m128i round = _mm_set1_epi16(16);
+  const __m128i dst = _mm_add_epi16(src, round);
+  return _mm_srai_epi16(dst, 5);
+}
+
+static INLINE __m256i xy_y_round_avx2(const __m256i src) {
+  const __m256i round = _mm256_set1_epi32(1024);
+  const __m256i dst = _mm256_add_epi32(src, round);
+  return _mm256_srai_epi32(dst, 11);
+}
+
+static INLINE __m256i xy_y_round_16_avx2(const __m256i r[2]) {
+  const __m256i r0 = xy_y_round_avx2(r[0]);
+  const __m256i r1 = xy_y_round_avx2(r[1]);
+  return _mm256_packs_epi32(r0, r1);
+}
+
+static INLINE __m256i xy_y_round_half_pel_avx2(const __m256i src) {
+  const __m256i round = _mm256_set1_epi16(16);
+  const __m256i dst = _mm256_add_epi16(src, round);
+  return _mm256_srai_epi16(dst, 5);
+}
+
+static INLINE void pack_store_2x2_sse2(const __m128i res, uint8_t *const dst,
+                                       const ptrdiff_t stride) {
+  const __m128i d = _mm_packus_epi16(res, res);
+  *(int16_t *)dst = (int16_t)_mm_cvtsi128_si32(d);
+  *(int16_t *)(dst + stride) = (int16_t)_mm_extract_epi16(d, 1);
+}
+
+static INLINE void pack_store_4x2_sse2(const __m128i res, uint8_t *const dst,
+                                       const ptrdiff_t stride) {
+  const __m128i d = _mm_packus_epi16(res, res);
+  store_u8_4x2_sse2(d, dst, stride);
+}
+
+static INLINE void pack_store_4x2_avx2(const __m256i res, uint8_t *const dst,
+                                       const ptrdiff_t stride) {
+  const __m256i d = _mm256_packus_epi16(res, res);
+  const __m128i d0 = _mm256_castsi256_si128(d);
+  const __m128i d1 = _mm256_extracti128_si256(d, 1);
+
+  xx_storel_32(dst, d0);
+  xx_storel_32(dst + stride, d1);
+}
+
+static INLINE void pack_store_8x2_avx2(const __m256i res, uint8_t *const dst,
+                                       const ptrdiff_t stride) {
+  const __m256i d = _mm256_packus_epi16(res, res);
+  const __m128i d0 = _mm256_castsi256_si128(d);
+  const __m128i d1 = _mm256_extracti128_si256(d, 1);
+  _mm_storel_epi64((__m128i *)dst, d0);
+  _mm_storel_epi64((__m128i *)(dst + stride), d1);
+}
+
+static INLINE void pack_store_16x2_avx2(const __m256i res0, const __m256i res1,
+                                        uint8_t *const dst,
+                                        const ptrdiff_t stride) {
+  const __m256i d = _mm256_packus_epi16(res0, res1);
+  storeu_u8_16x2_avx2(d, dst, stride);
+}
+
+static INLINE void xy_y_pack_store_16x2_avx2(const __m256i res0,
+                                             const __m256i res1,
+                                             uint8_t *const dst,
+                                             const ptrdiff_t stride) {
+  const __m256i t = _mm256_packus_epi16(res0, res1);
+  const __m256i d = _mm256_permute4x64_epi64(t, 0xD8);
+  storeu_u8_16x2_avx2(d, dst, stride);
+}
+
+static INLINE void pack_store_32_avx2(const __m256i res0, const __m256i res1,
+                                      uint8_t *const dst) {
+  const __m256i t = _mm256_packus_epi16(res0, res1);
+  const __m256i d = _mm256_permute4x64_epi64(t, 0xD8);
+  _mm256_storeu_si256((__m256i *)dst, d);
+}
+
+static INLINE void xy_y_round_store_2x2_sse2(const __m128i res,
+                                             uint8_t *const dst,
+                                             const ptrdiff_t stride) {
+  const __m128i r = xy_y_round_sse2(res);
+  const __m128i rr = _mm_packs_epi32(r, r);
+  pack_store_2x2_sse2(rr, dst, stride);
+}
+
+static INLINE void xy_y_round_store_4x2_avx2(const __m256i res,
+                                             uint8_t *const dst,
+                                             const ptrdiff_t stride) {
+  const __m256i r = xy_y_round_avx2(res);
+  const __m256i rr = _mm256_packs_epi32(r, r);
+  pack_store_4x2_avx2(rr, dst, stride);
+}
+
+static INLINE void xy_y_pack_store_32_avx2(const __m256i res0,
+                                           const __m256i res1,
+                                           uint8_t *const dst) {
+  const __m256i d = _mm256_packus_epi16(res0, res1);
+  // d = _mm256_permute4x64_epi64(d, 0xD8);
+  _mm256_storeu_si256((__m256i *)dst, d);
+}
+
+static INLINE void xy_y_round_store_32_avx2(const __m256i r0[2],
+                                            const __m256i r1[2],
+                                            uint8_t *const dst) {
+  const __m256i ra = xy_y_round_16_avx2(r0);
+  const __m256i rb = xy_y_round_16_avx2(r1);
+  xy_y_pack_store_32_avx2(ra, rb, dst);
+}
+
+static INLINE void convolve_store_32_avx2(const __m256i res0,
+                                          const __m256i res1,
+                                          uint8_t *const dst) {
+  const __m256i d = _mm256_packus_epi16(res0, res1);
+  _mm256_storeu_si256((__m256i *)dst, d);
+}
+
+static INLINE __m128i sr_x_round_sse2(const __m128i src) {
+  const __m128i round = _mm_set1_epi16(34);
+  const __m128i dst = _mm_add_epi16(src, round);
+  return _mm_srai_epi16(dst, 6);
+}
+
+static INLINE __m256i sr_x_round_avx2(const __m256i src) {
+  const __m256i round = _mm256_set1_epi16(34);
+  const __m256i dst = _mm256_add_epi16(src, round);
+  return _mm256_srai_epi16(dst, 6);
+}
+
+static INLINE __m128i sr_y_round_sse2(const __m128i src) {
+  const __m128i round = _mm_set1_epi16(32);
+  const __m128i dst = _mm_add_epi16(src, round);
+  return _mm_srai_epi16(dst, FILTER_BITS - 1);
+}
+
+static INLINE void sr_x_round_store_8x2_avx2(const __m256i res,
+                                             uint8_t *const dst,
+                                             const ptrdiff_t dst_stride) {
+  const __m256i r = sr_x_round_avx2(res);
+  pack_store_8x2_avx2(r, dst, dst_stride);
+}
+
+static INLINE void sr_x_round_store_16x2_avx2(const __m256i res[2],
+                                              uint8_t *const dst,
+                                              const ptrdiff_t dst_stride) {
+  __m256i r[2];
+
+  r[0] = sr_x_round_avx2(res[0]);
+  r[1] = sr_x_round_avx2(res[1]);
+  pack_store_16x2_avx2(r[0], r[1], dst, dst_stride);
+}
+
+static INLINE void sr_x_round_store_32_avx2(const __m256i res[2],
+                                            uint8_t *const dst) {
+  __m256i r[2];
+
+  r[0] = sr_x_round_avx2(res[0]);
+  r[1] = sr_x_round_avx2(res[1]);
+  convolve_store_32_avx2(r[0], r[1], dst);
+}
+
+static INLINE void sr_y_round_store_8x2_avx2(const __m256i res,
+                                             uint8_t *const dst,
+                                             const ptrdiff_t dst_stride) {
+  const __m256i r = sr_y_round_avx2(res);
+  pack_store_8x2_avx2(r, dst, dst_stride);
+}
+
+static INLINE void sr_y_round_store_16x2_avx2(const __m256i res[2],
+                                              uint8_t *const dst,
+                                              const ptrdiff_t dst_stride) {
+  __m256i r[2];
+
+  r[0] = sr_y_round_avx2(res[0]);
+  r[1] = sr_y_round_avx2(res[1]);
+  pack_store_16x2_avx2(r[0], r[1], dst, dst_stride);
+}
+
+static INLINE void sr_y_2tap_32_avg_avx2(const uint8_t *const src,
+                                         const __m256i s0, __m256i *const s1,
+                                         uint8_t *const dst) {
+  *s1 = _mm256_loadu_si256((__m256i *)src);
+  const __m256i d = _mm256_avg_epu8(s0, *s1);
+  _mm256_storeu_si256((__m256i *)dst, d);
+}
+
+static INLINE void sr_x_2tap_32_avg_avx2(const uint8_t *const src,
+                                         uint8_t *const dst) {
+  const __m256i s0 = _mm256_loadu_si256((__m256i *)src);
+  const __m256i s1 = _mm256_loadu_si256((__m256i *)(src + 1));
+  const __m256i d = _mm256_avg_epu8(s0, s1);
+  _mm256_storeu_si256((__m256i *)dst, d);
+}
+
+static INLINE __m128i x_convolve_2tap_2x2_sse4_1(const uint8_t *const src,
+                                                 const ptrdiff_t stride,
+                                                 const __m128i coeffs[1]) {
+  const __m128i sfl =
+      _mm_setr_epi8(0, 1, 1, 2, 4, 5, 5, 6, 0, 0, 0, 0, 0, 0, 0, 0);
+  const __m128i s_128 = load_u8_4x2_sse4_1(src, stride);
+  const __m128i ss = _mm_shuffle_epi8(s_128, sfl);
+  return convolve_2tap_ssse3(&ss, coeffs);
+}
+
+static INLINE __m128i x_convolve_2tap_4x2_ssse3(const uint8_t *const src,
+                                                const ptrdiff_t stride,
+                                                const __m128i coeffs[1]) {
+  const __m128i sfl =
+      _mm_setr_epi8(0, 1, 1, 2, 2, 3, 3, 4, 8, 9, 9, 10, 10, 11, 11, 12);
+  const __m128i s_128 = load_u8_8x2_sse2(src, stride);
+  const __m128i ss = _mm_shuffle_epi8(s_128, sfl);
+  return convolve_2tap_ssse3(&ss, coeffs);
+}
+
+static INLINE void x_convolve_2tap_8x2_ssse3(const uint8_t *const src,
+                                             const ptrdiff_t stride,
+                                             const __m128i coeffs[1],
+                                             __m128i r[2]) {
+  __m128i ss[2];
+  const __m128i s00 = _mm_loadu_si128((__m128i *)src);
+  const __m128i s10 = _mm_loadu_si128((__m128i *)(src + stride));
+  const __m128i s01 = _mm_srli_si128(s00, 1);
+  const __m128i s11 = _mm_srli_si128(s10, 1);
+  ss[0] = _mm_unpacklo_epi8(s00, s01);
+  ss[1] = _mm_unpacklo_epi8(s10, s11);
+
+  r[0] = convolve_2tap_ssse3(&ss[0], coeffs);
+  r[1] = convolve_2tap_ssse3(&ss[1], coeffs);
+}
+
+static INLINE __m256i x_convolve_2tap_8x2_avx2(const uint8_t *const src,
+                                               const ptrdiff_t stride,
+                                               const __m256i coeffs[1]) {
+  __m128i s_128[2][2];
+  __m256i s_256[2];
+
+  s_128[0][0] = _mm_loadu_si128((__m128i *)src);
+  s_128[1][0] = _mm_loadu_si128((__m128i *)(src + stride));
+  s_128[0][1] = _mm_srli_si128(s_128[0][0], 1);
+  s_128[1][1] = _mm_srli_si128(s_128[1][0], 1);
+  s_256[0] = _mm256_setr_m128i(s_128[0][0], s_128[1][0]);
+  s_256[1] = _mm256_setr_m128i(s_128[0][1], s_128[1][1]);
+  const __m256i ss = _mm256_unpacklo_epi8(s_256[0], s_256[1]);
+  return convolve_2tap_avx2(&ss, coeffs);
+}
+
+static INLINE void x_convolve_2tap_16x2_avx2(const uint8_t *const src,
+                                             const ptrdiff_t stride,
+                                             const __m256i coeffs[1],
+                                             __m256i r[2]) {
+  const __m256i s0_256 = loadu_8bit_16x2_avx2(src, stride);
+  const __m256i s1_256 = loadu_8bit_16x2_avx2(src + 1, stride);
+  const __m256i s0 = _mm256_unpacklo_epi8(s0_256, s1_256);
+  const __m256i s1 = _mm256_unpackhi_epi8(s0_256, s1_256);
+  r[0] = convolve_2tap_avx2(&s0, coeffs);
+  r[1] = convolve_2tap_avx2(&s1, coeffs);
+}
+
+static INLINE void x_convolve_2tap_32_avx2(const uint8_t *const src,
+                                           const __m256i coeffs[1],
+                                           __m256i r[2]) {
+  const __m256i s0 = _mm256_loadu_si256((__m256i *)src);
+  const __m256i s1 = _mm256_loadu_si256((__m256i *)(src + 1));
+  const __m256i ss0 = _mm256_unpacklo_epi8(s0, s1);
+  const __m256i ss1 = _mm256_unpackhi_epi8(s0, s1);
+
+  r[0] = convolve_2tap_avx2(&ss0, coeffs);
+  r[1] = convolve_2tap_avx2(&ss1, coeffs);
+}
+
+static INLINE __m128i x_convolve_4tap_2x2_ssse3(const uint8_t *const src,
+                                                const ptrdiff_t stride,
+                                                const __m128i coeffs[2]) {
+  const __m128i sfl0 =
+      _mm_setr_epi8(0, 1, 1, 2, 8, 9, 9, 10, 0, 0, 0, 0, 0, 0, 0, 0);
+  const __m128i sfl1 =
+      _mm_setr_epi8(2, 3, 3, 4, 10, 11, 11, 12, 0, 0, 0, 0, 0, 0, 0, 0);
+  const __m128i s = load_u8_8x2_sse2(src, stride);
+  __m128i ss[2];
+
+  ss[0] = _mm_shuffle_epi8(s, sfl0);
+  ss[1] = _mm_shuffle_epi8(s, sfl1);
+  return convolve_4tap_ssse3(ss, coeffs);
+}
+
+static INLINE __m128i x_convolve_4tap_4x2_ssse3(const uint8_t *const src,
+                                                const ptrdiff_t stride,
+                                                const __m128i coeffs[2]) {
+  const __m128i s = load_u8_8x2_sse2(src, stride);
+  const __m128i sfl0 =
+      _mm_setr_epi8(0, 1, 1, 2, 2, 3, 3, 4, 8, 9, 9, 10, 10, 11, 11, 12);
+  const __m128i sfl1 =
+      _mm_setr_epi8(2, 3, 3, 4, 4, 5, 5, 6, 10, 11, 11, 12, 12, 13, 13, 14);
+  __m128i ss[2];
+
+  ss[0] = _mm_shuffle_epi8(s, sfl0);
+  ss[1] = _mm_shuffle_epi8(s, sfl1);
+  return convolve_4tap_ssse3(ss, coeffs);
+}
+
+static INLINE __m256i x_convolve_4tap_8x2_avx2(const uint8_t *const src,
+                                               const ptrdiff_t stride,
+                                               const __m256i coeffs[2],
+                                               const __m256i filt[2]) {
+  const __m256i s_256 = loadu_8bit_16x2_avx2(src, stride);
+  return x_convolve_4tap_avx2(s_256, coeffs, filt);
+}
+
+static INLINE void x_convolve_4tap_16x2_avx2(const uint8_t *const src,
+                                             const int32_t src_stride,
+                                             const __m256i coeffs[2],
+                                             const __m256i filt[2],
+                                             __m256i r[2]) {
+  r[0] = x_convolve_4tap_8x2_avx2(src + 0, src_stride, coeffs, filt);
+  r[1] = x_convolve_4tap_8x2_avx2(src + 8, src_stride, coeffs, filt);
+}
+
+static INLINE void x_convolve_4tap_32_avx2(const uint8_t *const src,
+                                           const __m256i coeffs[2],
+                                           const __m256i filt[2],
+                                           __m256i r[2]) {
+  const __m256i s0_256 = _mm256_loadu_si256((__m256i *)src);
+  const __m256i s1_256 = _mm256_loadu_si256((__m256i *)(src + 8));
+
+  r[0] = x_convolve_4tap_avx2(s0_256, coeffs, filt);
+  r[1] = x_convolve_4tap_avx2(s1_256, coeffs, filt);
+}
+
+static INLINE __m128i x_convolve_6tap_2x2_ssse3(const uint8_t *const src,
+                                                const ptrdiff_t stride,
+                                                const __m128i coeffs[3]) {
+  const __m128i sfl0 =
+      _mm_setr_epi8(0, 1, 1, 2, 8, 9, 9, 10, 0, 0, 0, 0, 0, 0, 0, 0);
+  const __m128i sfl1 =
+      _mm_setr_epi8(2, 3, 3, 4, 10, 11, 11, 12, 0, 0, 0, 0, 0, 0, 0, 0);
+  const __m128i sfl2 =
+      _mm_setr_epi8(4, 5, 5, 6, 12, 13, 13, 14, 0, 0, 0, 0, 0, 0, 0, 0);
+
+  const __m128i s = load_u8_8x2_sse2(src, stride);
+  __m128i ss[3];
+
+  ss[0] = _mm_shuffle_epi8(s, sfl0);
+  ss[1] = _mm_shuffle_epi8(s, sfl1);
+  ss[2] = _mm_shuffle_epi8(s, sfl2);
+  return convolve_6tap_ssse3(ss, coeffs);
+}
+
+static INLINE __m128i x_convolve_6tap_4x2_ssse3(const uint8_t *const src,
+                                                const ptrdiff_t stride,
+                                                const __m128i coeffs[3]) {
+  const __m128i s = load_u8_8x2_sse2(src, stride);
+  const __m128i sfl0 =
+      _mm_setr_epi8(0, 1, 1, 2, 8, 9, 9, 10, 0, 0, 0, 0, 0, 0, 0, 0);
+  const __m128i sfl1 =
+      _mm_setr_epi8(2, 3, 3, 4, 10, 11, 11, 12, 0, 0, 0, 0, 0, 0, 0, 0);
+  const __m128i sfl2 =
+      _mm_setr_epi8(4, 5, 5, 6, 12, 13, 13, 14, 0, 0, 0, 0, 0, 0, 0, 0);
+  __m128i ss[3];
+
+  ss[0] = _mm_shuffle_epi8(s, sfl0);
+  ss[1] = _mm_shuffle_epi8(s, sfl1);
+  ss[2] = _mm_shuffle_epi8(s, sfl2);
+  return convolve_6tap_ssse3(ss, coeffs);
+}
+
+static INLINE __m256i x_convolve_6tap_8x2_avx2(const uint8_t *const src,
+                                               const ptrdiff_t stride,
+                                               const __m256i coeffs[3],
+                                               const __m256i filt[3]) {
+  const __m256i s_256 = loadu_8bit_16x2_avx2(src, stride);
+  return x_convolve_6tap_avx2(s_256, coeffs, filt);
+}
+
+static INLINE void x_convolve_6tap_16x2_avx2(const uint8_t *const src,
+                                             const int32_t src_stride,
+                                             const __m256i coeffs[3],
+                                             const __m256i filt[3],
+                                             __m256i r[2]) {
+  r[0] = x_convolve_6tap_8x2_avx2(src + 0, src_stride, coeffs, filt);
+  r[1] = x_convolve_6tap_8x2_avx2(src + 8, src_stride, coeffs, filt);
+}
+
+static INLINE void x_convolve_6tap_32_avx2(const uint8_t *const src,
+                                           const __m256i coeffs[3],
+                                           const __m256i filt[3],
+                                           __m256i r[2]) {
+  const __m256i s0_256 = _mm256_loadu_si256((__m256i *)src);
+  const __m256i s1_256 = _mm256_loadu_si256((__m256i *)(src + 8));
+
+  r[0] = x_convolve_6tap_avx2(s0_256, coeffs, filt);
+  r[1] = x_convolve_6tap_avx2(s1_256, coeffs, filt);
+}
+
+static INLINE __m256i x_convolve_8tap_8x2_avx2(const uint8_t *const src,
+                                               const ptrdiff_t stride,
+                                               const __m256i coeffs[4],
+                                               const __m256i filt[4]) {
+  const __m256i s_256 = loadu_8bit_16x2_avx2(src, stride);
+  return x_convolve_8tap_avx2(s_256, coeffs, filt);
+}
+
+static AOM_FORCE_INLINE void x_convolve_8tap_16x2_avx2(const uint8_t *const src,
+                                                       const int32_t src_stride,
+                                                       const __m256i coeffs[4],
+                                                       const __m256i filt[4],
+                                                       __m256i r[2]) {
+  r[0] = x_convolve_8tap_8x2_avx2(src + 0, src_stride, coeffs, filt);
+  r[1] = x_convolve_8tap_8x2_avx2(src + 8, src_stride, coeffs, filt);
+}
+
+static AOM_FORCE_INLINE void x_convolve_8tap_32_avx2(const uint8_t *const src,
+                                                     const __m256i coeffs[4],
+                                                     const __m256i filt[4],
+                                                     __m256i r[2]) {
+  const __m256i s0_256 = _mm256_loadu_si256((__m256i *)src);
+  const __m256i s1_256 = _mm256_loadu_si256((__m256i *)(src + 8));
+
+  r[0] = x_convolve_8tap_avx2(s0_256, coeffs, filt);
+  r[1] = x_convolve_8tap_avx2(s1_256, coeffs, filt);
+}
+
+static INLINE __m128i y_convolve_2tap_2x2_ssse3(const uint8_t *const src,
+                                                const ptrdiff_t stride,
+                                                const __m128i coeffs[1],
+                                                __m128i s_16[2]) {
+  __m128i s_128[2];
+
+  s_16[1] = _mm_cvtsi32_si128(*(int16_t *)(src + stride));
+  s_128[0] = _mm_unpacklo_epi16(s_16[0], s_16[1]);
+  s_16[0] = _mm_cvtsi32_si128(*(int16_t *)(src + 2 * stride));
+  s_128[1] = _mm_unpacklo_epi16(s_16[1], s_16[0]);
+  const __m128i ss = _mm_unpacklo_epi8(s_128[0], s_128[1]);
+  return convolve_2tap_ssse3(&ss, coeffs);
+}
+
+static INLINE __m128i y_convolve_2tap_4x2_ssse3(const uint8_t *const src,
+                                                const ptrdiff_t stride,
+                                                const __m128i coeffs[1],
+                                                __m128i s_32[2]) {
+  __m128i s_128[2];
+
+  s_32[1] = _mm_cvtsi32_si128(*(int32_t *)(src + stride));
+  s_128[0] = _mm_unpacklo_epi32(s_32[0], s_32[1]);
+  s_32[0] = _mm_cvtsi32_si128(*(int32_t *)(src + 2 * stride));
+  s_128[1] = _mm_unpacklo_epi32(s_32[1], s_32[0]);
+  const __m128i ss = _mm_unpacklo_epi8(s_128[0], s_128[1]);
+  return convolve_2tap_ssse3(&ss, coeffs);
+}
+
+static INLINE __m256i y_convolve_2tap_8x2_avx2(const uint8_t *const src,
+                                               const ptrdiff_t stride,
+                                               const __m256i coeffs[1],
+                                               __m128i s_64[2]) {
+  __m256i s_256[2];
+
+  s_64[1] = _mm_loadl_epi64((__m128i *)(src + stride));
+  s_256[0] = _mm256_setr_m128i(s_64[0], s_64[1]);
+  s_64[0] = _mm_loadl_epi64((__m128i *)(src + 2 * stride));
+  s_256[1] = _mm256_setr_m128i(s_64[1], s_64[0]);
+  const __m256i ss = _mm256_unpacklo_epi8(s_256[0], s_256[1]);
+  return convolve_2tap_avx2(&ss, coeffs);
+}
+
+static INLINE void y_convolve_2tap_16x2_avx2(const uint8_t *const src,
+                                             const ptrdiff_t stride,
+                                             const __m256i coeffs[1],
+                                             __m128i s_128[2], __m256i r[2]) {
+  __m256i s_256[2];
+
+  s_128[1] = _mm_loadu_si128((__m128i *)(src + stride));
+  s_256[0] = _mm256_setr_m128i(s_128[0], s_128[1]);
+  s_128[0] = _mm_loadu_si128((__m128i *)(src + 2 * stride));
+  s_256[1] = _mm256_setr_m128i(s_128[1], s_128[0]);
+  const __m256i ss0 = _mm256_unpacklo_epi8(s_256[0], s_256[1]);
+  const __m256i ss1 = _mm256_unpackhi_epi8(s_256[0], s_256[1]);
+  r[0] = convolve_2tap_avx2(&ss0, coeffs);
+  r[1] = convolve_2tap_avx2(&ss1, coeffs);
+}
+
+static INLINE void y_convolve_2tap_32_avx2(const uint8_t *const src,
+                                           const __m256i coeffs[1],
+                                           const __m256i s0, __m256i *const s1,
+                                           __m256i r[2]) {
+  *s1 = _mm256_loadu_si256((__m256i *)src);
+  const __m256i ss0 = _mm256_unpacklo_epi8(s0, *s1);
+  const __m256i ss1 = _mm256_unpackhi_epi8(s0, *s1);
+  r[0] = convolve_2tap_avx2(&ss0, coeffs);
+  r[1] = convolve_2tap_avx2(&ss1, coeffs);
+}
+
+static INLINE __m128i y_convolve_4tap_2x2_ssse3(const uint8_t *const src,
+                                                const ptrdiff_t stride,
+                                                const __m128i coeffs[2],
+                                                __m128i s_16[4],
+                                                __m128i ss_128[2]) {
+  s_16[3] = _mm_cvtsi32_si128(*(int16_t *)(src + stride));
+  const __m128i src23 = _mm_unpacklo_epi16(s_16[2], s_16[3]);
+  s_16[2] = _mm_cvtsi32_si128(*(int16_t *)(src + 2 * stride));
+  const __m128i src34 = _mm_unpacklo_epi16(s_16[3], s_16[2]);
+  ss_128[1] = _mm_unpacklo_epi8(src23, src34);
+  return convolve_4tap_ssse3(ss_128, coeffs);
+}
+
+static INLINE __m128i y_convolve_4tap_4x2_ssse3(const uint8_t *const src,
+                                                const ptrdiff_t stride,
+                                                const __m128i coeffs[2],
+                                                __m128i s_32[4],
+                                                __m128i ss_128[2]) {
+  s_32[3] = _mm_cvtsi32_si128(*(int32_t *)(src + stride));
+  const __m128i src23 = _mm_unpacklo_epi32(s_32[2], s_32[3]);
+  s_32[2] = _mm_cvtsi32_si128(*(int32_t *)(src + 2 * stride));
+  const __m128i src34 = _mm_unpacklo_epi32(s_32[3], s_32[2]);
+  ss_128[1] = _mm_unpacklo_epi8(src23, src34);
+  return convolve_4tap_ssse3(ss_128, coeffs);
+}
+
+static INLINE __m256i y_convolve_4tap_8x2_avx2(const uint8_t *const src,
+                                               const ptrdiff_t stride,
+                                               const __m256i coeffs[2],
+                                               __m128i s_64[4],
+                                               __m256i ss_256[2]) {
+  s_64[3] = _mm_loadl_epi64((__m128i *)(src + stride));
+  const __m256i src23 = _mm256_setr_m128i(s_64[2], s_64[3]);
+  s_64[2] = _mm_loadl_epi64((__m128i *)(src + 2 * stride));
+  const __m256i src34 = _mm256_setr_m128i(s_64[3], s_64[2]);
+  ss_256[1] = _mm256_unpacklo_epi8(src23, src34);
+  return convolve_4tap_avx2(ss_256, coeffs);
+}
+
+static INLINE void y_convolve_4tap_16x2_avx2(const uint8_t *const src,
+                                             const ptrdiff_t stride,
+                                             const __m256i coeffs[2],
+                                             __m128i s_128[4],
+                                             __m256i ss_256[4], __m256i r[2]) {
+  s_128[3] = _mm_loadu_si128((__m128i *)(src + stride));
+  const __m256i src23 = _mm256_setr_m128i(s_128[2], s_128[3]);
+  s_128[2] = _mm_loadu_si128((__m128i *)(src + 2 * stride));
+  const __m256i src34 = _mm256_setr_m128i(s_128[3], s_128[2]);
+  ss_256[1] = _mm256_unpacklo_epi8(src23, src34);
+  ss_256[3] = _mm256_unpackhi_epi8(src23, src34);
+  r[0] = convolve_4tap_avx2(ss_256, coeffs);
+  r[1] = convolve_4tap_avx2(ss_256 + 2, coeffs);
+}
+
+static INLINE __m128i y_convolve_6tap_2x2_ssse3(const uint8_t *const src,
+                                                const ptrdiff_t stride,
+                                                const __m128i coeffs[3],
+                                                __m128i s_16[6],
+                                                __m128i ss_128[3]) {
+  s_16[5] = _mm_cvtsi32_si128(*(int16_t *)(src + 3 * stride));
+  const __m128i src45 = _mm_unpacklo_epi16(s_16[4], s_16[5]);
+  s_16[4] = _mm_cvtsi32_si128(*(int16_t *)(src + 4 * stride));
+  const __m128i src56 = _mm_unpacklo_epi16(s_16[5], s_16[4]);
+  ss_128[2] = _mm_unpacklo_epi8(src45, src56);
+  return convolve_6tap_ssse3(ss_128, coeffs);
+}
+
+static INLINE void y_convolve_4tap_32x2_avx2(
+    const uint8_t *const src, const ptrdiff_t stride, const __m256i coeffs[2],
+    __m256i s_256[4], __m256i ss_256[4], __m256i tt_256[4], __m256i r[4]) {
+  s_256[3] = _mm256_loadu_si256((__m256i *)(src + 1 * stride));
+  ss_256[1] = _mm256_unpacklo_epi8(s_256[2], s_256[3]);
+  ss_256[3] = _mm256_unpackhi_epi8(s_256[2], s_256[3]);
+  s_256[2] = _mm256_loadu_si256((__m256i *)(src + 2 * stride));
+  tt_256[1] = _mm256_unpacklo_epi8(s_256[3], s_256[2]);
+  tt_256[3] = _mm256_unpackhi_epi8(s_256[3], s_256[2]);
+  r[0] = convolve_4tap_avx2(ss_256 + 0, coeffs);
+  r[1] = convolve_4tap_avx2(ss_256 + 2, coeffs);
+  r[2] = convolve_4tap_avx2(tt_256 + 0, coeffs);
+  r[3] = convolve_4tap_avx2(tt_256 + 2, coeffs);
+}
+
+static INLINE __m128i y_convolve_6tap_4x2_ssse3(const uint8_t *const src,
+                                                const ptrdiff_t stride,
+                                                const __m128i coeffs[3],
+                                                __m128i s_32[6],
+                                                __m128i ss_128[3]) {
+  s_32[5] = _mm_cvtsi32_si128(*(int32_t *)(src + 3 * stride));
+  const __m128i src45 = _mm_unpacklo_epi32(s_32[4], s_32[5]);
+  s_32[4] = _mm_cvtsi32_si128(*(int32_t *)(src + 4 * stride));
+  const __m128i src56 = _mm_unpacklo_epi32(s_32[5], s_32[4]);
+  ss_128[2] = _mm_unpacklo_epi8(src45, src56);
+  return convolve_6tap_ssse3(ss_128, coeffs);
+}
+
+static INLINE __m256i y_convolve_6tap_8x2_avx2(const uint8_t *const src,
+                                               const ptrdiff_t stride,
+                                               const __m256i coeffs[3],
+                                               __m128i s_64[6],
+                                               __m256i ss_256[3]) {
+  s_64[5] = _mm_loadl_epi64((__m128i *)(src + 3 * stride));
+  const __m256i src45 = _mm256_setr_m128i(s_64[4], s_64[5]);
+  s_64[4] = _mm_loadl_epi64((__m128i *)(src + 4 * stride));
+  const __m256i src56 = _mm256_setr_m128i(s_64[5], s_64[4]);
+  ss_256[2] = _mm256_unpacklo_epi8(src45, src56);
+  return convolve_6tap_avx2(ss_256, coeffs);
+}
+
+static INLINE void y_convolve_6tap_16x2_avx2(const uint8_t *const src,
+                                             const ptrdiff_t stride,
+                                             const __m256i coeffs[3],
+                                             __m128i s_128[6],
+                                             __m256i ss_256[6], __m256i r[2]) {
+  s_128[5] = _mm_loadu_si128((__m128i *)(src + 3 * stride));
+  const __m256i src45 = _mm256_setr_m128i(s_128[4], s_128[5]);
+  s_128[4] = _mm_loadu_si128((__m128i *)(src + 4 * stride));
+  const __m256i src56 = _mm256_setr_m128i(s_128[5], s_128[4]);
+  ss_256[2] = _mm256_unpacklo_epi8(src45, src56);
+  ss_256[5] = _mm256_unpackhi_epi8(src45, src56);
+  r[0] = convolve_6tap_avx2(ss_256, coeffs);
+  r[1] = convolve_6tap_avx2(ss_256 + 3, coeffs);
+}
+
+static INLINE void y_convolve_6tap_32x2_avx2(
+    const uint8_t *const src, const ptrdiff_t stride, const __m256i coeffs[3],
+    __m256i s_256[6], __m256i ss_256[6], __m256i tt_256[6], __m256i r[4]) {
+  s_256[5] = _mm256_loadu_si256((__m256i *)(src + 3 * stride));
+  ss_256[2] = _mm256_unpacklo_epi8(s_256[4], s_256[5]);
+  ss_256[5] = _mm256_unpackhi_epi8(s_256[4], s_256[5]);
+  s_256[4] = _mm256_loadu_si256((__m256i *)(src + 4 * stride));
+  tt_256[2] = _mm256_unpacklo_epi8(s_256[5], s_256[4]);
+  tt_256[5] = _mm256_unpackhi_epi8(s_256[5], s_256[4]);
+  r[0] = convolve_6tap_avx2(ss_256 + 0, coeffs);
+  r[1] = convolve_6tap_avx2(ss_256 + 3, coeffs);
+  r[2] = convolve_6tap_avx2(tt_256 + 0, coeffs);
+  r[3] = convolve_6tap_avx2(tt_256 + 3, coeffs);
+}
+
+static INLINE __m128i y_convolve_8tap_2x2_ssse3(const uint8_t *const src,
+                                                const ptrdiff_t stride,
+                                                const __m128i coeffs[4],
+                                                __m128i s_16[8],
+                                                __m128i ss_128[4]) {
+  s_16[7] = _mm_cvtsi32_si128(*(int16_t *)(src + 7 * stride));
+  const __m128i src67 = _mm_unpacklo_epi16(s_16[6], s_16[7]);
+  s_16[6] = _mm_cvtsi32_si128(*(int16_t *)(src + 8 * stride));
+  const __m128i src78 = _mm_unpacklo_epi16(s_16[7], s_16[6]);
+  ss_128[3] = _mm_unpacklo_epi8(src67, src78);
+  return convolve_8tap_ssse3(ss_128, coeffs);
+}
+
+static INLINE __m128i y_convolve_8tap_4x2_ssse3(const uint8_t *const src,
+                                                const ptrdiff_t stride,
+                                                const __m128i coeffs[4],
+                                                __m128i s_32[8],
+                                                __m128i ss_128[4]) {
+  s_32[7] = _mm_cvtsi32_si128(*(int32_t *)(src + 7 * stride));
+  const __m128i src67 = _mm_unpacklo_epi32(s_32[6], s_32[7]);
+  s_32[6] = _mm_cvtsi32_si128(*(int32_t *)(src + 8 * stride));
+  const __m128i src78 = _mm_unpacklo_epi32(s_32[7], s_32[6]);
+  ss_128[3] = _mm_unpacklo_epi8(src67, src78);
+  return convolve_8tap_ssse3(ss_128, coeffs);
+}
+
+static INLINE __m256i y_convolve_8tap_8x2_avx2(const uint8_t *const src,
+                                               const ptrdiff_t stride,
+                                               const __m256i coeffs[4],
+                                               __m128i s_64[8],
+                                               __m256i ss_256[4]) {
+  s_64[7] = _mm_loadl_epi64((__m128i *)(src + 7 * stride));
+  const __m256i src67 = _mm256_setr_m128i(s_64[6], s_64[7]);
+  s_64[6] = _mm_loadl_epi64((__m128i *)(src + 8 * stride));
+  const __m256i src78 = _mm256_setr_m128i(s_64[7], s_64[6]);
+  ss_256[3] = _mm256_unpacklo_epi8(src67, src78);
+  return convolve_8tap_avx2(ss_256, coeffs);
+}
+
+static INLINE void y_convolve_8tap_16x2_avx2(const uint8_t *const src,
+                                             const ptrdiff_t stride,
+                                             const __m256i coeffs[4],
+                                             __m128i s_128[8],
+                                             __m256i ss_256[8], __m256i r[2]) {
+  s_128[7] = _mm_loadu_si128((__m128i *)(src + 7 * stride));
+  const __m256i src67 = _mm256_setr_m128i(s_128[6], s_128[7]);
+  s_128[6] = _mm_loadu_si128((__m128i *)(src + 8 * stride));
+  const __m256i src78 = _mm256_setr_m128i(s_128[7], s_128[6]);
+  ss_256[3] = _mm256_unpacklo_epi8(src67, src78);
+  ss_256[7] = _mm256_unpackhi_epi8(src67, src78);
+  r[0] = convolve_8tap_avx2(ss_256, coeffs);
+  r[1] = convolve_8tap_avx2(ss_256 + 4, coeffs);
+}
+
+static INLINE void y_convolve_8tap_32x2_avx2(
+    const uint8_t *const src, const ptrdiff_t stride, const __m256i coeffs[4],
+    __m256i s_256[8], __m256i ss_256[8], __m256i tt_256[8], __m256i r[4]) {
+  s_256[7] = _mm256_loadu_si256((__m256i *)(src + 7 * stride));
+  ss_256[3] = _mm256_unpacklo_epi8(s_256[6], s_256[7]);
+  ss_256[7] = _mm256_unpackhi_epi8(s_256[6], s_256[7]);
+  s_256[6] = _mm256_loadu_si256((__m256i *)(src + 8 * stride));
+  tt_256[3] = _mm256_unpacklo_epi8(s_256[7], s_256[6]);
+  tt_256[7] = _mm256_unpackhi_epi8(s_256[7], s_256[6]);
+  r[0] = convolve_8tap_avx2(ss_256 + 0, coeffs);
+  r[1] = convolve_8tap_avx2(ss_256 + 4, coeffs);
+  r[2] = convolve_8tap_avx2(tt_256 + 0, coeffs);
+  r[3] = convolve_8tap_avx2(tt_256 + 4, coeffs);
+}
+
+static INLINE void xy_x_convolve_2tap_32_avx2(const uint8_t *const src,
+                                              const __m256i coeffs[1],
+                                              __m256i r[2]) {
+  const __m256i s0 = _mm256_loadu_si256((__m256i *)src);
+  const __m256i s1 = _mm256_loadu_si256((__m256i *)(src + 1));
+  const __m256i ss0 = _mm256_unpacklo_epi8(s0, s1);
+  const __m256i ss1 = _mm256_unpackhi_epi8(s0, s1);
+
+  r[0] = convolve_2tap_avx2(&ss0, coeffs);
+  r[1] = convolve_2tap_avx2(&ss1, coeffs);
+}
+
+static INLINE void xy_x_2tap_32_avx2(const uint8_t *const src,
+                                     const __m256i coeffs[1],
+                                     int16_t *const dst) {
+  __m256i r[2];
+
+  xy_x_convolve_2tap_32_avx2(src, coeffs, r);
+  const __m256i d0 = xy_x_round_avx2(r[0]);
+  const __m256i d1 = xy_x_round_avx2(r[1]);
+  _mm256_storeu_si256((__m256i *)dst, d0);
+  _mm256_storeu_si256((__m256i *)(dst + 16), d1);
+}
+
+static INLINE void xy_x_4tap_32_avx2(const uint8_t *const src,
+                                     const __m256i coeffs[2],
+                                     const __m256i filt[2],
+                                     int16_t *const dst) {
+  __m256i r[2];
+
+  x_convolve_4tap_32_avx2(src, coeffs, filt, r);
+  const __m256i d0 = xy_x_round_avx2(r[0]);
+  const __m256i d1 = xy_x_round_avx2(r[1]);
+  _mm256_storeu_si256((__m256i *)dst, d0);
+  _mm256_storeu_si256((__m256i *)(dst + 16), d1);
+}
+
+static INLINE void xy_x_6tap_32_avx2(const uint8_t *const src,
+                                     const __m256i coeffs[3],
+                                     const __m256i filt[3],
+                                     int16_t *const dst) {
+  __m256i r[2];
+
+  x_convolve_6tap_32_avx2(src, coeffs, filt, r);
+  const __m256i d0 = xy_x_round_avx2(r[0]);
+  const __m256i d1 = xy_x_round_avx2(r[1]);
+  _mm256_storeu_si256((__m256i *)dst, d0);
+  _mm256_storeu_si256((__m256i *)(dst + 16), d1);
+}
+
+static INLINE void xy_x_8tap_32_avx2(const uint8_t *const src,
+                                     const __m256i coeffs[4],
+                                     const __m256i filt[4],
+                                     int16_t *const dst) {
+  __m256i r[2];
+
+  x_convolve_8tap_32_avx2(src, coeffs, filt, r);
+  const __m256i d0 = xy_x_round_avx2(r[0]);
+  const __m256i d1 = xy_x_round_avx2(r[1]);
+  _mm256_storeu_si256((__m256i *)dst, d0);
+  _mm256_storeu_si256((__m256i *)(dst + 16), d1);
+}
+
+static INLINE __m128i xy_y_convolve_2tap_2x2_sse2(const int16_t *const src,
+                                                  __m128i s_32[2],
+                                                  const __m128i coeffs[1]) {
+  __m128i s_128[2];
+
+  s_32[1] = _mm_cvtsi32_si128(*(int32_t *)(src + 2));
+  s_128[0] = _mm_unpacklo_epi32(s_32[0], s_32[1]);
+  s_32[0] = _mm_cvtsi32_si128(*(int32_t *)(src + 2 * 2));
+  s_128[1] = _mm_unpacklo_epi32(s_32[1], s_32[0]);
+  const __m128i ss = _mm_unpacklo_epi16(s_128[0], s_128[1]);
+  return convolve16_2tap_sse2(&ss, coeffs);
+}
+
+static INLINE __m128i xy_y_convolve_2tap_2x2_half_pel_sse2(
+    const int16_t *const src, __m128i s_32[2]) {
+  __m128i s_128[2];
+
+  s_32[1] = _mm_cvtsi32_si128(*(int32_t *)(src + 2));
+  s_128[0] = _mm_unpacklo_epi32(s_32[0], s_32[1]);
+  s_32[0] = _mm_cvtsi32_si128(*(int32_t *)(src + 2 * 2));
+  s_128[1] = _mm_unpacklo_epi32(s_32[1], s_32[0]);
+  return _mm_add_epi16(s_128[0], s_128[1]);
+}
+
+static INLINE void xy_y_convolve_2tap_4x2_sse2(const int16_t *const src,
+                                               __m128i s_64[2],
+                                               const __m128i coeffs[1],
+                                               __m128i r[2]) {
+  __m128i s_128[2];
+
+  s_64[1] = _mm_loadl_epi64((__m128i *)(src + 4));
+  s_128[0] = _mm_unpacklo_epi64(s_64[0], s_64[1]);
+  s_64[0] = _mm_loadl_epi64((__m128i *)(src + 2 * 4));
+  s_128[1] = _mm_unpacklo_epi64(s_64[1], s_64[0]);
+  const __m128i ss0 = _mm_unpacklo_epi16(s_128[0], s_128[1]);
+  const __m128i ss1 = _mm_unpackhi_epi16(s_128[0], s_128[1]);
+  r[0] = convolve16_2tap_sse2(&ss0, coeffs);
+  r[1] = convolve16_2tap_sse2(&ss1, coeffs);
+}
+
+static INLINE __m128i xy_y_convolve_2tap_4x2_half_pel_sse2(
+    const int16_t *const src, __m128i s_64[2]) {
+  __m128i s_128[2];
+
+  s_64[1] = _mm_loadl_epi64((__m128i *)(src + 4));
+  s_128[0] = _mm_unpacklo_epi64(s_64[0], s_64[1]);
+  s_64[0] = _mm_loadl_epi64((__m128i *)(src + 2 * 4));
+  s_128[1] = _mm_unpacklo_epi64(s_64[1], s_64[0]);
+  return _mm_add_epi16(s_128[0], s_128[1]);
+}
+
+static INLINE void xy_y_convolve_2tap_16_avx2(const __m256i s0,
+                                              const __m256i s1,
+                                              const __m256i coeffs[1],
+                                              __m256i r[2]) {
+  const __m256i ss0 = _mm256_unpacklo_epi16(s0, s1);
+  const __m256i ss1 = _mm256_unpackhi_epi16(s0, s1);
+  r[0] = convolve16_2tap_avx2(&ss0, coeffs);
+  r[1] = convolve16_2tap_avx2(&ss1, coeffs);
+}
+
+static INLINE void xy_y_convolve_2tap_8x2_avx2(const int16_t *const src,
+                                               __m128i s_128[2],
+                                               const __m256i coeffs[1],
+                                               __m256i r[2]) {
+  __m256i s_256[2];
+  s_128[1] = _mm_loadu_si128((__m128i *)(src + 8));
+  s_256[0] = _mm256_setr_m128i(s_128[0], s_128[1]);
+  s_128[0] = _mm_loadu_si128((__m128i *)(src + 2 * 8));
+  s_256[1] = _mm256_setr_m128i(s_128[1], s_128[0]);
+  xy_y_convolve_2tap_16_avx2(s_256[0], s_256[1], coeffs, r);
+}
+
+static INLINE __m256i xy_y_convolve_2tap_8x2_half_pel_avx2(
+    const int16_t *const src, __m128i s_128[2]) {
+  __m256i s_256[2];
+  s_128[1] = _mm_loadu_si128((__m128i *)(src + 8));
+  s_256[0] = _mm256_setr_m128i(s_128[0], s_128[1]);
+  s_128[0] = _mm_loadu_si128((__m128i *)(src + 2 * 8));
+  s_256[1] = _mm256_setr_m128i(s_128[1], s_128[0]);
+  return _mm256_add_epi16(s_256[0], s_256[1]);
+}
+
+static INLINE void xy_y_convolve_2tap_16x2_half_pel_avx2(
+    const int16_t *const src, __m256i s_256[2], __m256i r[2]) {
+  s_256[1] = _mm256_loadu_si256((__m256i *)(src + 16));
+  r[0] = _mm256_add_epi16(s_256[0], s_256[1]);
+  s_256[0] = _mm256_loadu_si256((__m256i *)(src + 2 * 16));
+  r[1] = _mm256_add_epi16(s_256[1], s_256[0]);
+}
+
+static INLINE void xy_y_store_16x2_avx2(const __m256i r[2], uint8_t *const dst,
+                                        const ptrdiff_t stride) {
+  const __m256i t = _mm256_packus_epi16(r[0], r[1]);
+  const __m256i d = _mm256_permute4x64_epi64(t, 0xD8);
+  storeu_u8_16x2_avx2(d, dst, stride);
+}
+
+static INLINE void xy_y_convolve_2tap_16x2_avx2(const int16_t *const src,
+                                                __m256i s[2],
+                                                const __m256i coeffs[1],
+                                                __m256i r[4]) {
+  s[1] = _mm256_loadu_si256((__m256i *)(src + 16));
+  xy_y_convolve_2tap_16_avx2(s[0], s[1], coeffs, r + 0);
+  s[0] = _mm256_loadu_si256((__m256i *)(src + 2 * 16));
+  xy_y_convolve_2tap_16_avx2(s[1], s[0], coeffs, r + 2);
+}
+
+static INLINE void xy_y_convolve_2tap_32_avx2(const int16_t *const src,
+                                              const __m256i s0[2],
+                                              __m256i s1[2],
+                                              const __m256i coeffs[1],
+                                              __m256i r[4]) {
+  s1[0] = _mm256_loadu_si256((__m256i *)src);
+  s1[1] = _mm256_loadu_si256((__m256i *)(src + 16));
+  xy_y_convolve_2tap_16_avx2(s0[0], s1[0], coeffs, r + 0);
+  xy_y_convolve_2tap_16_avx2(s0[1], s1[1], coeffs, r + 2);
+}
+
+static INLINE void xy_y_convolve_2tap_32_all_avx2(const int16_t *const src,
+                                                  const __m256i s0[2],
+                                                  __m256i s1[2],
+                                                  const __m256i coeffs[1],
+                                                  uint8_t *const dst) {
+  __m256i r[4];
+
+  xy_y_convolve_2tap_32_avx2(src, s0, s1, coeffs, r);
+  xy_y_round_store_32_avx2(r + 0, r + 2, dst);
+}
+
+static INLINE void xy_y_convolve_2tap_half_pel_32_avx2(const int16_t *const src,
+                                                       const __m256i s0[2],
+                                                       __m256i s1[2],
+                                                       __m256i r[2]) {
+  s1[0] = _mm256_loadu_si256((__m256i *)src);
+  s1[1] = _mm256_loadu_si256((__m256i *)(src + 16));
+  r[0] = _mm256_add_epi16(s0[0], s1[0]);
+  r[1] = _mm256_add_epi16(s0[1], s1[1]);
+}
+
+static INLINE void xy_y_convolve_2tap_half_pel_32_all_avx2(
+    const int16_t *const src, const __m256i s0[2], __m256i s1[2],
+    uint8_t *const dst) {
+  __m256i r[2];
+
+  xy_y_convolve_2tap_half_pel_32_avx2(src, s0, s1, r);
+  r[0] = xy_y_round_half_pel_avx2(r[0]);
+  r[1] = xy_y_round_half_pel_avx2(r[1]);
+  xy_y_pack_store_32_avx2(r[0], r[1], dst);
+}
+
+static INLINE __m128i xy_y_convolve_4tap_2x2_sse2(const int16_t *const src,
+                                                  __m128i s_32[4],
+                                                  __m128i ss_128[2],
+                                                  const __m128i coeffs[2]) {
+  s_32[3] = _mm_cvtsi32_si128(*(int32_t *)(src + 3 * 2));
+  const __m128i src23 = _mm_unpacklo_epi32(s_32[2], s_32[3]);
+  s_32[2] = _mm_cvtsi32_si128(*(int32_t *)(src + 4 * 2));
+  const __m128i src34 = _mm_unpacklo_epi32(s_32[3], s_32[2]);
+  ss_128[1] = _mm_unpacklo_epi16(src23, src34);
+  const __m128i r = convolve16_4tap_sse2(ss_128, coeffs);
+  ss_128[0] = ss_128[1];
+  return r;
+}
+
+static INLINE __m256i xy_y_convolve_4tap_4x2_avx2(const int16_t *const src,
+                                                  __m128i s_64[4],
+                                                  __m256i ss_256[2],
+                                                  const __m256i coeffs[2]) {
+  __m256i s_256[2];
+  s_64[3] = _mm_loadl_epi64((__m128i *)(src + 3 * 4));
+  s_256[0] = _mm256_setr_m128i(s_64[2], s_64[3]);
+  s_64[2] = _mm_loadl_epi64((__m128i *)(src + 4 * 4));
+  s_256[1] = _mm256_setr_m128i(s_64[3], s_64[2]);
+  ss_256[1] = _mm256_unpacklo_epi16(s_256[0], s_256[1]);
+  const __m256i r = convolve16_4tap_avx2(ss_256, coeffs);
+  ss_256[0] = ss_256[1];
+  return r;
+}
+
+static INLINE void xy_y_convolve_4tap_16_avx2(const __m256i *const ss,
+                                              const __m256i coeffs[2],
+                                              __m256i r[2]) {
+  r[0] = convolve16_4tap_avx2(ss, coeffs);
+  r[1] = convolve16_4tap_avx2(ss + 2, coeffs);
+}
+
+static INLINE void xy_y_convolve_4tap_8x2_avx2(const int16_t *const src,
+                                               __m256i ss_256[4],
+                                               const __m256i coeffs[2],
+                                               __m256i r[2]) {
+  __m256i s_256[2];
+  s_256[0] = _mm256_loadu_si256((__m256i *)(src + 2 * 8));
+  s_256[1] = _mm256_loadu_si256((__m256i *)(src + 3 * 8));
+  ss_256[1] = _mm256_unpacklo_epi16(s_256[0], s_256[1]);
+  ss_256[3] = _mm256_unpackhi_epi16(s_256[0], s_256[1]);
+  xy_y_convolve_4tap_16_avx2(ss_256, coeffs, r);
+  ss_256[0] = ss_256[1];
+  ss_256[2] = ss_256[3];
+}
+
+static INLINE void xy_y_convolve_4tap_8x2_half_pel_avx2(
+    const int16_t *const src, const __m256i coeffs[1], __m256i s_256[4],
+    __m256i r[2]) {
+  __m256i a_256[2];
+  s_256[2] = _mm256_loadu_si256((__m256i *)(src + 2 * 8));
+  s_256[3] = _mm256_loadu_si256((__m256i *)(src + 3 * 8));
+  a_256[0] = _mm256_add_epi16(s_256[0], s_256[3]);
+  a_256[1] = _mm256_add_epi16(s_256[1], s_256[2]);
+  xy_y_convolve_2tap_16_avx2(a_256[0], a_256[1], coeffs, r);
+  s_256[0] = s_256[2];
+  s_256[1] = s_256[3];
+}
+
+static INLINE void xy_y_convolve_4tap_16x2_avx2(
+    const int16_t *const src, __m256i s_256[4], __m256i ss_256[4],
+    __m256i tt_256[4], const __m256i coeffs[2], __m256i r[4]) {
+  s_256[3] = _mm256_loadu_si256((__m256i *)(src + 3 * 16));
+  ss_256[1] = _mm256_unpacklo_epi16(s_256[2], s_256[3]);
+  ss_256[3] = _mm256_unpackhi_epi16(s_256[2], s_256[3]);
+  s_256[2] = _mm256_loadu_si256((__m256i *)(src + 4 * 16));
+  tt_256[1] = _mm256_unpacklo_epi16(s_256[3], s_256[2]);
+  tt_256[3] = _mm256_unpackhi_epi16(s_256[3], s_256[2]);
+  xy_y_convolve_4tap_16_avx2(ss_256, coeffs, r + 0);
+  xy_y_convolve_4tap_16_avx2(tt_256, coeffs, r + 2);
+  ss_256[0] = ss_256[1];
+  ss_256[2] = ss_256[3];
+  tt_256[0] = tt_256[1];
+  tt_256[2] = tt_256[3];
+}
+
+static INLINE void xy_y_convolve_4tap_32x2_avx2(
+    const int16_t *const src, const ptrdiff_t stride, __m256i s_256[4],
+    __m256i ss_256[4], __m256i tt_256[4], const __m256i coeffs[2],
+    __m256i r[4]) {
+  s_256[3] = _mm256_loadu_si256((__m256i *)(src + 3 * stride));
+  ss_256[1] = _mm256_unpacklo_epi16(s_256[2], s_256[3]);
+  ss_256[3] = _mm256_unpackhi_epi16(s_256[2], s_256[3]);
+  s_256[2] = _mm256_loadu_si256((__m256i *)(src + 4 * stride));
+  tt_256[1] = _mm256_unpacklo_epi16(s_256[3], s_256[2]);
+  tt_256[3] = _mm256_unpackhi_epi16(s_256[3], s_256[2]);
+  xy_y_convolve_4tap_16_avx2(ss_256, coeffs, r + 0);
+  xy_y_convolve_4tap_16_avx2(tt_256, coeffs, r + 2);
+  ss_256[0] = ss_256[1];
+  ss_256[2] = ss_256[3];
+  tt_256[0] = tt_256[1];
+  tt_256[2] = tt_256[3];
+}
+
+static INLINE void xy_y_convolve_4tap_16x2_half_pelavx2(
+    const int16_t *const src, __m256i s_256[5], const __m256i coeffs[1],
+    __m256i r[4]) {
+  __m256i a_256[2];
+
+  s_256[3] = _mm256_loadu_si256((__m256i *)(src + 3 * 16));
+  s_256[4] = _mm256_loadu_si256((__m256i *)(src + 4 * 16));
+
+  a_256[0] = _mm256_add_epi16(s_256[0], s_256[3]);
+  a_256[1] = _mm256_add_epi16(s_256[1], s_256[2]);
+  xy_y_convolve_2tap_16_avx2(a_256[0], a_256[1], coeffs, r + 0);
+
+  a_256[0] = _mm256_add_epi16(s_256[1], s_256[4]);
+  a_256[1] = _mm256_add_epi16(s_256[2], s_256[3]);
+  xy_y_convolve_2tap_16_avx2(a_256[0], a_256[1], coeffs, r + 2);
+
+  s_256[0] = s_256[2];
+  s_256[1] = s_256[3];
+  s_256[2] = s_256[4];
+}
+
+static INLINE __m128i xy_y_convolve_6tap_2x2_sse2(const int16_t *const src,
+                                                  __m128i s_32[6],
+                                                  __m128i ss_128[3],
+                                                  const __m128i coeffs[3]) {
+  s_32[5] = _mm_cvtsi32_si128(*(int32_t *)(src + 5 * 2));
+  const __m128i src45 = _mm_unpacklo_epi32(s_32[4], s_32[5]);
+  s_32[4] = _mm_cvtsi32_si128(*(int32_t *)(src + 6 * 2));
+  const __m128i src56 = _mm_unpacklo_epi32(s_32[5], s_32[4]);
+  ss_128[2] = _mm_unpacklo_epi16(src45, src56);
+  const __m128i r = convolve16_6tap_sse2(ss_128, coeffs);
+  ss_128[0] = ss_128[1];
+  ss_128[1] = ss_128[2];
+  return r;
+}
+
+static INLINE __m256i xy_y_convolve_6tap_4x2_avx2(const int16_t *const src,
+                                                  __m128i s_64[6],
+                                                  __m256i ss_256[3],
+                                                  const __m256i coeffs[3]) {
+  __m256i s_256[2];
+  s_64[5] = _mm_loadl_epi64((__m128i *)(src + 5 * 4));
+  s_256[0] = _mm256_setr_m128i(s_64[4], s_64[5]);
+  s_64[4] = _mm_loadl_epi64((__m128i *)(src + 6 * 4));
+  s_256[1] = _mm256_setr_m128i(s_64[5], s_64[4]);
+  ss_256[2] = _mm256_unpacklo_epi16(s_256[0], s_256[1]);
+  const __m256i r = convolve16_6tap_avx2(ss_256, coeffs);
+  ss_256[0] = ss_256[1];
+  ss_256[1] = ss_256[2];
+  return r;
+}
+
+static INLINE void xy_y_convolve_6tap_16_avx2(const __m256i ss[6],
+                                              const __m256i coeffs[3],
+                                              __m256i r[2]) {
+  r[0] = convolve16_6tap_avx2(ss, coeffs);
+  r[1] = convolve16_6tap_avx2(ss + 3, coeffs);
+}
+
+static INLINE void xy_y_convolve_6tap_8x2_avx2(const int16_t *const src,
+                                               __m256i ss_256[6],
+                                               const __m256i coeffs[3],
+                                               __m256i r[2]) {
+  __m256i s_256[2];
+  s_256[0] = _mm256_loadu_si256((__m256i *)(src + 4 * 8));
+  s_256[1] = _mm256_loadu_si256((__m256i *)(src + 5 * 8));
+  ss_256[2] = _mm256_unpacklo_epi16(s_256[0], s_256[1]);
+  ss_256[5] = _mm256_unpackhi_epi16(s_256[0], s_256[1]);
+  xy_y_convolve_6tap_16_avx2(ss_256, coeffs, r);
+  ss_256[0] = ss_256[1];
+  ss_256[1] = ss_256[2];
+  ss_256[3] = ss_256[4];
+  ss_256[4] = ss_256[5];
+}
+
+static INLINE void xy_y_convolve_6tap_8x2_half_pel_avx2(
+    const int16_t *const src, const __m256i coeffs[2], __m256i s_256[6],
+    __m256i r[2]) {
+  __m256i a_256[2], ss_256[4];
+  s_256[4] = _mm256_loadu_si256((__m256i *)(src + 4 * 8));
+  s_256[5] = _mm256_loadu_si256((__m256i *)(src + 5 * 8));
+  a_256[0] = _mm256_add_epi16(s_256[0], s_256[5]);
+  a_256[1] = _mm256_add_epi16(s_256[1], s_256[4]);
+  ss_256[0] = _mm256_unpacklo_epi16(a_256[0], a_256[1]);
+  ss_256[1] = _mm256_unpacklo_epi16(s_256[2], s_256[3]);
+  ss_256[2] = _mm256_unpackhi_epi16(a_256[0], a_256[1]);
+  ss_256[3] = _mm256_unpackhi_epi16(s_256[2], s_256[3]);
+  xy_y_convolve_4tap_16_avx2(ss_256, coeffs, r);
+  s_256[0] = s_256[2];
+  s_256[1] = s_256[3];
+  s_256[2] = s_256[4];
+  s_256[3] = s_256[5];
+}
+
+static INLINE void xy_y_convolve_6tap_16x2_avx2(
+    const int16_t *const src, const ptrdiff_t stride, __m256i s_256[6],
+    __m256i ss_256[6], __m256i tt_256[6], const __m256i coeffs[3],
+    __m256i r[4]) {
+  s_256[5] = _mm256_loadu_si256((__m256i *)(src + 5 * stride));
+  ss_256[2] = _mm256_unpacklo_epi16(s_256[4], s_256[5]);
+  ss_256[5] = _mm256_unpackhi_epi16(s_256[4], s_256[5]);
+  s_256[4] = _mm256_loadu_si256((__m256i *)(src + 6 * stride));
+  tt_256[2] = _mm256_unpacklo_epi16(s_256[5], s_256[4]);
+  tt_256[5] = _mm256_unpackhi_epi16(s_256[5], s_256[4]);
+
+  xy_y_convolve_6tap_16_avx2(ss_256, coeffs, r + 0);
+  xy_y_convolve_6tap_16_avx2(tt_256, coeffs, r + 2);
+
+  ss_256[0] = ss_256[1];
+  ss_256[1] = ss_256[2];
+  ss_256[3] = ss_256[4];
+  ss_256[4] = ss_256[5];
+
+  tt_256[0] = tt_256[1];
+  tt_256[1] = tt_256[2];
+  tt_256[3] = tt_256[4];
+  tt_256[4] = tt_256[5];
+}
+
+static INLINE void xy_y_convolve_6tap_16x2_half_pel_avx2(
+    const int16_t *const src, const ptrdiff_t stride, __m256i s_256[6],
+    __m256i ss_256[4], const __m256i coeffs[2], __m256i r[4]) {
+  __m256i a_256[2];
+
+  s_256[5] = _mm256_loadu_si256((__m256i *)(src + 5 * stride));
+  a_256[0] = _mm256_add_epi16(s_256[0], s_256[5]);
+  a_256[1] = _mm256_add_epi16(s_256[1], s_256[4]);
+  ss_256[0] = _mm256_unpacklo_epi16(a_256[0], a_256[1]);
+  ss_256[1] = _mm256_unpacklo_epi16(s_256[2], s_256[3]);
+  ss_256[2] = _mm256_unpackhi_epi16(a_256[0], a_256[1]);
+  ss_256[3] = _mm256_unpackhi_epi16(s_256[2], s_256[3]);
+  xy_y_convolve_4tap_16_avx2(ss_256, coeffs, r + 0);
+
+  a_256[1] = _mm256_add_epi16(s_256[2], s_256[5]);
+  s_256[0] = s_256[2];
+  s_256[2] = s_256[4];
+  s_256[4] = _mm256_loadu_si256((__m256i *)(src + 6 * stride));
+  a_256[0] = _mm256_add_epi16(s_256[1], s_256[4]);
+  s_256[1] = s_256[3];
+  s_256[3] = s_256[5];
+  ss_256[0] = _mm256_unpacklo_epi16(a_256[0], a_256[1]);
+  ss_256[1] = _mm256_unpacklo_epi16(s_256[1], s_256[2]);
+  ss_256[2] = _mm256_unpackhi_epi16(a_256[0], a_256[1]);
+  ss_256[3] = _mm256_unpackhi_epi16(s_256[1], s_256[2]);
+  xy_y_convolve_4tap_16_avx2(ss_256, coeffs, r + 2);
+}
+
+static INLINE __m128i xy_y_convolve_8tap_2x2_sse2(const int16_t *const src,
+                                                  __m128i s_32[8],
+                                                  __m128i ss_128[4],
+                                                  const __m128i coeffs[4]) {
+  s_32[7] = _mm_cvtsi32_si128(*(int32_t *)(src + 7 * 2));
+  const __m128i src67 = _mm_unpacklo_epi32(s_32[6], s_32[7]);
+  s_32[6] = _mm_cvtsi32_si128(*(int32_t *)(src + 8 * 2));
+  const __m128i src78 = _mm_unpacklo_epi32(s_32[7], s_32[6]);
+  ss_128[3] = _mm_unpacklo_epi16(src67, src78);
+  const __m128i r = convolve16_8tap_sse2(ss_128, coeffs);
+  ss_128[0] = ss_128[1];
+  ss_128[1] = ss_128[2];
+  ss_128[2] = ss_128[3];
+  return r;
+}
+
+static INLINE __m256i xy_y_convolve_8tap_4x2_avx2(const int16_t *const src,
+                                                  __m128i s_64[8],
+                                                  __m256i ss_256[4],
+                                                  const __m256i coeffs[4]) {
+  __m256i s_256[2];
+  s_64[7] = _mm_loadl_epi64((__m128i *)(src + 7 * 4));
+  s_256[0] = _mm256_setr_m128i(s_64[6], s_64[7]);
+  s_64[6] = _mm_loadl_epi64((__m128i *)(src + 8 * 4));
+  s_256[1] = _mm256_setr_m128i(s_64[7], s_64[6]);
+  ss_256[3] = _mm256_unpacklo_epi16(s_256[0], s_256[1]);
+  const __m256i r = convolve16_8tap_avx2(ss_256, coeffs);
+  ss_256[0] = ss_256[1];
+  ss_256[1] = ss_256[2];
+  ss_256[2] = ss_256[3];
+  return r;
+}
+
+static INLINE void xy_y_convolve_8tap_16_avx2(const __m256i *const ss,
+                                              const __m256i coeffs[4],
+                                              __m256i r[2]) {
+  r[0] = convolve16_8tap_avx2(ss, coeffs);
+  r[1] = convolve16_8tap_avx2(ss + 4, coeffs);
+}
+
+static INLINE void xy_y_convolve_8tap_8x2_avx2(const int16_t *const src,
+                                               __m256i ss_256[8],
+                                               const __m256i coeffs[4],
+                                               __m256i r[2]) {
+  __m256i s_256[2];
+  s_256[0] = _mm256_loadu_si256((__m256i *)(src + 6 * 8));
+  s_256[1] = _mm256_loadu_si256((__m256i *)(src + 7 * 8));
+  ss_256[3] = _mm256_unpacklo_epi16(s_256[0], s_256[1]);
+  ss_256[7] = _mm256_unpackhi_epi16(s_256[0], s_256[1]);
+  xy_y_convolve_8tap_16_avx2(ss_256, coeffs, r);
+  ss_256[0] = ss_256[1];
+  ss_256[1] = ss_256[2];
+  ss_256[2] = ss_256[3];
+  ss_256[4] = ss_256[5];
+  ss_256[5] = ss_256[6];
+  ss_256[6] = ss_256[7];
+}
+
+static INLINE void xy_y_convolve_8tap_8x2_half_pel_avx2(
+    const int16_t *const src, const __m256i coeffs[2], __m256i s_256[8],
+    __m256i r[2]) {
+  __m256i a_256[4], ss_256[4];
+
+  s_256[6] = _mm256_loadu_si256((__m256i *)(src + 6 * 8));
+  s_256[7] = _mm256_loadu_si256((__m256i *)(src + 7 * 8));
+  a_256[0] = _mm256_add_epi16(s_256[0], s_256[7]);
+  a_256[1] = _mm256_add_epi16(s_256[1], s_256[6]);
+  a_256[2] = _mm256_add_epi16(s_256[2], s_256[5]);
+  a_256[3] = _mm256_add_epi16(s_256[3], s_256[4]);
+  ss_256[0] = _mm256_unpacklo_epi16(a_256[0], a_256[1]);
+  ss_256[1] = _mm256_unpacklo_epi16(a_256[2], a_256[3]);
+  ss_256[2] = _mm256_unpackhi_epi16(a_256[0], a_256[1]);
+  ss_256[3] = _mm256_unpackhi_epi16(a_256[2], a_256[3]);
+  xy_y_convolve_4tap_16_avx2(ss_256, coeffs, r);
+  s_256[0] = s_256[2];
+  s_256[1] = s_256[3];
+  s_256[2] = s_256[4];
+  s_256[3] = s_256[5];
+  s_256[4] = s_256[6];
+  s_256[5] = s_256[7];
+}
+
+static AOM_FORCE_INLINE void xy_y_convolve_8tap_16x2_avx2(
+    const int16_t *const src, const ptrdiff_t stride, const __m256i coeffs[4],
+    __m256i s_256[8], __m256i ss_256[8], __m256i tt_256[8], __m256i r[4]) {
+  s_256[7] = _mm256_loadu_si256((__m256i *)(src + 7 * stride));
+  ss_256[3] = _mm256_unpacklo_epi16(s_256[6], s_256[7]);
+  ss_256[7] = _mm256_unpackhi_epi16(s_256[6], s_256[7]);
+  s_256[6] = _mm256_loadu_si256((__m256i *)(src + 8 * stride));
+  tt_256[3] = _mm256_unpacklo_epi16(s_256[7], s_256[6]);
+  tt_256[7] = _mm256_unpackhi_epi16(s_256[7], s_256[6]);
+
+  xy_y_convolve_8tap_16_avx2(ss_256, coeffs, r + 0);
+  xy_y_convolve_8tap_16_avx2(tt_256, coeffs, r + 2);
+
+  ss_256[0] = ss_256[1];
+  ss_256[1] = ss_256[2];
+  ss_256[2] = ss_256[3];
+  ss_256[4] = ss_256[5];
+  ss_256[5] = ss_256[6];
+  ss_256[6] = ss_256[7];
+
+  tt_256[0] = tt_256[1];
+  tt_256[1] = tt_256[2];
+  tt_256[2] = tt_256[3];
+  tt_256[4] = tt_256[5];
+  tt_256[5] = tt_256[6];
+  tt_256[6] = tt_256[7];
+}
+
+static INLINE void xy_y_convolve_8tap_16x2_half_pel_avx2(
+    const int16_t *const src, const ptrdiff_t stride, const __m256i coeffs[4],
+    __m256i s_256[8], __m256i r[4]) {
+  __m256i a_256[4], ss_256[4];
+  s_256[7] = _mm256_loadu_si256((__m256i *)(src + 7 * stride));
+
+  a_256[0] = _mm256_add_epi16(s_256[0], s_256[7]);
+  a_256[1] = _mm256_add_epi16(s_256[1], s_256[6]);
+  a_256[2] = _mm256_add_epi16(s_256[2], s_256[5]);
+  a_256[3] = _mm256_add_epi16(s_256[3], s_256[4]);
+  ss_256[0] = _mm256_unpacklo_epi16(a_256[0], a_256[1]);
+  ss_256[1] = _mm256_unpacklo_epi16(a_256[2], a_256[3]);
+  ss_256[2] = _mm256_unpackhi_epi16(a_256[0], a_256[1]);
+  ss_256[3] = _mm256_unpackhi_epi16(a_256[2], a_256[3]);
+
+  xy_y_convolve_4tap_16_avx2(ss_256, coeffs, r + 0);
+
+  a_256[1] = _mm256_add_epi16(s_256[2], s_256[7]);
+  a_256[2] = _mm256_add_epi16(s_256[3], s_256[6]);
+  a_256[3] = _mm256_add_epi16(s_256[4], s_256[5]);
+  s_256[0] = s_256[2];
+  s_256[2] = s_256[4];
+  s_256[4] = s_256[6];
+  s_256[6] = _mm256_loadu_si256((__m256i *)(src + 8 * stride));
+
+  a_256[0] = _mm256_add_epi16(s_256[1], s_256[6]);
+  s_256[1] = s_256[3];
+  s_256[3] = s_256[5];
+  s_256[5] = s_256[7];
+  ss_256[0] = _mm256_unpacklo_epi16(a_256[0], a_256[1]);
+  ss_256[1] = _mm256_unpacklo_epi16(a_256[2], a_256[3]);
+  ss_256[2] = _mm256_unpackhi_epi16(a_256[0], a_256[1]);
+  ss_256[3] = _mm256_unpackhi_epi16(a_256[2], a_256[3]);
+
+  xy_y_convolve_4tap_16_avx2(ss_256, coeffs, r + 2);
+}
+
+static INLINE void xy_y_round_store_8x2_avx2(const __m256i res[2],
+                                             uint8_t *const dst,
+                                             const ptrdiff_t stride) {
+  const __m256i r = xy_y_round_16_avx2(res);
+  pack_store_8x2_avx2(r, dst, stride);
+}
+
+static INLINE void xy_y_round_store_16x2_avx2(const __m256i res[4],
+                                              uint8_t *const dst,
+                                              const ptrdiff_t stride) {
+  const __m256i r0 = xy_y_round_16_avx2(res + 0);
+  const __m256i r1 = xy_y_round_16_avx2(res + 2);
+  xy_y_pack_store_16x2_avx2(r0, r1, dst, stride);
+}
+
+static INLINE void sr_y_round_store_32_avx2(const __m256i res[2],
+                                            uint8_t *const dst) {
+  __m256i r[2];
+
+  r[0] = sr_y_round_avx2(res[0]);
+  r[1] = sr_y_round_avx2(res[1]);
+  convolve_store_32_avx2(r[0], r[1], dst);
+}
+
+static INLINE void sr_y_round_store_32x2_avx2(const __m256i res[4],
+                                              uint8_t *const dst,
+                                              const int32_t dst_stride) {
+  sr_y_round_store_32_avx2(res, dst);
+  sr_y_round_store_32_avx2(res + 2, dst + dst_stride);
+}
+
+static INLINE void sr_y_2tap_32_avx2(const uint8_t *const src,
+                                     const __m256i coeffs[1], const __m256i s0,
+                                     __m256i *const s1, uint8_t *const dst) {
+  __m256i r[2];
+  y_convolve_2tap_32_avx2(src, coeffs, s0, s1, r);
+  sr_y_round_store_32_avx2(r, dst);
+}
+
+static AOM_FORCE_INLINE void av1_convolve_y_sr_specialized_avx2(
+    const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride,
+    int32_t w, int32_t h, const InterpFilterParams *filter_params_y,
+    const int32_t subpel_y_q4) {
+  int32_t x, y;
+  __m128i coeffs_128[4];
+  __m256i coeffs_256[4];
+
+  int vert_tap = get_filter_tap(filter_params_y, subpel_y_q4);
+
+  if (vert_tap == 2) {
+    // vert_filt as 2 tap
+    const uint8_t *src_ptr = src;
+
+    y = h;
+
+    if (subpel_y_q4 != 8) {
+      if (w <= 8) {
+        prepare_half_coeffs_2tap_ssse3(filter_params_y, subpel_y_q4,
+                                       coeffs_128);
+
+        if (w == 2) {
+          __m128i s_16[2];
+
+          s_16[0] = _mm_cvtsi32_si128(*(int16_t *)src_ptr);
+
+          do {
+            const __m128i res = y_convolve_2tap_2x2_ssse3(src_ptr, src_stride,
+                                                          coeffs_128, s_16);
+            const __m128i r = sr_y_round_sse2(res);
+            pack_store_2x2_sse2(r, dst, dst_stride);
+            src_ptr += 2 * src_stride;
+            dst += 2 * dst_stride;
+            y -= 2;
+          } while (y);
+        } else if (w == 4) {
+          __m128i s_32[2];
+
+          s_32[0] = _mm_cvtsi32_si128(*(int32_t *)src_ptr);
+
+          do {
+            const __m128i res = y_convolve_2tap_4x2_ssse3(src_ptr, src_stride,
+                                                          coeffs_128, s_32);
+            const __m128i r = sr_y_round_sse2(res);
+            pack_store_4x2_sse2(r, dst, dst_stride);
+            src_ptr += 2 * src_stride;
+            dst += 2 * dst_stride;
+            y -= 2;
+          } while (y);
+        } else {
+          __m128i s_64[2], s_128[2];
+
+          assert(w == 8);
+
+          s_64[0] = _mm_loadl_epi64((__m128i *)src_ptr);
+
+          do {
+            // Note: Faster than binding to AVX2 registers.
+            s_64[1] = _mm_loadl_epi64((__m128i *)(src_ptr + src_stride));
+            s_128[0] = _mm_unpacklo_epi64(s_64[0], s_64[1]);
+            s_64[0] = _mm_loadl_epi64((__m128i *)(src_ptr + 2 * src_stride));
+            s_128[1] = _mm_unpacklo_epi64(s_64[1], s_64[0]);
+            const __m128i ss0 = _mm_unpacklo_epi8(s_128[0], s_128[1]);
+            const __m128i ss1 = _mm_unpackhi_epi8(s_128[0], s_128[1]);
+            const __m128i res0 = convolve_2tap_ssse3(&ss0, coeffs_128);
+            const __m128i res1 = convolve_2tap_ssse3(&ss1, coeffs_128);
+            const __m128i r0 = sr_y_round_sse2(res0);
+            const __m128i r1 = sr_y_round_sse2(res1);
+            const __m128i d = _mm_packus_epi16(r0, r1);
+            _mm_storel_epi64((__m128i *)dst, d);
+            _mm_storeh_epi64((__m128i *)(dst + dst_stride), d);
+            src_ptr += 2 * src_stride;
+            dst += 2 * dst_stride;
+            y -= 2;
+          } while (y);
+        }
+      } else {
+        prepare_half_coeffs_2tap_avx2(filter_params_y, subpel_y_q4, coeffs_256);
+
+        if (w == 16) {
+          __m128i s_128[2];
+
+          s_128[0] = _mm_loadu_si128((__m128i *)src_ptr);
+
+          do {
+            __m256i r[2];
+
+            y_convolve_2tap_16x2_avx2(src_ptr, src_stride, coeffs_256, s_128,
+                                      r);
+            sr_y_round_store_16x2_avx2(r, dst, dst_stride);
+            src_ptr += 2 * src_stride;
+            dst += 2 * dst_stride;
+            y -= 2;
+          } while (y);
+        } else if (w == 32) {
+          __m256i s_256[2];
+
+          s_256[0] = _mm256_loadu_si256((__m256i *)src_ptr);
+
+          do {
+            sr_y_2tap_32_avx2(src_ptr + src_stride, coeffs_256, s_256[0],
+                              &s_256[1], dst);
+            sr_y_2tap_32_avx2(src_ptr + 2 * src_stride, coeffs_256, s_256[1],
+                              &s_256[0], dst + dst_stride);
+            src_ptr += 2 * src_stride;
+            dst += 2 * dst_stride;
+            y -= 2;
+          } while (y);
+        } else if (w == 64) {
+          __m256i s_256[2][2];
+
+          s_256[0][0] = _mm256_loadu_si256((__m256i *)(src_ptr + 0 * 32));
+          s_256[0][1] = _mm256_loadu_si256((__m256i *)(src_ptr + 1 * 32));
+
+          do {
+            sr_y_2tap_32_avx2(src_ptr + src_stride, coeffs_256, s_256[0][0],
+                              &s_256[1][0], dst);
+            sr_y_2tap_32_avx2(src_ptr + src_stride + 32, coeffs_256,
+                              s_256[0][1], &s_256[1][1], dst + 32);
+            sr_y_2tap_32_avx2(src_ptr + 2 * src_stride, coeffs_256, s_256[1][0],
+                              &s_256[0][0], dst + dst_stride);
+            sr_y_2tap_32_avx2(src_ptr + 2 * src_stride + 32, coeffs_256,
+                              s_256[1][1], &s_256[0][1], dst + dst_stride + 32);
+
+            src_ptr += 2 * src_stride;
+            dst += 2 * dst_stride;
+            y -= 2;
+          } while (y);
+        } else {
+          __m256i s_256[2][4];
+
+          assert(w == 128);
+
+          s_256[0][0] = _mm256_loadu_si256((__m256i *)(src_ptr + 0 * 32));
+          s_256[0][1] = _mm256_loadu_si256((__m256i *)(src_ptr + 1 * 32));
+          s_256[0][2] = _mm256_loadu_si256((__m256i *)(src_ptr + 2 * 32));
+          s_256[0][3] = _mm256_loadu_si256((__m256i *)(src_ptr + 3 * 32));
+
+          do {
+            sr_y_2tap_32_avx2(src_ptr + src_stride, coeffs_256, s_256[0][0],
+                              &s_256[1][0], dst);
+            sr_y_2tap_32_avx2(src_ptr + src_stride + 1 * 32, coeffs_256,
+                              s_256[0][1], &s_256[1][1], dst + 1 * 32);
+            sr_y_2tap_32_avx2(src_ptr + src_stride + 2 * 32, coeffs_256,
+                              s_256[0][2], &s_256[1][2], dst + 2 * 32);
+            sr_y_2tap_32_avx2(src_ptr + src_stride + 3 * 32, coeffs_256,
+                              s_256[0][3], &s_256[1][3], dst + 3 * 32);
+
+            sr_y_2tap_32_avx2(src_ptr + 2 * src_stride, coeffs_256, s_256[1][0],
+                              &s_256[0][0], dst + dst_stride);
+            sr_y_2tap_32_avx2(src_ptr + 2 * src_stride + 1 * 32, coeffs_256,
+                              s_256[1][1], &s_256[0][1],
+                              dst + dst_stride + 1 * 32);
+            sr_y_2tap_32_avx2(src_ptr + 2 * src_stride + 2 * 32, coeffs_256,
+                              s_256[1][2], &s_256[0][2],
+                              dst + dst_stride + 2 * 32);
+            sr_y_2tap_32_avx2(src_ptr + 2 * src_stride + 3 * 32, coeffs_256,
+                              s_256[1][3], &s_256[0][3],
+                              dst + dst_stride + 3 * 32);
+
+            src_ptr += 2 * src_stride;
+            dst += 2 * dst_stride;
+            y -= 2;
+          } while (y);
+        }
+      }
+    } else {
+      // average to get half pel
+      if (w <= 8) {
+        if (w == 2) {
+          __m128i s_16[2];
+
+          s_16[0] = _mm_cvtsi32_si128(*(int16_t *)src_ptr);
+
+          do {
+            s_16[1] = _mm_cvtsi32_si128(*(int16_t *)(src_ptr + src_stride));
+            const __m128i d0 = _mm_avg_epu8(s_16[0], s_16[1]);
+            *(int16_t *)dst = (int16_t)_mm_cvtsi128_si32(d0);
+            s_16[0] = _mm_cvtsi32_si128(*(int16_t *)(src_ptr + 2 * src_stride));
+            const __m128i d1 = _mm_avg_epu8(s_16[1], s_16[0]);
+            *(int16_t *)(dst + dst_stride) = (int16_t)_mm_cvtsi128_si32(d1);
+            src_ptr += 2 * src_stride;
+            dst += 2 * dst_stride;
+            y -= 2;
+          } while (y);
+        } else if (w == 4) {
+          __m128i s_32[2];
+
+          s_32[0] = _mm_cvtsi32_si128(*(int32_t *)src_ptr);
+
+          do {
+            s_32[1] = _mm_cvtsi32_si128(*(int32_t *)(src_ptr + src_stride));
+            const __m128i d0 = _mm_avg_epu8(s_32[0], s_32[1]);
+            xx_storel_32(dst, d0);
+            s_32[0] = _mm_cvtsi32_si128(*(int32_t *)(src_ptr + 2 * src_stride));
+            const __m128i d1 = _mm_avg_epu8(s_32[1], s_32[0]);
+            xx_storel_32(dst + dst_stride, d1);
+            src_ptr += 2 * src_stride;
+            dst += 2 * dst_stride;
+            y -= 2;
+          } while (y);
+        } else {
+          __m128i s_64[2];
+
+          assert(w == 8);
+
+          s_64[0] = _mm_loadl_epi64((__m128i *)src_ptr);
+
+          do {
+            // Note: Faster than binding to AVX2 registers.
+            s_64[1] = _mm_loadl_epi64((__m128i *)(src_ptr + src_stride));
+            const __m128i d0 = _mm_avg_epu8(s_64[0], s_64[1]);
+            _mm_storel_epi64((__m128i *)dst, d0);
+            s_64[0] = _mm_loadl_epi64((__m128i *)(src_ptr + 2 * src_stride));
+            const __m128i d1 = _mm_avg_epu8(s_64[1], s_64[0]);
+            _mm_storel_epi64((__m128i *)(dst + dst_stride), d1);
+            src_ptr += 2 * src_stride;
+            dst += 2 * dst_stride;
+            y -= 2;
+          } while (y);
+        }
+      } else if (w == 16) {
+        __m128i s_128[2];
+
+        s_128[0] = _mm_loadu_si128((__m128i *)src_ptr);
+
+        do {
+          s_128[1] = _mm_loadu_si128((__m128i *)(src_ptr + src_stride));
+          const __m128i d0 = _mm_avg_epu8(s_128[0], s_128[1]);
+          _mm_storeu_si128((__m128i *)dst, d0);
+          s_128[0] = _mm_loadu_si128((__m128i *)(src_ptr + 2 * src_stride));
+          const __m128i d1 = _mm_avg_epu8(s_128[1], s_128[0]);
+          _mm_storeu_si128((__m128i *)(dst + dst_stride), d1);
+          src_ptr += 2 * src_stride;
+          dst += 2 * dst_stride;
+          y -= 2;
+        } while (y);
+      } else if (w == 32) {
+        __m256i s_256[2];
+
+        s_256[0] = _mm256_loadu_si256((__m256i *)src_ptr);
+
+        do {
+          sr_y_2tap_32_avg_avx2(src_ptr + src_stride, s_256[0], &s_256[1], dst);
+          sr_y_2tap_32_avg_avx2(src_ptr + 2 * src_stride, s_256[1], &s_256[0],
+                                dst + dst_stride);
+          src_ptr += 2 * src_stride;
+          dst += 2 * dst_stride;
+          y -= 2;
+        } while (y);
+      } else if (w == 64) {
+        __m256i s_256[2][2];
+
+        s_256[0][0] = _mm256_loadu_si256((__m256i *)(src_ptr + 0 * 32));
+        s_256[0][1] = _mm256_loadu_si256((__m256i *)(src_ptr + 1 * 32));
+
+        do {
+          sr_y_2tap_32_avg_avx2(src_ptr + src_stride, s_256[0][0], &s_256[1][0],
+                                dst);
+          sr_y_2tap_32_avg_avx2(src_ptr + src_stride + 32, s_256[0][1],
+                                &s_256[1][1], dst + 32);
+
+          sr_y_2tap_32_avg_avx2(src_ptr + 2 * src_stride, s_256[1][0],
+                                &s_256[0][0], dst + dst_stride);
+          sr_y_2tap_32_avg_avx2(src_ptr + 2 * src_stride + 32, s_256[1][1],
+                                &s_256[0][1], dst + dst_stride + 32);
+
+          src_ptr += 2 * src_stride;
+          dst += 2 * dst_stride;
+          y -= 2;
+        } while (y);
+      } else {
+        __m256i s_256[2][4];
+
+        assert(w == 128);
+
+        s_256[0][0] = _mm256_loadu_si256((__m256i *)(src_ptr + 0 * 32));
+        s_256[0][1] = _mm256_loadu_si256((__m256i *)(src_ptr + 1 * 32));
+        s_256[0][2] = _mm256_loadu_si256((__m256i *)(src_ptr + 2 * 32));
+        s_256[0][3] = _mm256_loadu_si256((__m256i *)(src_ptr + 3 * 32));
+
+        do {
+          sr_y_2tap_32_avg_avx2(src_ptr + src_stride, s_256[0][0], &s_256[1][0],
+                                dst);
+          sr_y_2tap_32_avg_avx2(src_ptr + src_stride + 1 * 32, s_256[0][1],
+                                &s_256[1][1], dst + 1 * 32);
+          sr_y_2tap_32_avg_avx2(src_ptr + src_stride + 2 * 32, s_256[0][2],
+                                &s_256[1][2], dst + 2 * 32);
+          sr_y_2tap_32_avg_avx2(src_ptr + src_stride + 3 * 32, s_256[0][3],
+                                &s_256[1][3], dst + 3 * 32);
+
+          sr_y_2tap_32_avg_avx2(src_ptr + 2 * src_stride, s_256[1][0],
+                                &s_256[0][0], dst + dst_stride);
+          sr_y_2tap_32_avg_avx2(src_ptr + 2 * src_stride + 1 * 32, s_256[1][1],
+                                &s_256[0][1], dst + dst_stride + 1 * 32);
+          sr_y_2tap_32_avg_avx2(src_ptr + 2 * src_stride + 2 * 32, s_256[1][2],
+                                &s_256[0][2], dst + dst_stride + 2 * 32);
+          sr_y_2tap_32_avg_avx2(src_ptr + 2 * src_stride + 3 * 32, s_256[1][3],
+                                &s_256[0][3], dst + dst_stride + 3 * 32);
+
+          src_ptr += 2 * src_stride;
+          dst += 2 * dst_stride;
+          y -= 2;
+        } while (y);
+      }
+    }
+  } else if (vert_tap == 4) {
+    // vert_filt as 4 tap
+    const uint8_t *src_ptr = src - src_stride;
+
+    y = h;
+
+    if (w <= 4) {
+      prepare_half_coeffs_4tap_ssse3(filter_params_y, subpel_y_q4, coeffs_128);
+
+      if (w == 2) {
+        __m128i s_16[4], ss_128[2];
+
+        s_16[0] = _mm_cvtsi32_si128(*(int16_t *)(src_ptr + 0 * src_stride));
+        s_16[1] = _mm_cvtsi32_si128(*(int16_t *)(src_ptr + 1 * src_stride));
+        s_16[2] = _mm_cvtsi32_si128(*(int16_t *)(src_ptr + 2 * src_stride));
+
+        const __m128i src01 = _mm_unpacklo_epi16(s_16[0], s_16[1]);
+        const __m128i src12 = _mm_unpacklo_epi16(s_16[1], s_16[2]);
+
+        ss_128[0] = _mm_unpacklo_epi8(src01, src12);
+
+        do {
+          src_ptr += 2 * src_stride;
+          const __m128i res = y_convolve_4tap_2x2_ssse3(
+              src_ptr, src_stride, coeffs_128, s_16, ss_128);
+          const __m128i r = sr_y_round_sse2(res);
+          pack_store_2x2_sse2(r, dst, dst_stride);
+
+          ss_128[0] = ss_128[1];
+          dst += 2 * dst_stride;
+          y -= 2;
+        } while (y);
+      } else {
+        __m128i s_32[4], ss_128[2];
+
+        assert(w == 4);
+
+        s_32[0] = _mm_cvtsi32_si128(*(int32_t *)(src_ptr + 0 * src_stride));
+        s_32[1] = _mm_cvtsi32_si128(*(int32_t *)(src_ptr + 1 * src_stride));
+        s_32[2] = _mm_cvtsi32_si128(*(int32_t *)(src_ptr + 2 * src_stride));
+
+        const __m128i src01 = _mm_unpacklo_epi32(s_32[0], s_32[1]);
+        const __m128i src12 = _mm_unpacklo_epi32(s_32[1], s_32[2]);
+
+        ss_128[0] = _mm_unpacklo_epi8(src01, src12);
+
+        do {
+          src_ptr += 2 * src_stride;
+          const __m128i res = y_convolve_4tap_4x2_ssse3(
+              src_ptr, src_stride, coeffs_128, s_32, ss_128);
+          const __m128i r = sr_y_round_sse2(res);
+          pack_store_4x2_sse2(r, dst, dst_stride);
+
+          ss_128[0] = ss_128[1];
+          dst += 2 * dst_stride;
+          y -= 2;
+        } while (y);
+      }
+    } else {
+      prepare_half_coeffs_4tap_avx2(filter_params_y, subpel_y_q4, coeffs_256);
+
+      if (w == 8) {
+        __m128i s_64[4];
+        __m256i ss_256[2];
+
+        s_64[0] = _mm_loadl_epi64((__m128i *)(src_ptr + 0 * src_stride));
+        s_64[1] = _mm_loadl_epi64((__m128i *)(src_ptr + 1 * src_stride));
+        s_64[2] = _mm_loadl_epi64((__m128i *)(src_ptr + 2 * src_stride));
+
+        // Load lines a and b. Line a to lower 128, line b to upper 128
+        const __m256i src01 = _mm256_setr_m128i(s_64[0], s_64[1]);
+        const __m256i src12 = _mm256_setr_m128i(s_64[1], s_64[2]);
+
+        ss_256[0] = _mm256_unpacklo_epi8(src01, src12);
+
+        do {
+          src_ptr += 2 * src_stride;
+          const __m256i res = y_convolve_4tap_8x2_avx2(
+              src_ptr, src_stride, coeffs_256, s_64, ss_256);
+          sr_y_round_store_8x2_avx2(res, dst, dst_stride);
+
+          ss_256[0] = ss_256[1];
+          dst += 2 * dst_stride;
+          y -= 2;
+        } while (y);
+      } else if (w == 16) {
+        __m128i s_128[4];
+        __m256i ss_256[4], r[2];
+
+        s_128[0] = _mm_loadu_si128((__m128i *)(src_ptr + 0 * src_stride));
+        s_128[1] = _mm_loadu_si128((__m128i *)(src_ptr + 1 * src_stride));
+        s_128[2] = _mm_loadu_si128((__m128i *)(src_ptr + 2 * src_stride));
+
+        // Load lines a and b. Line a to lower 128, line b to upper 128
+        const __m256i src01 = _mm256_setr_m128i(s_128[0], s_128[1]);
+        const __m256i src12 = _mm256_setr_m128i(s_128[1], s_128[2]);
+
+        ss_256[0] = _mm256_unpacklo_epi8(src01, src12);
+        ss_256[2] = _mm256_unpackhi_epi8(src01, src12);
+
+        do {
+          src_ptr += 2 * src_stride;
+          y_convolve_4tap_16x2_avx2(src_ptr, src_stride, coeffs_256, s_128,
+                                    ss_256, r);
+          sr_y_round_store_16x2_avx2(r, dst, dst_stride);
+
+          ss_256[0] = ss_256[1];
+          ss_256[2] = ss_256[3];
+          dst += 2 * dst_stride;
+          y -= 2;
+        } while (y);
+      } else if (w == 32) {
+        // AV1 standard won't have 32x4 case.
+        // This only favors some optimization feature which
+        // subsamples 32x8 to 32x4 and triggers 4-tap filter.
+
+        __m256i s_256[4], ss_256[4], tt_256[4], r[4];
+
+        s_256[0] = _mm256_loadu_si256((__m256i *)(src_ptr + 0 * src_stride));
+        s_256[1] = _mm256_loadu_si256((__m256i *)(src_ptr + 1 * src_stride));
+        s_256[2] = _mm256_loadu_si256((__m256i *)(src_ptr + 2 * src_stride));
+
+        ss_256[0] = _mm256_unpacklo_epi8(s_256[0], s_256[1]);
+        ss_256[2] = _mm256_unpackhi_epi8(s_256[0], s_256[1]);
+
+        tt_256[0] = _mm256_unpacklo_epi8(s_256[1], s_256[2]);
+        tt_256[2] = _mm256_unpackhi_epi8(s_256[1], s_256[2]);
+
+        do {
+          src_ptr += 2 * src_stride;
+          y_convolve_4tap_32x2_avx2(src_ptr, src_stride, coeffs_256, s_256,
+                                    ss_256, tt_256, r);
+          sr_y_round_store_32x2_avx2(r, dst, dst_stride);
+
+          ss_256[0] = ss_256[1];
+          ss_256[2] = ss_256[3];
+
+          tt_256[0] = tt_256[1];
+          tt_256[2] = tt_256[3];
+          dst += 2 * dst_stride;
+          y -= 2;
+        } while (y);
+      } else {
+        assert(!(w % 32));
+
+        __m256i s_256[4], ss_256[4], tt_256[4], r[4];
+        x = 0;
+        do {
+          const uint8_t *s = src_ptr + x;
+          uint8_t *d = dst + x;
+          s_256[0] = _mm256_loadu_si256((__m256i *)(s + 0 * src_stride));
+          s_256[1] = _mm256_loadu_si256((__m256i *)(s + 1 * src_stride));
+          s_256[2] = _mm256_loadu_si256((__m256i *)(s + 2 * src_stride));
+
+          ss_256[0] = _mm256_unpacklo_epi8(s_256[0], s_256[1]);
+          ss_256[2] = _mm256_unpackhi_epi8(s_256[0], s_256[1]);
+
+          tt_256[0] = _mm256_unpacklo_epi8(s_256[1], s_256[2]);
+          tt_256[2] = _mm256_unpackhi_epi8(s_256[1], s_256[2]);
+
+          y = h;
+          do {
+            s += 2 * src_stride;
+            y_convolve_4tap_32x2_avx2(s, src_stride, coeffs_256, s_256, ss_256,
+                                      tt_256, r);
+            sr_y_round_store_32x2_avx2(r, d, dst_stride);
+
+            ss_256[0] = ss_256[1];
+            ss_256[2] = ss_256[3];
+
+            tt_256[0] = tt_256[1];
+            tt_256[2] = tt_256[3];
+            d += 2 * dst_stride;
+            y -= 2;
+          } while (y);
+          x += 32;
+        } while (x < w);
+      }
+    }
+  } else if (vert_tap == 6) {
+    // vert_filt as 6 tap
+    const uint8_t *src_ptr = src - 2 * src_stride;
+
+    if (w <= 4) {
+      prepare_half_coeffs_6tap_ssse3(filter_params_y, subpel_y_q4, coeffs_128);
+
+      y = h;
+
+      if (w == 2) {
+        __m128i s_16[6], ss_128[3];
+
+        s_16[0] = _mm_cvtsi32_si128(*(int16_t *)(src_ptr + 0 * src_stride));
+        s_16[1] = _mm_cvtsi32_si128(*(int16_t *)(src_ptr + 1 * src_stride));
+        s_16[2] = _mm_cvtsi32_si128(*(int16_t *)(src_ptr + 2 * src_stride));
+        s_16[3] = _mm_cvtsi32_si128(*(int16_t *)(src_ptr + 3 * src_stride));
+        s_16[4] = _mm_cvtsi32_si128(*(int16_t *)(src_ptr + 4 * src_stride));
+
+        const __m128i src01 = _mm_unpacklo_epi16(s_16[0], s_16[1]);
+        const __m128i src12 = _mm_unpacklo_epi16(s_16[1], s_16[2]);
+        const __m128i src23 = _mm_unpacklo_epi16(s_16[2], s_16[3]);
+        const __m128i src34 = _mm_unpacklo_epi16(s_16[3], s_16[4]);
+
+        ss_128[0] = _mm_unpacklo_epi8(src01, src12);
+        ss_128[1] = _mm_unpacklo_epi8(src23, src34);
+
+        do {
+          src_ptr += 2 * src_stride;
+          const __m128i res = y_convolve_6tap_2x2_ssse3(
+              src_ptr, src_stride, coeffs_128, s_16, ss_128);
+          const __m128i r = sr_y_round_sse2(res);
+          pack_store_2x2_sse2(r, dst, dst_stride);
+
+          ss_128[0] = ss_128[1];
+          ss_128[1] = ss_128[2];
+          dst += 2 * dst_stride;
+          y -= 2;
+        } while (y);
+      } else {
+        __m128i s_32[6], ss_128[3];
+
+        assert(w == 4);
+
+        s_32[0] = _mm_cvtsi32_si128(*(int32_t *)(src_ptr + 0 * src_stride));
+        s_32[1] = _mm_cvtsi32_si128(*(int32_t *)(src_ptr + 1 * src_stride));
+        s_32[2] = _mm_cvtsi32_si128(*(int32_t *)(src_ptr + 2 * src_stride));
+        s_32[3] = _mm_cvtsi32_si128(*(int32_t *)(src_ptr + 3 * src_stride));
+        s_32[4] = _mm_cvtsi32_si128(*(int32_t *)(src_ptr + 4 * src_stride));
+
+        const __m128i src01 = _mm_unpacklo_epi32(s_32[0], s_32[1]);
+        const __m128i src12 = _mm_unpacklo_epi32(s_32[1], s_32[2]);
+        const __m128i src23 = _mm_unpacklo_epi32(s_32[2], s_32[3]);
+        const __m128i src34 = _mm_unpacklo_epi32(s_32[3], s_32[4]);
+
+        ss_128[0] = _mm_unpacklo_epi8(src01, src12);
+        ss_128[1] = _mm_unpacklo_epi8(src23, src34);
+
+        do {
+          src_ptr += 2 * src_stride;
+          const __m128i res = y_convolve_6tap_4x2_ssse3(
+              src_ptr, src_stride, coeffs_128, s_32, ss_128);
+          const __m128i r = sr_y_round_sse2(res);
+          pack_store_4x2_sse2(r, dst, dst_stride);
+
+          ss_128[0] = ss_128[1];
+          ss_128[1] = ss_128[2];
+          dst += 2 * dst_stride;
+          y -= 2;
+        } while (y);
+      }
+    } else {
+      prepare_half_coeffs_6tap_avx2(filter_params_y, subpel_y_q4, coeffs_256);
+
+      if (w == 8) {
+        __m128i s_64[6];
+        __m256i ss_256[3];
+
+        s_64[0] = _mm_loadl_epi64((__m128i *)(src_ptr + 0 * src_stride));
+        s_64[1] = _mm_loadl_epi64((__m128i *)(src_ptr + 1 * src_stride));
+        s_64[2] = _mm_loadl_epi64((__m128i *)(src_ptr + 2 * src_stride));
+        s_64[3] = _mm_loadl_epi64((__m128i *)(src_ptr + 3 * src_stride));
+        s_64[4] = _mm_loadl_epi64((__m128i *)(src_ptr + 4 * src_stride));
+
+        // Load lines a and b. Line a to lower 128, line b to upper 128
+        const __m256i src01 = _mm256_setr_m128i(s_64[0], s_64[1]);
+        const __m256i src12 = _mm256_setr_m128i(s_64[1], s_64[2]);
+        const __m256i src23 = _mm256_setr_m128i(s_64[2], s_64[3]);
+        const __m256i src34 = _mm256_setr_m128i(s_64[3], s_64[4]);
+
+        ss_256[0] = _mm256_unpacklo_epi8(src01, src12);
+        ss_256[1] = _mm256_unpacklo_epi8(src23, src34);
+
+        y = h;
+        do {
+          src_ptr += 2 * src_stride;
+          const __m256i res = y_convolve_6tap_8x2_avx2(
+              src_ptr, src_stride, coeffs_256, s_64, ss_256);
+          sr_y_round_store_8x2_avx2(res, dst, dst_stride);
+
+          ss_256[0] = ss_256[1];
+          ss_256[1] = ss_256[2];
+          dst += 2 * dst_stride;
+          y -= 2;
+        } while (y);
+      } else if (w == 16) {
+        __m128i s_128[6];
+        __m256i ss_256[6], r[2];
+
+        s_128[0] = _mm_loadu_si128((__m128i *)(src_ptr + 0 * src_stride));
+        s_128[1] = _mm_loadu_si128((__m128i *)(src_ptr + 1 * src_stride));
+        s_128[2] = _mm_loadu_si128((__m128i *)(src_ptr + 2 * src_stride));
+        s_128[3] = _mm_loadu_si128((__m128i *)(src_ptr + 3 * src_stride));
+        s_128[4] = _mm_loadu_si128((__m128i *)(src_ptr + 4 * src_stride));
+
+        // Load lines a and b. Line a to lower 128, line b to upper 128
+        const __m256i src01 = _mm256_setr_m128i(s_128[0], s_128[1]);
+        const __m256i src12 = _mm256_setr_m128i(s_128[1], s_128[2]);
+        const __m256i src23 = _mm256_setr_m128i(s_128[2], s_128[3]);
+        const __m256i src34 = _mm256_setr_m128i(s_128[3], s_128[4]);
+
+        ss_256[0] = _mm256_unpacklo_epi8(src01, src12);
+        ss_256[1] = _mm256_unpacklo_epi8(src23, src34);
+
+        ss_256[3] = _mm256_unpackhi_epi8(src01, src12);
+        ss_256[4] = _mm256_unpackhi_epi8(src23, src34);
+
+        y = h;
+        do {
+          src_ptr += 2 * src_stride;
+          y_convolve_6tap_16x2_avx2(src_ptr, src_stride, coeffs_256, s_128,
+                                    ss_256, r);
+          sr_y_round_store_16x2_avx2(r, dst, dst_stride);
+
+          ss_256[0] = ss_256[1];
+          ss_256[1] = ss_256[2];
+
+          ss_256[3] = ss_256[4];
+          ss_256[4] = ss_256[5];
+          dst += 2 * dst_stride;
+          y -= 2;
+        } while (y);
+      } else {
+        __m256i s_256[6], ss_256[6], tt_256[6], r[4];
+
+        assert(!(w % 32));
+
+        x = 0;
+        do {
+          const uint8_t *s = src_ptr + x;
+          uint8_t *d = dst + x;
+
+          s_256[0] = _mm256_loadu_si256((__m256i *)(s + 0 * src_stride));
+          s_256[1] = _mm256_loadu_si256((__m256i *)(s + 1 * src_stride));
+          s_256[2] = _mm256_loadu_si256((__m256i *)(s + 2 * src_stride));
+          s_256[3] = _mm256_loadu_si256((__m256i *)(s + 3 * src_stride));
+          s_256[4] = _mm256_loadu_si256((__m256i *)(s + 4 * src_stride));
+
+          ss_256[0] = _mm256_unpacklo_epi8(s_256[0], s_256[1]);
+          ss_256[1] = _mm256_unpacklo_epi8(s_256[2], s_256[3]);
+          ss_256[3] = _mm256_unpackhi_epi8(s_256[0], s_256[1]);
+          ss_256[4] = _mm256_unpackhi_epi8(s_256[2], s_256[3]);
+
+          tt_256[0] = _mm256_unpacklo_epi8(s_256[1], s_256[2]);
+          tt_256[1] = _mm256_unpacklo_epi8(s_256[3], s_256[4]);
+          tt_256[3] = _mm256_unpackhi_epi8(s_256[1], s_256[2]);
+          tt_256[4] = _mm256_unpackhi_epi8(s_256[3], s_256[4]);
+
+          y = h;
+          do {
+            s += 2 * src_stride;
+            y_convolve_6tap_32x2_avx2(s, src_stride, coeffs_256, s_256, ss_256,
+                                      tt_256, r);
+            sr_y_round_store_32x2_avx2(r, d, dst_stride);
+
+            ss_256[0] = ss_256[1];
+            ss_256[1] = ss_256[2];
+            ss_256[3] = ss_256[4];
+            ss_256[4] = ss_256[5];
+
+            tt_256[0] = tt_256[1];
+            tt_256[1] = tt_256[2];
+            tt_256[3] = tt_256[4];
+            tt_256[4] = tt_256[5];
+            d += 2 * dst_stride;
+            y -= 2;
+          } while (y);
+
+          x += 32;
+        } while (x < w);
+      }
+    }
+  } else if (vert_tap == 8) {
+    // vert_filt as 8 tap
+    const uint8_t *src_ptr = src - 3 * src_stride;
+
+    if (w <= 4) {
+      prepare_half_coeffs_8tap_ssse3(filter_params_y, subpel_y_q4, coeffs_128);
+
+      y = h;
+
+      if (w == 2) {
+        __m128i s_16[8], ss_128[4];
+
+        s_16[0] = _mm_cvtsi32_si128(*(int16_t *)(src_ptr + 0 * src_stride));
+        s_16[1] = _mm_cvtsi32_si128(*(int16_t *)(src_ptr + 1 * src_stride));
+        s_16[2] = _mm_cvtsi32_si128(*(int16_t *)(src_ptr + 2 * src_stride));
+        s_16[3] = _mm_cvtsi32_si128(*(int16_t *)(src_ptr + 3 * src_stride));
+        s_16[4] = _mm_cvtsi32_si128(*(int16_t *)(src_ptr + 4 * src_stride));
+        s_16[5] = _mm_cvtsi32_si128(*(int16_t *)(src_ptr + 5 * src_stride));
+        s_16[6] = _mm_cvtsi32_si128(*(int16_t *)(src_ptr + 6 * src_stride));
+
+        const __m128i src01 = _mm_unpacklo_epi16(s_16[0], s_16[1]);
+        const __m128i src12 = _mm_unpacklo_epi16(s_16[1], s_16[2]);
+        const __m128i src23 = _mm_unpacklo_epi16(s_16[2], s_16[3]);
+        const __m128i src34 = _mm_unpacklo_epi16(s_16[3], s_16[4]);
+        const __m128i src45 = _mm_unpacklo_epi16(s_16[4], s_16[5]);
+        const __m128i src56 = _mm_unpacklo_epi16(s_16[5], s_16[6]);
+
+        ss_128[0] = _mm_unpacklo_epi8(src01, src12);
+        ss_128[1] = _mm_unpacklo_epi8(src23, src34);
+        ss_128[2] = _mm_unpacklo_epi8(src45, src56);
+
+        do {
+          const __m128i res = y_convolve_8tap_2x2_ssse3(
+              src_ptr, src_stride, coeffs_128, s_16, ss_128);
+          const __m128i r = sr_y_round_sse2(res);
+          pack_store_2x2_sse2(r, dst, dst_stride);
+          ss_128[0] = ss_128[1];
+          ss_128[1] = ss_128[2];
+          ss_128[2] = ss_128[3];
+          src_ptr += 2 * src_stride;
+          dst += 2 * dst_stride;
+          y -= 2;
+        } while (y);
+      } else {
+        __m128i s_32[8], ss_128[4];
+
+        assert(w == 4);
+
+        s_32[0] = _mm_cvtsi32_si128(*(int32_t *)(src_ptr + 0 * src_stride));
+        s_32[1] = _mm_cvtsi32_si128(*(int32_t *)(src_ptr + 1 * src_stride));
+        s_32[2] = _mm_cvtsi32_si128(*(int32_t *)(src_ptr + 2 * src_stride));
+        s_32[3] = _mm_cvtsi32_si128(*(int32_t *)(src_ptr + 3 * src_stride));
+        s_32[4] = _mm_cvtsi32_si128(*(int32_t *)(src_ptr + 4 * src_stride));
+        s_32[5] = _mm_cvtsi32_si128(*(int32_t *)(src_ptr + 5 * src_stride));
+        s_32[6] = _mm_cvtsi32_si128(*(int32_t *)(src_ptr + 6 * src_stride));
+
+        const __m128i src01 = _mm_unpacklo_epi32(s_32[0], s_32[1]);
+        const __m128i src12 = _mm_unpacklo_epi32(s_32[1], s_32[2]);
+        const __m128i src23 = _mm_unpacklo_epi32(s_32[2], s_32[3]);
+        const __m128i src34 = _mm_unpacklo_epi32(s_32[3], s_32[4]);
+        const __m128i src45 = _mm_unpacklo_epi32(s_32[4], s_32[5]);
+        const __m128i src56 = _mm_unpacklo_epi32(s_32[5], s_32[6]);
+
+        ss_128[0] = _mm_unpacklo_epi8(src01, src12);
+        ss_128[1] = _mm_unpacklo_epi8(src23, src34);
+        ss_128[2] = _mm_unpacklo_epi8(src45, src56);
+
+        do {
+          const __m128i res = y_convolve_8tap_4x2_ssse3(
+              src_ptr, src_stride, coeffs_128, s_32, ss_128);
+          const __m128i r = sr_y_round_sse2(res);
+          pack_store_4x2_sse2(r, dst, dst_stride);
+          ss_128[0] = ss_128[1];
+          ss_128[1] = ss_128[2];
+          ss_128[2] = ss_128[3];
+          src_ptr += 2 * src_stride;
+          dst += 2 * dst_stride;
+          y -= 2;
+        } while (y);
+      }
+    } else {
+      prepare_half_coeffs_8tap_avx2(filter_params_y, subpel_y_q4, coeffs_256);
+
+      if (w == 8) {
+        __m128i s_64[8];
+        __m256i ss_256[4];
+
+        s_64[0] = _mm_loadl_epi64((__m128i *)(src_ptr + 0 * src_stride));
+        s_64[1] = _mm_loadl_epi64((__m128i *)(src_ptr + 1 * src_stride));
+        s_64[2] = _mm_loadl_epi64((__m128i *)(src_ptr + 2 * src_stride));
+        s_64[3] = _mm_loadl_epi64((__m128i *)(src_ptr + 3 * src_stride));
+        s_64[4] = _mm_loadl_epi64((__m128i *)(src_ptr + 4 * src_stride));
+        s_64[5] = _mm_loadl_epi64((__m128i *)(src_ptr + 5 * src_stride));
+        s_64[6] = _mm_loadl_epi64((__m128i *)(src_ptr + 6 * src_stride));
+
+        // Load lines a and b. Line a to lower 128, line b to upper 128
+        const __m256i src01 = _mm256_setr_m128i(s_64[0], s_64[1]);
+        const __m256i src12 = _mm256_setr_m128i(s_64[1], s_64[2]);
+        const __m256i src23 = _mm256_setr_m128i(s_64[2], s_64[3]);
+        const __m256i src34 = _mm256_setr_m128i(s_64[3], s_64[4]);
+        const __m256i src45 = _mm256_setr_m128i(s_64[4], s_64[5]);
+        const __m256i src56 = _mm256_setr_m128i(s_64[5], s_64[6]);
+
+        ss_256[0] = _mm256_unpacklo_epi8(src01, src12);
+        ss_256[1] = _mm256_unpacklo_epi8(src23, src34);
+        ss_256[2] = _mm256_unpacklo_epi8(src45, src56);
+
+        y = h;
+        do {
+          const __m256i res = y_convolve_8tap_8x2_avx2(
+              src_ptr, src_stride, coeffs_256, s_64, ss_256);
+          sr_y_round_store_8x2_avx2(res, dst, dst_stride);
+          ss_256[0] = ss_256[1];
+          ss_256[1] = ss_256[2];
+          ss_256[2] = ss_256[3];
+          src_ptr += 2 * src_stride;
+          dst += 2 * dst_stride;
+          y -= 2;
+        } while (y);
+      } else if (w == 16) {
+        __m128i s_128[8];
+        __m256i ss_256[8], r[2];
+
+        s_128[0] = _mm_loadu_si128((__m128i *)(src_ptr + 0 * src_stride));
+        s_128[1] = _mm_loadu_si128((__m128i *)(src_ptr + 1 * src_stride));
+        s_128[2] = _mm_loadu_si128((__m128i *)(src_ptr + 2 * src_stride));
+        s_128[3] = _mm_loadu_si128((__m128i *)(src_ptr + 3 * src_stride));
+        s_128[4] = _mm_loadu_si128((__m128i *)(src_ptr + 4 * src_stride));
+        s_128[5] = _mm_loadu_si128((__m128i *)(src_ptr + 5 * src_stride));
+        s_128[6] = _mm_loadu_si128((__m128i *)(src_ptr + 6 * src_stride));
+
+        // Load lines a and b. Line a to lower 128, line b to upper 128
+        const __m256i src01 = _mm256_setr_m128i(s_128[0], s_128[1]);
+        const __m256i src12 = _mm256_setr_m128i(s_128[1], s_128[2]);
+        const __m256i src23 = _mm256_setr_m128i(s_128[2], s_128[3]);
+        const __m256i src34 = _mm256_setr_m128i(s_128[3], s_128[4]);
+        const __m256i src45 = _mm256_setr_m128i(s_128[4], s_128[5]);
+        const __m256i src56 = _mm256_setr_m128i(s_128[5], s_128[6]);
+
+        ss_256[0] = _mm256_unpacklo_epi8(src01, src12);
+        ss_256[1] = _mm256_unpacklo_epi8(src23, src34);
+        ss_256[2] = _mm256_unpacklo_epi8(src45, src56);
+
+        ss_256[4] = _mm256_unpackhi_epi8(src01, src12);
+        ss_256[5] = _mm256_unpackhi_epi8(src23, src34);
+        ss_256[6] = _mm256_unpackhi_epi8(src45, src56);
+
+        y = h;
+        do {
+          y_convolve_8tap_16x2_avx2(src_ptr, src_stride, coeffs_256, s_128,
+                                    ss_256, r);
+          sr_y_round_store_16x2_avx2(r, dst, dst_stride);
+
+          ss_256[0] = ss_256[1];
+          ss_256[1] = ss_256[2];
+          ss_256[2] = ss_256[3];
+
+          ss_256[4] = ss_256[5];
+          ss_256[5] = ss_256[6];
+          ss_256[6] = ss_256[7];
+          src_ptr += 2 * src_stride;
+          dst += 2 * dst_stride;
+          y -= 2;
+        } while (y);
+      } else {
+        __m256i s_256[8], ss_256[8], tt_256[8], r[4];
+
+        assert(!(w % 32));
+
+        x = 0;
+        do {
+          const uint8_t *s = src_ptr + x;
+          uint8_t *d = dst + x;
+
+          s_256[0] = _mm256_loadu_si256((__m256i *)(s + 0 * src_stride));
+          s_256[1] = _mm256_loadu_si256((__m256i *)(s + 1 * src_stride));
+          s_256[2] = _mm256_loadu_si256((__m256i *)(s + 2 * src_stride));
+          s_256[3] = _mm256_loadu_si256((__m256i *)(s + 3 * src_stride));
+          s_256[4] = _mm256_loadu_si256((__m256i *)(s + 4 * src_stride));
+          s_256[5] = _mm256_loadu_si256((__m256i *)(s + 5 * src_stride));
+          s_256[6] = _mm256_loadu_si256((__m256i *)(s + 6 * src_stride));
+
+          ss_256[0] = _mm256_unpacklo_epi8(s_256[0], s_256[1]);
+          ss_256[1] = _mm256_unpacklo_epi8(s_256[2], s_256[3]);
+          ss_256[2] = _mm256_unpacklo_epi8(s_256[4], s_256[5]);
+          ss_256[4] = _mm256_unpackhi_epi8(s_256[0], s_256[1]);
+          ss_256[5] = _mm256_unpackhi_epi8(s_256[2], s_256[3]);
+          ss_256[6] = _mm256_unpackhi_epi8(s_256[4], s_256[5]);
+
+          tt_256[0] = _mm256_unpacklo_epi8(s_256[1], s_256[2]);
+          tt_256[1] = _mm256_unpacklo_epi8(s_256[3], s_256[4]);
+          tt_256[2] = _mm256_unpacklo_epi8(s_256[5], s_256[6]);
+          tt_256[4] = _mm256_unpackhi_epi8(s_256[1], s_256[2]);
+          tt_256[5] = _mm256_unpackhi_epi8(s_256[3], s_256[4]);
+          tt_256[6] = _mm256_unpackhi_epi8(s_256[5], s_256[6]);
+
+          y = h;
+          do {
+            y_convolve_8tap_32x2_avx2(s, src_stride, coeffs_256, s_256, ss_256,
+                                      tt_256, r);
+            sr_y_round_store_32x2_avx2(r, d, dst_stride);
+
+            ss_256[0] = ss_256[1];
+            ss_256[1] = ss_256[2];
+            ss_256[2] = ss_256[3];
+            ss_256[4] = ss_256[5];
+            ss_256[5] = ss_256[6];
+            ss_256[6] = ss_256[7];
+
+            tt_256[0] = tt_256[1];
+            tt_256[1] = tt_256[2];
+            tt_256[2] = tt_256[3];
+            tt_256[4] = tt_256[5];
+            tt_256[5] = tt_256[6];
+            tt_256[6] = tt_256[7];
+            s += 2 * src_stride;
+            d += 2 * dst_stride;
+            y -= 2;
+          } while (y);
+
+          x += 32;
+        } while (x < w);
+      }
+    }
+  }
+}
+
+static INLINE void sr_x_2tap_32_avx2(const uint8_t *const src,
+                                     const __m256i coeffs[1],
+                                     uint8_t *const dst) {
+  __m256i r[2];
+
+  x_convolve_2tap_32_avx2(src, coeffs, r);
+  sr_x_round_store_32_avx2(r, dst);
+}
+
+static INLINE void sr_x_6tap_32_avx2(const uint8_t *const src,
+                                     const __m256i coeffs[3],
+                                     const __m256i filt[3],
+                                     uint8_t *const dst) {
+  __m256i r[2];
+
+  x_convolve_6tap_32_avx2(src, coeffs, filt, r);
+  sr_x_round_store_32_avx2(r, dst);
+}
+
+static AOM_FORCE_INLINE void sr_x_8tap_32_avx2(const uint8_t *const src,
+                                               const __m256i coeffs[4],
+                                               const __m256i filt[4],
+                                               uint8_t *const dst) {
+  __m256i r[2];
+
+  x_convolve_8tap_32_avx2(src, coeffs, filt, r);
+  sr_x_round_store_32_avx2(r, dst);
+}
+
+static AOM_FORCE_INLINE void av1_convolve_x_sr_specialized_avx2(
+    const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride,
+    int32_t w, int32_t h, const InterpFilterParams *filter_params_x,
+    const int32_t subpel_x_q4, ConvolveParams *conv_params) {
+  int32_t y = h;
+  __m128i coeffs_128[4];
+  __m256i coeffs_256[4];
+
+  assert(conv_params->round_0 == 3);
+  assert((FILTER_BITS - conv_params->round_1) >= 0 ||
+         ((conv_params->round_0 + conv_params->round_1) == 2 * FILTER_BITS));
+  (void)conv_params;
+
+  const int horz_tap = get_filter_tap(filter_params_x, subpel_x_q4);
+
+  if (horz_tap == 2) {
+    // horz_filt as 2 tap
+    const uint8_t *src_ptr = src;
+
+    if (subpel_x_q4 != 8) {
+      if (w <= 8) {
+        prepare_half_coeffs_2tap_ssse3(filter_params_x, subpel_x_q4,
+                                       coeffs_128);
+
+        if (w == 2) {
+          do {
+            const __m128i res =
+                x_convolve_2tap_2x2_sse4_1(src_ptr, src_stride, coeffs_128);
+            const __m128i r = sr_x_round_sse2(res);
+            pack_store_2x2_sse2(r, dst, dst_stride);
+            src_ptr += 2 * src_stride;
+            dst += 2 * dst_stride;
+            y -= 2;
+          } while (y);
+        } else if (w == 4) {
+          do {
+            const __m128i res =
+                x_convolve_2tap_4x2_ssse3(src_ptr, src_stride, coeffs_128);
+            const __m128i r = sr_x_round_sse2(res);
+            pack_store_4x2_sse2(r, dst, dst_stride);
+            src_ptr += 2 * src_stride;
+            dst += 2 * dst_stride;
+            y -= 2;
+          } while (y);
+        } else {
+          assert(w == 8);
+
+          do {
+            __m128i res[2];
+
+            x_convolve_2tap_8x2_ssse3(src_ptr, src_stride, coeffs_128, res);
+            res[0] = sr_x_round_sse2(res[0]);
+            res[1] = sr_x_round_sse2(res[1]);
+            const __m128i d = _mm_packus_epi16(res[0], res[1]);
+            _mm_storel_epi64((__m128i *)dst, d);
+            _mm_storeh_epi64((__m128i *)(dst + dst_stride), d);
+
+            src_ptr += 2 * src_stride;
+            dst += 2 * dst_stride;
+            y -= 2;
+          } while (y);
+        }
+      } else {
+        prepare_half_coeffs_2tap_avx2(filter_params_x, subpel_x_q4, coeffs_256);
+
+        if (w == 16) {
+          do {
+            __m256i r[2];
+
+            x_convolve_2tap_16x2_avx2(src_ptr, src_stride, coeffs_256, r);
+            sr_x_round_store_16x2_avx2(r, dst, dst_stride);
+            src_ptr += 2 * src_stride;
+            dst += 2 * dst_stride;
+            y -= 2;
+          } while (y);
+        } else if (w == 32) {
+          do {
+            sr_x_2tap_32_avx2(src_ptr, coeffs_256, dst);
+            src_ptr += src_stride;
+            dst += dst_stride;
+          } while (--y);
+        } else if (w == 64) {
+          do {
+            sr_x_2tap_32_avx2(src_ptr + 0 * 32, coeffs_256, dst + 0 * 32);
+            sr_x_2tap_32_avx2(src_ptr + 1 * 32, coeffs_256, dst + 1 * 32);
+            src_ptr += src_stride;
+            dst += dst_stride;
+          } while (--y);
+        } else {
+          assert(w == 128);
+
+          do {
+            sr_x_2tap_32_avx2(src_ptr + 0 * 32, coeffs_256, dst + 0 * 32);
+            sr_x_2tap_32_avx2(src_ptr + 1 * 32, coeffs_256, dst + 1 * 32);
+            sr_x_2tap_32_avx2(src_ptr + 2 * 32, coeffs_256, dst + 2 * 32);
+            sr_x_2tap_32_avx2(src_ptr + 3 * 32, coeffs_256, dst + 3 * 32);
+            src_ptr += src_stride;
+            dst += dst_stride;
+          } while (--y);
+        }
+      }
+    } else {
+      // average to get half pel
+      if (w == 2) {
+        do {
+          __m128i s_128;
+
+          s_128 = load_u8_4x2_sse4_1(src_ptr, src_stride);
+          const __m128i s1 = _mm_srli_si128(s_128, 1);
+          const __m128i d = _mm_avg_epu8(s_128, s1);
+          *(uint16_t *)dst = (uint16_t)_mm_cvtsi128_si32(d);
+          *(uint16_t *)(dst + dst_stride) = _mm_extract_epi16(d, 2);
+
+          src_ptr += 2 * src_stride;
+          dst += 2 * dst_stride;
+          y -= 2;
+        } while (y);
+      } else if (w == 4) {
+        do {
+          __m128i s_128;
+
+          s_128 = load_u8_8x2_sse2(src_ptr, src_stride);
+          const __m128i s1 = _mm_srli_si128(s_128, 1);
+          const __m128i d = _mm_avg_epu8(s_128, s1);
+          xx_storel_32(dst, d);
+          *(int32_t *)(dst + dst_stride) = _mm_extract_epi32(d, 2);
+
+          src_ptr += 2 * src_stride;
+          dst += 2 * dst_stride;
+          y -= 2;
+        } while (y);
+      } else if (w == 8) {
+        do {
+          const __m128i s00 = _mm_loadu_si128((__m128i *)src_ptr);
+          const __m128i s10 =
+              _mm_loadu_si128((__m128i *)(src_ptr + src_stride));
+          const __m128i s01 = _mm_srli_si128(s00, 1);
+          const __m128i s11 = _mm_srli_si128(s10, 1);
+          const __m128i d0 = _mm_avg_epu8(s00, s01);
+          const __m128i d1 = _mm_avg_epu8(s10, s11);
+          _mm_storel_epi64((__m128i *)dst, d0);
+          _mm_storel_epi64((__m128i *)(dst + dst_stride), d1);
+
+          src_ptr += 2 * src_stride;
+          dst += 2 * dst_stride;
+          y -= 2;
+        } while (y);
+      } else if (w == 16) {
+        do {
+          const __m128i s00 = _mm_loadu_si128((__m128i *)src_ptr);
+          const __m128i s01 = _mm_loadu_si128((__m128i *)(src_ptr + 1));
+          const __m128i s10 =
+              _mm_loadu_si128((__m128i *)(src_ptr + src_stride));
+          const __m128i s11 =
+              _mm_loadu_si128((__m128i *)(src_ptr + src_stride + 1));
+          const __m128i d0 = _mm_avg_epu8(s00, s01);
+          const __m128i d1 = _mm_avg_epu8(s10, s11);
+          _mm_storeu_si128((__m128i *)dst, d0);
+          _mm_storeu_si128((__m128i *)(dst + dst_stride), d1);
+
+          src_ptr += 2 * src_stride;
+          dst += 2 * dst_stride;
+          y -= 2;
+        } while (y);
+      } else if (w == 32) {
+        do {
+          sr_x_2tap_32_avg_avx2(src_ptr, dst);
+          src_ptr += src_stride;
+          dst += dst_stride;
+        } while (--y);
+      } else if (w == 64) {
+        do {
+          sr_x_2tap_32_avg_avx2(src_ptr + 0 * 32, dst + 0 * 32);
+          sr_x_2tap_32_avg_avx2(src_ptr + 1 * 32, dst + 1 * 32);
+          src_ptr += src_stride;
+          dst += dst_stride;
+        } while (--y);
+      } else {
+        assert(w == 128);
+
+        do {
+          sr_x_2tap_32_avg_avx2(src_ptr + 0 * 32, dst + 0 * 32);
+          sr_x_2tap_32_avg_avx2(src_ptr + 1 * 32, dst + 1 * 32);
+          sr_x_2tap_32_avg_avx2(src_ptr + 2 * 32, dst + 2 * 32);
+          sr_x_2tap_32_avg_avx2(src_ptr + 3 * 32, dst + 3 * 32);
+          src_ptr += src_stride;
+          dst += dst_stride;
+        } while (--y);
+      }
+    }
+  } else if (horz_tap == 4) {
+    // horz_filt as 4 tap
+    const uint8_t *src_ptr = src - 1;
+
+    prepare_half_coeffs_4tap_ssse3(filter_params_x, subpel_x_q4, coeffs_128);
+
+    if (w == 2) {
+      do {
+        const __m128i res =
+            x_convolve_4tap_2x2_ssse3(src_ptr, src_stride, coeffs_128);
+        const __m128i r = sr_x_round_sse2(res);
+        pack_store_2x2_sse2(r, dst, dst_stride);
+        src_ptr += 2 * src_stride;
+        dst += 2 * dst_stride;
+        y -= 2;
+      } while (y);
+    } else if (w == 4) {
+      do {
+        const __m128i res =
+            x_convolve_4tap_4x2_ssse3(src_ptr, src_stride, coeffs_128);
+        const __m128i r = sr_x_round_sse2(res);
+        pack_store_4x2_sse2(r, dst, dst_stride);
+        src_ptr += 2 * src_stride;
+        dst += 2 * dst_stride;
+        y -= 2;
+      } while (y);
+    } else if (w == 8) {
+      // TODO(chiyotsai@google.com): Reuse the old SIMD code here. Need to
+      // rewrite this for better performance later.
+      __m256i filt_256[2];
+      prepare_coeffs_lowbd(filter_params_x, subpel_x_q4, coeffs_256);
+
+      filt_256[0] = _mm256_loadu_si256((__m256i const *)filt1_global_avx2);
+      filt_256[1] = _mm256_loadu_si256((__m256i const *)filt2_global_avx2);
+      for (int i = 0; i < h; i += 2) {
+        const __m256i data = _mm256_permute2x128_si256(
+            _mm256_castsi128_si256(
+                _mm_loadu_si128((__m128i *)(&src_ptr[i * src_stride]))),
+            _mm256_castsi128_si256(_mm_loadu_si128(
+                (__m128i *)(&src_ptr[i * src_stride + src_stride]))),
+            0x20);
+
+        __m256i res_16b = convolve_lowbd_x_4tap(data, coeffs_256 + 1, filt_256);
+        res_16b = sr_x_round_avx2(res_16b);
+
+        __m256i res_8b = _mm256_packus_epi16(res_16b, res_16b);
+
+        const __m128i res_0 = _mm256_castsi256_si128(res_8b);
+        const __m128i res_1 = _mm256_extracti128_si256(res_8b, 1);
+
+        _mm_storel_epi64((__m128i *)&dst[i * dst_stride], res_0);
+        _mm_storel_epi64((__m128i *)&dst[i * dst_stride + dst_stride], res_1);
+      }
+    } else {
+      assert(!(w % 16));
+      // TODO(chiyotsai@google.com): Reuse the old SIMD code here. Need to
+      // rewrite this for better performance later.
+      __m256i filt_256[2];
+      prepare_coeffs_lowbd(filter_params_x, subpel_x_q4, coeffs_256);
+      filt_256[0] = _mm256_loadu_si256((__m256i const *)filt1_global_avx2);
+      filt_256[1] = _mm256_loadu_si256((__m256i const *)filt2_global_avx2);
+
+      for (int i = 0; i < h; ++i) {
+        for (int j = 0; j < w; j += 16) {
+          // 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 8 9 10 11 12 13 14 15 16 17
+          // 18 19 20 21 22 23
+          const __m256i data = _mm256_inserti128_si256(
+              _mm256_loadu_si256((__m256i *)&src_ptr[(i * src_stride) + j]),
+              _mm_loadu_si128((__m128i *)&src_ptr[(i * src_stride) + (j + 8)]),
+              1);
+
+          __m256i res_16b =
+              convolve_lowbd_x_4tap(data, coeffs_256 + 1, filt_256);
+          res_16b = sr_x_round_avx2(res_16b);
+
+          /* rounding code */
+          // 8 bit conversion and saturation to uint8
+          __m256i res_8b = _mm256_packus_epi16(res_16b, res_16b);
+
+          // Store values into the destination buffer
+          // 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
+          res_8b = _mm256_permute4x64_epi64(res_8b, 216);
+          __m128i res = _mm256_castsi256_si128(res_8b);
+          _mm_storeu_si128((__m128i *)&dst[i * dst_stride + j], res);
+        }
+      }
+    }
+  } else {
+    __m256i filt_256[4];
+
+    filt_256[0] = _mm256_loadu_si256((__m256i const *)filt1_global_avx2);
+    filt_256[1] = _mm256_loadu_si256((__m256i const *)filt2_global_avx2);
+    filt_256[2] = _mm256_loadu_si256((__m256i const *)filt3_global_avx2);
+
+    if (horz_tap == 6) {
+      // horz_filt as 6 tap
+      const uint8_t *src_ptr = src - 2;
+
+      prepare_half_coeffs_6tap_avx2(filter_params_x, subpel_x_q4, coeffs_256);
+
+      if (w == 8) {
+        do {
+          const __m256i res = x_convolve_6tap_8x2_avx2(src_ptr, src_stride,
+                                                       coeffs_256, filt_256);
+          sr_x_round_store_8x2_avx2(res, dst, dst_stride);
+          src_ptr += 2 * src_stride;
+          dst += 2 * dst_stride;
+          y -= 2;
+        } while (y);
+      } else if (w == 16) {
+        do {
+          __m256i r[2];
+
+          x_convolve_6tap_16x2_avx2(src_ptr, src_stride, coeffs_256, filt_256,
+                                    r);
+          sr_x_round_store_16x2_avx2(r, dst, dst_stride);
+          src_ptr += 2 * src_stride;
+          dst += 2 * dst_stride;
+          y -= 2;
+        } while (y);
+      } else if (w == 32) {
+        do {
+          sr_x_6tap_32_avx2(src_ptr, coeffs_256, filt_256, dst);
+          src_ptr += src_stride;
+          dst += dst_stride;
+        } while (--y);
+      } else if (w == 64) {
+        do {
+          sr_x_6tap_32_avx2(src_ptr, coeffs_256, filt_256, dst);
+          sr_x_6tap_32_avx2(src_ptr + 32, coeffs_256, filt_256, dst + 32);
+          src_ptr += src_stride;
+          dst += dst_stride;
+        } while (--y);
+      } else {
+        assert(w == 128);
+
+        do {
+          sr_x_6tap_32_avx2(src_ptr, coeffs_256, filt_256, dst);
+          sr_x_6tap_32_avx2(src_ptr + 1 * 32, coeffs_256, filt_256,
+                            dst + 1 * 32);
+          sr_x_6tap_32_avx2(src_ptr + 2 * 32, coeffs_256, filt_256,
+                            dst + 2 * 32);
+          sr_x_6tap_32_avx2(src_ptr + 3 * 32, coeffs_256, filt_256,
+                            dst + 3 * 32);
+          src_ptr += src_stride;
+          dst += dst_stride;
+        } while (--y);
+      }
+    } else if (horz_tap == 8) {
+      // horz_filt as 8 tap
+      const uint8_t *src_ptr = src - 3;
+
+      filt_256[3] = _mm256_loadu_si256((__m256i const *)filt4_global_avx2);
+
+      prepare_half_coeffs_8tap_avx2(filter_params_x, subpel_x_q4, coeffs_256);
+
+      if (w == 8) {
+        do {
+          const __m256i res = x_convolve_8tap_8x2_avx2(src_ptr, src_stride,
+                                                       coeffs_256, filt_256);
+          sr_x_round_store_8x2_avx2(res, dst, dst_stride);
+          src_ptr += 2 * src_stride;
+          dst += 2 * dst_stride;
+          y -= 2;
+        } while (y);
+      } else if (w == 16) {
+        do {
+          __m256i r[2];
+
+          x_convolve_8tap_16x2_avx2(src_ptr, src_stride, coeffs_256, filt_256,
+                                    r);
+          sr_x_round_store_16x2_avx2(r, dst, dst_stride);
+          src_ptr += 2 * src_stride;
+          dst += 2 * dst_stride;
+          y -= 2;
+        } while (y);
+      } else if (w == 32) {
+        do {
+          sr_x_8tap_32_avx2(src_ptr, coeffs_256, filt_256, dst);
+          src_ptr += src_stride;
+          dst += dst_stride;
+        } while (--y);
+      } else if (w == 64) {
+        do {
+          sr_x_8tap_32_avx2(src_ptr, coeffs_256, filt_256, dst);
+          sr_x_8tap_32_avx2(src_ptr + 32, coeffs_256, filt_256, dst + 32);
+          src_ptr += src_stride;
+          dst += dst_stride;
+        } while (--y);
+      } else {
+        assert(w == 128);
+
+        do {
+          sr_x_8tap_32_avx2(src_ptr, coeffs_256, filt_256, dst);
+          sr_x_8tap_32_avx2(src_ptr + 1 * 32, coeffs_256, filt_256,
+                            dst + 1 * 32);
+          sr_x_8tap_32_avx2(src_ptr + 2 * 32, coeffs_256, filt_256,
+                            dst + 2 * 32);
+          sr_x_8tap_32_avx2(src_ptr + 3 * 32, coeffs_256, filt_256,
+                            dst + 3 * 32);
+          src_ptr += src_stride;
+          dst += dst_stride;
+        } while (--y);
+      }
+    }
+  }
+}
+
+#endif  // THIRD_PARTY_SVT_AV1_CONVOLVE_AVX2_H_
diff --git a/chromium/third_party/libaom/source/libaom/third_party/SVT-AV1/synonyms.h b/chromium/third_party/libaom/source/libaom/third_party/SVT-AV1/synonyms.h
new file mode 100644
index 00000000000..0ded6e5cfc1
--- /dev/null
+++ b/chromium/third_party/libaom/source/libaom/third_party/SVT-AV1/synonyms.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#ifndef AOM_THIRD_PARTY_SVT_AV1_SYNONYMS_H_
+#define AOM_THIRD_PARTY_SVT_AV1_SYNONYMS_H_
+
+#include "aom_dsp/x86/mem_sse2.h"
+#include "aom_dsp/x86/synonyms.h"
+
+static INLINE __m128i load_u8_8x2_sse2(const uint8_t *const src,
+                                       const ptrdiff_t stride) {
+  return load_8bit_8x2_to_1_reg_sse2(src, (int)(sizeof(*src) * stride));
+}
+
+static AOM_FORCE_INLINE void store_u8_4x2_sse2(const __m128i src,
+                                               uint8_t *const dst,
+                                               const ptrdiff_t stride) {
+  xx_storel_32(dst, src);
+  *(uint32_t *)(dst + stride) =
+      ((uint32_t)_mm_extract_epi16(src, 3) << 16) | _mm_extract_epi16(src, 2);
+}
+
+#endif  // AOM_THIRD_PARTY_SVT_AV1_SYNONYMS_H_
diff --git a/chromium/third_party/libaom/source/libaom/third_party/fastfeat/README.libaom b/chromium/third_party/libaom/source/libaom/third_party/fastfeat/README.libaom
index a732b0d9370..ce7ce7076ed 100644
--- a/chromium/third_party/libaom/source/libaom/third_party/fastfeat/README.libaom
+++ b/chromium/third_party/libaom/source/libaom/third_party/fastfeat/README.libaom
@@ -38,3 +38,4 @@ Add lines to turn off clang formatting for these files
 Remove Fast 10, 11 and 12
 Convert tabs to spaces
 Prefix global functions with "aom_"
+Add error checking
diff --git a/chromium/third_party/libaom/source/libaom/third_party/fastfeat/fast_9.c b/chromium/third_party/libaom/source/libaom/third_party/fastfeat/fast_9.c
index c0fdbe26cd1..345c37fed16 100644
--- a/chromium/third_party/libaom/source/libaom/third_party/fastfeat/fast_9.c
+++ b/chromium/third_party/libaom/source/libaom/third_party/fastfeat/fast_9.c
@@ -2994,6 +2994,7 @@ int* aom_fast9_score(const byte* i, int stride, xy* corners, int num_corners, in
   int n;
 
   int pixel[16];
+  if(!scores) return NULL;
   make_offsets(pixel, stride);
 
   for(n=0; n < num_corners; n++)
@@ -3012,6 +3013,7 @@ xy* aom_fast9_detect(const byte* im, int xsize, int ysize, int stride, int b, in
   int x, y;
 
   ret_corners = (xy*)malloc(sizeof(xy)*rsize);
+  if(!ret_corners) return NULL;
   make_offsets(pixel, stride);
 
   for(y=3; y < ysize - 3; y++)
@@ -5926,6 +5928,7 @@ xy* aom_fast9_detect(const byte* im, int xsize, int ysize, int stride, int b, in
       {
         rsize*=2;
         ret_corners = (xy*)realloc(ret_corners, sizeof(xy)*rsize);
+        if(!ret_corners) return NULL;
       }
       ret_corners[num_corners].x = x;
       ret_corners[num_corners].y = y;
diff --git a/chromium/third_party/libaom/source/libaom/third_party/fastfeat/nonmax.c b/chromium/third_party/libaom/source/libaom/third_party/fastfeat/nonmax.c
index 2e048e54605..39ec18c485b 100644
--- a/chromium/third_party/libaom/source/libaom/third_party/fastfeat/nonmax.c
+++ b/chromium/third_party/libaom/source/libaom/third_party/fastfeat/nonmax.c
@@ -49,20 +49,28 @@ xy* aom_nonmax_suppression(const xy* corners, const int* scores, int num_corners
   int point_above = 0;
   int point_below = 0;
 
-
-  if(num_corners < 1)
+  *ret_num_nonmax = 0;
+  if(!(corners && scores) || num_corners < 1)
   {
-    *ret_num_nonmax = 0;
     return 0;
   }
 
   ret_nonmax = (xy*)malloc(num_corners * sizeof(xy));
+  if(!ret_nonmax)
+  {
+    return 0;
+  }
 
   /* Find where each row begins
      (the corners are output in raster scan order). A beginning of -1 signifies
      that there are no corners on that row. */
   last_row = corners[num_corners-1].y;
   row_start = (int*)malloc((last_row+1)*sizeof(int));
+  if(!row_start)
+  {
+    free(ret_nonmax);
+    return 0;
+  }
 
   for(i=0; i < last_row+1; i++)
     row_start[i] = -1;
diff --git a/chromium/third_party/libaom/source/libaom/third_party/googletest/README.libaom b/chromium/third_party/libaom/source/libaom/third_party/googletest/README.libaom
index f3638538a6e..5e429d4dae5 100644
--- a/chromium/third_party/libaom/source/libaom/third_party/googletest/README.libaom
+++ b/chromium/third_party/libaom/source/libaom/third_party/googletest/README.libaom
@@ -1,5 +1,5 @@
 URL: https://github.com/google/googletest
-Version: release-1.10.0-224-g23b2a3b1
+Version: release-1.12.1
 License: BSD
 License File: LICENSE
 
@@ -13,21 +13,26 @@ generation.
 
 Local Modifications:
 - Remove everything but:
+  .clang-format
+  CMakeLists.txt
+  CONTRIBUTORS
   googlemock/
    cmake
    CMakeLists.txt
-   CONTRIBUTORS
    include
-   LICENSE
    README.md
    src
   googletest/
    cmake
    CMakeLists.txt
-   CONTRIBUTORS
    include
-   LICENSE
    README.md
    src
-- Enable kErrorOnUninstantiatedParameterizedTest and
-  kErrorOnUninstantiatedTypeParameterizedTest in gtest.cc
+  LICENSE
+  README.md
+- In googletest/include/gtest/internal/custom/gtest-port.h, define
+  GTEST_HAS_NOTIFICATION_ as 1 and use a stub Notification class to fix
+  the mingw32 g++ compilation errors caused by the lack of std::mutex
+  and std::condition_variable in the <mutex> and <condition_variable>
+  headers if mingw32 is configured with the win32 threads option. See
+  https://stackoverflow.com/questions/17242516/mingw-w64-threads-posix-vs-win32
diff --git a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/.clang-format b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/.clang-format
new file mode 100644
index 00000000000..5b9bfe6d224
--- /dev/null
+++ b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/.clang-format
@@ -0,0 +1,4 @@
+# Run manually to reformat a file:
+# clang-format -i --style=file <file>
+Language:        Cpp
+BasedOnStyle:  Google
diff --git a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/CMakeLists.txt b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/CMakeLists.txt
new file mode 100644
index 00000000000..102e28cd49e
--- /dev/null
+++ b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/CMakeLists.txt
@@ -0,0 +1,34 @@
+# Note: CMake support is community-based. The maintainers do not use CMake
+# internally.
+
+cmake_minimum_required(VERSION 3.5)
+
+if (POLICY CMP0048)
+  cmake_policy(SET CMP0048 NEW)
+endif (POLICY CMP0048)
+
+if (POLICY CMP0077)
+  cmake_policy(SET CMP0077 NEW)
+endif (POLICY CMP0077)
+
+project(googletest-distribution)
+set(GOOGLETEST_VERSION 1.12.1)
+
+if(NOT CYGWIN AND NOT MSYS AND NOT ${CMAKE_SYSTEM_NAME} STREQUAL QNX)
+  set(CMAKE_CXX_EXTENSIONS OFF)
+endif()
+
+enable_testing()
+
+include(CMakeDependentOption)
+include(GNUInstallDirs)
+
+#Note that googlemock target already builds googletest
+option(BUILD_GMOCK "Builds the googlemock subproject" ON)
+option(INSTALL_GTEST "Enable installation of googletest. (Projects embedding googletest may want to turn this OFF.)" ON)
+
+if(BUILD_GMOCK)
+  add_subdirectory( googlemock )
+else()
+  add_subdirectory( googletest )
+endif()
diff --git a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/CONTRIBUTORS b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/CONTRIBUTORS
index 1e4afe21825..77397a5b53f 100644
--- a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/CONTRIBUTORS
+++ b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/CONTRIBUTORS
@@ -5,34 +5,61 @@
 
 Ajay Joshi <jaj@google.com>
 Balázs Dán <balazs.dan@gmail.com>
+Benoit Sigoure <tsuna@google.com>
 Bharat Mediratta <bharat@menalto.com>
+Bogdan Piloca <boo@google.com>
 Chandler Carruth <chandlerc@google.com>
 Chris Prince <cprince@google.com>
 Chris Taylor <taylorc@google.com>
 Dan Egnor <egnor@google.com>
+Dave MacLachlan <dmaclach@gmail.com>
+David Anderson <danderson@google.com>
+Dean Sturtevant
 Eric Roman <eroman@chromium.org>
+Gene Volovich <gv@cite.com>
 Hady Zalek <hady.zalek@gmail.com>
+Hal Burch <gmock@hburch.com>
 Jeffrey Yasskin <jyasskin@google.com>
+Jim Keller <jimkeller@google.com>
+Joe Walnes <joe@truemesh.com>
+Jon Wray <jwray@google.com>
 Jói Sigurðsson <joi@google.com>
 Keir Mierle <mierle@gmail.com>
 Keith Ray <keith.ray@gmail.com>
 Kenton Varda <kenton@google.com>
+Kostya Serebryany <kcc@google.com>
 Krystian Kuzniarek <krystian.kuzniarek@gmail.com>
+Lev Makhlis
 Manuel Klimek <klimek@google.com>
+Mario Tanev <radix@google.com>
+Mark Paskin
 Markus Heule <markus.heule@gmail.com>
+Martijn Vels <mvels@google.com>
+Matthew Simmons <simmonmt@acm.org>
 Mika Raento <mikie@iki.fi>
+Mike Bland <mbland@google.com>
 Miklós Fazekas <mfazekas@szemafor.com>
+Neal Norwitz <nnorwitz@gmail.com>
+Nermin Ozkiranartli <nermin@google.com>
+Owen Carlsen <ocarlsen@google.com>
+Paneendra Ba <paneendra@google.com>
 Pasi Valminen <pasi.valminen@gmail.com>
 Patrick Hanna <phanna@google.com>
 Patrick Riley <pfr@google.com>
+Paul Menage <menage@google.com>
 Peter Kaminski <piotrk@google.com>
+Piotr Kaminski <piotrk@google.com>
 Preston Jackson <preston.a.jackson@gmail.com>
 Rainer Klaffenboeck <rainer.klaffenboeck@dynatrace.com>
 Russ Cox <rsc@google.com>
 Russ Rufer <russ@pentad.com>
 Sean Mcafee <eefacm@gmail.com>
 Sigurður Ásgeirsson <siggi@google.com>
+Sverre Sundsdal <sundsdal@gmail.com>
+Szymon Sobik <sobik.szymon@gmail.com>
+Takeshi Yoshino <tyoshino@google.com>
 Tracy Bialik <tracy@pentad.com>
 Vadim Berman <vadimb@google.com>
 Vlad Losev <vladl@google.com>
+Wolfgang Klier <wklier@google.com>
 Zhanyong Wan <wan@google.com>
diff --git a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/LICENSE b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/LICENSE
index 1941a11f8ce..1941a11f8ce 100644
--- a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/LICENSE
+++ b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/LICENSE
diff --git a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/README.md b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/README.md
new file mode 100644
index 00000000000..30edaecf313
--- /dev/null
+++ b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/README.md
@@ -0,0 +1,141 @@
+# GoogleTest
+
+### Announcements
+
+#### Live at Head
+
+GoogleTest now follows the
+[Abseil Live at Head philosophy](https://abseil.io/about/philosophy#upgrade-support).
+We recommend
+[updating to the latest commit in the `main` branch as often as possible](https://github.com/abseil/abseil-cpp/blob/master/FAQ.md#what-is-live-at-head-and-how-do-i-do-it).
+
+#### Documentation Updates
+
+Our documentation is now live on GitHub Pages at
+https://google.github.io/googletest/. We recommend browsing the documentation on
+GitHub Pages rather than directly in the repository.
+
+#### Release 1.11.0
+
+[Release 1.11.0](https://github.com/google/googletest/releases/tag/release-1.11.0)
+is now available.
+
+#### Coming Soon
+
+*   We are planning to take a dependency on
+    [Abseil](https://github.com/abseil/abseil-cpp).
+*   More documentation improvements are planned.
+
+## Welcome to **GoogleTest**, Google's C++ test framework!
+
+This repository is a merger of the formerly separate GoogleTest and GoogleMock
+projects. These were so closely related that it makes sense to maintain and
+release them together.
+
+### Getting Started
+
+See the [GoogleTest User's Guide](https://google.github.io/googletest/) for
+documentation. We recommend starting with the
+[GoogleTest Primer](https://google.github.io/googletest/primer.html).
+
+More information about building GoogleTest can be found at
+[googletest/README.md](googletest/README.md).
+
+## Features
+
+*   An [xUnit](https://en.wikipedia.org/wiki/XUnit) test framework.
+*   Test discovery.
+*   A rich set of assertions.
+*   User-defined assertions.
+*   Death tests.
+*   Fatal and non-fatal failures.
+*   Value-parameterized tests.
+*   Type-parameterized tests.
+*   Various options for running the tests.
+*   XML test report generation.
+
+## Supported Platforms
+
+GoogleTest requires a codebase and compiler compliant with the C++11 standard or
+newer.
+
+The GoogleTest code is officially supported on the following platforms.
+Operating systems or tools not listed below are community-supported. For
+community-supported platforms, patches that do not complicate the code may be
+considered.
+
+If you notice any problems on your platform, please file an issue on the
+[GoogleTest GitHub Issue Tracker](https://github.com/google/googletest/issues).
+Pull requests containing fixes are welcome!
+
+### Operating Systems
+
+*   Linux
+*   macOS
+*   Windows
+
+### Compilers
+
+*   gcc 5.0+
+*   clang 5.0+
+*   MSVC 2015+
+
+**macOS users:** Xcode 9.3+ provides clang 5.0+.
+
+### Build Systems
+
+*   [Bazel](https://bazel.build/)
+*   [CMake](https://cmake.org/)
+
+**Note:** Bazel is the build system used by the team internally and in tests.
+CMake is supported on a best-effort basis and by the community.
+
+## Who Is Using GoogleTest?
+
+In addition to many internal projects at Google, GoogleTest is also used by the
+following notable projects:
+
+*   The [Chromium projects](http://www.chromium.org/) (behind the Chrome browser
+    and Chrome OS).
+*   The [LLVM](http://llvm.org/) compiler.
+*   [Protocol Buffers](https://github.com/google/protobuf), Google's data
+    interchange format.
+*   The [OpenCV](http://opencv.org/) computer vision library.
+
+## Related Open Source Projects
+
+[GTest Runner](https://github.com/nholthaus/gtest-runner) is a Qt5 based
+automated test-runner and Graphical User Interface with powerful features for
+Windows and Linux platforms.
+
+[GoogleTest UI](https://github.com/ospector/gtest-gbar) is a test runner that
+runs your test binary, allows you to track its progress via a progress bar, and
+displays a list of test failures. Clicking on one shows failure text. GoogleTest
+UI is written in C#.
+
+[GTest TAP Listener](https://github.com/kinow/gtest-tap-listener) is an event
+listener for GoogleTest that implements the
+[TAP protocol](https://en.wikipedia.org/wiki/Test_Anything_Protocol) for test
+result output. If your test runner understands TAP, you may find it useful.
+
+[gtest-parallel](https://github.com/google/gtest-parallel) is a test runner that
+runs tests from your binary in parallel to provide significant speed-up.
+
+[GoogleTest Adapter](https://marketplace.visualstudio.com/items?itemName=DavidSchuldenfrei.gtest-adapter)
+is a VS Code extension allowing to view GoogleTest in a tree view and run/debug
+your tests.
+
+[C++ TestMate](https://github.com/matepek/vscode-catch2-test-adapter) is a VS
+Code extension allowing to view GoogleTest in a tree view and run/debug your
+tests.
+
+[Cornichon](https://pypi.org/project/cornichon/) is a small Gherkin DSL parser
+that generates stub code for GoogleTest.
+
+## Contributing Changes
+
+Please read
+[`CONTRIBUTING.md`](https://github.com/google/googletest/blob/master/CONTRIBUTING.md)
+for details on how to contribute to this project.
+
+Happy testing!
diff --git a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/CMakeLists.txt b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/CMakeLists.txt
index 8ab59d7f6cd..5c1f0dafea8 100644
--- a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/CMakeLists.txt
+++ b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/CMakeLists.txt
@@ -36,13 +36,9 @@ endif()
 # as ${gmock_SOURCE_DIR} and to the root binary directory as
 # ${gmock_BINARY_DIR}.
 # Language "C" is required for find_package(Threads).
-if (CMAKE_VERSION VERSION_LESS 3.0)
-  project(gmock CXX C)
-else()
-  cmake_policy(SET CMP0048 NEW)
-  project(gmock VERSION ${GOOGLETEST_VERSION} LANGUAGES CXX C)
-endif()
-cmake_minimum_required(VERSION 2.6.4)
+cmake_minimum_required(VERSION 3.5)
+cmake_policy(SET CMP0048 NEW)
+project(gmock VERSION ${GOOGLETEST_VERSION} LANGUAGES CXX C)
 
 if (COMMAND set_up_hermetic_build)
   set_up_hermetic_build()
@@ -100,18 +96,21 @@ if (MSVC)
 else()
   cxx_library(gmock "${cxx_strict}" src/gmock-all.cc)
   target_link_libraries(gmock PUBLIC gtest)
+  set_target_properties(gmock PROPERTIES VERSION ${GOOGLETEST_VERSION})
   cxx_library(gmock_main "${cxx_strict}" src/gmock_main.cc)
   target_link_libraries(gmock_main PUBLIC gmock)
+  set_target_properties(gmock_main PROPERTIES VERSION ${GOOGLETEST_VERSION})
 endif()
 # If the CMake version supports it, attach header directory information
 # to the targets for when we are part of a parent build (ie being pulled
 # in via add_subdirectory() rather than being a standalone build).
 if (DEFINED CMAKE_VERSION AND NOT "${CMAKE_VERSION}" VERSION_LESS "2.8.11")
+  string(REPLACE ";" "$<SEMICOLON>" dirs "${gmock_build_include_dirs}")
   target_include_directories(gmock SYSTEM INTERFACE
-    "$<BUILD_INTERFACE:${gmock_build_include_dirs}>"
+    "$<BUILD_INTERFACE:${dirs}>"
     "$<INSTALL_INTERFACE:$<INSTALL_PREFIX>/${CMAKE_INSTALL_INCLUDEDIR}>")
   target_include_directories(gmock_main SYSTEM INTERFACE
-    "$<BUILD_INTERFACE:${gmock_build_include_dirs}>"
+    "$<BUILD_INTERFACE:${dirs}>"
     "$<INSTALL_INTERFACE:$<INSTALL_PREFIX>/${CMAKE_INSTALL_INCLUDEDIR}>")
 endif()
 
@@ -136,20 +135,6 @@ if (gmock_build_tests)
   # 'make test' or ctest.
   enable_testing()
 
-  if (WIN32)
-    file(GENERATE OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/$<CONFIG>/RunTest.ps1"
-         CONTENT
-"$project_bin = \"${CMAKE_BINARY_DIR}/bin/$<CONFIG>\"
-$env:Path = \"$project_bin;$env:Path\"
-& $args")
-  elseif (MINGW OR CYGWIN)
-    file(GENERATE OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/RunTest.ps1"
-         CONTENT
-"$project_bin = (cygpath --windows ${CMAKE_BINARY_DIR}/bin)
-$env:Path = \"$project_bin;$env:Path\"
-& $args")
-  endif()
-
   if (MINGW OR CYGWIN)
     if (CMAKE_VERSION VERSION_LESS "2.8.12")
       add_compile_options("-Wa,-mbig-obj")
@@ -165,10 +150,11 @@ $env:Path = \"$project_bin;$env:Path\"
   cxx_test(gmock-cardinalities_test gmock_main)
   cxx_test(gmock_ex_test gmock_main)
   cxx_test(gmock-function-mocker_test gmock_main)
-  cxx_test(gmock-generated-actions_test gmock_main)
-  cxx_test(gmock-generated-matchers_test gmock_main)
   cxx_test(gmock-internal-utils_test gmock_main)
-  cxx_test(gmock-matchers_test gmock_main)
+  cxx_test(gmock-matchers-arithmetic_test gmock_main)
+  cxx_test(gmock-matchers-comparisons_test gmock_main)
+  cxx_test(gmock-matchers-containers_test gmock_main)
+  cxx_test(gmock-matchers-misc_test gmock_main)
   cxx_test(gmock-more-actions_test gmock_main)
   cxx_test(gmock-nice-strict_test gmock_main)
   cxx_test(gmock-port_test gmock_main)
diff --git a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/CONTRIBUTORS b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/CONTRIBUTORS
deleted file mode 100644
index 6e9ae362b60..00000000000
--- a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/CONTRIBUTORS
+++ /dev/null
@@ -1,40 +0,0 @@
-# This file contains a list of people who've made non-trivial
-# contribution to the Google C++ Mocking Framework project.  People
-# who commit code to the project are encouraged to add their names
-# here.  Please keep the list sorted by first names.
-
-Benoit Sigoure <tsuna@google.com>
-Bogdan Piloca <boo@google.com>
-Chandler Carruth <chandlerc@google.com>
-Dave MacLachlan <dmaclach@gmail.com>
-David Anderson <danderson@google.com>
-Dean Sturtevant
-Gene Volovich <gv@cite.com>
-Hal Burch <gmock@hburch.com>
-Jeffrey Yasskin <jyasskin@google.com>
-Jim Keller <jimkeller@google.com>
-Joe Walnes <joe@truemesh.com>
-Jon Wray <jwray@google.com>
-Keir Mierle <mierle@gmail.com>
-Keith Ray <keith.ray@gmail.com>
-Kostya Serebryany <kcc@google.com>
-Lev Makhlis
-Manuel Klimek <klimek@google.com>
-Mario Tanev <radix@google.com>
-Mark Paskin
-Markus Heule <markus.heule@gmail.com>
-Matthew Simmons <simmonmt@acm.org>
-Mike Bland <mbland@google.com>
-Neal Norwitz <nnorwitz@gmail.com>
-Nermin Ozkiranartli <nermin@google.com>
-Owen Carlsen <ocarlsen@google.com>
-Paneendra Ba <paneendra@google.com>
-Paul Menage <menage@google.com>
-Piotr Kaminski <piotrk@google.com>
-Russ Rufer <russ@pentad.com>
-Sverre Sundsdal <sundsdal@gmail.com>
-Takeshi Yoshino <tyoshino@google.com>
-Vadim Berman <vadimb@google.com>
-Vlad Losev <vladl@google.com>
-Wolfgang Klier <wklier@google.com>
-Zhanyong Wan <wan@google.com>
diff --git a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/README.md b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/README.md
index 183fdb81d9b..7da60655dba 100644
--- a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/README.md
+++ b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/README.md
@@ -7,38 +7,34 @@ derive better designs of your system and write better tests.
 
 It is inspired by:
 
-*   [jMock](http://www.jmock.org/),
-*   [EasyMock](http://www.easymock.org/), and
-*   [Hamcrest](http://code.google.com/p/hamcrest/),
+*   [jMock](http://www.jmock.org/)
+*   [EasyMock](http://www.easymock.org/)
+*   [Hamcrest](http://code.google.com/p/hamcrest/)
 
-and designed with C++'s specifics in mind.
+It is designed with C++'s specifics in mind.
 
 gMock:
 
--   provides a declarative syntax for defining mocks,
--   can define partial (hybrid) mocks, which are a cross of real and mock
-    objects,
--   handles functions of arbitrary types and overloaded functions,
--   comes with a rich set of matchers for validating function arguments,
--   uses an intuitive syntax for controlling the behavior of a mock,
--   does automatic verification of expectations (no record-and-replay needed),
--   allows arbitrary (partial) ordering constraints on function calls to be
-    expressed,
--   lets a user extend it by defining new matchers and actions.
--   does not use exceptions, and
--   is easy to learn and use.
+-   Provides a declarative syntax for defining mocks.
+-   Can define partial (hybrid) mocks, which are a cross of real and mock
+    objects.
+-   Handles functions of arbitrary types and overloaded functions.
+-   Comes with a rich set of matchers for validating function arguments.
+-   Uses an intuitive syntax for controlling the behavior of a mock.
+-   Does automatic verification of expectations (no record-and-replay needed).
+-   Allows arbitrary (partial) ordering constraints on function calls to be
+    expressed.
+-   Lets a user extend it by defining new matchers and actions.
+-   Does not use exceptions.
+-   Is easy to learn and use.
 
 Details and examples can be found here:
 
-*   [gMock for Dummies](docs/for_dummies.md)
-*   [Legacy gMock FAQ](docs/gmock_faq.md)
-*   [gMock Cookbook](docs/cook_book.md)
-*   [gMock Cheat Sheet](docs/cheat_sheet.md)
+*   [gMock for Dummies](https://google.github.io/googletest/gmock_for_dummies.html)
+*   [Legacy gMock FAQ](https://google.github.io/googletest/gmock_faq.html)
+*   [gMock Cookbook](https://google.github.io/googletest/gmock_cook_book.html)
+*   [gMock Cheat Sheet](https://google.github.io/googletest/gmock_cheat_sheet.html)
 
-Please note that code under scripts/generator/ is from the [cppclean
-project](http://code.google.com/p/cppclean/) and under the Apache
-License, which is different from Google Mock's license.
-
-Google Mock is a part of
-[Google Test C++ testing framework](http://github.com/google/googletest/) and a
+GoogleMock is a part of
+[GoogleTest C++ testing framework](http://github.com/google/googletest/) and a
 subject to the same requirements.
diff --git a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/cmake/gmock.pc.in b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/cmake/gmock.pc.in
index 5780fcaa53b..23c67b5c88d 100644
--- a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/cmake/gmock.pc.in
+++ b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/cmake/gmock.pc.in
@@ -5,6 +5,6 @@ Name: gmock
 Description: GoogleMock (without main() function)
 Version: @PROJECT_VERSION@
 URL: https://github.com/google/googletest
-Requires: gtest
+Requires: gtest = @PROJECT_VERSION@
 Libs: -L${libdir} -lgmock @CMAKE_THREAD_LIBS_INIT@
 Cflags: -I${includedir} @GTEST_HAS_PTHREAD_MACRO@
diff --git a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/cmake/gmock_main.pc.in b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/cmake/gmock_main.pc.in
index f2dfe69e0f7..66ffea7f443 100644
--- a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/cmake/gmock_main.pc.in
+++ b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/cmake/gmock_main.pc.in
@@ -5,6 +5,6 @@ Name: gmock_main
 Description: GoogleMock (with main() function)
 Version: @PROJECT_VERSION@
 URL: https://github.com/google/googletest
-Requires: gmock
+Requires: gmock = @PROJECT_VERSION@
 Libs: -L${libdir} -lgmock_main @CMAKE_THREAD_LIBS_INIT@
 Cflags: -I${includedir} @GTEST_HAS_PTHREAD_MACRO@
diff --git a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/include/gmock/gmock-actions.h b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/include/gmock/gmock-actions.h
index e46bcaa7947..c785ad8abba 100644
--- a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/include/gmock/gmock-actions.h
+++ b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/include/gmock/gmock-actions.h
@@ -27,7 +27,6 @@
 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-
 // Google Mock - a framework for writing C++ mock classes.
 //
 // The ACTION* family of macros can be used in a namespace scope to
@@ -123,21 +122,23 @@
 // MORE INFORMATION:
 //
 // To learn more about using these macros, please search for 'ACTION' on
-// https://github.com/google/googletest/blob/master/googlemock/docs/cook_book.md
+// https://github.com/google/googletest/blob/master/docs/gmock_cook_book.md
 
-// GOOGLETEST_CM0002 DO NOT DELETE
+// IWYU pragma: private, include "gmock/gmock.h"
+// IWYU pragma: friend gmock/.*
 
-#ifndef GMOCK_INCLUDE_GMOCK_GMOCK_ACTIONS_H_
-#define GMOCK_INCLUDE_GMOCK_GMOCK_ACTIONS_H_
+#ifndef GOOGLEMOCK_INCLUDE_GMOCK_GMOCK_ACTIONS_H_
+#define GOOGLEMOCK_INCLUDE_GMOCK_GMOCK_ACTIONS_H_
 
 #ifndef _WIN32_WCE
-# include <errno.h>
+#include <errno.h>
 #endif
 
 #include <algorithm>
 #include <functional>
 #include <memory>
 #include <string>
+#include <tuple>
 #include <type_traits>
 #include <utility>
 
@@ -146,8 +147,8 @@
 #include "gmock/internal/gmock-pp.h"
 
 #ifdef _MSC_VER
-# pragma warning(push)
-# pragma warning(disable:4100)
+#pragma warning(push)
+#pragma warning(disable : 4100)
 #endif
 
 namespace testing {
@@ -195,9 +196,7 @@ class BuiltInDefaultValue {
  public:
   // This function returns true if and only if type T has a built-in default
   // value.
-  static bool Exists() {
-    return ::std::is_default_constructible<T>::value;
-  }
+  static bool Exists() { return ::std::is_default_constructible<T>::value; }
 
   static T Get() {
     return BuiltInDefaultValueGetter<
@@ -226,11 +225,11 @@ class BuiltInDefaultValue<T*> {
 // The following specializations define the default values for
 // specific types we care about.
 #define GMOCK_DEFINE_DEFAULT_ACTION_FOR_RETURN_TYPE_(type, value) \
-  template <> \
-  class BuiltInDefaultValue<type> { \
-   public: \
-    static bool Exists() { return true; } \
-    static type Get() { return value; } \
+  template <>                                                     \
+  class BuiltInDefaultValue<type> {                               \
+   public:                                                        \
+    static bool Exists() { return true; }                         \
+    static type Get() { return value; }                           \
   }
 
 GMOCK_DEFINE_DEFAULT_ACTION_FOR_RETURN_TYPE_(void, );  // NOLINT
@@ -254,17 +253,309 @@ GMOCK_DEFINE_DEFAULT_ACTION_FOR_RETURN_TYPE_(unsigned short, 0U);  // NOLINT
 GMOCK_DEFINE_DEFAULT_ACTION_FOR_RETURN_TYPE_(signed short, 0);     // NOLINT
 GMOCK_DEFINE_DEFAULT_ACTION_FOR_RETURN_TYPE_(unsigned int, 0U);
 GMOCK_DEFINE_DEFAULT_ACTION_FOR_RETURN_TYPE_(signed int, 0);
-GMOCK_DEFINE_DEFAULT_ACTION_FOR_RETURN_TYPE_(unsigned long, 0UL);  // NOLINT
-GMOCK_DEFINE_DEFAULT_ACTION_FOR_RETURN_TYPE_(signed long, 0L);     // NOLINT
+GMOCK_DEFINE_DEFAULT_ACTION_FOR_RETURN_TYPE_(unsigned long, 0UL);     // NOLINT
+GMOCK_DEFINE_DEFAULT_ACTION_FOR_RETURN_TYPE_(signed long, 0L);        // NOLINT
 GMOCK_DEFINE_DEFAULT_ACTION_FOR_RETURN_TYPE_(unsigned long long, 0);  // NOLINT
-GMOCK_DEFINE_DEFAULT_ACTION_FOR_RETURN_TYPE_(signed long long, 0);  // NOLINT
+GMOCK_DEFINE_DEFAULT_ACTION_FOR_RETURN_TYPE_(signed long long, 0);    // NOLINT
 GMOCK_DEFINE_DEFAULT_ACTION_FOR_RETURN_TYPE_(float, 0);
 GMOCK_DEFINE_DEFAULT_ACTION_FOR_RETURN_TYPE_(double, 0);
 
 #undef GMOCK_DEFINE_DEFAULT_ACTION_FOR_RETURN_TYPE_
 
+// Partial implementations of metaprogramming types from the standard library
+// not available in C++11.
+
+template <typename P>
+struct negation
+    // NOLINTNEXTLINE
+    : std::integral_constant<bool, bool(!P::value)> {};
+
+// Base case: with zero predicates the answer is always true.
+template <typename...>
+struct conjunction : std::true_type {};
+
+// With a single predicate, the answer is that predicate.
+template <typename P1>
+struct conjunction<P1> : P1 {};
+
+// With multiple predicates the answer is the first predicate if that is false,
+// and we recurse otherwise.
+template <typename P1, typename... Ps>
+struct conjunction<P1, Ps...>
+    : std::conditional<bool(P1::value), conjunction<Ps...>, P1>::type {};
+
+template <typename...>
+struct disjunction : std::false_type {};
+
+template <typename P1>
+struct disjunction<P1> : P1 {};
+
+template <typename P1, typename... Ps>
+struct disjunction<P1, Ps...>
+    // NOLINTNEXTLINE
+    : std::conditional<!bool(P1::value), disjunction<Ps...>, P1>::type {};
+
+template <typename...>
+using void_t = void;
+
+// Detects whether an expression of type `From` can be implicitly converted to
+// `To` according to [conv]. In C++17, [conv]/3 defines this as follows:
+//
+//     An expression e can be implicitly converted to a type T if and only if
+//     the declaration T t=e; is well-formed, for some invented temporary
+//     variable t ([dcl.init]).
+//
+// [conv]/2 implies we can use function argument passing to detect whether this
+// initialization is valid.
+//
+// Note that this is distinct from is_convertible, which requires this be valid:
+//
+//     To test() {
+//       return declval<From>();
+//     }
+//
+// In particular, is_convertible doesn't give the correct answer when `To` and
+// `From` are the same non-moveable type since `declval<From>` will be an rvalue
+// reference, defeating the guaranteed copy elision that would otherwise make
+// this function work.
+//
+// REQUIRES: `From` is not cv void.
+template <typename From, typename To>
+struct is_implicitly_convertible {
+ private:
+  // A function that accepts a parameter of type T. This can be called with type
+  // U successfully only if U is implicitly convertible to T.
+  template <typename T>
+  static void Accept(T);
+
+  // A function that creates a value of type T.
+  template <typename T>
+  static T Make();
+
+  // An overload be selected when implicit conversion from T to To is possible.
+  template <typename T, typename = decltype(Accept<To>(Make<T>()))>
+  static std::true_type TestImplicitConversion(int);
+
+  // A fallback overload selected in all other cases.
+  template <typename T>
+  static std::false_type TestImplicitConversion(...);
+
+ public:
+  using type = decltype(TestImplicitConversion<From>(0));
+  static constexpr bool value = type::value;
+};
+
+// Like std::invoke_result_t from C++17, but works only for objects with call
+// operators (not e.g. member function pointers, which we don't need specific
+// support for in OnceAction because std::function deals with them).
+template <typename F, typename... Args>
+using call_result_t = decltype(std::declval<F>()(std::declval<Args>()...));
+
+template <typename Void, typename R, typename F, typename... Args>
+struct is_callable_r_impl : std::false_type {};
+
+// Specialize the struct for those template arguments where call_result_t is
+// well-formed. When it's not, the generic template above is chosen, resulting
+// in std::false_type.
+template <typename R, typename F, typename... Args>
+struct is_callable_r_impl<void_t<call_result_t<F, Args...>>, R, F, Args...>
+    : std::conditional<
+          std::is_void<R>::value,  //
+          std::true_type,          //
+          is_implicitly_convertible<call_result_t<F, Args...>, R>>::type {};
+
+// Like std::is_invocable_r from C++17, but works only for objects with call
+// operators. See the note on call_result_t.
+template <typename R, typename F, typename... Args>
+using is_callable_r = is_callable_r_impl<void, R, F, Args...>;
+
+// Like std::as_const from C++17.
+template <typename T>
+typename std::add_const<T>::type& as_const(T& t) {
+  return t;
+}
+
 }  // namespace internal
 
+// Specialized for function types below.
+template <typename F>
+class OnceAction;
+
+// An action that can only be used once.
+//
+// This is accepted by WillOnce, which doesn't require the underlying action to
+// be copy-constructible (only move-constructible), and promises to invoke it as
+// an rvalue reference. This allows the action to work with move-only types like
+// std::move_only_function in a type-safe manner.
+//
+// For example:
+//
+//     // Assume we have some API that needs to accept a unique pointer to some
+//     // non-copyable object Foo.
+//     void AcceptUniquePointer(std::unique_ptr<Foo> foo);
+//
+//     // We can define an action that provides a Foo to that API. Because It
+//     // has to give away its unique pointer, it must not be called more than
+//     // once, so its call operator is &&-qualified.
+//     struct ProvideFoo {
+//       std::unique_ptr<Foo> foo;
+//
+//       void operator()() && {
+//         AcceptUniquePointer(std::move(Foo));
+//       }
+//     };
+//
+//     // This action can be used with WillOnce.
+//     EXPECT_CALL(mock, Call)
+//         .WillOnce(ProvideFoo{std::make_unique<Foo>(...)});
+//
+//     // But a call to WillRepeatedly will fail to compile. This is correct,
+//     // since the action cannot correctly be used repeatedly.
+//     EXPECT_CALL(mock, Call)
+//         .WillRepeatedly(ProvideFoo{std::make_unique<Foo>(...)});
+//
+// A less-contrived example would be an action that returns an arbitrary type,
+// whose &&-qualified call operator is capable of dealing with move-only types.
+template <typename Result, typename... Args>
+class OnceAction<Result(Args...)> final {
+ private:
+  // True iff we can use the given callable type (or lvalue reference) directly
+  // via StdFunctionAdaptor.
+  template <typename Callable>
+  using IsDirectlyCompatible = internal::conjunction<
+      // It must be possible to capture the callable in StdFunctionAdaptor.
+      std::is_constructible<typename std::decay<Callable>::type, Callable>,
+      // The callable must be compatible with our signature.
+      internal::is_callable_r<Result, typename std::decay<Callable>::type,
+                              Args...>>;
+
+  // True iff we can use the given callable type via StdFunctionAdaptor once we
+  // ignore incoming arguments.
+  template <typename Callable>
+  using IsCompatibleAfterIgnoringArguments = internal::conjunction<
+      // It must be possible to capture the callable in a lambda.
+      std::is_constructible<typename std::decay<Callable>::type, Callable>,
+      // The callable must be invocable with zero arguments, returning something
+      // convertible to Result.
+      internal::is_callable_r<Result, typename std::decay<Callable>::type>>;
+
+ public:
+  // Construct from a callable that is directly compatible with our mocked
+  // signature: it accepts our function type's arguments and returns something
+  // convertible to our result type.
+  template <typename Callable,
+            typename std::enable_if<
+                internal::conjunction<
+                    // Teach clang on macOS that we're not talking about a
+                    // copy/move constructor here. Otherwise it gets confused
+                    // when checking the is_constructible requirement of our
+                    // traits above.
+                    internal::negation<std::is_same<
+                        OnceAction, typename std::decay<Callable>::type>>,
+                    IsDirectlyCompatible<Callable>>  //
+                ::value,
+                int>::type = 0>
+  OnceAction(Callable&& callable)  // NOLINT
+      : function_(StdFunctionAdaptor<typename std::decay<Callable>::type>(
+            {}, std::forward<Callable>(callable))) {}
+
+  // As above, but for a callable that ignores the mocked function's arguments.
+  template <typename Callable,
+            typename std::enable_if<
+                internal::conjunction<
+                    // Teach clang on macOS that we're not talking about a
+                    // copy/move constructor here. Otherwise it gets confused
+                    // when checking the is_constructible requirement of our
+                    // traits above.
+                    internal::negation<std::is_same<
+                        OnceAction, typename std::decay<Callable>::type>>,
+                    // Exclude callables for which the overload above works.
+                    // We'd rather provide the arguments if possible.
+                    internal::negation<IsDirectlyCompatible<Callable>>,
+                    IsCompatibleAfterIgnoringArguments<Callable>>::value,
+                int>::type = 0>
+  OnceAction(Callable&& callable)  // NOLINT
+                                   // Call the constructor above with a callable
+                                   // that ignores the input arguments.
+      : OnceAction(IgnoreIncomingArguments<typename std::decay<Callable>::type>{
+            std::forward<Callable>(callable)}) {}
+
+  // We are naturally copyable because we store only an std::function, but
+  // semantically we should not be copyable.
+  OnceAction(const OnceAction&) = delete;
+  OnceAction& operator=(const OnceAction&) = delete;
+  OnceAction(OnceAction&&) = default;
+
+  // Invoke the underlying action callable with which we were constructed,
+  // handing it the supplied arguments.
+  Result Call(Args... args) && {
+    return function_(std::forward<Args>(args)...);
+  }
+
+ private:
+  // An adaptor that wraps a callable that is compatible with our signature and
+  // being invoked as an rvalue reference so that it can be used as an
+  // StdFunctionAdaptor. This throws away type safety, but that's fine because
+  // this is only used by WillOnce, which we know calls at most once.
+  //
+  // Once we have something like std::move_only_function from C++23, we can do
+  // away with this.
+  template <typename Callable>
+  class StdFunctionAdaptor final {
+   public:
+    // A tag indicating that the (otherwise universal) constructor is accepting
+    // the callable itself, instead of e.g. stealing calls for the move
+    // constructor.
+    struct CallableTag final {};
+
+    template <typename F>
+    explicit StdFunctionAdaptor(CallableTag, F&& callable)
+        : callable_(std::make_shared<Callable>(std::forward<F>(callable))) {}
+
+    // Rather than explicitly returning Result, we return whatever the wrapped
+    // callable returns. This allows for compatibility with existing uses like
+    // the following, when the mocked function returns void:
+    //
+    //     EXPECT_CALL(mock_fn_, Call)
+    //         .WillOnce([&] {
+    //            [...]
+    //            return 0;
+    //         });
+    //
+    // Such a callable can be turned into std::function<void()>. If we use an
+    // explicit return type of Result here then it *doesn't* work with
+    // std::function, because we'll get a "void function should not return a
+    // value" error.
+    //
+    // We need not worry about incompatible result types because the SFINAE on
+    // OnceAction already checks this for us. std::is_invocable_r_v itself makes
+    // the same allowance for void result types.
+    template <typename... ArgRefs>
+    internal::call_result_t<Callable, ArgRefs...> operator()(
+        ArgRefs&&... args) const {
+      return std::move(*callable_)(std::forward<ArgRefs>(args)...);
+    }
+
+   private:
+    // We must put the callable on the heap so that we are copyable, which
+    // std::function needs.
+    std::shared_ptr<Callable> callable_;
+  };
+
+  // An adaptor that makes a callable that accepts zero arguments callable with
+  // our mocked arguments.
+  template <typename Callable>
+  struct IgnoreIncomingArguments {
+    internal::call_result_t<Callable> operator()(Args&&...) {
+      return std::move(callable)();
+    }
+
+    Callable callable;
+  };
+
+  std::function<Result(Args...)> function_;
+};
+
 // When an unexpected function call is encountered, Google Mock will
 // let it return a default value if the user has specified one for its
 // return type, or if the return type has a built-in default value;
@@ -334,7 +625,8 @@ class DefaultValue {
 
    private:
     const T value_;
-    GTEST_DISALLOW_COPY_AND_ASSIGN_(FixedValueProducer);
+    FixedValueProducer(const FixedValueProducer&) = delete;
+    FixedValueProducer& operator=(const FixedValueProducer&) = delete;
   };
 
   class FactoryValueProducer : public ValueProducer {
@@ -345,7 +637,8 @@ class DefaultValue {
 
    private:
     const FactoryFunction factory_;
-    GTEST_DISALLOW_COPY_AND_ASSIGN_(FactoryValueProducer);
+    FactoryValueProducer(const FactoryValueProducer&) = delete;
+    FactoryValueProducer& operator=(const FactoryValueProducer&) = delete;
   };
 
   static ValueProducer* producer_;
@@ -419,31 +712,40 @@ class ActionInterface {
   virtual Result Perform(const ArgumentTuple& args) = 0;
 
  private:
-  GTEST_DISALLOW_COPY_AND_ASSIGN_(ActionInterface);
+  ActionInterface(const ActionInterface&) = delete;
+  ActionInterface& operator=(const ActionInterface&) = delete;
 };
 
-// An Action<F> is a copyable and IMMUTABLE (except by assignment)
-// object that represents an action to be taken when a mock function
-// of type F is called.  The implementation of Action<T> is just a
-// std::shared_ptr to const ActionInterface<T>. Don't inherit from Action!
-// You can view an object implementing ActionInterface<F> as a
-// concrete action (including its current state), and an Action<F>
-// object as a handle to it.
 template <typename F>
-class Action {
+class Action;
+
+// An Action<R(Args...)> is a copyable and IMMUTABLE (except by assignment)
+// object that represents an action to be taken when a mock function of type
+// R(Args...) is called. The implementation of Action<T> is just a
+// std::shared_ptr to const ActionInterface<T>. Don't inherit from Action! You
+// can view an object implementing ActionInterface<F> as a concrete action
+// (including its current state), and an Action<F> object as a handle to it.
+template <typename R, typename... Args>
+class Action<R(Args...)> {
+ private:
+  using F = R(Args...);
+
   // Adapter class to allow constructing Action from a legacy ActionInterface.
   // New code should create Actions from functors instead.
   struct ActionAdapter {
     // Adapter must be copyable to satisfy std::function requirements.
     ::std::shared_ptr<ActionInterface<F>> impl_;
 
-    template <typename... Args>
-    typename internal::Function<F>::Result operator()(Args&&... args) {
+    template <typename... InArgs>
+    typename internal::Function<F>::Result operator()(InArgs&&... args) {
       return impl_->Perform(
-          ::std::forward_as_tuple(::std::forward<Args>(args)...));
+          ::std::forward_as_tuple(::std::forward<InArgs>(args)...));
     }
   };
 
+  template <typename G>
+  using IsCompatibleFunctor = std::is_constructible<std::function<F>, G>;
+
  public:
   typedef typename internal::Function<F>::Result Result;
   typedef typename internal::Function<F>::ArgumentTuple ArgumentTuple;
@@ -455,10 +757,14 @@ class Action {
   // Construct an Action from a specified callable.
   // This cannot take std::function directly, because then Action would not be
   // directly constructible from lambda (it would require two conversions).
-  template <typename G,
-            typename = typename ::std::enable_if<
-                ::std::is_constructible<::std::function<F>, G>::value>::type>
-  Action(G&& fun) : fun_(::std::forward<G>(fun)) {}  // NOLINT
+  template <
+      typename G,
+      typename = typename std::enable_if<internal::disjunction<
+          IsCompatibleFunctor<G>, std::is_constructible<std::function<Result()>,
+                                                        G>>::value>::type>
+  Action(G&& fun) {  // NOLINT
+    Init(::std::forward<G>(fun), IsCompatibleFunctor<G>());
+  }
 
   // Constructs an Action from its implementation.
   explicit Action(ActionInterface<F>* impl)
@@ -468,7 +774,8 @@ class Action {
   // Action<F>, as long as F's arguments can be implicitly converted
   // to Func's and Func's return type can be implicitly converted to F's.
   template <typename Func>
-  explicit Action(const Action<Func>& action) : fun_(action.fun_) {}
+  Action(const Action<Func>& action)  // NOLINT
+      : fun_(action.fun_) {}
 
   // Returns true if and only if this is the DoDefault() action.
   bool IsDoDefault() const { return fun_ == nullptr; }
@@ -486,10 +793,48 @@ class Action {
     return internal::Apply(fun_, ::std::move(args));
   }
 
+  // An action can be used as a OnceAction, since it's obviously safe to call it
+  // once.
+  operator OnceAction<F>() const {  // NOLINT
+    // Return a OnceAction-compatible callable that calls Perform with the
+    // arguments it is provided. We could instead just return fun_, but then
+    // we'd need to handle the IsDoDefault() case separately.
+    struct OA {
+      Action<F> action;
+
+      R operator()(Args... args) && {
+        return action.Perform(
+            std::forward_as_tuple(std::forward<Args>(args)...));
+      }
+    };
+
+    return OA{*this};
+  }
+
  private:
   template <typename G>
   friend class Action;
 
+  template <typename G>
+  void Init(G&& g, ::std::true_type) {
+    fun_ = ::std::forward<G>(g);
+  }
+
+  template <typename G>
+  void Init(G&& g, ::std::false_type) {
+    fun_ = IgnoreArgs<typename ::std::decay<G>::type>{::std::forward<G>(g)};
+  }
+
+  template <typename FunctionImpl>
+  struct IgnoreArgs {
+    template <typename... InArgs>
+    Result operator()(const InArgs&...) const {
+      return function_impl();
+    }
+
+    FunctionImpl function_impl;
+  };
+
   // fun_ is an empty function if and only if this is the DoDefault() action.
   ::std::function<F> fun_;
 };
@@ -540,13 +885,9 @@ class PolymorphicAction {
 
    private:
     Impl impl_;
-
-    GTEST_DISALLOW_ASSIGN_(MonomorphicImpl);
   };
 
   Impl impl_;
-
-  GTEST_DISALLOW_ASSIGN_(PolymorphicAction);
 };
 
 // Creates an Action from its implementation and returns it.  The
@@ -578,122 +919,198 @@ struct ByMoveWrapper {
   T payload;
 };
 
-// Implements the polymorphic Return(x) action, which can be used in
-// any function that returns the type of x, regardless of the argument
-// types.
-//
-// Note: The value passed into Return must be converted into
-// Function<F>::Result when this action is cast to Action<F> rather than
-// when that action is performed. This is important in scenarios like
-//
-// MOCK_METHOD1(Method, T(U));
-// ...
-// {
-//   Foo foo;
-//   X x(&foo);
-//   EXPECT_CALL(mock, Method(_)).WillOnce(Return(x));
-// }
-//
-// In the example above the variable x holds reference to foo which leaves
-// scope and gets destroyed.  If copying X just copies a reference to foo,
-// that copy will be left with a hanging reference.  If conversion to T
-// makes a copy of foo, the above code is safe. To support that scenario, we
-// need to make sure that the type conversion happens inside the EXPECT_CALL
-// statement, and conversion of the result of Return to Action<T(U)> is a
-// good place for that.
-//
-// The real life example of the above scenario happens when an invocation
-// of gtl::Container() is passed into Return.
-//
+// The general implementation of Return(R). Specializations follow below.
 template <typename R>
-class ReturnAction {
+class ReturnAction final {
  public:
-  // Constructs a ReturnAction object from the value to be returned.
-  // 'value' is passed by value instead of by const reference in order
-  // to allow Return("string literal") to compile.
-  explicit ReturnAction(R value) : value_(new R(std::move(value))) {}
+  explicit ReturnAction(R value) : value_(std::move(value)) {}
+
+  template <typename U, typename... Args,
+            typename = typename std::enable_if<conjunction<
+                // See the requirements documented on Return.
+                negation<std::is_same<void, U>>,  //
+                negation<std::is_reference<U>>,   //
+                std::is_convertible<R, U>,        //
+                std::is_move_constructible<U>>::value>::type>
+  operator OnceAction<U(Args...)>() && {  // NOLINT
+    return Impl<U>(std::move(value_));
+  }
 
-  // This template type conversion operator allows Return(x) to be
-  // used in ANY function that returns x's type.
-  template <typename F>
-  operator Action<F>() const {  // NOLINT
-    // Assert statement belongs here because this is the best place to verify
-    // conditions on F. It produces the clearest error messages
-    // in most compilers.
-    // Impl really belongs in this scope as a local class but can't
-    // because MSVC produces duplicate symbols in different translation units
-    // in this case. Until MS fixes that bug we put Impl into the class scope
-    // and put the typedef both here (for use in assert statement) and
-    // in the Impl class. But both definitions must be the same.
-    typedef typename Function<F>::Result Result;
-    GTEST_COMPILE_ASSERT_(
-        !std::is_reference<Result>::value,
-        use_ReturnRef_instead_of_Return_to_return_a_reference);
-    static_assert(!std::is_void<Result>::value,
-                  "Can't use Return() on an action expected to return `void`.");
-    return Action<F>(new Impl<R, F>(value_));
+  template <typename U, typename... Args,
+            typename = typename std::enable_if<conjunction<
+                // See the requirements documented on Return.
+                negation<std::is_same<void, U>>,   //
+                negation<std::is_reference<U>>,    //
+                std::is_convertible<const R&, U>,  //
+                std::is_copy_constructible<U>>::value>::type>
+  operator Action<U(Args...)>() const {  // NOLINT
+    return Impl<U>(value_);
   }
 
  private:
-  // Implements the Return(x) action for a particular function type F.
-  template <typename R_, typename F>
-  class Impl : public ActionInterface<F> {
+  // Implements the Return(x) action for a mock function that returns type U.
+  template <typename U>
+  class Impl final {
    public:
-    typedef typename Function<F>::Result Result;
-    typedef typename Function<F>::ArgumentTuple ArgumentTuple;
+    // The constructor used when the return value is allowed to move from the
+    // input value (i.e. we are converting to OnceAction).
+    explicit Impl(R&& input_value)
+        : state_(new State(std::move(input_value))) {}
 
-    // The implicit cast is necessary when Result has more than one
-    // single-argument constructor (e.g. Result is std::vector<int>) and R
-    // has a type conversion operator template.  In that case, value_(value)
-    // won't compile as the compiler doesn't known which constructor of
-    // Result to call.  ImplicitCast_ forces the compiler to convert R to
-    // Result without considering explicit constructors, thus resolving the
-    // ambiguity. value_ is then initialized using its copy constructor.
-    explicit Impl(const std::shared_ptr<R>& value)
-        : value_before_cast_(*value),
-          value_(ImplicitCast_<Result>(value_before_cast_)) {}
+    // The constructor used when the return value is not allowed to move from
+    // the input value (i.e. we are converting to Action).
+    explicit Impl(const R& input_value) : state_(new State(input_value)) {}
 
-    Result Perform(const ArgumentTuple&) override { return value_; }
+    U operator()() && { return std::move(state_->value); }
+    U operator()() const& { return state_->value; }
 
    private:
-    GTEST_COMPILE_ASSERT_(!std::is_reference<Result>::value,
-                          Result_cannot_be_a_reference_type);
-    // We save the value before casting just in case it is being cast to a
-    // wrapper type.
-    R value_before_cast_;
-    Result value_;
-
-    GTEST_DISALLOW_COPY_AND_ASSIGN_(Impl);
+    // We put our state on the heap so that the compiler-generated copy/move
+    // constructors work correctly even when U is a reference-like type. This is
+    // necessary only because we eagerly create State::value (see the note on
+    // that symbol for details). If we instead had only the input value as a
+    // member then the default constructors would work fine.
+    //
+    // For example, when R is std::string and U is std::string_view, value is a
+    // reference to the string backed by input_value. The copy constructor would
+    // copy both, so that we wind up with a new input_value object (with the
+    // same contents) and a reference to the *old* input_value object rather
+    // than the new one.
+    struct State {
+      explicit State(const R& input_value_in)
+          : input_value(input_value_in),
+            // Make an implicit conversion to Result before initializing the U
+            // object we store, avoiding calling any explicit constructor of U
+            // from R.
+            //
+            // This simulates the language rules: a function with return type U
+            // that does `return R()` requires R to be implicitly convertible to
+            // U, and uses that path for the conversion, even U Result has an
+            // explicit constructor from R.
+            value(ImplicitCast_<U>(internal::as_const(input_value))) {}
+
+      // As above, but for the case where we're moving from the ReturnAction
+      // object because it's being used as a OnceAction.
+      explicit State(R&& input_value_in)
+          : input_value(std::move(input_value_in)),
+            // For the same reason as above we make an implicit conversion to U
+            // before initializing the value.
+            //
+            // Unlike above we provide the input value as an rvalue to the
+            // implicit conversion because this is a OnceAction: it's fine if it
+            // wants to consume the input value.
+            value(ImplicitCast_<U>(std::move(input_value))) {}
+
+      // A copy of the value originally provided by the user. We retain this in
+      // addition to the value of the mock function's result type below in case
+      // the latter is a reference-like type. See the std::string_view example
+      // in the documentation on Return.
+      R input_value;
+
+      // The value we actually return, as the type returned by the mock function
+      // itself.
+      //
+      // We eagerly initialize this here, rather than lazily doing the implicit
+      // conversion automatically each time Perform is called, for historical
+      // reasons: in 2009-11, commit a070cbd91c (Google changelist 13540126)
+      // made the Action<U()> conversion operator eagerly convert the R value to
+      // U, but without keeping the R alive. This broke the use case discussed
+      // in the documentation for Return, making reference-like types such as
+      // std::string_view not safe to use as U where the input type R is a
+      // value-like type such as std::string.
+      //
+      // The example the commit gave was not very clear, nor was the issue
+      // thread (https://github.com/google/googlemock/issues/86), but it seems
+      // the worry was about reference-like input types R that flatten to a
+      // value-like type U when being implicitly converted. An example of this
+      // is std::vector<bool>::reference, which is often a proxy type with an
+      // reference to the underlying vector:
+      //
+      //     // Helper method: have the mock function return bools according
+      //     // to the supplied script.
+      //     void SetActions(MockFunction<bool(size_t)>& mock,
+      //                     const std::vector<bool>& script) {
+      //       for (size_t i = 0; i < script.size(); ++i) {
+      //         EXPECT_CALL(mock, Call(i)).WillOnce(Return(script[i]));
+      //       }
+      //     }
+      //
+      //     TEST(Foo, Bar) {
+      //       // Set actions using a temporary vector, whose operator[]
+      //       // returns proxy objects that references that will be
+      //       // dangling once the call to SetActions finishes and the
+      //       // vector is destroyed.
+      //       MockFunction<bool(size_t)> mock;
+      //       SetActions(mock, {false, true});
+      //
+      //       EXPECT_FALSE(mock.AsStdFunction()(0));
+      //       EXPECT_TRUE(mock.AsStdFunction()(1));
+      //     }
+      //
+      // This eager conversion helps with a simple case like this, but doesn't
+      // fully make these types work in general. For example the following still
+      // uses a dangling reference:
+      //
+      //     TEST(Foo, Baz) {
+      //       MockFunction<std::vector<std::string>()> mock;
+      //
+      //       // Return the same vector twice, and then the empty vector
+      //       // thereafter.
+      //       auto action = Return(std::initializer_list<std::string>{
+      //           "taco", "burrito",
+      //       });
+      //
+      //       EXPECT_CALL(mock, Call)
+      //           .WillOnce(action)
+      //           .WillOnce(action)
+      //           .WillRepeatedly(Return(std::vector<std::string>{}));
+      //
+      //       EXPECT_THAT(mock.AsStdFunction()(),
+      //                   ElementsAre("taco", "burrito"));
+      //       EXPECT_THAT(mock.AsStdFunction()(),
+      //                   ElementsAre("taco", "burrito"));
+      //       EXPECT_THAT(mock.AsStdFunction()(), IsEmpty());
+      //     }
+      //
+      U value;
+    };
+
+    const std::shared_ptr<State> state_;
   };
 
-  // Partially specialize for ByMoveWrapper. This version of ReturnAction will
-  // move its contents instead.
-  template <typename R_, typename F>
-  class Impl<ByMoveWrapper<R_>, F> : public ActionInterface<F> {
-   public:
-    typedef typename Function<F>::Result Result;
-    typedef typename Function<F>::ArgumentTuple ArgumentTuple;
+  R value_;
+};
 
-    explicit Impl(const std::shared_ptr<R>& wrapper)
-        : performed_(false), wrapper_(wrapper) {}
+// A specialization of ReturnAction<R> when R is ByMoveWrapper<T> for some T.
+//
+// This version applies the type system-defeating hack of moving from T even in
+// the const call operator, checking at runtime that it isn't called more than
+// once, since the user has declared their intent to do so by using ByMove.
+template <typename T>
+class ReturnAction<ByMoveWrapper<T>> final {
+ public:
+  explicit ReturnAction(ByMoveWrapper<T> wrapper)
+      : state_(new State(std::move(wrapper.payload))) {}
 
-    Result Perform(const ArgumentTuple&) override {
-      GTEST_CHECK_(!performed_)
-          << "A ByMove() action should only be performed once.";
-      performed_ = true;
-      return std::move(wrapper_->payload);
-    }
+  T operator()() const {
+    GTEST_CHECK_(!state_->called)
+        << "A ByMove() action must be performed at most once.";
 
-   private:
-    bool performed_;
-    const std::shared_ptr<R> wrapper_;
+    state_->called = true;
+    return std::move(state_->value);
+  }
 
-    GTEST_DISALLOW_ASSIGN_(Impl);
-  };
+ private:
+  // We store our state on the heap so that we are copyable as required by
+  // Action, despite the fact that we are stateful and T may not be copyable.
+  struct State {
+    explicit State(T&& value_in) : value(std::move(value_in)) {}
 
-  const std::shared_ptr<R> value_;
+    T value;
+    bool called = false;
+  };
 
-  GTEST_DISALLOW_ASSIGN_(ReturnAction);
+  const std::shared_ptr<State> state_;
 };
 
 // Implements the ReturnNull() action.
@@ -735,8 +1152,8 @@ class ReturnRefAction {
     // Asserts that the function return type is a reference.  This
     // catches the user error of using ReturnRef(x) when Return(x)
     // should be used, and generates some helpful error message.
-    GTEST_COMPILE_ASSERT_(std::is_reference<Result>::value,
-                          use_Return_instead_of_ReturnRef_to_return_a_value);
+    static_assert(std::is_reference<Result>::value,
+                  "use Return instead of ReturnRef to return a value");
     return Action<F>(new Impl<F>(ref_));
   }
 
@@ -754,13 +1171,9 @@ class ReturnRefAction {
 
    private:
     T& ref_;
-
-    GTEST_DISALLOW_ASSIGN_(Impl);
   };
 
   T& ref_;
-
-  GTEST_DISALLOW_ASSIGN_(ReturnRefAction);
 };
 
 // Implements the polymorphic ReturnRefOfCopy(x) action, which can be
@@ -781,9 +1194,8 @@ class ReturnRefOfCopyAction {
     // Asserts that the function return type is a reference.  This
     // catches the user error of using ReturnRefOfCopy(x) when Return(x)
     // should be used, and generates some helpful error message.
-    GTEST_COMPILE_ASSERT_(
-        std::is_reference<Result>::value,
-        use_Return_instead_of_ReturnRefOfCopy_to_return_a_value);
+    static_assert(std::is_reference<Result>::value,
+                  "use Return instead of ReturnRefOfCopy to return a value");
     return Action<F>(new Impl<F>(value_));
   }
 
@@ -801,13 +1213,9 @@ class ReturnRefOfCopyAction {
 
    private:
     T value_;
-
-    GTEST_DISALLOW_ASSIGN_(Impl);
   };
 
   const T value_;
-
-  GTEST_DISALLOW_ASSIGN_(ReturnRefOfCopyAction);
 };
 
 // Implements the polymorphic ReturnRoundRobin(v) action, which can be
@@ -823,7 +1231,7 @@ class ReturnRoundRobinAction {
 
   template <typename... Args>
   T operator()(Args&&...) const {
-     return state_->Next();
+    return state_->Next();
   }
 
  private:
@@ -846,7 +1254,9 @@ class DoDefaultAction {
   // This template type conversion operator allows DoDefault() to be
   // used in any function.
   template <typename F>
-  operator Action<F>() const { return Action<F>(); }  // NOLINT
+  operator Action<F>() const {
+    return Action<F>();
+  }  // NOLINT
 };
 
 // Implements the Assign action to set a given pointer referent to a
@@ -864,8 +1274,6 @@ class AssignAction {
  private:
   T1* const ptr_;
   const T2 value_;
-
-  GTEST_DISALLOW_ASSIGN_(AssignAction);
 };
 
 #if !GTEST_OS_WINDOWS_MOBILE
@@ -876,8 +1284,7 @@ template <typename T>
 class SetErrnoAndReturnAction {
  public:
   SetErrnoAndReturnAction(int errno_value, T result)
-      : errno_(errno_value),
-        result_(result) {}
+      : errno_(errno_value), result_(result) {}
   template <typename Result, typename ArgumentTuple>
   Result Perform(const ArgumentTuple& /* args */) const {
     errno = errno_;
@@ -887,8 +1294,6 @@ class SetErrnoAndReturnAction {
  private:
   const int errno_;
   const T result_;
-
-  GTEST_DISALLOW_ASSIGN_(SetErrnoAndReturnAction);
 };
 
 #endif  // !GTEST_OS_WINDOWS_MOBILE
@@ -940,7 +1345,8 @@ struct InvokeMethodWithoutArgsAction {
   Class* const obj_ptr;
   const MethodPtr method_ptr;
 
-  using ReturnType = typename std::result_of<MethodPtr(Class*)>::type;
+  using ReturnType =
+      decltype((std::declval<Class*>()->*std::declval<MethodPtr>())());
 
   template <typename... Args>
   ReturnType operator()(const Args&...) const {
@@ -989,66 +1395,351 @@ class IgnoreResultAction {
    private:
     // Type OriginalFunction is the same as F except that its return
     // type is IgnoredValue.
-    typedef typename internal::Function<F>::MakeResultIgnoredValue
-        OriginalFunction;
+    typedef
+        typename internal::Function<F>::MakeResultIgnoredValue OriginalFunction;
 
     const Action<OriginalFunction> action_;
-
-    GTEST_DISALLOW_ASSIGN_(Impl);
   };
 
   const A action_;
-
-  GTEST_DISALLOW_ASSIGN_(IgnoreResultAction);
 };
 
 template <typename InnerAction, size_t... I>
 struct WithArgsAction {
-  InnerAction action;
+  InnerAction inner_action;
 
-  // The inner action could be anything convertible to Action<X>.
-  // We use the conversion operator to detect the signature of the inner Action.
+  // The signature of the function as seen by the inner action, given an out
+  // action with the given result and argument types.
   template <typename R, typename... Args>
+  using InnerSignature =
+      R(typename std::tuple_element<I, std::tuple<Args...>>::type...);
+
+  // Rather than a call operator, we must define conversion operators to
+  // particular action types. This is necessary for embedded actions like
+  // DoDefault(), which rely on an action conversion operators rather than
+  // providing a call operator because even with a particular set of arguments
+  // they don't have a fixed return type.
+
+  template <typename R, typename... Args,
+            typename std::enable_if<
+                std::is_convertible<
+                    InnerAction,
+                    // Unfortunately we can't use the InnerSignature alias here;
+                    // MSVC complains about the I parameter pack not being
+                    // expanded (error C3520) despite it being expanded in the
+                    // type alias.
+                    OnceAction<R(typename std::tuple_element<
+                                 I, std::tuple<Args...>>::type...)>>::value,
+                int>::type = 0>
+  operator OnceAction<R(Args...)>() && {  // NOLINT
+    struct OA {
+      OnceAction<InnerSignature<R, Args...>> inner_action;
+
+      R operator()(Args&&... args) && {
+        return std::move(inner_action)
+            .Call(std::get<I>(
+                std::forward_as_tuple(std::forward<Args>(args)...))...);
+      }
+    };
+
+    return OA{std::move(inner_action)};
+  }
+
+  template <typename R, typename... Args,
+            typename std::enable_if<
+                std::is_convertible<
+                    const InnerAction&,
+                    // Unfortunately we can't use the InnerSignature alias here;
+                    // MSVC complains about the I parameter pack not being
+                    // expanded (error C3520) despite it being expanded in the
+                    // type alias.
+                    Action<R(typename std::tuple_element<
+                             I, std::tuple<Args...>>::type...)>>::value,
+                int>::type = 0>
   operator Action<R(Args...)>() const {  // NOLINT
-    using TupleType = std::tuple<Args...>;
-    Action<R(typename std::tuple_element<I, TupleType>::type...)>
-        converted(action);
+    Action<InnerSignature<R, Args...>> converted(inner_action);
 
-    return [converted](Args... args) -> R {
+    return [converted](Args&&... args) -> R {
       return converted.Perform(std::forward_as_tuple(
-        std::get<I>(std::forward_as_tuple(std::forward<Args>(args)...))...));
+          std::get<I>(std::forward_as_tuple(std::forward<Args>(args)...))...));
     };
   }
 };
 
 template <typename... Actions>
-struct DoAllAction {
- private:
-  template <typename... Args, size_t... I>
-  std::vector<Action<void(Args...)>> Convert(IndexSequence<I...>) const {
-    return {std::get<I>(actions)...};
+class DoAllAction;
+
+// Base case: only a single action.
+template <typename FinalAction>
+class DoAllAction<FinalAction> {
+ public:
+  struct UserConstructorTag {};
+
+  template <typename T>
+  explicit DoAllAction(UserConstructorTag, T&& action)
+      : final_action_(std::forward<T>(action)) {}
+
+  // Rather than a call operator, we must define conversion operators to
+  // particular action types. This is necessary for embedded actions like
+  // DoDefault(), which rely on an action conversion operators rather than
+  // providing a call operator because even with a particular set of arguments
+  // they don't have a fixed return type.
+
+  template <typename R, typename... Args,
+            typename std::enable_if<
+                std::is_convertible<FinalAction, OnceAction<R(Args...)>>::value,
+                int>::type = 0>
+  operator OnceAction<R(Args...)>() && {  // NOLINT
+    return std::move(final_action_);
   }
 
+  template <
+      typename R, typename... Args,
+      typename std::enable_if<
+          std::is_convertible<const FinalAction&, Action<R(Args...)>>::value,
+          int>::type = 0>
+  operator Action<R(Args...)>() const {  // NOLINT
+    return final_action_;
+  }
+
+ private:
+  FinalAction final_action_;
+};
+
+// Recursive case: support N actions by calling the initial action and then
+// calling through to the base class containing N-1 actions.
+template <typename InitialAction, typename... OtherActions>
+class DoAllAction<InitialAction, OtherActions...>
+    : private DoAllAction<OtherActions...> {
+ private:
+  using Base = DoAllAction<OtherActions...>;
+
+  // The type of reference that should be provided to an initial action for a
+  // mocked function parameter of type T.
+  //
+  // There are two quirks here:
+  //
+  //  *  Unlike most forwarding functions, we pass scalars through by value.
+  //     This isn't strictly necessary because an lvalue reference would work
+  //     fine too and be consistent with other non-reference types, but it's
+  //     perhaps less surprising.
+  //
+  //     For example if the mocked function has signature void(int), then it
+  //     might seem surprising for the user's initial action to need to be
+  //     convertible to Action<void(const int&)>. This is perhaps less
+  //     surprising for a non-scalar type where there may be a performance
+  //     impact, or it might even be impossible, to pass by value.
+  //
+  //  *  More surprisingly, `const T&` is often not a const reference type.
+  //     By the reference collapsing rules in C++17 [dcl.ref]/6, if T refers to
+  //     U& or U&& for some non-scalar type U, then InitialActionArgType<T> is
+  //     U&. In other words, we may hand over a non-const reference.
+  //
+  //     So for example, given some non-scalar type Obj we have the following
+  //     mappings:
+  //
+  //            T               InitialActionArgType<T>
+  //         -------            -----------------------
+  //         Obj                const Obj&
+  //         Obj&               Obj&
+  //         Obj&&              Obj&
+  //         const Obj          const Obj&
+  //         const Obj&         const Obj&
+  //         const Obj&&        const Obj&
+  //
+  //     In other words, the initial actions get a mutable view of an non-scalar
+  //     argument if and only if the mock function itself accepts a non-const
+  //     reference type. They are never given an rvalue reference to an
+  //     non-scalar type.
+  //
+  //     This situation makes sense if you imagine use with a matcher that is
+  //     designed to write through a reference. For example, if the caller wants
+  //     to fill in a reference argument and then return a canned value:
+  //
+  //         EXPECT_CALL(mock, Call)
+  //             .WillOnce(DoAll(SetArgReferee<0>(17), Return(19)));
+  //
+  template <typename T>
+  using InitialActionArgType =
+      typename std::conditional<std::is_scalar<T>::value, T, const T&>::type;
+
  public:
-  std::tuple<Actions...> actions;
+  struct UserConstructorTag {};
+
+  template <typename T, typename... U>
+  explicit DoAllAction(UserConstructorTag, T&& initial_action,
+                       U&&... other_actions)
+      : Base({}, std::forward<U>(other_actions)...),
+        initial_action_(std::forward<T>(initial_action)) {}
+
+  template <typename R, typename... Args,
+            typename std::enable_if<
+                conjunction<
+                    // Both the initial action and the rest must support
+                    // conversion to OnceAction.
+                    std::is_convertible<
+                        InitialAction,
+                        OnceAction<void(InitialActionArgType<Args>...)>>,
+                    std::is_convertible<Base, OnceAction<R(Args...)>>>::value,
+                int>::type = 0>
+  operator OnceAction<R(Args...)>() && {  // NOLINT
+    // Return an action that first calls the initial action with arguments
+    // filtered through InitialActionArgType, then forwards arguments directly
+    // to the base class to deal with the remaining actions.
+    struct OA {
+      OnceAction<void(InitialActionArgType<Args>...)> initial_action;
+      OnceAction<R(Args...)> remaining_actions;
+
+      R operator()(Args... args) && {
+        std::move(initial_action)
+            .Call(static_cast<InitialActionArgType<Args>>(args)...);
+
+        return std::move(remaining_actions).Call(std::forward<Args>(args)...);
+      }
+    };
 
-  template <typename R, typename... Args>
+    return OA{
+        std::move(initial_action_),
+        std::move(static_cast<Base&>(*this)),
+    };
+  }
+
+  template <
+      typename R, typename... Args,
+      typename std::enable_if<
+          conjunction<
+              // Both the initial action and the rest must support conversion to
+              // Action.
+              std::is_convertible<const InitialAction&,
+                                  Action<void(InitialActionArgType<Args>...)>>,
+              std::is_convertible<const Base&, Action<R(Args...)>>>::value,
+          int>::type = 0>
   operator Action<R(Args...)>() const {  // NOLINT
-    struct Op {
-      std::vector<Action<void(Args...)>> converted;
-      Action<R(Args...)> last;
+    // Return an action that first calls the initial action with arguments
+    // filtered through InitialActionArgType, then forwards arguments directly
+    // to the base class to deal with the remaining actions.
+    struct OA {
+      Action<void(InitialActionArgType<Args>...)> initial_action;
+      Action<R(Args...)> remaining_actions;
+
       R operator()(Args... args) const {
-        auto tuple_args = std::forward_as_tuple(std::forward<Args>(args)...);
-        for (auto& a : converted) {
-          a.Perform(tuple_args);
-        }
-        return last.Perform(tuple_args);
+        initial_action.Perform(std::forward_as_tuple(
+            static_cast<InitialActionArgType<Args>>(args)...));
+
+        return remaining_actions.Perform(
+            std::forward_as_tuple(std::forward<Args>(args)...));
       }
     };
-    return Op{Convert<Args...>(MakeIndexSequence<sizeof...(Actions) - 1>()),
-              std::get<sizeof...(Actions) - 1>(actions)};
+
+    return OA{
+        initial_action_,
+        static_cast<const Base&>(*this),
+    };
+  }
+
+ private:
+  InitialAction initial_action_;
+};
+
+template <typename T, typename... Params>
+struct ReturnNewAction {
+  T* operator()() const {
+    return internal::Apply(
+        [](const Params&... unpacked_params) {
+          return new T(unpacked_params...);
+        },
+        params);
+  }
+  std::tuple<Params...> params;
+};
+
+template <size_t k>
+struct ReturnArgAction {
+  template <typename... Args,
+            typename = typename std::enable_if<(k < sizeof...(Args))>::type>
+  auto operator()(Args&&... args) const -> decltype(std::get<k>(
+      std::forward_as_tuple(std::forward<Args>(args)...))) {
+    return std::get<k>(std::forward_as_tuple(std::forward<Args>(args)...));
+  }
+};
+
+template <size_t k, typename Ptr>
+struct SaveArgAction {
+  Ptr pointer;
+
+  template <typename... Args>
+  void operator()(const Args&... args) const {
+    *pointer = std::get<k>(std::tie(args...));
+  }
+};
+
+template <size_t k, typename Ptr>
+struct SaveArgPointeeAction {
+  Ptr pointer;
+
+  template <typename... Args>
+  void operator()(const Args&... args) const {
+    *pointer = *std::get<k>(std::tie(args...));
+  }
+};
+
+template <size_t k, typename T>
+struct SetArgRefereeAction {
+  T value;
+
+  template <typename... Args>
+  void operator()(Args&&... args) const {
+    using argk_type =
+        typename ::std::tuple_element<k, std::tuple<Args...>>::type;
+    static_assert(std::is_lvalue_reference<argk_type>::value,
+                  "Argument must be a reference type.");
+    std::get<k>(std::tie(args...)) = value;
+  }
+};
+
+template <size_t k, typename I1, typename I2>
+struct SetArrayArgumentAction {
+  I1 first;
+  I2 last;
+
+  template <typename... Args>
+  void operator()(const Args&... args) const {
+    auto value = std::get<k>(std::tie(args...));
+    for (auto it = first; it != last; ++it, (void)++value) {
+      *value = *it;
+    }
+  }
+};
+
+template <size_t k>
+struct DeleteArgAction {
+  template <typename... Args>
+  void operator()(const Args&... args) const {
+    delete std::get<k>(std::tie(args...));
+  }
+};
+
+template <typename Ptr>
+struct ReturnPointeeAction {
+  Ptr pointer;
+  template <typename... Args>
+  auto operator()(const Args&...) const -> decltype(*pointer) {
+    return *pointer;
+  }
+};
+
+#if GTEST_HAS_EXCEPTIONS
+template <typename T>
+struct ThrowAction {
+  T exception;
+  // We use a conversion operator to adapt to any return type.
+  template <typename R, typename... Args>
+  operator Action<R(Args...)>() const {  // NOLINT
+    T copy = exception;
+    return [copy](Args...) -> R { throw copy; };
   }
 };
+#endif  // GTEST_HAS_EXCEPTIONS
 
 }  // namespace internal
 
@@ -1085,11 +1776,13 @@ struct DoAllAction {
 typedef internal::IgnoredValue Unused;
 
 // Creates an action that does actions a1, a2, ..., sequentially in
-// each invocation.
+// each invocation. All but the last action will have a readonly view of the
+// arguments.
 template <typename... Action>
 internal::DoAllAction<typename std::decay<Action>::type...> DoAll(
     Action&&... action) {
-  return {std::forward_as_tuple(std::forward<Action>(action)...)};
+  return internal::DoAllAction<typename std::decay<Action>::type...>(
+      {}, std::forward<Action>(action)...);
 }
 
 // WithArg<k>(an_action) creates an action that passes the k-th
@@ -1098,8 +1791,8 @@ internal::DoAllAction<typename std::decay<Action>::type...> DoAll(
 // multiple arguments.  For convenience, we also provide
 // WithArgs<k>(an_action) (defined below) as a synonym.
 template <size_t k, typename InnerAction>
-internal::WithArgsAction<typename std::decay<InnerAction>::type, k>
-WithArg(InnerAction&& action) {
+internal::WithArgsAction<typename std::decay<InnerAction>::type, k> WithArg(
+    InnerAction&& action) {
   return {std::forward<InnerAction>(action)};
 }
 
@@ -1118,14 +1811,35 @@ WithArgs(InnerAction&& action) {
 // argument.  In other words, it adapts an action accepting no
 // argument to one that accepts (and ignores) arguments.
 template <typename InnerAction>
-internal::WithArgsAction<typename std::decay<InnerAction>::type>
-WithoutArgs(InnerAction&& action) {
+internal::WithArgsAction<typename std::decay<InnerAction>::type> WithoutArgs(
+    InnerAction&& action) {
   return {std::forward<InnerAction>(action)};
 }
 
-// Creates an action that returns 'value'.  'value' is passed by value
-// instead of const reference - otherwise Return("string literal")
-// will trigger a compiler error about using array as initializer.
+// Creates an action that returns a value.
+//
+// The returned type can be used with a mock function returning a non-void,
+// non-reference type U as follows:
+//
+//  *  If R is convertible to U and U is move-constructible, then the action can
+//     be used with WillOnce.
+//
+//  *  If const R& is convertible to U and U is copy-constructible, then the
+//     action can be used with both WillOnce and WillRepeatedly.
+//
+// The mock expectation contains the R value from which the U return value is
+// constructed (a move/copy of the argument to Return). This means that the R
+// value will survive at least until the mock object's expectations are cleared
+// or the mock object is destroyed, meaning that U can safely be a
+// reference-like type such as std::string_view:
+//
+//     // The mock function returns a view of a copy of the string fed to
+//     // Return. The view is valid even after the action is performed.
+//     MockFunction<std::string_view()> mock;
+//     EXPECT_CALL(mock, Call).WillOnce(Return(std::string("taco")));
+//     const std::string_view result = mock.AsStdFunction()();
+//     EXPECT_EQ("taco", result);
+//
 template <typename R>
 internal::ReturnAction<R> Return(R value) {
   return internal::ReturnAction<R>(std::move(value));
@@ -1159,6 +1873,8 @@ inline internal::ReturnRefOfCopyAction<R> ReturnRefOfCopy(const R& x) {
   return internal::ReturnRefOfCopyAction<R>(x);
 }
 
+// DEPRECATED: use Return(x) directly with WillOnce.
+//
 // Modifies the parent action (a Return() action) to perform a move of the
 // argument instead of a copy.
 // Return(ByMove()) actions can only be executed once and will assert this
@@ -1205,7 +1921,7 @@ internal::SetArgumentPointeeAction<N, T> SetArgumentPointee(T value) {
 
 // Creates an action that sets a pointer referent to a given value.
 template <typename T1, typename T2>
-PolymorphicAction<internal::AssignAction<T1, T2> > Assign(T1* ptr, T2 val) {
+PolymorphicAction<internal::AssignAction<T1, T2>> Assign(T1* ptr, T2 val) {
   return MakePolymorphicAction(internal::AssignAction<T1, T2>(ptr, val));
 }
 
@@ -1213,8 +1929,8 @@ PolymorphicAction<internal::AssignAction<T1, T2> > Assign(T1* ptr, T2 val) {
 
 // Creates an action that sets errno and returns the appropriate error.
 template <typename T>
-PolymorphicAction<internal::SetErrnoAndReturnAction<T> >
-SetErrnoAndReturn(int errval, T result) {
+PolymorphicAction<internal::SetErrnoAndReturnAction<T>> SetErrnoAndReturn(
+    int errval, T result) {
   return MakePolymorphicAction(
       internal::SetErrnoAndReturnAction<T>(errval, result));
 }
@@ -1278,6 +1994,76 @@ inline ::std::reference_wrapper<T> ByRef(T& l_value) {  // NOLINT
   return ::std::reference_wrapper<T>(l_value);
 }
 
+// The ReturnNew<T>(a1, a2, ..., a_k) action returns a pointer to a new
+// instance of type T, constructed on the heap with constructor arguments
+// a1, a2, ..., and a_k. The caller assumes ownership of the returned value.
+template <typename T, typename... Params>
+internal::ReturnNewAction<T, typename std::decay<Params>::type...> ReturnNew(
+    Params&&... params) {
+  return {std::forward_as_tuple(std::forward<Params>(params)...)};
+}
+
+// Action ReturnArg<k>() returns the k-th argument of the mock function.
+template <size_t k>
+internal::ReturnArgAction<k> ReturnArg() {
+  return {};
+}
+
+// Action SaveArg<k>(pointer) saves the k-th (0-based) argument of the
+// mock function to *pointer.
+template <size_t k, typename Ptr>
+internal::SaveArgAction<k, Ptr> SaveArg(Ptr pointer) {
+  return {pointer};
+}
+
+// Action SaveArgPointee<k>(pointer) saves the value pointed to
+// by the k-th (0-based) argument of the mock function to *pointer.
+template <size_t k, typename Ptr>
+internal::SaveArgPointeeAction<k, Ptr> SaveArgPointee(Ptr pointer) {
+  return {pointer};
+}
+
+// Action SetArgReferee<k>(value) assigns 'value' to the variable
+// referenced by the k-th (0-based) argument of the mock function.
+template <size_t k, typename T>
+internal::SetArgRefereeAction<k, typename std::decay<T>::type> SetArgReferee(
+    T&& value) {
+  return {std::forward<T>(value)};
+}
+
+// Action SetArrayArgument<k>(first, last) copies the elements in
+// source range [first, last) to the array pointed to by the k-th
+// (0-based) argument, which can be either a pointer or an
+// iterator. The action does not take ownership of the elements in the
+// source range.
+template <size_t k, typename I1, typename I2>
+internal::SetArrayArgumentAction<k, I1, I2> SetArrayArgument(I1 first,
+                                                             I2 last) {
+  return {first, last};
+}
+
+// Action DeleteArg<k>() deletes the k-th (0-based) argument of the mock
+// function.
+template <size_t k>
+internal::DeleteArgAction<k> DeleteArg() {
+  return {};
+}
+
+// This action returns the value pointed to by 'pointer'.
+template <typename Ptr>
+internal::ReturnPointeeAction<Ptr> ReturnPointee(Ptr pointer) {
+  return {pointer};
+}
+
+// Action Throw(exception) can be used in a mock function of any type
+// to throw the given exception.  Any copyable value can be thrown.
+#if GTEST_HAS_EXCEPTIONS
+template <typename T>
+internal::ThrowAction<typename std::decay<T>::type> Throw(T&& exception) {
+  return {std::forward<T>(exception)};
+}
+#endif  // GTEST_HAS_EXCEPTIONS
+
 namespace internal {
 
 // A macro from the ACTION* family (defined later in gmock-generated-actions.h)
@@ -1296,79 +2082,71 @@ namespace internal {
 // TYPE DIRECTLY.
 struct ExcessiveArg {};
 
-// A helper class needed for implementing the ACTION* macros.
-template <typename Result, class Impl>
-class ActionHelper {
- public:
-  template <typename... Ts>
-  static Result Perform(Impl* impl, const std::tuple<Ts...>& args) {
-    return Apply(impl, args, MakeIndexSequence<sizeof...(Ts)>{},
-                 MakeIndexSequence<10 - sizeof...(Ts)>{});
-  }
-
- private:
-  template <typename... Ts, std::size_t... tuple_ids, std::size_t... rest_ids>
-  static Result Apply(Impl* impl, const std::tuple<Ts...>& args,
-                      IndexSequence<tuple_ids...>, IndexSequence<rest_ids...>) {
-    return impl->template gmock_PerformImpl<Ts...>(
-        args, std::get<tuple_ids>(args)...,
-        ((void)rest_ids, ExcessiveArg())...);
-  }
-};
+// Builds an implementation of an Action<> for some particular signature, using
+// a class defined by an ACTION* macro.
+template <typename F, typename Impl>
+struct ActionImpl;
 
-// A helper base class needed for implementing the ACTION* macros.
-// Implements constructor and conversion operator for Action.
-//
-// Template specialization for parameterless Action.
-template <typename Derived>
-class ActionImpl {
- public:
-  ActionImpl() = default;
-
-  template <typename F>
-  operator ::testing::Action<F>() const {  // NOLINT(runtime/explicit)
-    return ::testing::Action<F>(new typename Derived::template gmock_Impl<F>());
-  }
+template <typename Impl>
+struct ImplBase {
+  struct Holder {
+    // Allows each copy of the Action<> to get to the Impl.
+    explicit operator const Impl&() const { return *ptr; }
+    std::shared_ptr<Impl> ptr;
+  };
+  using type = typename std::conditional<std::is_constructible<Impl>::value,
+                                         Impl, Holder>::type;
 };
 
-// Template specialization for parameterized Action.
-template <template <typename...> class Derived, typename... Ts>
-class ActionImpl<Derived<Ts...>> {
- public:
-  explicit ActionImpl(Ts... params) : params_(std::forward<Ts>(params)...) {}
-
-  template <typename F>
-  operator ::testing::Action<F>() const {  // NOLINT(runtime/explicit)
-    return Apply<F>(MakeIndexSequence<sizeof...(Ts)>{});
+template <typename R, typename... Args, typename Impl>
+struct ActionImpl<R(Args...), Impl> : ImplBase<Impl>::type {
+  using Base = typename ImplBase<Impl>::type;
+  using function_type = R(Args...);
+  using args_type = std::tuple<Args...>;
+
+  ActionImpl() = default;  // Only defined if appropriate for Base.
+  explicit ActionImpl(std::shared_ptr<Impl> impl) : Base{std::move(impl)} {}
+
+  R operator()(Args&&... arg) const {
+    static constexpr size_t kMaxArgs =
+        sizeof...(Args) <= 10 ? sizeof...(Args) : 10;
+    return Apply(MakeIndexSequence<kMaxArgs>{},
+                 MakeIndexSequence<10 - kMaxArgs>{},
+                 args_type{std::forward<Args>(arg)...});
   }
 
- private:
-  template <typename F, std::size_t... tuple_ids>
-  ::testing::Action<F> Apply(IndexSequence<tuple_ids...>) const {
-    return ::testing::Action<F>(new
-                                typename Derived<Ts...>::template gmock_Impl<F>(
-                                    std::get<tuple_ids>(params_)...));
+  template <std::size_t... arg_id, std::size_t... excess_id>
+  R Apply(IndexSequence<arg_id...>, IndexSequence<excess_id...>,
+          const args_type& args) const {
+    // Impl need not be specific to the signature of action being implemented;
+    // only the implementing function body needs to have all of the specific
+    // types instantiated.  Up to 10 of the args that are provided by the
+    // args_type get passed, followed by a dummy of unspecified type for the
+    // remainder up to 10 explicit args.
+    static constexpr ExcessiveArg kExcessArg{};
+    return static_cast<const Impl&>(*this)
+        .template gmock_PerformImpl<
+            /*function_type=*/function_type, /*return_type=*/R,
+            /*args_type=*/args_type,
+            /*argN_type=*/
+            typename std::tuple_element<arg_id, args_type>::type...>(
+            /*args=*/args, std::get<arg_id>(args)...,
+            ((void)excess_id, kExcessArg)...);
   }
-
-  std::tuple<Ts...> params_;
 };
 
-namespace invoke_argument {
-
-// Appears in InvokeArgumentAdl's argument list to help avoid
-// accidental calls to user functions of the same name.
-struct AdlTag {};
-
-// InvokeArgumentAdl - a helper for InvokeArgument.
-// The basic overloads are provided here for generic functors.
-// Overloads for other custom-callables are provided in the
-// internal/custom/gmock-generated-actions.h header.
-template <typename F, typename... Args>
-auto InvokeArgumentAdl(AdlTag, F f, Args... args) -> decltype(f(args...)) {
-  return f(args...);
+// Stores a default-constructed Impl as part of the Action<>'s
+// std::function<>. The Impl should be trivial to copy.
+template <typename F, typename Impl>
+::testing::Action<F> MakeAction() {
+  return ::testing::Action<F>(ActionImpl<F, Impl>());
 }
 
-}  // namespace invoke_argument
+// Stores just the one given instance of Impl.
+template <typename F, typename Impl>
+::testing::Action<F> MakeAction(std::shared_ptr<Impl> impl) {
+  return ::testing::Action<F>(ActionImpl<F, Impl>(std::move(impl)));
+}
 
 #define GMOCK_INTERNAL_ARG_UNUSED(i, data, el) \
   , const arg##i##_type& arg##i GTEST_ATTRIBUTE_UNUSED_
@@ -1411,90 +2189,75 @@ auto InvokeArgumentAdl(AdlTag, F f, Args... args) -> decltype(f(args...)) {
 #define GMOCK_ACTION_FIELD_PARAMS_(params) \
   GMOCK_PP_FOR_EACH(GMOCK_INTERNAL_FIELD_PARAM, , params)
 
-#define GMOCK_INTERNAL_ACTION(name, full_name, params)                        \
-  template <GMOCK_ACTION_TYPENAME_PARAMS_(params)>                            \
-  class full_name : public ::testing::internal::ActionImpl<                   \
-                        full_name<GMOCK_ACTION_TYPE_PARAMS_(params)>> {       \
-    using base_type = ::testing::internal::ActionImpl<full_name>;             \
-                                                                              \
-   public:                                                                    \
-    using base_type::base_type;                                               \
-    template <typename F>                                                     \
-    class gmock_Impl : public ::testing::ActionInterface<F> {                 \
-     public:                                                                  \
-      typedef F function_type;                                                \
-      typedef typename ::testing::internal::Function<F>::Result return_type;  \
-      typedef                                                                 \
-          typename ::testing::internal::Function<F>::ArgumentTuple args_type; \
-      explicit gmock_Impl(GMOCK_ACTION_TYPE_GVALUE_PARAMS_(params))           \
-          : GMOCK_ACTION_INIT_PARAMS_(params) {}                              \
-      return_type Perform(const args_type& args) override {                   \
-        return ::testing::internal::ActionHelper<return_type,                 \
-                                                 gmock_Impl>::Perform(this,   \
-                                                                      args);  \
-      }                                                                       \
-      template <GMOCK_ACTION_TEMPLATE_ARGS_NAMES_>                            \
-      return_type gmock_PerformImpl(GMOCK_ACTION_ARG_TYPES_AND_NAMES_) const; \
-      GMOCK_ACTION_FIELD_PARAMS_(params)                                      \
-                                                                              \
-     private:                                                                 \
-      GTEST_DISALLOW_ASSIGN_(gmock_Impl);                                     \
-    };                                                                        \
-                                                                              \
-   private:                                                                   \
-    GTEST_DISALLOW_ASSIGN_(full_name);                                        \
-  };                                                                          \
-  template <GMOCK_ACTION_TYPENAME_PARAMS_(params)>                            \
-  inline full_name<GMOCK_ACTION_TYPE_PARAMS_(params)> name(                   \
-      GMOCK_ACTION_TYPE_GVALUE_PARAMS_(params)) {                             \
-    return full_name<GMOCK_ACTION_TYPE_PARAMS_(params)>(                      \
-        GMOCK_ACTION_GVALUE_PARAMS_(params));                                 \
-  }                                                                           \
-  template <GMOCK_ACTION_TYPENAME_PARAMS_(params)>                            \
-  template <typename F>                                                       \
-  template <GMOCK_ACTION_TEMPLATE_ARGS_NAMES_>                                \
-  typename ::testing::internal::Function<F>::Result                           \
-      full_name<GMOCK_ACTION_TYPE_PARAMS_(params)>::gmock_Impl<               \
-          F>::gmock_PerformImpl(GMOCK_ACTION_ARG_TYPES_AND_NAMES_UNUSED_)     \
-          const
+#define GMOCK_INTERNAL_ACTION(name, full_name, params)                         \
+  template <GMOCK_ACTION_TYPENAME_PARAMS_(params)>                             \
+  class full_name {                                                            \
+   public:                                                                     \
+    explicit full_name(GMOCK_ACTION_TYPE_GVALUE_PARAMS_(params))               \
+        : impl_(std::make_shared<gmock_Impl>(                                  \
+              GMOCK_ACTION_GVALUE_PARAMS_(params))) {}                         \
+    full_name(const full_name&) = default;                                     \
+    full_name(full_name&&) noexcept = default;                                 \
+    template <typename F>                                                      \
+    operator ::testing::Action<F>() const {                                    \
+      return ::testing::internal::MakeAction<F>(impl_);                        \
+    }                                                                          \
+                                                                               \
+   private:                                                                    \
+    class gmock_Impl {                                                         \
+     public:                                                                   \
+      explicit gmock_Impl(GMOCK_ACTION_TYPE_GVALUE_PARAMS_(params))            \
+          : GMOCK_ACTION_INIT_PARAMS_(params) {}                               \
+      template <typename function_type, typename return_type,                  \
+                typename args_type, GMOCK_ACTION_TEMPLATE_ARGS_NAMES_>         \
+      return_type gmock_PerformImpl(GMOCK_ACTION_ARG_TYPES_AND_NAMES_) const;  \
+      GMOCK_ACTION_FIELD_PARAMS_(params)                                       \
+    };                                                                         \
+    std::shared_ptr<const gmock_Impl> impl_;                                   \
+  };                                                                           \
+  template <GMOCK_ACTION_TYPENAME_PARAMS_(params)>                             \
+  inline full_name<GMOCK_ACTION_TYPE_PARAMS_(params)> name(                    \
+      GMOCK_ACTION_TYPE_GVALUE_PARAMS_(params)) GTEST_MUST_USE_RESULT_;        \
+  template <GMOCK_ACTION_TYPENAME_PARAMS_(params)>                             \
+  inline full_name<GMOCK_ACTION_TYPE_PARAMS_(params)> name(                    \
+      GMOCK_ACTION_TYPE_GVALUE_PARAMS_(params)) {                              \
+    return full_name<GMOCK_ACTION_TYPE_PARAMS_(params)>(                       \
+        GMOCK_ACTION_GVALUE_PARAMS_(params));                                  \
+  }                                                                            \
+  template <GMOCK_ACTION_TYPENAME_PARAMS_(params)>                             \
+  template <typename function_type, typename return_type, typename args_type,  \
+            GMOCK_ACTION_TEMPLATE_ARGS_NAMES_>                                 \
+  return_type                                                                  \
+  full_name<GMOCK_ACTION_TYPE_PARAMS_(params)>::gmock_Impl::gmock_PerformImpl( \
+      GMOCK_ACTION_ARG_TYPES_AND_NAMES_UNUSED_) const
 
 }  // namespace internal
 
+// Similar to GMOCK_INTERNAL_ACTION, but no bound parameters are stored.
 #define ACTION(name)                                                          \
-  class name##Action : public ::testing::internal::ActionImpl<name##Action> { \
-    using base_type = ::testing::internal::ActionImpl<name##Action>;          \
-                                                                              \
+  class name##Action {                                                        \
    public:                                                                    \
-    using base_type::base_type;                                               \
+    explicit name##Action() noexcept {}                                       \
+    name##Action(const name##Action&) noexcept {}                             \
     template <typename F>                                                     \
-    class gmock_Impl : public ::testing::ActionInterface<F> {                 \
+    operator ::testing::Action<F>() const {                                   \
+      return ::testing::internal::MakeAction<F, gmock_Impl>();                \
+    }                                                                         \
+                                                                              \
+   private:                                                                   \
+    class gmock_Impl {                                                        \
      public:                                                                  \
-      typedef F function_type;                                                \
-      typedef typename ::testing::internal::Function<F>::Result return_type;  \
-      typedef                                                                 \
-          typename ::testing::internal::Function<F>::ArgumentTuple args_type; \
-      gmock_Impl() {}                                                         \
-      return_type Perform(const args_type& args) override {                   \
-        return ::testing::internal::ActionHelper<return_type,                 \
-                                                 gmock_Impl>::Perform(this,   \
-                                                                      args);  \
-      }                                                                       \
-      template <GMOCK_ACTION_TEMPLATE_ARGS_NAMES_>                            \
+      template <typename function_type, typename return_type,                 \
+                typename args_type, GMOCK_ACTION_TEMPLATE_ARGS_NAMES_>        \
       return_type gmock_PerformImpl(GMOCK_ACTION_ARG_TYPES_AND_NAMES_) const; \
-                                                                              \
-     private:                                                                 \
-      GTEST_DISALLOW_ASSIGN_(gmock_Impl);                                     \
     };                                                                        \
-                                                                              \
-   private:                                                                   \
-    GTEST_DISALLOW_ASSIGN_(name##Action);                                     \
   };                                                                          \
+  inline name##Action name() GTEST_MUST_USE_RESULT_;                          \
   inline name##Action name() { return name##Action(); }                       \
-  template <typename F>                                                       \
-  template <GMOCK_ACTION_TEMPLATE_ARGS_NAMES_>                                \
-  typename ::testing::internal::Function<F>::Result                           \
-      name##Action::gmock_Impl<F>::gmock_PerformImpl(                         \
-          GMOCK_ACTION_ARG_TYPES_AND_NAMES_UNUSED_) const
+  template <typename function_type, typename return_type, typename args_type, \
+            GMOCK_ACTION_TEMPLATE_ARGS_NAMES_>                                \
+  return_type name##Action::gmock_Impl::gmock_PerformImpl(                    \
+      GMOCK_ACTION_ARG_TYPES_AND_NAMES_UNUSED_) const
 
 #define ACTION_P(name, ...) \
   GMOCK_INTERNAL_ACTION(name, name##ActionP, (__VA_ARGS__))
@@ -1529,8 +2292,7 @@ auto InvokeArgumentAdl(AdlTag, F f, Args... args) -> decltype(f(args...)) {
 }  // namespace testing
 
 #ifdef _MSC_VER
-# pragma warning(pop)
+#pragma warning(pop)
 #endif
 
-
-#endif  // GMOCK_INCLUDE_GMOCK_GMOCK_ACTIONS_H_
+#endif  // GOOGLEMOCK_INCLUDE_GMOCK_GMOCK_ACTIONS_H_
diff --git a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/include/gmock/gmock-cardinalities.h b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/include/gmock/gmock-cardinalities.h
index 46e01e102d5..b6ab648e50a 100644
--- a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/include/gmock/gmock-cardinalities.h
+++ b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/include/gmock/gmock-cardinalities.h
@@ -27,21 +27,23 @@
 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-
 // Google Mock - a framework for writing C++ mock classes.
 //
 // This file implements some commonly used cardinalities.  More
 // cardinalities can be defined by the user implementing the
 // CardinalityInterface interface if necessary.
 
-// GOOGLETEST_CM0002 DO NOT DELETE
+// IWYU pragma: private, include "gmock/gmock.h"
+// IWYU pragma: friend gmock/.*
 
-#ifndef GMOCK_INCLUDE_GMOCK_GMOCK_CARDINALITIES_H_
-#define GMOCK_INCLUDE_GMOCK_GMOCK_CARDINALITIES_H_
+#ifndef GOOGLEMOCK_INCLUDE_GMOCK_GMOCK_CARDINALITIES_H_
+#define GOOGLEMOCK_INCLUDE_GMOCK_GMOCK_CARDINALITIES_H_
 
 #include <limits.h>
+
 #include <memory>
 #include <ostream>  // NOLINT
+
 #include "gmock/internal/gmock-port.h"
 #include "gtest/gtest.h"
 
@@ -116,7 +118,7 @@ class GTEST_API_ Cardinality {
   // cardinality, i.e. exceed the maximum number of allowed calls.
   bool IsOverSaturatedByCallCount(int call_count) const {
     return impl_->IsSaturatedByCallCount(call_count) &&
-        !impl_->IsSatisfiedByCallCount(call_count);
+           !impl_->IsSatisfiedByCallCount(call_count);
   }
 
   // Describes self to an ostream
@@ -154,4 +156,4 @@ inline Cardinality MakeCardinality(const CardinalityInterface* c) {
 
 GTEST_DISABLE_MSC_WARNINGS_POP_()  //  4251
 
-#endif  // GMOCK_INCLUDE_GMOCK_GMOCK_CARDINALITIES_H_
+#endif  // GOOGLEMOCK_INCLUDE_GMOCK_GMOCK_CARDINALITIES_H_
diff --git a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/include/gmock/gmock-function-mocker.h b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/include/gmock/gmock-function-mocker.h
index 317d6c2b7eb..f565d980c56 100644
--- a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/include/gmock/gmock-function-mocker.h
+++ b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/include/gmock/gmock-function-mocker.h
@@ -31,10 +31,11 @@
 //
 // This file implements MOCK_METHOD.
 
-// GOOGLETEST_CM0002 DO NOT DELETE
+// IWYU pragma: private, include "gmock/gmock.h"
+// IWYU pragma: friend gmock/.*
 
-#ifndef THIRD_PARTY_GOOGLETEST_GOOGLEMOCK_INCLUDE_GMOCK_INTERNAL_GMOCK_FUNCTION_MOCKER_H_  // NOLINT
-#define THIRD_PARTY_GOOGLETEST_GOOGLEMOCK_INCLUDE_GMOCK_INTERNAL_GMOCK_FUNCTION_MOCKER_H_  // NOLINT
+#ifndef GOOGLEMOCK_INCLUDE_GMOCK_INTERNAL_GMOCK_FUNCTION_MOCKER_H_  // NOLINT
+#define GOOGLEMOCK_INCLUDE_GMOCK_INTERNAL_GMOCK_FUNCTION_MOCKER_H_  // NOLINT
 
 #include <type_traits>  // IWYU pragma: keep
 #include <utility>      // IWYU pragma: keep
@@ -48,14 +49,53 @@ namespace internal {
 template <typename T>
 using identity_t = T;
 
-template <typename MockType>
-const MockType* AdjustConstness_const(const MockType* mock) {
-  return mock;
+template <typename Pattern>
+struct ThisRefAdjuster {
+  template <typename T>
+  using AdjustT = typename std::conditional<
+      std::is_const<typename std::remove_reference<Pattern>::type>::value,
+      typename std::conditional<std::is_lvalue_reference<Pattern>::value,
+                                const T&, const T&&>::type,
+      typename std::conditional<std::is_lvalue_reference<Pattern>::value, T&,
+                                T&&>::type>::type;
+
+  template <typename MockType>
+  static AdjustT<MockType> Adjust(const MockType& mock) {
+    return static_cast<AdjustT<MockType>>(const_cast<MockType&>(mock));
+  }
+};
+
+constexpr bool PrefixOf(const char* a, const char* b) {
+  return *a == 0 || (*a == *b && internal::PrefixOf(a + 1, b + 1));
 }
 
-template <typename MockType>
-MockType* AdjustConstness_(const MockType* mock) {
-  return const_cast<MockType*>(mock);
+template <int N, int M>
+constexpr bool StartsWith(const char (&prefix)[N], const char (&str)[M]) {
+  return N <= M && internal::PrefixOf(prefix, str);
+}
+
+template <int N, int M>
+constexpr bool EndsWith(const char (&suffix)[N], const char (&str)[M]) {
+  return N <= M && internal::PrefixOf(suffix, str + M - N);
+}
+
+template <int N, int M>
+constexpr bool Equals(const char (&a)[N], const char (&b)[M]) {
+  return N == M && internal::PrefixOf(a, b);
+}
+
+template <int N>
+constexpr bool ValidateSpec(const char (&spec)[N]) {
+  return internal::Equals("const", spec) ||
+         internal::Equals("override", spec) ||
+         internal::Equals("final", spec) ||
+         internal::Equals("noexcept", spec) ||
+         (internal::StartsWith("noexcept(", spec) &&
+          internal::EndsWith(")", spec)) ||
+         internal::Equals("ref(&)", spec) ||
+         internal::Equals("ref(&&)", spec) ||
+         (internal::StartsWith("Calltype(", spec) &&
+          internal::EndsWith(")", spec));
 }
 
 }  // namespace internal
@@ -90,7 +130,8 @@ using internal::FunctionMocker;
       GMOCK_PP_NARG0 _Args, _MethodName, GMOCK_INTERNAL_HAS_CONST(_Spec),  \
       GMOCK_INTERNAL_HAS_OVERRIDE(_Spec), GMOCK_INTERNAL_HAS_FINAL(_Spec), \
       GMOCK_INTERNAL_GET_NOEXCEPT_SPEC(_Spec),                             \
-      GMOCK_INTERNAL_GET_CALLTYPE(_Spec),                                  \
+      GMOCK_INTERNAL_GET_CALLTYPE_SPEC(_Spec),                             \
+      GMOCK_INTERNAL_GET_REF_SPEC(_Spec),                                  \
       (GMOCK_INTERNAL_SIGNATURE(_Ret, _Args)))
 
 #define GMOCK_INTERNAL_MOCK_METHOD_ARG_5(...) \
@@ -124,19 +165,19 @@ using internal::FunctionMocker;
       ::testing::tuple_size<typename ::testing::internal::Function<    \
               __VA_ARGS__>::ArgumentTuple>::value == _N,               \
       "This method does not take " GMOCK_PP_STRINGIZE(                 \
-          _N) " arguments. Parenthesize all types with unproctected commas.")
+          _N) " arguments. Parenthesize all types with unprotected commas.")
 
 #define GMOCK_INTERNAL_ASSERT_VALID_SPEC(_Spec) \
   GMOCK_PP_FOR_EACH(GMOCK_INTERNAL_ASSERT_VALID_SPEC_ELEMENT, ~, _Spec)
 
 #define GMOCK_INTERNAL_MOCK_METHOD_IMPL(_N, _MethodName, _Constness,           \
                                         _Override, _Final, _NoexceptSpec,      \
-                                        _CallType, _Signature)                 \
+                                        _CallType, _RefSpec, _Signature)       \
   typename ::testing::internal::Function<GMOCK_PP_REMOVE_PARENS(               \
       _Signature)>::Result                                                     \
   GMOCK_INTERNAL_EXPAND(_CallType)                                             \
       _MethodName(GMOCK_PP_REPEAT(GMOCK_INTERNAL_PARAMETER, _Signature, _N))   \
-          GMOCK_PP_IF(_Constness, const, ) _NoexceptSpec                       \
+          GMOCK_PP_IF(_Constness, const, ) _RefSpec _NoexceptSpec              \
           GMOCK_PP_IF(_Override, override, ) GMOCK_PP_IF(_Final, final, ) {    \
     GMOCK_MOCKER_(_N, _Constness, _MethodName)                                 \
         .SetOwnerAndName(this, #_MethodName);                                  \
@@ -145,7 +186,7 @@ using internal::FunctionMocker;
   }                                                                            \
   ::testing::MockSpec<GMOCK_PP_REMOVE_PARENS(_Signature)> gmock_##_MethodName( \
       GMOCK_PP_REPEAT(GMOCK_INTERNAL_MATCHER_PARAMETER, _Signature, _N))       \
-      GMOCK_PP_IF(_Constness, const, ) {                                       \
+      GMOCK_PP_IF(_Constness, const, ) _RefSpec {                              \
     GMOCK_MOCKER_(_N, _Constness, _MethodName).RegisterOwner(this);            \
     return GMOCK_MOCKER_(_N, _Constness, _MethodName)                          \
         .With(GMOCK_PP_REPEAT(GMOCK_INTERNAL_MATCHER_ARGUMENT, , _N));         \
@@ -153,18 +194,18 @@ using internal::FunctionMocker;
   ::testing::MockSpec<GMOCK_PP_REMOVE_PARENS(_Signature)> gmock_##_MethodName( \
       const ::testing::internal::WithoutMatchers&,                             \
       GMOCK_PP_IF(_Constness, const, )::testing::internal::Function<           \
-          GMOCK_PP_REMOVE_PARENS(_Signature)>*) const _NoexceptSpec {          \
-    return GMOCK_PP_CAT(::testing::internal::AdjustConstness_,                 \
-                        GMOCK_PP_IF(_Constness, const, ))(this)                \
-        ->gmock_##_MethodName(GMOCK_PP_REPEAT(                                 \
+          GMOCK_PP_REMOVE_PARENS(_Signature)>*) const _RefSpec _NoexceptSpec { \
+    return ::testing::internal::ThisRefAdjuster<GMOCK_PP_IF(                   \
+        _Constness, const, ) int _RefSpec>::Adjust(*this)                      \
+        .gmock_##_MethodName(GMOCK_PP_REPEAT(                                  \
             GMOCK_INTERNAL_A_MATCHER_ARGUMENT, _Signature, _N));               \
   }                                                                            \
   mutable ::testing::FunctionMocker<GMOCK_PP_REMOVE_PARENS(_Signature)>        \
-      GMOCK_MOCKER_(_N, _Constness, _MethodName)
+  GMOCK_MOCKER_(_N, _Constness, _MethodName)
 
 #define GMOCK_INTERNAL_EXPAND(...) __VA_ARGS__
 
-// Five Valid modifiers.
+// Valid modifiers.
 #define GMOCK_INTERNAL_HAS_CONST(_Tuple) \
   GMOCK_PP_HAS_COMMA(GMOCK_PP_FOR_EACH(GMOCK_INTERNAL_DETECT_CONST, ~, _Tuple))
 
@@ -183,18 +224,40 @@ using internal::FunctionMocker;
       GMOCK_PP_HAS_COMMA(GMOCK_INTERNAL_DETECT_NOEXCEPT(_i, _, _elem)), \
       _elem, )
 
-#define GMOCK_INTERNAL_GET_CALLTYPE(_Tuple) \
-  GMOCK_PP_FOR_EACH(GMOCK_INTERNAL_GET_CALLTYPE_IMPL, ~, _Tuple)
-
-#define GMOCK_INTERNAL_ASSERT_VALID_SPEC_ELEMENT(_i, _, _elem)            \
-  static_assert(                                                          \
-      (GMOCK_PP_HAS_COMMA(GMOCK_INTERNAL_DETECT_CONST(_i, _, _elem)) +    \
-       GMOCK_PP_HAS_COMMA(GMOCK_INTERNAL_DETECT_OVERRIDE(_i, _, _elem)) + \
-       GMOCK_PP_HAS_COMMA(GMOCK_INTERNAL_DETECT_FINAL(_i, _, _elem)) +    \
-       GMOCK_PP_HAS_COMMA(GMOCK_INTERNAL_DETECT_NOEXCEPT(_i, _, _elem)) + \
-       GMOCK_INTERNAL_IS_CALLTYPE(_elem)) == 1,                           \
-      GMOCK_PP_STRINGIZE(                                                 \
+#define GMOCK_INTERNAL_GET_CALLTYPE_SPEC(_Tuple) \
+  GMOCK_PP_FOR_EACH(GMOCK_INTERNAL_CALLTYPE_SPEC_IF_CALLTYPE, ~, _Tuple)
+
+#define GMOCK_INTERNAL_CALLTYPE_SPEC_IF_CALLTYPE(_i, _, _elem)          \
+  GMOCK_PP_IF(                                                          \
+      GMOCK_PP_HAS_COMMA(GMOCK_INTERNAL_DETECT_CALLTYPE(_i, _, _elem)), \
+      GMOCK_PP_CAT(GMOCK_INTERNAL_UNPACK_, _elem), )
+
+#define GMOCK_INTERNAL_GET_REF_SPEC(_Tuple) \
+  GMOCK_PP_FOR_EACH(GMOCK_INTERNAL_REF_SPEC_IF_REF, ~, _Tuple)
+
+#define GMOCK_INTERNAL_REF_SPEC_IF_REF(_i, _, _elem)                       \
+  GMOCK_PP_IF(GMOCK_PP_HAS_COMMA(GMOCK_INTERNAL_DETECT_REF(_i, _, _elem)), \
+              GMOCK_PP_CAT(GMOCK_INTERNAL_UNPACK_, _elem), )
+
+#ifdef GMOCK_INTERNAL_STRICT_SPEC_ASSERT
+#define GMOCK_INTERNAL_ASSERT_VALID_SPEC_ELEMENT(_i, _, _elem) \
+  static_assert(                                                     \
+      ::testing::internal::ValidateSpec(GMOCK_PP_STRINGIZE(_elem)),  \
+      "Token \'" GMOCK_PP_STRINGIZE(                                 \
+          _elem) "\' cannot be recognized as a valid specification " \
+                 "modifier. Is a ',' missing?");
+#else
+#define GMOCK_INTERNAL_ASSERT_VALID_SPEC_ELEMENT(_i, _, _elem)                 \
+  static_assert(                                                               \
+      (GMOCK_PP_HAS_COMMA(GMOCK_INTERNAL_DETECT_CONST(_i, _, _elem)) +         \
+       GMOCK_PP_HAS_COMMA(GMOCK_INTERNAL_DETECT_OVERRIDE(_i, _, _elem)) +      \
+       GMOCK_PP_HAS_COMMA(GMOCK_INTERNAL_DETECT_FINAL(_i, _, _elem)) +         \
+       GMOCK_PP_HAS_COMMA(GMOCK_INTERNAL_DETECT_NOEXCEPT(_i, _, _elem)) +      \
+       GMOCK_PP_HAS_COMMA(GMOCK_INTERNAL_DETECT_REF(_i, _, _elem)) +           \
+       GMOCK_PP_HAS_COMMA(GMOCK_INTERNAL_DETECT_CALLTYPE(_i, _, _elem))) == 1, \
+      GMOCK_PP_STRINGIZE(                                                      \
           _elem) " cannot be recognized as a valid specification modifier.");
+#endif  // GMOCK_INTERNAL_STRICT_SPEC_ASSERT
 
 // Modifiers implementation.
 #define GMOCK_INTERNAL_DETECT_CONST(_i, _, _elem) \
@@ -217,26 +280,19 @@ using internal::FunctionMocker;
 
 #define GMOCK_INTERNAL_DETECT_NOEXCEPT_I_noexcept ,
 
-#define GMOCK_INTERNAL_GET_CALLTYPE_IMPL(_i, _, _elem)           \
-  GMOCK_PP_IF(GMOCK_INTERNAL_IS_CALLTYPE(_elem),                 \
-              GMOCK_INTERNAL_GET_VALUE_CALLTYPE, GMOCK_PP_EMPTY) \
-  (_elem)
+#define GMOCK_INTERNAL_DETECT_REF(_i, _, _elem) \
+  GMOCK_PP_CAT(GMOCK_INTERNAL_DETECT_REF_I_, _elem)
+
+#define GMOCK_INTERNAL_DETECT_REF_I_ref ,
+
+#define GMOCK_INTERNAL_UNPACK_ref(x) x
 
-// TODO(iserna): GMOCK_INTERNAL_IS_CALLTYPE and
-// GMOCK_INTERNAL_GET_VALUE_CALLTYPE needed more expansions to work on windows
-// maybe they can be simplified somehow.
-#define GMOCK_INTERNAL_IS_CALLTYPE(_arg) \
-  GMOCK_INTERNAL_IS_CALLTYPE_I(          \
-      GMOCK_PP_CAT(GMOCK_INTERNAL_IS_CALLTYPE_HELPER_, _arg))
-#define GMOCK_INTERNAL_IS_CALLTYPE_I(_arg) GMOCK_PP_IS_ENCLOSED_PARENS(_arg)
+#define GMOCK_INTERNAL_DETECT_CALLTYPE(_i, _, _elem) \
+  GMOCK_PP_CAT(GMOCK_INTERNAL_DETECT_CALLTYPE_I_, _elem)
 
-#define GMOCK_INTERNAL_GET_VALUE_CALLTYPE(_arg) \
-  GMOCK_INTERNAL_GET_VALUE_CALLTYPE_I(          \
-      GMOCK_PP_CAT(GMOCK_INTERNAL_IS_CALLTYPE_HELPER_, _arg))
-#define GMOCK_INTERNAL_GET_VALUE_CALLTYPE_I(_arg) \
-  GMOCK_PP_CAT(GMOCK_PP_IDENTITY, _arg)
+#define GMOCK_INTERNAL_DETECT_CALLTYPE_I_Calltype ,
 
-#define GMOCK_INTERNAL_IS_CALLTYPE_HELPER_Calltype
+#define GMOCK_INTERNAL_UNPACK_Calltype(...) __VA_ARGS__
 
 // Note: The use of `identity_t` here allows _Ret to represent return types that
 // would normally need to be specified in a different way. For example, a method
@@ -449,10 +505,10 @@ using internal::FunctionMocker;
   GMOCK_INTERNAL_ASSERT_VALID_SIGNATURE(                                  \
       args_num, ::testing::internal::identity_t<__VA_ARGS__>);            \
   GMOCK_INTERNAL_MOCK_METHOD_IMPL(                                        \
-      args_num, Method, GMOCK_PP_NARG0(constness), 0, 0, , ct,            \
+      args_num, Method, GMOCK_PP_NARG0(constness), 0, 0, , ct, ,          \
       (::testing::internal::identity_t<__VA_ARGS__>))
 
 #define GMOCK_MOCKER_(arity, constness, Method) \
   GTEST_CONCAT_TOKEN_(gmock##constness##arity##_##Method##_, __LINE__)
 
-#endif  // THIRD_PARTY_GOOGLETEST_GOOGLEMOCK_INCLUDE_GMOCK_INTERNAL_GMOCK_FUNCTION_MOCKER_H_
+#endif  // GOOGLEMOCK_INCLUDE_GMOCK_INTERNAL_GMOCK_FUNCTION_MOCKER_H_
diff --git a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/include/gmock/gmock-generated-actions.h b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/include/gmock/gmock-generated-actions.h
deleted file mode 100644
index c78debef077..00000000000
--- a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/include/gmock/gmock-generated-actions.h
+++ /dev/null
@@ -1,687 +0,0 @@
-// This file was GENERATED by command:
-//     pump.py gmock-generated-actions.h.pump
-// DO NOT EDIT BY HAND!!!
-
-// Copyright 2007, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-
-// Google Mock - a framework for writing C++ mock classes.
-//
-// This file implements some commonly used variadic actions.
-
-// GOOGLETEST_CM0002 DO NOT DELETE
-
-#ifndef GMOCK_INCLUDE_GMOCK_GMOCK_GENERATED_ACTIONS_H_
-#define GMOCK_INCLUDE_GMOCK_GMOCK_GENERATED_ACTIONS_H_
-
-#include <memory>
-#include <utility>
-
-#include "gmock/gmock-actions.h"
-#include "gmock/internal/gmock-port.h"
-
-
-// Sometimes you want to give an action explicit template parameters
-// that cannot be inferred from its value parameters.  ACTION() and
-// ACTION_P*() don't support that.  ACTION_TEMPLATE() remedies that
-// and can be viewed as an extension to ACTION() and ACTION_P*().
-//
-// The syntax:
-//
-//   ACTION_TEMPLATE(ActionName,
-//                   HAS_m_TEMPLATE_PARAMS(kind1, name1, ..., kind_m, name_m),
-//                   AND_n_VALUE_PARAMS(p1, ..., p_n)) { statements; }
-//
-// defines an action template that takes m explicit template
-// parameters and n value parameters.  name_i is the name of the i-th
-// template parameter, and kind_i specifies whether it's a typename,
-// an integral constant, or a template.  p_i is the name of the i-th
-// value parameter.
-//
-// Example:
-//
-//   // DuplicateArg<k, T>(output) converts the k-th argument of the mock
-//   // function to type T and copies it to *output.
-//   ACTION_TEMPLATE(DuplicateArg,
-//                   HAS_2_TEMPLATE_PARAMS(int, k, typename, T),
-//                   AND_1_VALUE_PARAMS(output)) {
-//     *output = T(::std::get<k>(args));
-//   }
-//   ...
-//     int n;
-//     EXPECT_CALL(mock, Foo(_, _))
-//         .WillOnce(DuplicateArg<1, unsigned char>(&n));
-//
-// To create an instance of an action template, write:
-//
-//   ActionName<t1, ..., t_m>(v1, ..., v_n)
-//
-// where the ts are the template arguments and the vs are the value
-// arguments.  The value argument types are inferred by the compiler.
-// If you want to explicitly specify the value argument types, you can
-// provide additional template arguments:
-//
-//   ActionName<t1, ..., t_m, u1, ..., u_k>(v1, ..., v_n)
-//
-// where u_i is the desired type of v_i.
-//
-// ACTION_TEMPLATE and ACTION/ACTION_P* can be overloaded on the
-// number of value parameters, but not on the number of template
-// parameters.  Without the restriction, the meaning of the following
-// is unclear:
-//
-//   OverloadedAction<int, bool>(x);
-//
-// Are we using a single-template-parameter action where 'bool' refers
-// to the type of x, or are we using a two-template-parameter action
-// where the compiler is asked to infer the type of x?
-//
-// Implementation notes:
-//
-// GMOCK_INTERNAL_*_HAS_m_TEMPLATE_PARAMS and
-// GMOCK_INTERNAL_*_AND_n_VALUE_PARAMS are internal macros for
-// implementing ACTION_TEMPLATE.  The main trick we use is to create
-// new macro invocations when expanding a macro.  For example, we have
-//
-//   #define ACTION_TEMPLATE(name, template_params, value_params)
-//       ... GMOCK_INTERNAL_DECL_##template_params ...
-//
-// which causes ACTION_TEMPLATE(..., HAS_1_TEMPLATE_PARAMS(typename, T), ...)
-// to expand to
-//
-//       ... GMOCK_INTERNAL_DECL_HAS_1_TEMPLATE_PARAMS(typename, T) ...
-//
-// Since GMOCK_INTERNAL_DECL_HAS_1_TEMPLATE_PARAMS is a macro, the
-// preprocessor will continue to expand it to
-//
-//       ... typename T ...
-//
-// This technique conforms to the C++ standard and is portable.  It
-// allows us to implement action templates using O(N) code, where N is
-// the maximum number of template/value parameters supported.  Without
-// using it, we'd have to devote O(N^2) amount of code to implement all
-// combinations of m and n.
-
-// Declares the template parameters.
-#define GMOCK_INTERNAL_DECL_HAS_1_TEMPLATE_PARAMS(kind0, name0) kind0 name0
-#define GMOCK_INTERNAL_DECL_HAS_2_TEMPLATE_PARAMS(kind0, name0, kind1, \
-    name1) kind0 name0, kind1 name1
-#define GMOCK_INTERNAL_DECL_HAS_3_TEMPLATE_PARAMS(kind0, name0, kind1, name1, \
-    kind2, name2) kind0 name0, kind1 name1, kind2 name2
-#define GMOCK_INTERNAL_DECL_HAS_4_TEMPLATE_PARAMS(kind0, name0, kind1, name1, \
-    kind2, name2, kind3, name3) kind0 name0, kind1 name1, kind2 name2, \
-    kind3 name3
-#define GMOCK_INTERNAL_DECL_HAS_5_TEMPLATE_PARAMS(kind0, name0, kind1, name1, \
-    kind2, name2, kind3, name3, kind4, name4) kind0 name0, kind1 name1, \
-    kind2 name2, kind3 name3, kind4 name4
-#define GMOCK_INTERNAL_DECL_HAS_6_TEMPLATE_PARAMS(kind0, name0, kind1, name1, \
-    kind2, name2, kind3, name3, kind4, name4, kind5, name5) kind0 name0, \
-    kind1 name1, kind2 name2, kind3 name3, kind4 name4, kind5 name5
-#define GMOCK_INTERNAL_DECL_HAS_7_TEMPLATE_PARAMS(kind0, name0, kind1, name1, \
-    kind2, name2, kind3, name3, kind4, name4, kind5, name5, kind6, \
-    name6) kind0 name0, kind1 name1, kind2 name2, kind3 name3, kind4 name4, \
-    kind5 name5, kind6 name6
-#define GMOCK_INTERNAL_DECL_HAS_8_TEMPLATE_PARAMS(kind0, name0, kind1, name1, \
-    kind2, name2, kind3, name3, kind4, name4, kind5, name5, kind6, name6, \
-    kind7, name7) kind0 name0, kind1 name1, kind2 name2, kind3 name3, \
-    kind4 name4, kind5 name5, kind6 name6, kind7 name7
-#define GMOCK_INTERNAL_DECL_HAS_9_TEMPLATE_PARAMS(kind0, name0, kind1, name1, \
-    kind2, name2, kind3, name3, kind4, name4, kind5, name5, kind6, name6, \
-    kind7, name7, kind8, name8) kind0 name0, kind1 name1, kind2 name2, \
-    kind3 name3, kind4 name4, kind5 name5, kind6 name6, kind7 name7, \
-    kind8 name8
-#define GMOCK_INTERNAL_DECL_HAS_10_TEMPLATE_PARAMS(kind0, name0, kind1, \
-    name1, kind2, name2, kind3, name3, kind4, name4, kind5, name5, kind6, \
-    name6, kind7, name7, kind8, name8, kind9, name9) kind0 name0, \
-    kind1 name1, kind2 name2, kind3 name3, kind4 name4, kind5 name5, \
-    kind6 name6, kind7 name7, kind8 name8, kind9 name9
-
-// Lists the template parameters.
-#define GMOCK_INTERNAL_LIST_HAS_1_TEMPLATE_PARAMS(kind0, name0) name0
-#define GMOCK_INTERNAL_LIST_HAS_2_TEMPLATE_PARAMS(kind0, name0, kind1, \
-    name1) name0, name1
-#define GMOCK_INTERNAL_LIST_HAS_3_TEMPLATE_PARAMS(kind0, name0, kind1, name1, \
-    kind2, name2) name0, name1, name2
-#define GMOCK_INTERNAL_LIST_HAS_4_TEMPLATE_PARAMS(kind0, name0, kind1, name1, \
-    kind2, name2, kind3, name3) name0, name1, name2, name3
-#define GMOCK_INTERNAL_LIST_HAS_5_TEMPLATE_PARAMS(kind0, name0, kind1, name1, \
-    kind2, name2, kind3, name3, kind4, name4) name0, name1, name2, name3, \
-    name4
-#define GMOCK_INTERNAL_LIST_HAS_6_TEMPLATE_PARAMS(kind0, name0, kind1, name1, \
-    kind2, name2, kind3, name3, kind4, name4, kind5, name5) name0, name1, \
-    name2, name3, name4, name5
-#define GMOCK_INTERNAL_LIST_HAS_7_TEMPLATE_PARAMS(kind0, name0, kind1, name1, \
-    kind2, name2, kind3, name3, kind4, name4, kind5, name5, kind6, \
-    name6) name0, name1, name2, name3, name4, name5, name6
-#define GMOCK_INTERNAL_LIST_HAS_8_TEMPLATE_PARAMS(kind0, name0, kind1, name1, \
-    kind2, name2, kind3, name3, kind4, name4, kind5, name5, kind6, name6, \
-    kind7, name7) name0, name1, name2, name3, name4, name5, name6, name7
-#define GMOCK_INTERNAL_LIST_HAS_9_TEMPLATE_PARAMS(kind0, name0, kind1, name1, \
-    kind2, name2, kind3, name3, kind4, name4, kind5, name5, kind6, name6, \
-    kind7, name7, kind8, name8) name0, name1, name2, name3, name4, name5, \
-    name6, name7, name8
-#define GMOCK_INTERNAL_LIST_HAS_10_TEMPLATE_PARAMS(kind0, name0, kind1, \
-    name1, kind2, name2, kind3, name3, kind4, name4, kind5, name5, kind6, \
-    name6, kind7, name7, kind8, name8, kind9, name9) name0, name1, name2, \
-    name3, name4, name5, name6, name7, name8, name9
-
-// Declares the types of value parameters.
-#define GMOCK_INTERNAL_DECL_TYPE_AND_0_VALUE_PARAMS()
-#define GMOCK_INTERNAL_DECL_TYPE_AND_1_VALUE_PARAMS(p0) , typename p0##_type
-#define GMOCK_INTERNAL_DECL_TYPE_AND_2_VALUE_PARAMS(p0, p1) , \
-    typename p0##_type, typename p1##_type
-#define GMOCK_INTERNAL_DECL_TYPE_AND_3_VALUE_PARAMS(p0, p1, p2) , \
-    typename p0##_type, typename p1##_type, typename p2##_type
-#define GMOCK_INTERNAL_DECL_TYPE_AND_4_VALUE_PARAMS(p0, p1, p2, p3) , \
-    typename p0##_type, typename p1##_type, typename p2##_type, \
-    typename p3##_type
-#define GMOCK_INTERNAL_DECL_TYPE_AND_5_VALUE_PARAMS(p0, p1, p2, p3, p4) , \
-    typename p0##_type, typename p1##_type, typename p2##_type, \
-    typename p3##_type, typename p4##_type
-#define GMOCK_INTERNAL_DECL_TYPE_AND_6_VALUE_PARAMS(p0, p1, p2, p3, p4, p5) , \
-    typename p0##_type, typename p1##_type, typename p2##_type, \
-    typename p3##_type, typename p4##_type, typename p5##_type
-#define GMOCK_INTERNAL_DECL_TYPE_AND_7_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, \
-    p6) , typename p0##_type, typename p1##_type, typename p2##_type, \
-    typename p3##_type, typename p4##_type, typename p5##_type, \
-    typename p6##_type
-#define GMOCK_INTERNAL_DECL_TYPE_AND_8_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, \
-    p6, p7) , typename p0##_type, typename p1##_type, typename p2##_type, \
-    typename p3##_type, typename p4##_type, typename p5##_type, \
-    typename p6##_type, typename p7##_type
-#define GMOCK_INTERNAL_DECL_TYPE_AND_9_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, \
-    p6, p7, p8) , typename p0##_type, typename p1##_type, typename p2##_type, \
-    typename p3##_type, typename p4##_type, typename p5##_type, \
-    typename p6##_type, typename p7##_type, typename p8##_type
-#define GMOCK_INTERNAL_DECL_TYPE_AND_10_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, \
-    p6, p7, p8, p9) , typename p0##_type, typename p1##_type, \
-    typename p2##_type, typename p3##_type, typename p4##_type, \
-    typename p5##_type, typename p6##_type, typename p7##_type, \
-    typename p8##_type, typename p9##_type
-
-// Initializes the value parameters.
-#define GMOCK_INTERNAL_INIT_AND_0_VALUE_PARAMS()\
-    ()
-#define GMOCK_INTERNAL_INIT_AND_1_VALUE_PARAMS(p0)\
-    (p0##_type gmock_p0) : p0(::std::move(gmock_p0))
-#define GMOCK_INTERNAL_INIT_AND_2_VALUE_PARAMS(p0, p1)\
-    (p0##_type gmock_p0, p1##_type gmock_p1) : p0(::std::move(gmock_p0)), \
-        p1(::std::move(gmock_p1))
-#define GMOCK_INTERNAL_INIT_AND_3_VALUE_PARAMS(p0, p1, p2)\
-    (p0##_type gmock_p0, p1##_type gmock_p1, \
-        p2##_type gmock_p2) : p0(::std::move(gmock_p0)), \
-        p1(::std::move(gmock_p1)), p2(::std::move(gmock_p2))
-#define GMOCK_INTERNAL_INIT_AND_4_VALUE_PARAMS(p0, p1, p2, p3)\
-    (p0##_type gmock_p0, p1##_type gmock_p1, p2##_type gmock_p2, \
-        p3##_type gmock_p3) : p0(::std::move(gmock_p0)), \
-        p1(::std::move(gmock_p1)), p2(::std::move(gmock_p2)), \
-        p3(::std::move(gmock_p3))
-#define GMOCK_INTERNAL_INIT_AND_5_VALUE_PARAMS(p0, p1, p2, p3, p4)\
-    (p0##_type gmock_p0, p1##_type gmock_p1, p2##_type gmock_p2, \
-        p3##_type gmock_p3, p4##_type gmock_p4) : p0(::std::move(gmock_p0)), \
-        p1(::std::move(gmock_p1)), p2(::std::move(gmock_p2)), \
-        p3(::std::move(gmock_p3)), p4(::std::move(gmock_p4))
-#define GMOCK_INTERNAL_INIT_AND_6_VALUE_PARAMS(p0, p1, p2, p3, p4, p5)\
-    (p0##_type gmock_p0, p1##_type gmock_p1, p2##_type gmock_p2, \
-        p3##_type gmock_p3, p4##_type gmock_p4, \
-        p5##_type gmock_p5) : p0(::std::move(gmock_p0)), \
-        p1(::std::move(gmock_p1)), p2(::std::move(gmock_p2)), \
-        p3(::std::move(gmock_p3)), p4(::std::move(gmock_p4)), \
-        p5(::std::move(gmock_p5))
-#define GMOCK_INTERNAL_INIT_AND_7_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6)\
-    (p0##_type gmock_p0, p1##_type gmock_p1, p2##_type gmock_p2, \
-        p3##_type gmock_p3, p4##_type gmock_p4, p5##_type gmock_p5, \
-        p6##_type gmock_p6) : p0(::std::move(gmock_p0)), \
-        p1(::std::move(gmock_p1)), p2(::std::move(gmock_p2)), \
-        p3(::std::move(gmock_p3)), p4(::std::move(gmock_p4)), \
-        p5(::std::move(gmock_p5)), p6(::std::move(gmock_p6))
-#define GMOCK_INTERNAL_INIT_AND_8_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, p7)\
-    (p0##_type gmock_p0, p1##_type gmock_p1, p2##_type gmock_p2, \
-        p3##_type gmock_p3, p4##_type gmock_p4, p5##_type gmock_p5, \
-        p6##_type gmock_p6, p7##_type gmock_p7) : p0(::std::move(gmock_p0)), \
-        p1(::std::move(gmock_p1)), p2(::std::move(gmock_p2)), \
-        p3(::std::move(gmock_p3)), p4(::std::move(gmock_p4)), \
-        p5(::std::move(gmock_p5)), p6(::std::move(gmock_p6)), \
-        p7(::std::move(gmock_p7))
-#define GMOCK_INTERNAL_INIT_AND_9_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, \
-    p7, p8)\
-    (p0##_type gmock_p0, p1##_type gmock_p1, p2##_type gmock_p2, \
-        p3##_type gmock_p3, p4##_type gmock_p4, p5##_type gmock_p5, \
-        p6##_type gmock_p6, p7##_type gmock_p7, \
-        p8##_type gmock_p8) : p0(::std::move(gmock_p0)), \
-        p1(::std::move(gmock_p1)), p2(::std::move(gmock_p2)), \
-        p3(::std::move(gmock_p3)), p4(::std::move(gmock_p4)), \
-        p5(::std::move(gmock_p5)), p6(::std::move(gmock_p6)), \
-        p7(::std::move(gmock_p7)), p8(::std::move(gmock_p8))
-#define GMOCK_INTERNAL_INIT_AND_10_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, \
-    p7, p8, p9)\
-    (p0##_type gmock_p0, p1##_type gmock_p1, p2##_type gmock_p2, \
-        p3##_type gmock_p3, p4##_type gmock_p4, p5##_type gmock_p5, \
-        p6##_type gmock_p6, p7##_type gmock_p7, p8##_type gmock_p8, \
-        p9##_type gmock_p9) : p0(::std::move(gmock_p0)), \
-        p1(::std::move(gmock_p1)), p2(::std::move(gmock_p2)), \
-        p3(::std::move(gmock_p3)), p4(::std::move(gmock_p4)), \
-        p5(::std::move(gmock_p5)), p6(::std::move(gmock_p6)), \
-        p7(::std::move(gmock_p7)), p8(::std::move(gmock_p8)), \
-        p9(::std::move(gmock_p9))
-
-// Declares the fields for storing the value parameters.
-#define GMOCK_INTERNAL_DEFN_AND_0_VALUE_PARAMS()
-#define GMOCK_INTERNAL_DEFN_AND_1_VALUE_PARAMS(p0) p0##_type p0;
-#define GMOCK_INTERNAL_DEFN_AND_2_VALUE_PARAMS(p0, p1) p0##_type p0; \
-    p1##_type p1;
-#define GMOCK_INTERNAL_DEFN_AND_3_VALUE_PARAMS(p0, p1, p2) p0##_type p0; \
-    p1##_type p1; p2##_type p2;
-#define GMOCK_INTERNAL_DEFN_AND_4_VALUE_PARAMS(p0, p1, p2, p3) p0##_type p0; \
-    p1##_type p1; p2##_type p2; p3##_type p3;
-#define GMOCK_INTERNAL_DEFN_AND_5_VALUE_PARAMS(p0, p1, p2, p3, \
-    p4) p0##_type p0; p1##_type p1; p2##_type p2; p3##_type p3; p4##_type p4;
-#define GMOCK_INTERNAL_DEFN_AND_6_VALUE_PARAMS(p0, p1, p2, p3, p4, \
-    p5) p0##_type p0; p1##_type p1; p2##_type p2; p3##_type p3; p4##_type p4; \
-    p5##_type p5;
-#define GMOCK_INTERNAL_DEFN_AND_7_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, \
-    p6) p0##_type p0; p1##_type p1; p2##_type p2; p3##_type p3; p4##_type p4; \
-    p5##_type p5; p6##_type p6;
-#define GMOCK_INTERNAL_DEFN_AND_8_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, \
-    p7) p0##_type p0; p1##_type p1; p2##_type p2; p3##_type p3; p4##_type p4; \
-    p5##_type p5; p6##_type p6; p7##_type p7;
-#define GMOCK_INTERNAL_DEFN_AND_9_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, \
-    p7, p8) p0##_type p0; p1##_type p1; p2##_type p2; p3##_type p3; \
-    p4##_type p4; p5##_type p5; p6##_type p6; p7##_type p7; p8##_type p8;
-#define GMOCK_INTERNAL_DEFN_AND_10_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, \
-    p7, p8, p9) p0##_type p0; p1##_type p1; p2##_type p2; p3##_type p3; \
-    p4##_type p4; p5##_type p5; p6##_type p6; p7##_type p7; p8##_type p8; \
-    p9##_type p9;
-
-// Lists the value parameters.
-#define GMOCK_INTERNAL_LIST_AND_0_VALUE_PARAMS()
-#define GMOCK_INTERNAL_LIST_AND_1_VALUE_PARAMS(p0) p0
-#define GMOCK_INTERNAL_LIST_AND_2_VALUE_PARAMS(p0, p1) p0, p1
-#define GMOCK_INTERNAL_LIST_AND_3_VALUE_PARAMS(p0, p1, p2) p0, p1, p2
-#define GMOCK_INTERNAL_LIST_AND_4_VALUE_PARAMS(p0, p1, p2, p3) p0, p1, p2, p3
-#define GMOCK_INTERNAL_LIST_AND_5_VALUE_PARAMS(p0, p1, p2, p3, p4) p0, p1, \
-    p2, p3, p4
-#define GMOCK_INTERNAL_LIST_AND_6_VALUE_PARAMS(p0, p1, p2, p3, p4, p5) p0, \
-    p1, p2, p3, p4, p5
-#define GMOCK_INTERNAL_LIST_AND_7_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, \
-    p6) p0, p1, p2, p3, p4, p5, p6
-#define GMOCK_INTERNAL_LIST_AND_8_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, \
-    p7) p0, p1, p2, p3, p4, p5, p6, p7
-#define GMOCK_INTERNAL_LIST_AND_9_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, \
-    p7, p8) p0, p1, p2, p3, p4, p5, p6, p7, p8
-#define GMOCK_INTERNAL_LIST_AND_10_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, \
-    p7, p8, p9) p0, p1, p2, p3, p4, p5, p6, p7, p8, p9
-
-// Lists the value parameter types.
-#define GMOCK_INTERNAL_LIST_TYPE_AND_0_VALUE_PARAMS()
-#define GMOCK_INTERNAL_LIST_TYPE_AND_1_VALUE_PARAMS(p0) , p0##_type
-#define GMOCK_INTERNAL_LIST_TYPE_AND_2_VALUE_PARAMS(p0, p1) , p0##_type, \
-    p1##_type
-#define GMOCK_INTERNAL_LIST_TYPE_AND_3_VALUE_PARAMS(p0, p1, p2) , p0##_type, \
-    p1##_type, p2##_type
-#define GMOCK_INTERNAL_LIST_TYPE_AND_4_VALUE_PARAMS(p0, p1, p2, p3) , \
-    p0##_type, p1##_type, p2##_type, p3##_type
-#define GMOCK_INTERNAL_LIST_TYPE_AND_5_VALUE_PARAMS(p0, p1, p2, p3, p4) , \
-    p0##_type, p1##_type, p2##_type, p3##_type, p4##_type
-#define GMOCK_INTERNAL_LIST_TYPE_AND_6_VALUE_PARAMS(p0, p1, p2, p3, p4, p5) , \
-    p0##_type, p1##_type, p2##_type, p3##_type, p4##_type, p5##_type
-#define GMOCK_INTERNAL_LIST_TYPE_AND_7_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, \
-    p6) , p0##_type, p1##_type, p2##_type, p3##_type, p4##_type, p5##_type, \
-    p6##_type
-#define GMOCK_INTERNAL_LIST_TYPE_AND_8_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, \
-    p6, p7) , p0##_type, p1##_type, p2##_type, p3##_type, p4##_type, \
-    p5##_type, p6##_type, p7##_type
-#define GMOCK_INTERNAL_LIST_TYPE_AND_9_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, \
-    p6, p7, p8) , p0##_type, p1##_type, p2##_type, p3##_type, p4##_type, \
-    p5##_type, p6##_type, p7##_type, p8##_type
-#define GMOCK_INTERNAL_LIST_TYPE_AND_10_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, \
-    p6, p7, p8, p9) , p0##_type, p1##_type, p2##_type, p3##_type, p4##_type, \
-    p5##_type, p6##_type, p7##_type, p8##_type, p9##_type
-
-// Declares the value parameters.
-#define GMOCK_INTERNAL_DECL_AND_0_VALUE_PARAMS()
-#define GMOCK_INTERNAL_DECL_AND_1_VALUE_PARAMS(p0) p0##_type p0
-#define GMOCK_INTERNAL_DECL_AND_2_VALUE_PARAMS(p0, p1) p0##_type p0, \
-    p1##_type p1
-#define GMOCK_INTERNAL_DECL_AND_3_VALUE_PARAMS(p0, p1, p2) p0##_type p0, \
-    p1##_type p1, p2##_type p2
-#define GMOCK_INTERNAL_DECL_AND_4_VALUE_PARAMS(p0, p1, p2, p3) p0##_type p0, \
-    p1##_type p1, p2##_type p2, p3##_type p3
-#define GMOCK_INTERNAL_DECL_AND_5_VALUE_PARAMS(p0, p1, p2, p3, \
-    p4) p0##_type p0, p1##_type p1, p2##_type p2, p3##_type p3, p4##_type p4
-#define GMOCK_INTERNAL_DECL_AND_6_VALUE_PARAMS(p0, p1, p2, p3, p4, \
-    p5) p0##_type p0, p1##_type p1, p2##_type p2, p3##_type p3, p4##_type p4, \
-    p5##_type p5
-#define GMOCK_INTERNAL_DECL_AND_7_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, \
-    p6) p0##_type p0, p1##_type p1, p2##_type p2, p3##_type p3, p4##_type p4, \
-    p5##_type p5, p6##_type p6
-#define GMOCK_INTERNAL_DECL_AND_8_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, \
-    p7) p0##_type p0, p1##_type p1, p2##_type p2, p3##_type p3, p4##_type p4, \
-    p5##_type p5, p6##_type p6, p7##_type p7
-#define GMOCK_INTERNAL_DECL_AND_9_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, \
-    p7, p8) p0##_type p0, p1##_type p1, p2##_type p2, p3##_type p3, \
-    p4##_type p4, p5##_type p5, p6##_type p6, p7##_type p7, p8##_type p8
-#define GMOCK_INTERNAL_DECL_AND_10_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, \
-    p7, p8, p9) p0##_type p0, p1##_type p1, p2##_type p2, p3##_type p3, \
-    p4##_type p4, p5##_type p5, p6##_type p6, p7##_type p7, p8##_type p8, \
-    p9##_type p9
-
-// The suffix of the class template implementing the action template.
-#define GMOCK_INTERNAL_COUNT_AND_0_VALUE_PARAMS()
-#define GMOCK_INTERNAL_COUNT_AND_1_VALUE_PARAMS(p0) P
-#define GMOCK_INTERNAL_COUNT_AND_2_VALUE_PARAMS(p0, p1) P2
-#define GMOCK_INTERNAL_COUNT_AND_3_VALUE_PARAMS(p0, p1, p2) P3
-#define GMOCK_INTERNAL_COUNT_AND_4_VALUE_PARAMS(p0, p1, p2, p3) P4
-#define GMOCK_INTERNAL_COUNT_AND_5_VALUE_PARAMS(p0, p1, p2, p3, p4) P5
-#define GMOCK_INTERNAL_COUNT_AND_6_VALUE_PARAMS(p0, p1, p2, p3, p4, p5) P6
-#define GMOCK_INTERNAL_COUNT_AND_7_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6) P7
-#define GMOCK_INTERNAL_COUNT_AND_8_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, \
-    p7) P8
-#define GMOCK_INTERNAL_COUNT_AND_9_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, \
-    p7, p8) P9
-#define GMOCK_INTERNAL_COUNT_AND_10_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, \
-    p7, p8, p9) P10
-
-// The name of the class template implementing the action template.
-#define GMOCK_ACTION_CLASS_(name, value_params)\
-    GTEST_CONCAT_TOKEN_(name##Action, GMOCK_INTERNAL_COUNT_##value_params)
-
-#define ACTION_TEMPLATE(name, template_params, value_params)\
-  template <GMOCK_INTERNAL_DECL_##template_params\
-            GMOCK_INTERNAL_DECL_TYPE_##value_params>\
-  class GMOCK_ACTION_CLASS_(name, value_params) {\
-   public:\
-    explicit GMOCK_ACTION_CLASS_(name, value_params)\
-        GMOCK_INTERNAL_INIT_##value_params {}\
-    template <typename F>\
-    class gmock_Impl : public ::testing::ActionInterface<F> {\
-     public:\
-      typedef F function_type;\
-      typedef typename ::testing::internal::Function<F>::Result return_type;\
-      typedef typename ::testing::internal::Function<F>::ArgumentTuple\
-          args_type;\
-      explicit gmock_Impl GMOCK_INTERNAL_INIT_##value_params {}\
-      return_type Perform(const args_type& args) override {\
-        return ::testing::internal::ActionHelper<return_type, gmock_Impl>::\
-            Perform(this, args);\
-      }\
-      template <GMOCK_ACTION_TEMPLATE_ARGS_NAMES_>\
-      return_type gmock_PerformImpl(GMOCK_ACTION_ARG_TYPES_AND_NAMES_) const;\
-      GMOCK_INTERNAL_DEFN_##value_params\
-     private:\
-      GTEST_DISALLOW_ASSIGN_(gmock_Impl);\
-    };\
-    template <typename F> operator ::testing::Action<F>() const {\
-      return ::testing::Action<F>(\
-          new gmock_Impl<F>(GMOCK_INTERNAL_LIST_##value_params));\
-    }\
-    GMOCK_INTERNAL_DEFN_##value_params\
-   private:\
-    GTEST_DISALLOW_ASSIGN_(GMOCK_ACTION_CLASS_(name, value_params));\
-  };\
-  template <GMOCK_INTERNAL_DECL_##template_params\
-            GMOCK_INTERNAL_DECL_TYPE_##value_params>\
-  inline GMOCK_ACTION_CLASS_(name, value_params)<\
-      GMOCK_INTERNAL_LIST_##template_params\
-      GMOCK_INTERNAL_LIST_TYPE_##value_params> name(\
-          GMOCK_INTERNAL_DECL_##value_params) {\
-    return GMOCK_ACTION_CLASS_(name, value_params)<\
-        GMOCK_INTERNAL_LIST_##template_params\
-        GMOCK_INTERNAL_LIST_TYPE_##value_params>(\
-            GMOCK_INTERNAL_LIST_##value_params);\
-  }\
-  template <GMOCK_INTERNAL_DECL_##template_params\
-            GMOCK_INTERNAL_DECL_TYPE_##value_params>\
-  template <typename F>\
-  template <GMOCK_ACTION_TEMPLATE_ARGS_NAMES_>\
-  typename ::testing::internal::Function<F>::Result\
-      GMOCK_ACTION_CLASS_(name, value_params)<\
-          GMOCK_INTERNAL_LIST_##template_params\
-          GMOCK_INTERNAL_LIST_TYPE_##value_params>::gmock_Impl<F>::\
-              gmock_PerformImpl(\
-          GMOCK_ACTION_ARG_TYPES_AND_NAMES_UNUSED_) const
-
-
-namespace testing {
-
-
-// The ACTION*() macros trigger warning C4100 (unreferenced formal
-// parameter) in MSVC with -W4.  Unfortunately they cannot be fixed in
-// the macro definition, as the warnings are generated when the macro
-// is expanded and macro expansion cannot contain #pragma.  Therefore
-// we suppress them here.
-#ifdef _MSC_VER
-# pragma warning(push)
-# pragma warning(disable:4100)
-#endif
-
-// Various overloads for InvokeArgument<N>().
-//
-// The InvokeArgument<N>(a1, a2, ..., a_k) action invokes the N-th
-// (0-based) argument, which must be a k-ary callable, of the mock
-// function, with arguments a1, a2, ..., a_k.
-//
-// Notes:
-//
-//   1. The arguments are passed by value by default.  If you need to
-//   pass an argument by reference, wrap it inside ByRef().  For
-//   example,
-//
-//     InvokeArgument<1>(5, string("Hello"), ByRef(foo))
-//
-//   passes 5 and string("Hello") by value, and passes foo by
-//   reference.
-//
-//   2. If the callable takes an argument by reference but ByRef() is
-//   not used, it will receive the reference to a copy of the value,
-//   instead of the original value.  For example, when the 0-th
-//   argument of the mock function takes a const string&, the action
-//
-//     InvokeArgument<0>(string("Hello"))
-//
-//   makes a copy of the temporary string("Hello") object and passes a
-//   reference of the copy, instead of the original temporary object,
-//   to the callable.  This makes it easy for a user to define an
-//   InvokeArgument action from temporary values and have it performed
-//   later.
-
-ACTION_TEMPLATE(InvokeArgument,
-                HAS_1_TEMPLATE_PARAMS(int, k),
-                AND_0_VALUE_PARAMS()) {
-  using internal::invoke_argument::InvokeArgumentAdl;
-  return InvokeArgumentAdl(internal::invoke_argument::AdlTag(),
-                           ::std::get<k>(args));
-}
-
-ACTION_TEMPLATE(InvokeArgument,
-                HAS_1_TEMPLATE_PARAMS(int, k),
-                AND_1_VALUE_PARAMS(p0)) {
-  using internal::invoke_argument::InvokeArgumentAdl;
-  return InvokeArgumentAdl(internal::invoke_argument::AdlTag(),
-                           ::std::get<k>(args), p0);
-}
-
-ACTION_TEMPLATE(InvokeArgument,
-                HAS_1_TEMPLATE_PARAMS(int, k),
-                AND_2_VALUE_PARAMS(p0, p1)) {
-  using internal::invoke_argument::InvokeArgumentAdl;
-  return InvokeArgumentAdl(internal::invoke_argument::AdlTag(),
-                           ::std::get<k>(args), p0, p1);
-}
-
-ACTION_TEMPLATE(InvokeArgument,
-                HAS_1_TEMPLATE_PARAMS(int, k),
-                AND_3_VALUE_PARAMS(p0, p1, p2)) {
-  using internal::invoke_argument::InvokeArgumentAdl;
-  return InvokeArgumentAdl(internal::invoke_argument::AdlTag(),
-                           ::std::get<k>(args), p0, p1, p2);
-}
-
-ACTION_TEMPLATE(InvokeArgument,
-                HAS_1_TEMPLATE_PARAMS(int, k),
-                AND_4_VALUE_PARAMS(p0, p1, p2, p3)) {
-  using internal::invoke_argument::InvokeArgumentAdl;
-  return InvokeArgumentAdl(internal::invoke_argument::AdlTag(),
-                           ::std::get<k>(args), p0, p1, p2, p3);
-}
-
-ACTION_TEMPLATE(InvokeArgument,
-                HAS_1_TEMPLATE_PARAMS(int, k),
-                AND_5_VALUE_PARAMS(p0, p1, p2, p3, p4)) {
-  using internal::invoke_argument::InvokeArgumentAdl;
-  return InvokeArgumentAdl(internal::invoke_argument::AdlTag(),
-                           ::std::get<k>(args), p0, p1, p2, p3, p4);
-}
-
-ACTION_TEMPLATE(InvokeArgument,
-                HAS_1_TEMPLATE_PARAMS(int, k),
-                AND_6_VALUE_PARAMS(p0, p1, p2, p3, p4, p5)) {
-  using internal::invoke_argument::InvokeArgumentAdl;
-  return InvokeArgumentAdl(internal::invoke_argument::AdlTag(),
-                           ::std::get<k>(args), p0, p1, p2, p3, p4, p5);
-}
-
-ACTION_TEMPLATE(InvokeArgument,
-                HAS_1_TEMPLATE_PARAMS(int, k),
-                AND_7_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6)) {
-  using internal::invoke_argument::InvokeArgumentAdl;
-  return InvokeArgumentAdl(internal::invoke_argument::AdlTag(),
-                           ::std::get<k>(args), p0, p1, p2, p3, p4, p5, p6);
-}
-
-ACTION_TEMPLATE(InvokeArgument,
-                HAS_1_TEMPLATE_PARAMS(int, k),
-                AND_8_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, p7)) {
-  using internal::invoke_argument::InvokeArgumentAdl;
-  return InvokeArgumentAdl(internal::invoke_argument::AdlTag(),
-                           ::std::get<k>(args), p0, p1, p2, p3, p4, p5, p6, p7);
-}
-
-ACTION_TEMPLATE(InvokeArgument,
-                HAS_1_TEMPLATE_PARAMS(int, k),
-                AND_9_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, p7, p8)) {
-  using internal::invoke_argument::InvokeArgumentAdl;
-  return InvokeArgumentAdl(internal::invoke_argument::AdlTag(),
-                           ::std::get<k>(args), p0, p1, p2, p3, p4, p5, p6, p7,
-                               p8);
-}
-
-ACTION_TEMPLATE(InvokeArgument,
-                HAS_1_TEMPLATE_PARAMS(int, k),
-                AND_10_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9)) {
-  using internal::invoke_argument::InvokeArgumentAdl;
-  return InvokeArgumentAdl(internal::invoke_argument::AdlTag(),
-                           ::std::get<k>(args), p0, p1, p2, p3, p4, p5, p6, p7,
-                               p8, p9);
-}
-
-// Various overloads for ReturnNew<T>().
-//
-// The ReturnNew<T>(a1, a2, ..., a_k) action returns a pointer to a new
-// instance of type T, constructed on the heap with constructor arguments
-// a1, a2, ..., and a_k. The caller assumes ownership of the returned value.
-ACTION_TEMPLATE(ReturnNew,
-                HAS_1_TEMPLATE_PARAMS(typename, T),
-                AND_0_VALUE_PARAMS()) {
-  return new T();
-}
-
-ACTION_TEMPLATE(ReturnNew,
-                HAS_1_TEMPLATE_PARAMS(typename, T),
-                AND_1_VALUE_PARAMS(p0)) {
-  return new T(p0);
-}
-
-ACTION_TEMPLATE(ReturnNew,
-                HAS_1_TEMPLATE_PARAMS(typename, T),
-                AND_2_VALUE_PARAMS(p0, p1)) {
-  return new T(p0, p1);
-}
-
-ACTION_TEMPLATE(ReturnNew,
-                HAS_1_TEMPLATE_PARAMS(typename, T),
-                AND_3_VALUE_PARAMS(p0, p1, p2)) {
-  return new T(p0, p1, p2);
-}
-
-ACTION_TEMPLATE(ReturnNew,
-                HAS_1_TEMPLATE_PARAMS(typename, T),
-                AND_4_VALUE_PARAMS(p0, p1, p2, p3)) {
-  return new T(p0, p1, p2, p3);
-}
-
-ACTION_TEMPLATE(ReturnNew,
-                HAS_1_TEMPLATE_PARAMS(typename, T),
-                AND_5_VALUE_PARAMS(p0, p1, p2, p3, p4)) {
-  return new T(p0, p1, p2, p3, p4);
-}
-
-ACTION_TEMPLATE(ReturnNew,
-                HAS_1_TEMPLATE_PARAMS(typename, T),
-                AND_6_VALUE_PARAMS(p0, p1, p2, p3, p4, p5)) {
-  return new T(p0, p1, p2, p3, p4, p5);
-}
-
-ACTION_TEMPLATE(ReturnNew,
-                HAS_1_TEMPLATE_PARAMS(typename, T),
-                AND_7_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6)) {
-  return new T(p0, p1, p2, p3, p4, p5, p6);
-}
-
-ACTION_TEMPLATE(ReturnNew,
-                HAS_1_TEMPLATE_PARAMS(typename, T),
-                AND_8_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, p7)) {
-  return new T(p0, p1, p2, p3, p4, p5, p6, p7);
-}
-
-ACTION_TEMPLATE(ReturnNew,
-                HAS_1_TEMPLATE_PARAMS(typename, T),
-                AND_9_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, p7, p8)) {
-  return new T(p0, p1, p2, p3, p4, p5, p6, p7, p8);
-}
-
-ACTION_TEMPLATE(ReturnNew,
-                HAS_1_TEMPLATE_PARAMS(typename, T),
-                AND_10_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9)) {
-  return new T(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9);
-}
-
-#ifdef _MSC_VER
-# pragma warning(pop)
-#endif
-
-}  // namespace testing
-
-// Include any custom callback actions added by the local installation.
-// We must include this header at the end to make sure it can use the
-// declarations from this file.
-#include "gmock/internal/custom/gmock-generated-actions.h"
-
-#endif  // GMOCK_INCLUDE_GMOCK_GMOCK_GENERATED_ACTIONS_H_
diff --git a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/include/gmock/gmock-generated-actions.h.pump b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/include/gmock/gmock-generated-actions.h.pump
deleted file mode 100644
index be9d99fed20..00000000000
--- a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/include/gmock/gmock-generated-actions.h.pump
+++ /dev/null
@@ -1,376 +0,0 @@
-$$ -*- mode: c++; -*-
-$$ This is a Pump source file. Please use Pump to convert it to
-$$ gmock-generated-actions.h.
-$$
-$var n = 10  $$ The maximum arity we support.
-$$}} This meta comment fixes auto-indentation in editors.
-// Copyright 2007, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-
-// Google Mock - a framework for writing C++ mock classes.
-//
-// This file implements some commonly used variadic actions.
-
-// GOOGLETEST_CM0002 DO NOT DELETE
-
-#ifndef GMOCK_INCLUDE_GMOCK_GMOCK_GENERATED_ACTIONS_H_
-#define GMOCK_INCLUDE_GMOCK_GMOCK_GENERATED_ACTIONS_H_
-
-#include <memory>
-#include <utility>
-
-#include "gmock/gmock-actions.h"
-#include "gmock/internal/gmock-port.h"
-
-$range i 0..n
-$range k 0..n-1
-
-
-// Sometimes you want to give an action explicit template parameters
-// that cannot be inferred from its value parameters.  ACTION() and
-// ACTION_P*() don't support that.  ACTION_TEMPLATE() remedies that
-// and can be viewed as an extension to ACTION() and ACTION_P*().
-//
-// The syntax:
-//
-//   ACTION_TEMPLATE(ActionName,
-//                   HAS_m_TEMPLATE_PARAMS(kind1, name1, ..., kind_m, name_m),
-//                   AND_n_VALUE_PARAMS(p1, ..., p_n)) { statements; }
-//
-// defines an action template that takes m explicit template
-// parameters and n value parameters.  name_i is the name of the i-th
-// template parameter, and kind_i specifies whether it's a typename,
-// an integral constant, or a template.  p_i is the name of the i-th
-// value parameter.
-//
-// Example:
-//
-//   // DuplicateArg<k, T>(output) converts the k-th argument of the mock
-//   // function to type T and copies it to *output.
-//   ACTION_TEMPLATE(DuplicateArg,
-//                   HAS_2_TEMPLATE_PARAMS(int, k, typename, T),
-//                   AND_1_VALUE_PARAMS(output)) {
-//     *output = T(::std::get<k>(args));
-//   }
-//   ...
-//     int n;
-//     EXPECT_CALL(mock, Foo(_, _))
-//         .WillOnce(DuplicateArg<1, unsigned char>(&n));
-//
-// To create an instance of an action template, write:
-//
-//   ActionName<t1, ..., t_m>(v1, ..., v_n)
-//
-// where the ts are the template arguments and the vs are the value
-// arguments.  The value argument types are inferred by the compiler.
-// If you want to explicitly specify the value argument types, you can
-// provide additional template arguments:
-//
-//   ActionName<t1, ..., t_m, u1, ..., u_k>(v1, ..., v_n)
-//
-// where u_i is the desired type of v_i.
-//
-// ACTION_TEMPLATE and ACTION/ACTION_P* can be overloaded on the
-// number of value parameters, but not on the number of template
-// parameters.  Without the restriction, the meaning of the following
-// is unclear:
-//
-//   OverloadedAction<int, bool>(x);
-//
-// Are we using a single-template-parameter action where 'bool' refers
-// to the type of x, or are we using a two-template-parameter action
-// where the compiler is asked to infer the type of x?
-//
-// Implementation notes:
-//
-// GMOCK_INTERNAL_*_HAS_m_TEMPLATE_PARAMS and
-// GMOCK_INTERNAL_*_AND_n_VALUE_PARAMS are internal macros for
-// implementing ACTION_TEMPLATE.  The main trick we use is to create
-// new macro invocations when expanding a macro.  For example, we have
-//
-//   #define ACTION_TEMPLATE(name, template_params, value_params)
-//       ... GMOCK_INTERNAL_DECL_##template_params ...
-//
-// which causes ACTION_TEMPLATE(..., HAS_1_TEMPLATE_PARAMS(typename, T), ...)
-// to expand to
-//
-//       ... GMOCK_INTERNAL_DECL_HAS_1_TEMPLATE_PARAMS(typename, T) ...
-//
-// Since GMOCK_INTERNAL_DECL_HAS_1_TEMPLATE_PARAMS is a macro, the
-// preprocessor will continue to expand it to
-//
-//       ... typename T ...
-//
-// This technique conforms to the C++ standard and is portable.  It
-// allows us to implement action templates using O(N) code, where N is
-// the maximum number of template/value parameters supported.  Without
-// using it, we'd have to devote O(N^2) amount of code to implement all
-// combinations of m and n.
-
-// Declares the template parameters.
-
-$range j 1..n
-$for j [[
-$range m 0..j-1
-#define GMOCK_INTERNAL_DECL_HAS_$j[[]]
-_TEMPLATE_PARAMS($for m, [[kind$m, name$m]]) $for m, [[kind$m name$m]]
-
-
-]]
-
-// Lists the template parameters.
-
-$for j [[
-$range m 0..j-1
-#define GMOCK_INTERNAL_LIST_HAS_$j[[]]
-_TEMPLATE_PARAMS($for m, [[kind$m, name$m]]) $for m, [[name$m]]
-
-
-]]
-
-// Declares the types of value parameters.
-
-$for i [[
-$range j 0..i-1
-#define GMOCK_INTERNAL_DECL_TYPE_AND_$i[[]]
-_VALUE_PARAMS($for j, [[p$j]]) $for j [[, typename p$j##_type]]
-
-
-]]
-
-// Initializes the value parameters.
-
-$for i [[
-$range j 0..i-1
-#define GMOCK_INTERNAL_INIT_AND_$i[[]]_VALUE_PARAMS($for j, [[p$j]])\
-    ($for j, [[p$j##_type gmock_p$j]])$if i>0 [[ : ]]$for j, [[p$j(::std::move(gmock_p$j))]]
-
-
-]]
-
-// Declares the fields for storing the value parameters.
-
-$for i [[
-$range j 0..i-1
-#define GMOCK_INTERNAL_DEFN_AND_$i[[]]
-_VALUE_PARAMS($for j, [[p$j]]) $for j [[p$j##_type p$j; ]]
-
-
-]]
-
-// Lists the value parameters.
-
-$for i [[
-$range j 0..i-1
-#define GMOCK_INTERNAL_LIST_AND_$i[[]]
-_VALUE_PARAMS($for j, [[p$j]]) $for j, [[p$j]]
-
-
-]]
-
-// Lists the value parameter types.
-
-$for i [[
-$range j 0..i-1
-#define GMOCK_INTERNAL_LIST_TYPE_AND_$i[[]]
-_VALUE_PARAMS($for j, [[p$j]]) $for j [[, p$j##_type]]
-
-
-]]
-
-// Declares the value parameters.
-
-$for i [[
-$range j 0..i-1
-#define GMOCK_INTERNAL_DECL_AND_$i[[]]_VALUE_PARAMS($for j, [[p$j]]) [[]]
-$for j, [[p$j##_type p$j]]
-
-
-]]
-
-// The suffix of the class template implementing the action template.
-$for i [[
-
-
-$range j 0..i-1
-#define GMOCK_INTERNAL_COUNT_AND_$i[[]]_VALUE_PARAMS($for j, [[p$j]]) [[]]
-$if i==1 [[P]] $elif i>=2 [[P$i]]
-]]
-
-
-// The name of the class template implementing the action template.
-#define GMOCK_ACTION_CLASS_(name, value_params)\
-    GTEST_CONCAT_TOKEN_(name##Action, GMOCK_INTERNAL_COUNT_##value_params)
-
-$range k 0..n-1
-
-#define ACTION_TEMPLATE(name, template_params, value_params)\
-  template <GMOCK_INTERNAL_DECL_##template_params\
-            GMOCK_INTERNAL_DECL_TYPE_##value_params>\
-  class GMOCK_ACTION_CLASS_(name, value_params) {\
-   public:\
-    explicit GMOCK_ACTION_CLASS_(name, value_params)\
-        GMOCK_INTERNAL_INIT_##value_params {}\
-    template <typename F>\
-    class gmock_Impl : public ::testing::ActionInterface<F> {\
-     public:\
-      typedef F function_type;\
-      typedef typename ::testing::internal::Function<F>::Result return_type;\
-      typedef typename ::testing::internal::Function<F>::ArgumentTuple\
-          args_type;\
-      explicit gmock_Impl GMOCK_INTERNAL_INIT_##value_params {}\
-      return_type Perform(const args_type& args) override {\
-        return ::testing::internal::ActionHelper<return_type, gmock_Impl>::\
-            Perform(this, args);\
-      }\
-      template <GMOCK_ACTION_TEMPLATE_ARGS_NAMES_>\
-      return_type gmock_PerformImpl(GMOCK_ACTION_ARG_TYPES_AND_NAMES_) const;\
-      GMOCK_INTERNAL_DEFN_##value_params\
-     private:\
-      GTEST_DISALLOW_ASSIGN_(gmock_Impl);\
-    };\
-    template <typename F> operator ::testing::Action<F>() const {\
-      return ::testing::Action<F>(\
-          new gmock_Impl<F>(GMOCK_INTERNAL_LIST_##value_params));\
-    }\
-    GMOCK_INTERNAL_DEFN_##value_params\
-   private:\
-    GTEST_DISALLOW_ASSIGN_(GMOCK_ACTION_CLASS_(name, value_params));\
-  };\
-  template <GMOCK_INTERNAL_DECL_##template_params\
-            GMOCK_INTERNAL_DECL_TYPE_##value_params>\
-  inline GMOCK_ACTION_CLASS_(name, value_params)<\
-      GMOCK_INTERNAL_LIST_##template_params\
-      GMOCK_INTERNAL_LIST_TYPE_##value_params> name(\
-          GMOCK_INTERNAL_DECL_##value_params) {\
-    return GMOCK_ACTION_CLASS_(name, value_params)<\
-        GMOCK_INTERNAL_LIST_##template_params\
-        GMOCK_INTERNAL_LIST_TYPE_##value_params>(\
-            GMOCK_INTERNAL_LIST_##value_params);\
-  }\
-  template <GMOCK_INTERNAL_DECL_##template_params\
-            GMOCK_INTERNAL_DECL_TYPE_##value_params>\
-  template <typename F>\
-  template <GMOCK_ACTION_TEMPLATE_ARGS_NAMES_>\
-  typename ::testing::internal::Function<F>::Result\
-      GMOCK_ACTION_CLASS_(name, value_params)<\
-          GMOCK_INTERNAL_LIST_##template_params\
-          GMOCK_INTERNAL_LIST_TYPE_##value_params>::gmock_Impl<F>::\
-              gmock_PerformImpl(\
-          GMOCK_ACTION_ARG_TYPES_AND_NAMES_UNUSED_) const
-
-
-namespace testing {
-
-
-// The ACTION*() macros trigger warning C4100 (unreferenced formal
-// parameter) in MSVC with -W4.  Unfortunately they cannot be fixed in
-// the macro definition, as the warnings are generated when the macro
-// is expanded and macro expansion cannot contain #pragma.  Therefore
-// we suppress them here.
-#ifdef _MSC_VER
-# pragma warning(push)
-# pragma warning(disable:4100)
-#endif
-
-// Various overloads for InvokeArgument<N>().
-//
-// The InvokeArgument<N>(a1, a2, ..., a_k) action invokes the N-th
-// (0-based) argument, which must be a k-ary callable, of the mock
-// function, with arguments a1, a2, ..., a_k.
-//
-// Notes:
-//
-//   1. The arguments are passed by value by default.  If you need to
-//   pass an argument by reference, wrap it inside ByRef().  For
-//   example,
-//
-//     InvokeArgument<1>(5, string("Hello"), ByRef(foo))
-//
-//   passes 5 and string("Hello") by value, and passes foo by
-//   reference.
-//
-//   2. If the callable takes an argument by reference but ByRef() is
-//   not used, it will receive the reference to a copy of the value,
-//   instead of the original value.  For example, when the 0-th
-//   argument of the mock function takes a const string&, the action
-//
-//     InvokeArgument<0>(string("Hello"))
-//
-//   makes a copy of the temporary string("Hello") object and passes a
-//   reference of the copy, instead of the original temporary object,
-//   to the callable.  This makes it easy for a user to define an
-//   InvokeArgument action from temporary values and have it performed
-//   later.
-
-$range i 0..n
-$for i [[
-$range j 0..i-1
-
-ACTION_TEMPLATE(InvokeArgument,
-                HAS_1_TEMPLATE_PARAMS(int, k),
-                AND_$i[[]]_VALUE_PARAMS($for j, [[p$j]])) {
-  using internal::invoke_argument::InvokeArgumentAdl;
-  return InvokeArgumentAdl(internal::invoke_argument::AdlTag(),
-                           ::std::get<k>(args)$for j[[, p$j]]);
-}
-
-]]
-
-// Various overloads for ReturnNew<T>().
-//
-// The ReturnNew<T>(a1, a2, ..., a_k) action returns a pointer to a new
-// instance of type T, constructed on the heap with constructor arguments
-// a1, a2, ..., and a_k. The caller assumes ownership of the returned value.
-$range i 0..n
-$for i [[
-$range j 0..i-1
-$var ps = [[$for j, [[p$j]]]]
-
-ACTION_TEMPLATE(ReturnNew,
-                HAS_1_TEMPLATE_PARAMS(typename, T),
-                AND_$i[[]]_VALUE_PARAMS($ps)) {
-  return new T($ps);
-}
-
-]]
-
-#ifdef _MSC_VER
-# pragma warning(pop)
-#endif
-
-}  // namespace testing
-
-// Include any custom callback actions added by the local installation.
-// We must include this header at the end to make sure it can use the
-// declarations from this file.
-#include "gmock/internal/custom/gmock-generated-actions.h"
-
-#endif  // GMOCK_INCLUDE_GMOCK_GMOCK_GENERATED_ACTIONS_H_
diff --git a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/include/gmock/gmock-matchers.h b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/include/gmock/gmock-matchers.h
index 4b6ac5634ec..6282901145a 100644
--- a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/include/gmock/gmock-matchers.h
+++ b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/include/gmock/gmock-matchers.h
@@ -27,7 +27,6 @@
 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-
 // Google Mock - a framework for writing C++ mock classes.
 //
 // The MATCHER* family of macros can be used in a namespace scope to
@@ -241,7 +240,7 @@
 //
 // To learn more about using these macros, please search for 'MATCHER'
 // on
-// https://github.com/google/googletest/blob/master/googlemock/docs/cook_book.md
+// https://github.com/google/googletest/blob/master/docs/gmock_cook_book.md
 //
 // This file also implements some commonly used argument matchers.  More
 // matchers can be defined by the user implementing the
@@ -250,10 +249,11 @@
 // See googletest/include/gtest/gtest-matchers.h for the definition of class
 // Matcher, class MatcherInterface, and others.
 
-// GOOGLETEST_CM0002 DO NOT DELETE
+// IWYU pragma: private, include "gmock/gmock.h"
+// IWYU pragma: friend gmock/.*
 
-#ifndef GMOCK_INCLUDE_GMOCK_GMOCK_MATCHERS_H_
-#define GMOCK_INCLUDE_GMOCK_GMOCK_MATCHERS_H_
+#ifndef GOOGLEMOCK_INCLUDE_GMOCK_GMOCK_MATCHERS_H_
+#define GOOGLEMOCK_INCLUDE_GMOCK_GMOCK_MATCHERS_H_
 
 #include <algorithm>
 #include <cmath>
@@ -313,7 +313,9 @@ class StringMatchResultListener : public MatchResultListener {
  private:
   ::std::stringstream ss_;
 
-  GTEST_DISALLOW_COPY_AND_ASSIGN_(StringMatchResultListener);
+  StringMatchResultListener(const StringMatchResultListener&) = delete;
+  StringMatchResultListener& operator=(const StringMatchResultListener&) =
+      delete;
 };
 
 // Anything inside the 'internal' namespace IS INTERNAL IMPLEMENTATION
@@ -343,7 +345,7 @@ class MatcherCastImpl {
     // constructor from M (this usually happens when T has an implicit
     // constructor from any type).
     //
-    // It won't work to unconditionally implict_cast
+    // It won't work to unconditionally implicit_cast
     // polymorphic_matcher_or_value to Matcher<T> because it won't trigger
     // a user-defined conversion from M to T if one exists (assuming M is
     // a value).
@@ -396,7 +398,7 @@ class MatcherCastImpl {
 // is already a Matcher.  This only compiles when type T can be
 // statically converted to type U.
 template <typename T, typename U>
-class MatcherCastImpl<T, Matcher<U> > {
+class MatcherCastImpl<T, Matcher<U>> {
  public:
   static Matcher<T> Cast(const Matcher<U>& source_matcher) {
     return Matcher<T>(new Impl(source_matcher));
@@ -424,7 +426,14 @@ class MatcherCastImpl<T, Matcher<U> > {
               !std::is_base_of<FromType, ToType>::value,
           "Can't implicitly convert from <base> to <derived>");
 
-      return source_matcher_.MatchAndExplain(static_cast<U>(x), listener);
+      // Do the cast to `U` explicitly if necessary.
+      // Otherwise, let implicit conversions do the trick.
+      using CastType =
+          typename std::conditional<std::is_convertible<T&, const U&>::value,
+                                    T&, U>::type;
+
+      return source_matcher_.MatchAndExplain(static_cast<CastType>(x),
+                                             listener);
     }
 
     void DescribeTo(::std::ostream* os) const override {
@@ -437,15 +446,13 @@ class MatcherCastImpl<T, Matcher<U> > {
 
    private:
     const Matcher<U> source_matcher_;
-
-    GTEST_DISALLOW_ASSIGN_(Impl);
   };
 };
 
 // This even more specialized version is used for efficiently casting
 // a matcher to its own type.
 template <typename T>
-class MatcherCastImpl<T, Matcher<T> > {
+class MatcherCastImpl<T, Matcher<T>> {
  public:
   static Matcher<T> Cast(const Matcher<T>& matcher) { return matcher; }
 };
@@ -524,23 +531,22 @@ inline Matcher<T> SafeMatcherCast(const M& polymorphic_matcher_or_value) {
 template <typename T, typename U>
 inline Matcher<T> SafeMatcherCast(const Matcher<U>& matcher) {
   // Enforce that T can be implicitly converted to U.
-  GTEST_COMPILE_ASSERT_((std::is_convertible<T, U>::value),
-                        "T must be implicitly convertible to U");
+  static_assert(std::is_convertible<const T&, const U&>::value,
+                "T must be implicitly convertible to U");
   // Enforce that we are not converting a non-reference type T to a reference
   // type U.
-  GTEST_COMPILE_ASSERT_(
-      std::is_reference<T>::value || !std::is_reference<U>::value,
-      cannot_convert_non_reference_arg_to_reference);
+  static_assert(std::is_reference<T>::value || !std::is_reference<U>::value,
+                "cannot convert non reference arg to reference");
   // In case both T and U are arithmetic types, enforce that the
   // conversion is not lossy.
   typedef GTEST_REMOVE_REFERENCE_AND_CONST_(T) RawT;
   typedef GTEST_REMOVE_REFERENCE_AND_CONST_(U) RawU;
   constexpr bool kTIsOther = GMOCK_KIND_OF_(RawT) == internal::kOther;
   constexpr bool kUIsOther = GMOCK_KIND_OF_(RawU) == internal::kOther;
-  GTEST_COMPILE_ASSERT_(
+  static_assert(
       kTIsOther || kUIsOther ||
-      (internal::LosslessArithmeticConvertible<RawT, RawU>::value),
-      conversion_of_arithmetic_types_must_be_lossless);
+          (internal::LosslessArithmeticConvertible<RawT, RawU>::value),
+      "conversion of arithmetic types must be lossless");
   return MatcherCast<T>(matcher);
 }
 
@@ -673,9 +679,9 @@ bool TupleMatches(const MatcherTuple& matcher_tuple,
                   const ValueTuple& value_tuple) {
   // Makes sure that matcher_tuple and value_tuple have the same
   // number of fields.
-  GTEST_COMPILE_ASSERT_(std::tuple_size<MatcherTuple>::value ==
-                            std::tuple_size<ValueTuple>::value,
-                        matcher_and_value_have_different_numbers_of_fields);
+  static_assert(std::tuple_size<MatcherTuple>::value ==
+                    std::tuple_size<ValueTuple>::value,
+                "matcher and value have different numbers of fields");
   return TuplePrefix<std::tuple_size<ValueTuple>::value>::Matches(matcher_tuple,
                                                                   value_tuple);
 }
@@ -684,8 +690,7 @@ bool TupleMatches(const MatcherTuple& matcher_tuple,
 // is no failure, nothing will be streamed to os.
 template <typename MatcherTuple, typename ValueTuple>
 void ExplainMatchFailureTupleTo(const MatcherTuple& matchers,
-                                const ValueTuple& values,
-                                ::std::ostream* os) {
+                                const ValueTuple& values, ::std::ostream* os) {
   TuplePrefix<std::tuple_size<MatcherTuple>::value>::ExplainMatchFailuresTo(
       matchers, values, os);
 }
@@ -709,14 +714,14 @@ class TransformTupleValuesHelper {
  private:
   template <typename Tup, size_t kRemainingSize>
   struct IterateOverTuple {
-    OutIter operator() (Func f, const Tup& t, OutIter out) const {
+    OutIter operator()(Func f, const Tup& t, OutIter out) const {
       *out++ = f(::std::get<TupleSize::value - kRemainingSize>(t));
       return IterateOverTuple<Tup, kRemainingSize - 1>()(f, t, out);
     }
   };
   template <typename Tup>
   struct IterateOverTuple<Tup, 0> {
-    OutIter operator() (Func /* f */, const Tup& /* t */, OutIter out) const {
+    OutIter operator()(Func /* f */, const Tup& /* t */, OutIter out) const {
       return out;
     }
   };
@@ -730,31 +735,25 @@ OutIter TransformTupleValues(Func f, const Tuple& t, OutIter out) {
   return TransformTupleValuesHelper<Tuple, Func, OutIter>::Run(f, t, out);
 }
 
-// Implements A<T>().
-template <typename T>
-class AnyMatcherImpl : public MatcherInterface<const T&> {
- public:
-  bool MatchAndExplain(const T& /* x */,
-                       MatchResultListener* /* listener */) const override {
-    return true;
-  }
-  void DescribeTo(::std::ostream* os) const override { *os << "is anything"; }
-  void DescribeNegationTo(::std::ostream* os) const override {
-    // This is mostly for completeness' safe, as it's not very useful
-    // to write Not(A<bool>()).  However we cannot completely rule out
-    // such a possibility, and it doesn't hurt to be prepared.
-    *os << "never matches";
-  }
-};
-
 // Implements _, a matcher that matches any value of any
 // type.  This is a polymorphic matcher, so we need a template type
 // conversion operator to make it appearing as a Matcher<T> for any
 // type T.
 class AnythingMatcher {
  public:
+  using is_gtest_matcher = void;
+
   template <typename T>
-  operator Matcher<T>() const { return A<T>(); }
+  bool MatchAndExplain(const T& /* x */, std::ostream* /* listener */) const {
+    return true;
+  }
+  void DescribeTo(std::ostream* os) const { *os << "is anything"; }
+  void DescribeNegationTo(::std::ostream* os) const {
+    // This is mostly for completeness' sake, as it's not very useful
+    // to write Not(A<bool>()).  However we cannot completely rule out
+    // such a possibility, and it doesn't hurt to be prepared.
+    *os << "never matches";
+  }
 };
 
 // Implements the polymorphic IsNull() matcher, which matches any raw or smart
@@ -768,9 +767,7 @@ class IsNullMatcher {
   }
 
   void DescribeTo(::std::ostream* os) const { *os << "is NULL"; }
-  void DescribeNegationTo(::std::ostream* os) const {
-    *os << "isn't NULL";
-  }
+  void DescribeNegationTo(::std::ostream* os) const { *os << "isn't NULL"; }
 };
 
 // Implements the polymorphic NotNull() matcher, which matches any raw or smart
@@ -784,9 +781,7 @@ class NotNullMatcher {
   }
 
   void DescribeTo(::std::ostream* os) const { *os << "isn't NULL"; }
-  void DescribeNegationTo(::std::ostream* os) const {
-    *os << "is NULL";
-  }
+  void DescribeNegationTo(::std::ostream* os) const { *os << "is NULL"; }
 };
 
 // Ref(variable) matches any argument that is a reference to
@@ -854,13 +849,9 @@ class RefMatcher<T&> {
 
    private:
     const Super& object_;
-
-    GTEST_DISALLOW_ASSIGN_(Impl);
   };
 
   T& object_;
-
-  GTEST_DISALLOW_ASSIGN_(RefMatcher);
 };
 
 // Polymorphic helper functions for narrow and wide string matchers.
@@ -876,8 +867,7 @@ inline bool CaseInsensitiveCStringEquals(const wchar_t* lhs,
 // String comparison for narrow or wide strings that can have embedded NUL
 // characters.
 template <typename StringType>
-bool CaseInsensitiveStringEquals(const StringType& s1,
-                                 const StringType& s2) {
+bool CaseInsensitiveStringEquals(const StringType& s1, const StringType& s2) {
   // Are the heads equal?
   if (!CaseInsensitiveCStringEquals(s1.c_str(), s2.c_str())) {
     return false;
@@ -902,9 +892,10 @@ bool CaseInsensitiveStringEquals(const StringType& s1,
 template <typename StringType>
 class StrEqualityMatcher {
  public:
-  StrEqualityMatcher(const StringType& str, bool expect_eq,
-                     bool case_sensitive)
-      : string_(str), expect_eq_(expect_eq), case_sensitive_(case_sensitive) {}
+  StrEqualityMatcher(StringType str, bool expect_eq, bool case_sensitive)
+      : string_(std::move(str)),
+        expect_eq_(expect_eq),
+        case_sensitive_(case_sensitive) {}
 
 #if GTEST_INTERNAL_HAS_STRING_VIEW
   bool MatchAndExplain(const internal::StringView& s,
@@ -937,8 +928,8 @@ class StrEqualityMatcher {
   bool MatchAndExplain(const MatcheeStringType& s,
                        MatchResultListener* /* listener */) const {
     const StringType s2(s);
-    const bool eq = case_sensitive_ ? s2 == string_ :
-        CaseInsensitiveStringEquals(s2, string_);
+    const bool eq = case_sensitive_ ? s2 == string_
+                                    : CaseInsensitiveStringEquals(s2, string_);
     return expect_eq_ == eq;
   }
 
@@ -963,8 +954,6 @@ class StrEqualityMatcher {
   const StringType string_;
   const bool expect_eq_;
   const bool case_sensitive_;
-
-  GTEST_DISALLOW_ASSIGN_(StrEqualityMatcher);
 };
 
 // Implements the polymorphic HasSubstr(substring) matcher, which
@@ -1019,8 +1008,6 @@ class HasSubstrMatcher {
 
  private:
   const StringType substring_;
-
-  GTEST_DISALLOW_ASSIGN_(HasSubstrMatcher);
 };
 
 // Implements the polymorphic StartsWith(substring) matcher, which
@@ -1029,8 +1016,7 @@ class HasSubstrMatcher {
 template <typename StringType>
 class StartsWithMatcher {
  public:
-  explicit StartsWithMatcher(const StringType& prefix) : prefix_(prefix) {
-  }
+  explicit StartsWithMatcher(const StringType& prefix) : prefix_(prefix) {}
 
 #if GTEST_INTERNAL_HAS_STRING_VIEW
   bool MatchAndExplain(const internal::StringView& s,
@@ -1061,7 +1047,7 @@ class StartsWithMatcher {
                        MatchResultListener* /* listener */) const {
     const StringType& s2(s);
     return s2.length() >= prefix_.length() &&
-        s2.substr(0, prefix_.length()) == prefix_;
+           s2.substr(0, prefix_.length()) == prefix_;
   }
 
   void DescribeTo(::std::ostream* os) const {
@@ -1076,8 +1062,6 @@ class StartsWithMatcher {
 
  private:
   const StringType prefix_;
-
-  GTEST_DISALLOW_ASSIGN_(StartsWithMatcher);
 };
 
 // Implements the polymorphic EndsWith(substring) matcher, which
@@ -1117,7 +1101,7 @@ class EndsWithMatcher {
                        MatchResultListener* /* listener */) const {
     const StringType& s2(s);
     return s2.length() >= suffix_.length() &&
-        s2.substr(s2.length() - suffix_.length()) == suffix_;
+           s2.substr(s2.length() - suffix_.length()) == suffix_;
   }
 
   void DescribeTo(::std::ostream* os) const {
@@ -1132,8 +1116,45 @@ class EndsWithMatcher {
 
  private:
   const StringType suffix_;
+};
+
+// Implements the polymorphic WhenBase64Unescaped(matcher) matcher, which can be
+// used as a Matcher<T> as long as T can be converted to a string.
+class WhenBase64UnescapedMatcher {
+ public:
+  using is_gtest_matcher = void;
+
+  explicit WhenBase64UnescapedMatcher(
+      const Matcher<const std::string&>& internal_matcher)
+      : internal_matcher_(internal_matcher) {}
 
-  GTEST_DISALLOW_ASSIGN_(EndsWithMatcher);
+  // Matches anything that can convert to std::string.
+  template <typename MatcheeStringType>
+  bool MatchAndExplain(const MatcheeStringType& s,
+                       MatchResultListener* listener) const {
+    const std::string s2(s);  // NOLINT (needed for working with string_view).
+    std::string unescaped;
+    if (!internal::Base64Unescape(s2, &unescaped)) {
+      if (listener != nullptr) {
+        *listener << "is not a valid base64 escaped string";
+      }
+      return false;
+    }
+    return MatchPrintAndExplain(unescaped, internal_matcher_, listener);
+  }
+
+  void DescribeTo(::std::ostream* os) const {
+    *os << "matches after Base64Unescape ";
+    internal_matcher_.DescribeTo(os);
+  }
+
+  void DescribeNegationTo(::std::ostream* os) const {
+    *os << "does not match after Base64Unescape ";
+    internal_matcher_.DescribeTo(os);
+  }
+
+ private:
+  const Matcher<const std::string&> internal_matcher_;
 };
 
 // Implements a matcher that compares the two fields of a 2-tuple
@@ -1209,8 +1230,7 @@ class Ge2Matcher : public PairMatchBase<Ge2Matcher, AnyGe> {
 template <typename T>
 class NotMatcherImpl : public MatcherInterface<const T&> {
  public:
-  explicit NotMatcherImpl(const Matcher<T>& matcher)
-      : matcher_(matcher) {}
+  explicit NotMatcherImpl(const Matcher<T>& matcher) : matcher_(matcher) {}
 
   bool MatchAndExplain(const T& x,
                        MatchResultListener* listener) const override {
@@ -1227,8 +1247,6 @@ class NotMatcherImpl : public MatcherInterface<const T&> {
 
  private:
   const Matcher<T> matcher_;
-
-  GTEST_DISALLOW_ASSIGN_(NotMatcherImpl);
 };
 
 // Implements the Not(m) matcher, which matches a value that doesn't
@@ -1247,8 +1265,6 @@ class NotMatcher {
 
  private:
   InnerMatcher matcher_;
-
-  GTEST_DISALLOW_ASSIGN_(NotMatcher);
 };
 
 // Implements the AllOf(m1, m2) matcher for a particular argument type
@@ -1258,7 +1274,7 @@ class NotMatcher {
 template <typename T>
 class AllOfMatcherImpl : public MatcherInterface<const T&> {
  public:
-  explicit AllOfMatcherImpl(std::vector<Matcher<T> > matchers)
+  explicit AllOfMatcherImpl(std::vector<Matcher<T>> matchers)
       : matchers_(std::move(matchers)) {}
 
   void DescribeTo(::std::ostream* os) const override {
@@ -1309,9 +1325,7 @@ class AllOfMatcherImpl : public MatcherInterface<const T&> {
   }
 
  private:
-  const std::vector<Matcher<T> > matchers_;
-
-  GTEST_DISALLOW_ASSIGN_(AllOfMatcherImpl);
+  const std::vector<Matcher<T>> matchers_;
 };
 
 // VariadicMatcher is used for the variadic implementation of
@@ -1326,19 +1340,22 @@ class VariadicMatcher {
     static_assert(sizeof...(Args) > 0, "Must have at least one matcher.");
   }
 
+  VariadicMatcher(const VariadicMatcher&) = default;
+  VariadicMatcher& operator=(const VariadicMatcher&) = delete;
+
   // This template type conversion operator allows an
   // VariadicMatcher<Matcher1, Matcher2...> object to match any type that
   // all of the provided matchers (Matcher1, Matcher2, ...) can match.
   template <typename T>
   operator Matcher<T>() const {
-    std::vector<Matcher<T> > values;
+    std::vector<Matcher<T>> values;
     CreateVariadicMatcher<T>(&values, std::integral_constant<size_t, 0>());
     return Matcher<T>(new CombiningMatcher<T>(std::move(values)));
   }
 
  private:
   template <typename T, size_t I>
-  void CreateVariadicMatcher(std::vector<Matcher<T> >* values,
+  void CreateVariadicMatcher(std::vector<Matcher<T>>* values,
                              std::integral_constant<size_t, I>) const {
     values->push_back(SafeMatcherCast<T>(std::get<I>(matchers_)));
     CreateVariadicMatcher<T>(values, std::integral_constant<size_t, I + 1>());
@@ -1346,12 +1363,10 @@ class VariadicMatcher {
 
   template <typename T>
   void CreateVariadicMatcher(
-      std::vector<Matcher<T> >*,
+      std::vector<Matcher<T>>*,
       std::integral_constant<size_t, sizeof...(Args)>) const {}
 
   std::tuple<Args...> matchers_;
-
-  GTEST_DISALLOW_ASSIGN_(VariadicMatcher);
 };
 
 template <typename... Args>
@@ -1364,7 +1379,7 @@ using AllOfMatcher = VariadicMatcher<AllOfMatcherImpl, Args...>;
 template <typename T>
 class AnyOfMatcherImpl : public MatcherInterface<const T&> {
  public:
-  explicit AnyOfMatcherImpl(std::vector<Matcher<T> > matchers)
+  explicit AnyOfMatcherImpl(std::vector<Matcher<T>> matchers)
       : matchers_(std::move(matchers)) {}
 
   void DescribeTo(::std::ostream* os) const override {
@@ -1415,15 +1430,35 @@ class AnyOfMatcherImpl : public MatcherInterface<const T&> {
   }
 
  private:
-  const std::vector<Matcher<T> > matchers_;
-
-  GTEST_DISALLOW_ASSIGN_(AnyOfMatcherImpl);
+  const std::vector<Matcher<T>> matchers_;
 };
 
 // AnyOfMatcher is used for the variadic implementation of AnyOf(m_1, m_2, ...).
 template <typename... Args>
 using AnyOfMatcher = VariadicMatcher<AnyOfMatcherImpl, Args...>;
 
+// ConditionalMatcher is the implementation of Conditional(cond, m1, m2)
+template <typename MatcherTrue, typename MatcherFalse>
+class ConditionalMatcher {
+ public:
+  ConditionalMatcher(bool condition, MatcherTrue matcher_true,
+                     MatcherFalse matcher_false)
+      : condition_(condition),
+        matcher_true_(std::move(matcher_true)),
+        matcher_false_(std::move(matcher_false)) {}
+
+  template <typename T>
+  operator Matcher<T>() const {  // NOLINT(runtime/explicit)
+    return condition_ ? SafeMatcherCast<T>(matcher_true_)
+                      : SafeMatcherCast<T>(matcher_false_);
+  }
+
+ private:
+  bool condition_;
+  MatcherTrue matcher_true_;
+  MatcherFalse matcher_false_;
+};
+
 // Wrapper for implementation of Any/AllOfArray().
 template <template <class> class MatcherImpl, typename T>
 class SomeOfArrayMatcher {
@@ -1445,8 +1480,6 @@ class SomeOfArrayMatcher {
 
  private:
   const ::std::vector<T> matchers_;
-
-  GTEST_DISALLOW_ASSIGN_(SomeOfArrayMatcher);
 };
 
 template <typename T>
@@ -1468,15 +1501,15 @@ class TrulyMatcher {
   // interested in the address of the argument.
   template <typename T>
   bool MatchAndExplain(T& x,  // NOLINT
-                       MatchResultListener* /* listener */) const {
+                       MatchResultListener* listener) const {
     // Without the if-statement, MSVC sometimes warns about converting
     // a value to bool (warning 4800).
     //
     // We cannot write 'return !!predicate_(x);' as that doesn't work
     // when predicate_(x) returns a class convertible to bool but
     // having no operator!().
-    if (predicate_(x))
-      return true;
+    if (predicate_(x)) return true;
+    *listener << "didn't satisfy the given predicate";
     return false;
   }
 
@@ -1490,8 +1523,6 @@ class TrulyMatcher {
 
  private:
   Predicate predicate_;
-
-  GTEST_DISALLOW_ASSIGN_(TrulyMatcher);
 };
 
 // Used for implementing Matches(matcher), which turns a matcher into
@@ -1528,8 +1559,6 @@ class MatcherAsPredicate {
 
  private:
   M matcher_;
-
-  GTEST_DISALLOW_ASSIGN_(MatcherAsPredicate);
 };
 
 // For implementing ASSERT_THAT() and EXPECT_THAT().  The template
@@ -1580,8 +1609,6 @@ class PredicateFormatterFromMatcher {
 
  private:
   const M matcher_;
-
-  GTEST_DISALLOW_ASSIGN_(PredicateFormatterFromMatcher);
 };
 
 // A helper function for converting a matcher to a predicate-formatter
@@ -1589,8 +1616,8 @@ class PredicateFormatterFromMatcher {
 // used for implementing ASSERT_THAT() and EXPECT_THAT().
 // Implementation detail: 'matcher' is received by-value to force decaying.
 template <typename M>
-inline PredicateFormatterFromMatcher<M>
-MakePredicateFormatterFromMatcher(M matcher) {
+inline PredicateFormatterFromMatcher<M> MakePredicateFormatterFromMatcher(
+    M matcher) {
   return PredicateFormatterFromMatcher<M>(std::move(matcher));
 }
 
@@ -1605,9 +1632,7 @@ class IsNanMatcher {
   }
 
   void DescribeTo(::std::ostream* os) const { *os << "is NaN"; }
-  void DescribeNegationTo(::std::ostream* os) const {
-    *os << "isn't NaN";
-  }
+  void DescribeNegationTo(::std::ostream* os) const { *os << "isn't NaN"; }
 };
 
 // Implements the polymorphic floating point equality matcher, which matches
@@ -1623,9 +1648,8 @@ class FloatingEqMatcher {
   // equality comparisons between NANs will always return false.  We specify a
   // negative max_abs_error_ term to indicate that ULP-based approximation will
   // be used for comparison.
-  FloatingEqMatcher(FloatType expected, bool nan_eq_nan) :
-    expected_(expected), nan_eq_nan_(nan_eq_nan), max_abs_error_(-1) {
-  }
+  FloatingEqMatcher(FloatType expected, bool nan_eq_nan)
+      : expected_(expected), nan_eq_nan_(nan_eq_nan), max_abs_error_(-1) {}
 
   // Constructor that supports a user-specified max_abs_error that will be used
   // for comparison instead of ULP-based approximation.  The max absolute
@@ -1687,8 +1711,8 @@ class FloatingEqMatcher {
       // os->precision() returns the previously set precision, which we
       // store to restore the ostream to its original configuration
       // after outputting.
-      const ::std::streamsize old_precision = os->precision(
-          ::std::numeric_limits<FloatType>::digits10 + 2);
+      const ::std::streamsize old_precision =
+          os->precision(::std::numeric_limits<FloatType>::digits10 + 2);
       if (FloatingPoint<FloatType>(expected_).is_nan()) {
         if (nan_eq_nan_) {
           *os << "is NaN";
@@ -1706,8 +1730,8 @@ class FloatingEqMatcher {
 
     void DescribeNegationTo(::std::ostream* os) const override {
       // As before, get original precision.
-      const ::std::streamsize old_precision = os->precision(
-          ::std::numeric_limits<FloatType>::digits10 + 2);
+      const ::std::streamsize old_precision =
+          os->precision(::std::numeric_limits<FloatType>::digits10 + 2);
       if (FloatingPoint<FloatType>(expected_).is_nan()) {
         if (nan_eq_nan_) {
           *os << "isn't NaN";
@@ -1725,24 +1749,17 @@ class FloatingEqMatcher {
     }
 
    private:
-    bool HasMaxAbsError() const {
-      return max_abs_error_ >= 0;
-    }
+    bool HasMaxAbsError() const { return max_abs_error_ >= 0; }
 
     const FloatType expected_;
     const bool nan_eq_nan_;
     // max_abs_error will be used for value comparison when >= 0.
     const FloatType max_abs_error_;
-
-    GTEST_DISALLOW_ASSIGN_(Impl);
   };
 
   // The following 3 type conversion operators allow FloatEq(expected) and
   // NanSensitiveFloatEq(expected) to be used as a Matcher<float>, a
   // Matcher<const float&>, or a Matcher<float&>, but nothing else.
-  // (While Google's C++ coding style doesn't allow arguments passed
-  // by non-const reference, we may see them in code not conforming to
-  // the style.  Therefore Google Mock needs to support them.)
   operator Matcher<FloatType>() const {
     return MakeMatcher(
         new Impl<FloatType>(expected_, nan_eq_nan_, max_abs_error_));
@@ -1763,8 +1780,6 @@ class FloatingEqMatcher {
   const bool nan_eq_nan_;
   // max_abs_error will be used for value comparison when >= 0.
   const FloatType max_abs_error_;
-
-  GTEST_DISALLOW_ASSIGN_(FloatingEqMatcher);
 };
 
 // A 2-tuple ("binary") wrapper around FloatingEqMatcher:
@@ -1806,9 +1821,8 @@ class FloatingEq2Matcher {
   template <typename Tuple>
   class Impl : public MatcherInterface<Tuple> {
    public:
-    Impl(FloatType max_abs_error, bool nan_eq_nan) :
-        max_abs_error_(max_abs_error),
-        nan_eq_nan_(nan_eq_nan) {}
+    Impl(FloatType max_abs_error, bool nan_eq_nan)
+        : max_abs_error_(max_abs_error), nan_eq_nan_(nan_eq_nan) {}
 
     bool MatchAndExplain(Tuple args,
                          MatchResultListener* listener) const override {
@@ -1868,8 +1882,9 @@ class PointeeMatcher {
   template <typename Pointer>
   class Impl : public MatcherInterface<Pointer> {
    public:
-    typedef typename PointeeOf<GTEST_REMOVE_REFERENCE_AND_CONST_(Pointer)>::type
-        Pointee;
+    using Pointee =
+        typename std::pointer_traits<GTEST_REMOVE_REFERENCE_AND_CONST_(
+            Pointer)>::element_type;
 
     explicit Impl(const InnerMatcher& matcher)
         : matcher_(MatcherCast<const Pointee&>(matcher)) {}
@@ -1894,13 +1909,67 @@ class PointeeMatcher {
 
    private:
     const Matcher<const Pointee&> matcher_;
-
-    GTEST_DISALLOW_ASSIGN_(Impl);
   };
 
   const InnerMatcher matcher_;
+};
+
+// Implements the Pointer(m) matcher
+// Implements the Pointer(m) matcher for matching a pointer that matches matcher
+// m.  The pointer can be either raw or smart, and will match `m` against the
+// raw pointer.
+template <typename InnerMatcher>
+class PointerMatcher {
+ public:
+  explicit PointerMatcher(const InnerMatcher& matcher) : matcher_(matcher) {}
+
+  // This type conversion operator template allows Pointer(m) to be
+  // used as a matcher for any pointer type whose pointer type is
+  // compatible with the inner matcher, where type PointerType can be
+  // either a raw pointer or a smart pointer.
+  //
+  // The reason we do this instead of relying on
+  // MakePolymorphicMatcher() is that the latter is not flexible
+  // enough for implementing the DescribeTo() method of Pointer().
+  template <typename PointerType>
+  operator Matcher<PointerType>() const {  // NOLINT
+    return Matcher<PointerType>(new Impl<const PointerType&>(matcher_));
+  }
+
+ private:
+  // The monomorphic implementation that works for a particular pointer type.
+  template <typename PointerType>
+  class Impl : public MatcherInterface<PointerType> {
+   public:
+    using Pointer =
+        const typename std::pointer_traits<GTEST_REMOVE_REFERENCE_AND_CONST_(
+            PointerType)>::element_type*;
+
+    explicit Impl(const InnerMatcher& matcher)
+        : matcher_(MatcherCast<Pointer>(matcher)) {}
+
+    void DescribeTo(::std::ostream* os) const override {
+      *os << "is a pointer that ";
+      matcher_.DescribeTo(os);
+    }
+
+    void DescribeNegationTo(::std::ostream* os) const override {
+      *os << "is not a pointer that ";
+      matcher_.DescribeTo(os);
+    }
+
+    bool MatchAndExplain(PointerType pointer,
+                         MatchResultListener* listener) const override {
+      *listener << "which is a pointer that ";
+      Pointer p = GetRawPointer(pointer);
+      return MatchPrintAndExplain(p, matcher_, listener);
+    }
 
-  GTEST_DISALLOW_ASSIGN_(PointeeMatcher);
+   private:
+    Matcher<Pointer> matcher_;
+  };
+
+  const InnerMatcher matcher_;
 };
 
 #if GTEST_HAS_RTTI
@@ -1929,16 +1998,12 @@ class WhenDynamicCastToMatcherBase {
  protected:
   const Matcher<To> matcher_;
 
-  static std::string GetToName() {
-    return GetTypeName<To>();
-  }
+  static std::string GetToName() { return GetTypeName<To>(); }
 
  private:
   static void GetCastTypeDescription(::std::ostream* os) {
     *os << "when dynamic_cast to " << GetToName() << ", ";
   }
-
-  GTEST_DISALLOW_ASSIGN_(WhenDynamicCastToMatcherBase);
 };
 
 // Primary template.
@@ -2036,8 +2101,6 @@ class FieldMatcher {
   // Contains either "whose given field " if the name of the field is unknown
   // or "whose field `name_of_field` " if the name is known.
   const std::string whose_field_;
-
-  GTEST_DISALLOW_ASSIGN_(FieldMatcher);
 };
 
 // Implements the Property() matcher for matching a property
@@ -2072,7 +2135,7 @@ class PropertyMatcher {
   }
 
   template <typename T>
-  bool MatchAndExplain(const T&value, MatchResultListener* listener) const {
+  bool MatchAndExplain(const T& value, MatchResultListener* listener) const {
     return MatchAndExplainImpl(
         typename std::is_pointer<typename std::remove_const<T>::type>::type(),
         value, listener);
@@ -2106,8 +2169,6 @@ class PropertyMatcher {
   // Contains either "whose given property " if the name of the property is
   // unknown or "whose property `name_of_property` " if the name is known.
   const std::string whose_property_;
-
-  GTEST_DISALLOW_ASSIGN_(PropertyMatcher);
 };
 
 // Type traits specifying various features of different functors for ResultOf.
@@ -2126,16 +2187,16 @@ struct CallableTraits {
 
 // Specialization for function pointers.
 template <typename ArgType, typename ResType>
-struct CallableTraits<ResType(*)(ArgType)> {
+struct CallableTraits<ResType (*)(ArgType)> {
   typedef ResType ResultType;
-  typedef ResType(*StorageType)(ArgType);
+  typedef ResType (*StorageType)(ArgType);
 
-  static void CheckIsValid(ResType(*f)(ArgType)) {
+  static void CheckIsValid(ResType (*f)(ArgType)) {
     GTEST_CHECK_(f != nullptr)
         << "NULL function pointer is passed into ResultOf().";
   }
   template <typename T>
-  static ResType Invoke(ResType(*f)(ArgType), T arg) {
+  static ResType Invoke(ResType (*f)(ArgType), T arg) {
     return (*f)(arg);
   }
 };
@@ -2146,13 +2207,21 @@ template <typename Callable, typename InnerMatcher>
 class ResultOfMatcher {
  public:
   ResultOfMatcher(Callable callable, InnerMatcher matcher)
-      : callable_(std::move(callable)), matcher_(std::move(matcher)) {
+      : ResultOfMatcher(/*result_description=*/"", std::move(callable),
+                        std::move(matcher)) {}
+
+  ResultOfMatcher(const std::string& result_description, Callable callable,
+                  InnerMatcher matcher)
+      : result_description_(result_description),
+        callable_(std::move(callable)),
+        matcher_(std::move(matcher)) {
     CallableTraits<Callable>::CheckIsValid(callable_);
   }
 
   template <typename T>
   operator Matcher<T>() const {
-    return Matcher<T>(new Impl<const T&>(callable_, matcher_));
+    return Matcher<T>(
+        new Impl<const T&>(result_description_, callable_, matcher_));
   }
 
  private:
@@ -2165,21 +2234,36 @@ class ResultOfMatcher {
 
    public:
     template <typename M>
-    Impl(const CallableStorageType& callable, const M& matcher)
-        : callable_(callable), matcher_(MatcherCast<ResultType>(matcher)) {}
+    Impl(const std::string& result_description,
+         const CallableStorageType& callable, const M& matcher)
+        : result_description_(result_description),
+          callable_(callable),
+          matcher_(MatcherCast<ResultType>(matcher)) {}
 
     void DescribeTo(::std::ostream* os) const override {
-      *os << "is mapped by the given callable to a value that ";
+      if (result_description_.empty()) {
+        *os << "is mapped by the given callable to a value that ";
+      } else {
+        *os << "whose " << result_description_ << " ";
+      }
       matcher_.DescribeTo(os);
     }
 
     void DescribeNegationTo(::std::ostream* os) const override {
-      *os << "is mapped by the given callable to a value that ";
+      if (result_description_.empty()) {
+        *os << "is mapped by the given callable to a value that ";
+      } else {
+        *os << "whose " << result_description_ << " ";
+      }
       matcher_.DescribeNegationTo(os);
     }
 
     bool MatchAndExplain(T obj, MatchResultListener* listener) const override {
-      *listener << "which is mapped by the given callable to ";
+      if (result_description_.empty()) {
+        *listener << "which is mapped by the given callable to ";
+      } else {
+        *listener << "whose " << result_description_ << " is ";
+      }
       // Cannot pass the return value directly to MatchPrintAndExplain, which
       // takes a non-const reference as argument.
       // Also, specifying template argument explicitly is needed because T could
@@ -2190,6 +2274,7 @@ class ResultOfMatcher {
     }
 
    private:
+    const std::string result_description_;
     // Functors often define operator() as non-const method even though
     // they are actually stateless. But we need to use them even when
     // 'this' is a const pointer. It's the user's responsibility not to
@@ -2197,14 +2282,11 @@ class ResultOfMatcher {
     // how many times the callable will be invoked.
     mutable CallableStorageType callable_;
     const Matcher<ResultType> matcher_;
-
-    GTEST_DISALLOW_ASSIGN_(Impl);
   };  // class Impl
 
+  const std::string result_description_;
   const CallableStorageType callable_;
   const InnerMatcher matcher_;
-
-  GTEST_DISALLOW_ASSIGN_(ResultOfMatcher);
 };
 
 // Implements a matcher that checks the size of an STL-style container.
@@ -2212,8 +2294,7 @@ template <typename SizeMatcher>
 class SizeIsMatcher {
  public:
   explicit SizeIsMatcher(const SizeMatcher& size_matcher)
-       : size_matcher_(size_matcher) {
-  }
+      : size_matcher_(size_matcher) {}
 
   template <typename Container>
   operator Matcher<Container>() const {
@@ -2241,20 +2322,18 @@ class SizeIsMatcher {
       SizeType size = container.size();
       StringMatchResultListener size_listener;
       const bool result = size_matcher_.MatchAndExplain(size, &size_listener);
-      *listener
-          << "whose size " << size << (result ? " matches" : " doesn't match");
+      *listener << "whose size " << size
+                << (result ? " matches" : " doesn't match");
       PrintIfNotEmpty(size_listener.str(), listener->stream());
       return result;
     }
 
    private:
     const Matcher<SizeType> size_matcher_;
-    GTEST_DISALLOW_ASSIGN_(Impl);
   };
 
  private:
   const SizeMatcher size_matcher_;
-  GTEST_DISALLOW_ASSIGN_(SizeIsMatcher);
 };
 
 // Implements a matcher that checks the begin()..end() distance of an STL-style
@@ -2273,8 +2352,9 @@ class BeginEndDistanceIsMatcher {
   template <typename Container>
   class Impl : public MatcherInterface<Container> {
    public:
-    typedef internal::StlContainerView<
-        GTEST_REMOVE_REFERENCE_AND_CONST_(Container)> ContainerView;
+    typedef internal::StlContainerView<GTEST_REMOVE_REFERENCE_AND_CONST_(
+        Container)>
+        ContainerView;
     typedef typename std::iterator_traits<
         typename ContainerView::type::const_iterator>::difference_type
         DistanceType;
@@ -2306,12 +2386,10 @@ class BeginEndDistanceIsMatcher {
 
    private:
     const Matcher<DistanceType> distance_matcher_;
-    GTEST_DISALLOW_ASSIGN_(Impl);
   };
 
  private:
   const DistanceMatcher distance_matcher_;
-  GTEST_DISALLOW_ASSIGN_(BeginEndDistanceIsMatcher);
 };
 
 // Implements an equality matcher for any STL-style container whose elements
@@ -2356,18 +2434,15 @@ class ContainerEqMatcher {
     typedef internal::StlContainerView<
         typename std::remove_const<LhsContainer>::type>
         LhsView;
-    typedef typename LhsView::type LhsStlContainer;
     StlContainerReference lhs_stl_container = LhsView::ConstReference(lhs);
-    if (lhs_stl_container == expected_)
-      return true;
+    if (lhs_stl_container == expected_) return true;
 
     ::std::ostream* const os = listener->stream();
     if (os != nullptr) {
       // Something is different. Check for extra values first.
       bool printed_header = false;
-      for (typename LhsStlContainer::const_iterator it =
-               lhs_stl_container.begin();
-           it != lhs_stl_container.end(); ++it) {
+      for (auto it = lhs_stl_container.begin(); it != lhs_stl_container.end();
+           ++it) {
         if (internal::ArrayAwareFind(expected_.begin(), expected_.end(), *it) ==
             expected_.end()) {
           if (printed_header) {
@@ -2382,11 +2457,10 @@ class ContainerEqMatcher {
 
       // Now check for missing values.
       bool printed_header2 = false;
-      for (typename StlContainer::const_iterator it = expected_.begin();
-           it != expected_.end(); ++it) {
-        if (internal::ArrayAwareFind(
-                lhs_stl_container.begin(), lhs_stl_container.end(), *it) ==
-            lhs_stl_container.end()) {
+      for (auto it = expected_.begin(); it != expected_.end(); ++it) {
+        if (internal::ArrayAwareFind(lhs_stl_container.begin(),
+                                     lhs_stl_container.end(),
+                                     *it) == lhs_stl_container.end()) {
           if (printed_header2) {
             *os << ", ";
           } else {
@@ -2404,14 +2478,14 @@ class ContainerEqMatcher {
 
  private:
   const StlContainer expected_;
-
-  GTEST_DISALLOW_ASSIGN_(ContainerEqMatcher);
 };
 
 // A comparator functor that uses the < operator to compare two values.
 struct LessComparator {
   template <typename T, typename U>
-  bool operator()(const T& lhs, const U& rhs) const { return lhs < rhs; }
+  bool operator()(const T& lhs, const U& rhs) const {
+    return lhs < rhs;
+  }
 };
 
 // Implements WhenSortedBy(comparator, container_matcher).
@@ -2430,14 +2504,16 @@ class WhenSortedByMatcher {
   template <typename LhsContainer>
   class Impl : public MatcherInterface<LhsContainer> {
    public:
-    typedef internal::StlContainerView<
-         GTEST_REMOVE_REFERENCE_AND_CONST_(LhsContainer)> LhsView;
+    typedef internal::StlContainerView<GTEST_REMOVE_REFERENCE_AND_CONST_(
+        LhsContainer)>
+        LhsView;
     typedef typename LhsView::type LhsStlContainer;
     typedef typename LhsView::const_reference LhsStlContainerReference;
     // Transforms std::pair<const Key, Value> into std::pair<Key, Value>
     // so that we can match associative containers.
-    typedef typename RemoveConstFromKey<
-        typename LhsStlContainer::value_type>::type LhsValue;
+    typedef
+        typename RemoveConstFromKey<typename LhsStlContainer::value_type>::type
+            LhsValue;
 
     Impl(const Comparator& comparator, const ContainerMatcher& matcher)
         : comparator_(comparator), matcher_(matcher) {}
@@ -2457,8 +2533,8 @@ class WhenSortedByMatcher {
       LhsStlContainerReference lhs_stl_container = LhsView::ConstReference(lhs);
       ::std::vector<LhsValue> sorted_container(lhs_stl_container.begin(),
                                                lhs_stl_container.end());
-      ::std::sort(
-           sorted_container.begin(), sorted_container.end(), comparator_);
+      ::std::sort(sorted_container.begin(), sorted_container.end(),
+                  comparator_);
 
       if (!listener->IsInterested()) {
         // If the listener is not interested, we do not need to
@@ -2471,8 +2547,8 @@ class WhenSortedByMatcher {
       *listener << " when sorted";
 
       StringMatchResultListener inner_listener;
-      const bool match = matcher_.MatchAndExplain(sorted_container,
-                                                  &inner_listener);
+      const bool match =
+          matcher_.MatchAndExplain(sorted_container, &inner_listener);
       PrintIfNotEmpty(inner_listener.str(), listener->stream());
       return match;
     }
@@ -2481,14 +2557,13 @@ class WhenSortedByMatcher {
     const Comparator comparator_;
     const Matcher<const ::std::vector<LhsValue>&> matcher_;
 
-    GTEST_DISALLOW_COPY_AND_ASSIGN_(Impl);
+    Impl(const Impl&) = delete;
+    Impl& operator=(const Impl&) = delete;
   };
 
  private:
   const Comparator comparator_;
   const ContainerMatcher matcher_;
-
-  GTEST_DISALLOW_ASSIGN_(WhenSortedByMatcher);
 };
 
 // Implements Pointwise(tuple_matcher, rhs_container).  tuple_matcher
@@ -2497,9 +2572,9 @@ class WhenSortedByMatcher {
 // container and the RHS container respectively.
 template <typename TupleMatcher, typename RhsContainer>
 class PointwiseMatcher {
-  GTEST_COMPILE_ASSERT_(
+  static_assert(
       !IsHashTable<GTEST_REMOVE_REFERENCE_AND_CONST_(RhsContainer)>::value,
-      use_UnorderedPointwise_with_hash_tables);
+      "use UnorderedPointwise with hash tables");
 
  public:
   typedef internal::StlContainerView<RhsContainer> RhsView;
@@ -2518,9 +2593,9 @@ class PointwiseMatcher {
 
   template <typename LhsContainer>
   operator Matcher<LhsContainer>() const {
-    GTEST_COMPILE_ASSERT_(
+    static_assert(
         !IsHashTable<GTEST_REMOVE_REFERENCE_AND_CONST_(LhsContainer)>::value,
-        use_UnorderedPointwise_with_hash_tables);
+        "use UnorderedPointwise with hash tables");
 
     return Matcher<LhsContainer>(
         new Impl<const LhsContainer&>(tuple_matcher_, rhs_));
@@ -2529,8 +2604,9 @@ class PointwiseMatcher {
   template <typename LhsContainer>
   class Impl : public MatcherInterface<LhsContainer> {
    public:
-    typedef internal::StlContainerView<
-         GTEST_REMOVE_REFERENCE_AND_CONST_(LhsContainer)> LhsView;
+    typedef internal::StlContainerView<GTEST_REMOVE_REFERENCE_AND_CONST_(
+        LhsContainer)>
+        LhsView;
     typedef typename LhsView::type LhsStlContainer;
     typedef typename LhsView::const_reference LhsStlContainerReference;
     typedef typename LhsStlContainer::value_type LhsValue;
@@ -2570,14 +2646,14 @@ class PointwiseMatcher {
         return false;
       }
 
-      typename LhsStlContainer::const_iterator left = lhs_stl_container.begin();
-      typename RhsStlContainer::const_iterator right = rhs_.begin();
+      auto left = lhs_stl_container.begin();
+      auto right = rhs_.begin();
       for (size_t i = 0; i != actual_size; ++i, ++left, ++right) {
         if (listener->IsInterested()) {
           StringMatchResultListener inner_listener;
           // Create InnerMatcherArg as a temporarily object to avoid it outlives
           // *left and *right. Dereference or the conversion to `const T&` may
-          // return temp objects, e.g for vector<bool>.
+          // return temp objects, e.g. for vector<bool>.
           if (!mono_tuple_matcher_.MatchAndExplain(
                   InnerMatcherArg(ImplicitCast_<const LhsValue&>(*left),
                                   ImplicitCast_<const RhsValue&>(*right)),
@@ -2604,15 +2680,11 @@ class PointwiseMatcher {
    private:
     const Matcher<InnerMatcherArg> mono_tuple_matcher_;
     const RhsStlContainer rhs_;
-
-    GTEST_DISALLOW_ASSIGN_(Impl);
   };
 
  private:
   const TupleMatcher tuple_matcher_;
   const RhsStlContainer rhs_;
-
-  GTEST_DISALLOW_ASSIGN_(PointwiseMatcher);
 };
 
 // Holds the logic common to ContainsMatcherImpl and EachMatcherImpl.
@@ -2628,18 +2700,17 @@ class QuantifierMatcherImpl : public MatcherInterface<Container> {
   template <typename InnerMatcher>
   explicit QuantifierMatcherImpl(InnerMatcher inner_matcher)
       : inner_matcher_(
-           testing::SafeMatcherCast<const Element&>(inner_matcher)) {}
+            testing::SafeMatcherCast<const Element&>(inner_matcher)) {}
 
   // Checks whether:
   // * All elements in the container match, if all_elements_should_match.
   // * Any element in the container matches, if !all_elements_should_match.
-  bool MatchAndExplainImpl(bool all_elements_should_match,
-                           Container container,
+  bool MatchAndExplainImpl(bool all_elements_should_match, Container container,
                            MatchResultListener* listener) const {
     StlContainerReference stl_container = View::ConstReference(container);
     size_t i = 0;
-    for (typename StlContainer::const_iterator it = stl_container.begin();
-         it != stl_container.end(); ++it, ++i) {
+    for (auto it = stl_container.begin(); it != stl_container.end();
+         ++it, ++i) {
       StringMatchResultListener inner_listener;
       const bool matches = inner_matcher_.MatchAndExplain(*it, &inner_listener);
 
@@ -2653,10 +2724,56 @@ class QuantifierMatcherImpl : public MatcherInterface<Container> {
     return all_elements_should_match;
   }
 
+  bool MatchAndExplainImpl(const Matcher<size_t>& count_matcher,
+                           Container container,
+                           MatchResultListener* listener) const {
+    StlContainerReference stl_container = View::ConstReference(container);
+    size_t i = 0;
+    std::vector<size_t> match_elements;
+    for (auto it = stl_container.begin(); it != stl_container.end();
+         ++it, ++i) {
+      StringMatchResultListener inner_listener;
+      const bool matches = inner_matcher_.MatchAndExplain(*it, &inner_listener);
+      if (matches) {
+        match_elements.push_back(i);
+      }
+    }
+    if (listener->IsInterested()) {
+      if (match_elements.empty()) {
+        *listener << "has no element that matches";
+      } else if (match_elements.size() == 1) {
+        *listener << "whose element #" << match_elements[0] << " matches";
+      } else {
+        *listener << "whose elements (";
+        std::string sep = "";
+        for (size_t e : match_elements) {
+          *listener << sep << e;
+          sep = ", ";
+        }
+        *listener << ") match";
+      }
+    }
+    StringMatchResultListener count_listener;
+    if (count_matcher.MatchAndExplain(match_elements.size(), &count_listener)) {
+      *listener << " and whose match quantity of " << match_elements.size()
+                << " matches";
+      PrintIfNotEmpty(count_listener.str(), listener->stream());
+      return true;
+    } else {
+      if (match_elements.empty()) {
+        *listener << " and";
+      } else {
+        *listener << " but";
+      }
+      *listener << " whose match quantity of " << match_elements.size()
+                << " does not match";
+      PrintIfNotEmpty(count_listener.str(), listener->stream());
+      return false;
+    }
+  }
+
  protected:
   const Matcher<const Element&> inner_matcher_;
-
-  GTEST_DISALLOW_ASSIGN_(QuantifierMatcherImpl);
 };
 
 // Implements Contains(element_matcher) for the given argument type Container.
@@ -2683,9 +2800,6 @@ class ContainsMatcherImpl : public QuantifierMatcherImpl<Container> {
                        MatchResultListener* listener) const override {
     return this->MatchAndExplainImpl(false, container, listener);
   }
-
- private:
-  GTEST_DISALLOW_ASSIGN_(ContainsMatcherImpl);
 };
 
 // Implements Each(element_matcher) for the given argument type Container.
@@ -2712,9 +2826,58 @@ class EachMatcherImpl : public QuantifierMatcherImpl<Container> {
                        MatchResultListener* listener) const override {
     return this->MatchAndExplainImpl(true, container, listener);
   }
+};
+
+// Implements Contains(element_matcher).Times(n) for the given argument type
+// Container.
+template <typename Container>
+class ContainsTimesMatcherImpl : public QuantifierMatcherImpl<Container> {
+ public:
+  template <typename InnerMatcher>
+  explicit ContainsTimesMatcherImpl(InnerMatcher inner_matcher,
+                                    Matcher<size_t> count_matcher)
+      : QuantifierMatcherImpl<Container>(inner_matcher),
+        count_matcher_(std::move(count_matcher)) {}
+
+  void DescribeTo(::std::ostream* os) const override {
+    *os << "quantity of elements that match ";
+    this->inner_matcher_.DescribeTo(os);
+    *os << " ";
+    count_matcher_.DescribeTo(os);
+  }
+
+  void DescribeNegationTo(::std::ostream* os) const override {
+    *os << "quantity of elements that match ";
+    this->inner_matcher_.DescribeTo(os);
+    *os << " ";
+    count_matcher_.DescribeNegationTo(os);
+  }
+
+  bool MatchAndExplain(Container container,
+                       MatchResultListener* listener) const override {
+    return this->MatchAndExplainImpl(count_matcher_, container, listener);
+  }
 
  private:
-  GTEST_DISALLOW_ASSIGN_(EachMatcherImpl);
+  const Matcher<size_t> count_matcher_;
+};
+
+// Implements polymorphic Contains(element_matcher).Times(n).
+template <typename M>
+class ContainsTimesMatcher {
+ public:
+  explicit ContainsTimesMatcher(M m, Matcher<size_t> count_matcher)
+      : inner_matcher_(m), count_matcher_(std::move(count_matcher)) {}
+
+  template <typename Container>
+  operator Matcher<Container>() const {  // NOLINT
+    return Matcher<Container>(new ContainsTimesMatcherImpl<const Container&>(
+        inner_matcher_, count_matcher_));
+  }
+
+ private:
+  const M inner_matcher_;
+  const Matcher<size_t> count_matcher_;
 };
 
 // Implements polymorphic Contains(element_matcher).
@@ -2724,15 +2887,17 @@ class ContainsMatcher {
   explicit ContainsMatcher(M m) : inner_matcher_(m) {}
 
   template <typename Container>
-  operator Matcher<Container>() const {
+  operator Matcher<Container>() const {  // NOLINT
     return Matcher<Container>(
         new ContainsMatcherImpl<const Container&>(inner_matcher_));
   }
 
+  ContainsTimesMatcher<M> Times(Matcher<size_t> count_matcher) const {
+    return ContainsTimesMatcher<M>(inner_matcher_, std::move(count_matcher));
+  }
+
  private:
   const M inner_matcher_;
-
-  GTEST_DISALLOW_ASSIGN_(ContainsMatcher);
 };
 
 // Implements polymorphic Each(element_matcher).
@@ -2742,15 +2907,13 @@ class EachMatcher {
   explicit EachMatcher(M m) : inner_matcher_(m) {}
 
   template <typename Container>
-  operator Matcher<Container>() const {
+  operator Matcher<Container>() const {  // NOLINT
     return Matcher<Container>(
         new EachMatcherImpl<const Container&>(inner_matcher_));
   }
 
  private:
   const M inner_matcher_;
-
-  GTEST_DISALLOW_ASSIGN_(EachMatcher);
 };
 
 struct Rank1 {};
@@ -2790,8 +2953,7 @@ class KeyMatcherImpl : public MatcherInterface<PairType> {
   template <typename InnerMatcher>
   explicit KeyMatcherImpl(InnerMatcher inner_matcher)
       : inner_matcher_(
-          testing::SafeMatcherCast<const KeyType&>(inner_matcher)) {
-  }
+            testing::SafeMatcherCast<const KeyType&>(inner_matcher)) {}
 
   // Returns true if and only if 'key_value.first' (the key) matches the inner
   // matcher.
@@ -2821,8 +2983,6 @@ class KeyMatcherImpl : public MatcherInterface<PairType> {
 
  private:
   const Matcher<const KeyType&> inner_matcher_;
-
-  GTEST_DISALLOW_ASSIGN_(KeyMatcherImpl);
 };
 
 // Implements polymorphic Key(matcher_for_key).
@@ -2839,8 +2999,49 @@ class KeyMatcher {
 
  private:
   const M matcher_for_key_;
+};
+
+// Implements polymorphic Address(matcher_for_address).
+template <typename InnerMatcher>
+class AddressMatcher {
+ public:
+  explicit AddressMatcher(InnerMatcher m) : matcher_(m) {}
+
+  template <typename Type>
+  operator Matcher<Type>() const {  // NOLINT
+    return Matcher<Type>(new Impl<const Type&>(matcher_));
+  }
+
+ private:
+  // The monomorphic implementation that works for a particular object type.
+  template <typename Type>
+  class Impl : public MatcherInterface<Type> {
+   public:
+    using Address = const GTEST_REMOVE_REFERENCE_AND_CONST_(Type) *;
+    explicit Impl(const InnerMatcher& matcher)
+        : matcher_(MatcherCast<Address>(matcher)) {}
+
+    void DescribeTo(::std::ostream* os) const override {
+      *os << "has address that ";
+      matcher_.DescribeTo(os);
+    }
+
+    void DescribeNegationTo(::std::ostream* os) const override {
+      *os << "does not have address that ";
+      matcher_.DescribeTo(os);
+    }
+
+    bool MatchAndExplain(Type object,
+                         MatchResultListener* listener) const override {
+      *listener << "which has address ";
+      Address address = std::addressof(object);
+      return MatchPrintAndExplain(address, matcher_, listener);
+    }
 
-  GTEST_DISALLOW_ASSIGN_(KeyMatcher);
+   private:
+    const Matcher<Address> matcher_;
+  };
+  const InnerMatcher matcher_;
 };
 
 // Implements Pair(first_matcher, second_matcher) for the given argument pair
@@ -2857,8 +3058,7 @@ class PairMatcherImpl : public MatcherInterface<PairType> {
       : first_matcher_(
             testing::SafeMatcherCast<const FirstType&>(first_matcher)),
         second_matcher_(
-            testing::SafeMatcherCast<const SecondType&>(second_matcher)) {
-  }
+            testing::SafeMatcherCast<const SecondType&>(second_matcher)) {}
 
   // Describes what this matcher does.
   void DescribeTo(::std::ostream* os) const override {
@@ -2926,8 +3126,6 @@ class PairMatcherImpl : public MatcherInterface<PairType> {
 
   const Matcher<const FirstType&> first_matcher_;
   const Matcher<const SecondType&> second_matcher_;
-
-  GTEST_DISALLOW_ASSIGN_(PairMatcherImpl);
 };
 
 // Implements polymorphic Pair(first_matcher, second_matcher).
@@ -2938,7 +3136,7 @@ class PairMatcher {
       : first_matcher_(first_matcher), second_matcher_(second_matcher) {}
 
   template <typename PairType>
-  operator Matcher<PairType> () const {
+  operator Matcher<PairType>() const {
     return Matcher<PairType>(
         new PairMatcherImpl<const PairType&>(first_matcher_, second_matcher_));
   }
@@ -2946,8 +3144,203 @@ class PairMatcher {
  private:
   const FirstMatcher first_matcher_;
   const SecondMatcher second_matcher_;
+};
+
+template <typename T, size_t... I>
+auto UnpackStructImpl(const T& t, IndexSequence<I...>, int)
+    -> decltype(std::tie(get<I>(t)...)) {
+  static_assert(std::tuple_size<T>::value == sizeof...(I),
+                "Number of arguments doesn't match the number of fields.");
+  return std::tie(get<I>(t)...);
+}
+
+#if defined(__cpp_structured_bindings) && __cpp_structured_bindings >= 201606
+template <typename T>
+auto UnpackStructImpl(const T& t, MakeIndexSequence<1>, char) {
+  const auto& [a] = t;
+  return std::tie(a);
+}
+template <typename T>
+auto UnpackStructImpl(const T& t, MakeIndexSequence<2>, char) {
+  const auto& [a, b] = t;
+  return std::tie(a, b);
+}
+template <typename T>
+auto UnpackStructImpl(const T& t, MakeIndexSequence<3>, char) {
+  const auto& [a, b, c] = t;
+  return std::tie(a, b, c);
+}
+template <typename T>
+auto UnpackStructImpl(const T& t, MakeIndexSequence<4>, char) {
+  const auto& [a, b, c, d] = t;
+  return std::tie(a, b, c, d);
+}
+template <typename T>
+auto UnpackStructImpl(const T& t, MakeIndexSequence<5>, char) {
+  const auto& [a, b, c, d, e] = t;
+  return std::tie(a, b, c, d, e);
+}
+template <typename T>
+auto UnpackStructImpl(const T& t, MakeIndexSequence<6>, char) {
+  const auto& [a, b, c, d, e, f] = t;
+  return std::tie(a, b, c, d, e, f);
+}
+template <typename T>
+auto UnpackStructImpl(const T& t, MakeIndexSequence<7>, char) {
+  const auto& [a, b, c, d, e, f, g] = t;
+  return std::tie(a, b, c, d, e, f, g);
+}
+template <typename T>
+auto UnpackStructImpl(const T& t, MakeIndexSequence<8>, char) {
+  const auto& [a, b, c, d, e, f, g, h] = t;
+  return std::tie(a, b, c, d, e, f, g, h);
+}
+template <typename T>
+auto UnpackStructImpl(const T& t, MakeIndexSequence<9>, char) {
+  const auto& [a, b, c, d, e, f, g, h, i] = t;
+  return std::tie(a, b, c, d, e, f, g, h, i);
+}
+template <typename T>
+auto UnpackStructImpl(const T& t, MakeIndexSequence<10>, char) {
+  const auto& [a, b, c, d, e, f, g, h, i, j] = t;
+  return std::tie(a, b, c, d, e, f, g, h, i, j);
+}
+template <typename T>
+auto UnpackStructImpl(const T& t, MakeIndexSequence<11>, char) {
+  const auto& [a, b, c, d, e, f, g, h, i, j, k] = t;
+  return std::tie(a, b, c, d, e, f, g, h, i, j, k);
+}
+template <typename T>
+auto UnpackStructImpl(const T& t, MakeIndexSequence<12>, char) {
+  const auto& [a, b, c, d, e, f, g, h, i, j, k, l] = t;
+  return std::tie(a, b, c, d, e, f, g, h, i, j, k, l);
+}
+template <typename T>
+auto UnpackStructImpl(const T& t, MakeIndexSequence<13>, char) {
+  const auto& [a, b, c, d, e, f, g, h, i, j, k, l, m] = t;
+  return std::tie(a, b, c, d, e, f, g, h, i, j, k, l, m);
+}
+template <typename T>
+auto UnpackStructImpl(const T& t, MakeIndexSequence<14>, char) {
+  const auto& [a, b, c, d, e, f, g, h, i, j, k, l, m, n] = t;
+  return std::tie(a, b, c, d, e, f, g, h, i, j, k, l, m, n);
+}
+template <typename T>
+auto UnpackStructImpl(const T& t, MakeIndexSequence<15>, char) {
+  const auto& [a, b, c, d, e, f, g, h, i, j, k, l, m, n, o] = t;
+  return std::tie(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o);
+}
+template <typename T>
+auto UnpackStructImpl(const T& t, MakeIndexSequence<16>, char) {
+  const auto& [a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p] = t;
+  return std::tie(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p);
+}
+#endif  // defined(__cpp_structured_bindings)
+
+template <size_t I, typename T>
+auto UnpackStruct(const T& t)
+    -> decltype((UnpackStructImpl)(t, MakeIndexSequence<I>{}, 0)) {
+  return (UnpackStructImpl)(t, MakeIndexSequence<I>{}, 0);
+}
+
+// Helper function to do comma folding in C++11.
+// The array ensures left-to-right order of evaluation.
+// Usage: VariadicExpand({expr...});
+template <typename T, size_t N>
+void VariadicExpand(const T (&)[N]) {}
+
+template <typename Struct, typename StructSize>
+class FieldsAreMatcherImpl;
+
+template <typename Struct, size_t... I>
+class FieldsAreMatcherImpl<Struct, IndexSequence<I...>>
+    : public MatcherInterface<Struct> {
+  using UnpackedType =
+      decltype(UnpackStruct<sizeof...(I)>(std::declval<const Struct&>()));
+  using MatchersType = std::tuple<
+      Matcher<const typename std::tuple_element<I, UnpackedType>::type&>...>;
+
+ public:
+  template <typename Inner>
+  explicit FieldsAreMatcherImpl(const Inner& matchers)
+      : matchers_(testing::SafeMatcherCast<
+                  const typename std::tuple_element<I, UnpackedType>::type&>(
+            std::get<I>(matchers))...) {}
 
-  GTEST_DISALLOW_ASSIGN_(PairMatcher);
+  void DescribeTo(::std::ostream* os) const override {
+    const char* separator = "";
+    VariadicExpand(
+        {(*os << separator << "has field #" << I << " that ",
+          std::get<I>(matchers_).DescribeTo(os), separator = ", and ")...});
+  }
+
+  void DescribeNegationTo(::std::ostream* os) const override {
+    const char* separator = "";
+    VariadicExpand({(*os << separator << "has field #" << I << " that ",
+                     std::get<I>(matchers_).DescribeNegationTo(os),
+                     separator = ", or ")...});
+  }
+
+  bool MatchAndExplain(Struct t, MatchResultListener* listener) const override {
+    return MatchInternal((UnpackStruct<sizeof...(I)>)(t), listener);
+  }
+
+ private:
+  bool MatchInternal(UnpackedType tuple, MatchResultListener* listener) const {
+    if (!listener->IsInterested()) {
+      // If the listener is not interested, we don't need to construct the
+      // explanation.
+      bool good = true;
+      VariadicExpand({good = good && std::get<I>(matchers_).Matches(
+                                         std::get<I>(tuple))...});
+      return good;
+    }
+
+    size_t failed_pos = ~size_t{};
+
+    std::vector<StringMatchResultListener> inner_listener(sizeof...(I));
+
+    VariadicExpand(
+        {failed_pos == ~size_t{} && !std::get<I>(matchers_).MatchAndExplain(
+                                        std::get<I>(tuple), &inner_listener[I])
+             ? failed_pos = I
+             : 0 ...});
+    if (failed_pos != ~size_t{}) {
+      *listener << "whose field #" << failed_pos << " does not match";
+      PrintIfNotEmpty(inner_listener[failed_pos].str(), listener->stream());
+      return false;
+    }
+
+    *listener << "whose all elements match";
+    const char* separator = ", where";
+    for (size_t index = 0; index < sizeof...(I); ++index) {
+      const std::string str = inner_listener[index].str();
+      if (!str.empty()) {
+        *listener << separator << " field #" << index << " is a value " << str;
+        separator = ", and";
+      }
+    }
+
+    return true;
+  }
+
+  MatchersType matchers_;
+};
+
+template <typename... Inner>
+class FieldsAreMatcher {
+ public:
+  explicit FieldsAreMatcher(Inner... inner) : matchers_(std::move(inner)...) {}
+
+  template <typename Struct>
+  operator Matcher<Struct>() const {  // NOLINT
+    return Matcher<Struct>(
+        new FieldsAreMatcherImpl<const Struct&, IndexSequenceFor<Inner...>>(
+            matchers_));
+  }
+
+ private:
+  std::tuple<Inner...> matchers_;
 };
 
 // Implements ElementsAre() and ElementsAreArray().
@@ -3015,7 +3408,7 @@ class ElementsAreMatcherImpl : public MatcherInterface<Container> {
     // explanations[i] is the explanation of the element at index i.
     ::std::vector<std::string> explanations(count());
     StlContainerReference stl_container = View::ConstReference(container);
-    typename StlContainer::const_iterator it = stl_container.begin();
+    auto it = stl_container.begin();
     size_t exam_pos = 0;
     bool mismatch_found = false;  // Have we found a mismatched element yet?
 
@@ -3092,9 +3485,7 @@ class ElementsAreMatcherImpl : public MatcherInterface<Container> {
 
   size_t count() const { return matchers_.size(); }
 
-  ::std::vector<Matcher<const Element&> > matchers_;
-
-  GTEST_DISALLOW_ASSIGN_(ElementsAreMatcherImpl);
+  ::std::vector<Matcher<const Element&>> matchers_;
 };
 
 // Connectivity matrix of (elements X matchers), in element-major order.
@@ -3106,8 +3497,7 @@ class GTEST_API_ MatchMatrix {
   MatchMatrix(size_t num_elements, size_t num_matchers)
       : num_elements_(num_elements),
         num_matchers_(num_matchers),
-        matched_(num_elements_* num_matchers_, 0) {
-  }
+        matched_(num_elements_ * num_matchers_, 0) {}
 
   size_t LhsSize() const { return num_elements_; }
   size_t RhsSize() const { return num_matchers_; }
@@ -3146,8 +3536,7 @@ typedef ::std::vector<ElementMatcherPair> ElementMatcherPairs;
 
 // Returns a maximum bipartite matching for the specified graph 'g'.
 // The matching is represented as a vector of {element, matcher} pairs.
-GTEST_API_ ElementMatcherPairs
-FindMaxBipartiteMatching(const MatchMatrix& g);
+GTEST_API_ ElementMatcherPairs FindMaxBipartiteMatching(const MatchMatrix& g);
 
 struct UnorderedMatcherRequire {
   enum Flags {
@@ -3184,9 +3573,7 @@ class GTEST_API_ UnorderedElementsAreMatcherImplBase {
   bool FindPairing(const MatchMatrix& matrix,
                    MatchResultListener* listener) const;
 
-  MatcherDescriberVec& matcher_describers() {
-    return matcher_describers_;
-  }
+  MatcherDescriberVec& matcher_describers() { return matcher_describers_; }
 
   static Message Elements(size_t n) {
     return Message() << n << " element" << (n == 1 ? "" : "s");
@@ -3197,8 +3584,6 @@ class GTEST_API_ UnorderedElementsAreMatcherImplBase {
  private:
   UnorderedMatcherRequire::Flags match_flags_;
   MatcherDescriberVec matcher_describers_;
-
-  GTEST_DISALLOW_ASSIGN_(UnorderedElementsAreMatcherImplBase);
 };
 
 // Implements UnorderedElementsAre, UnorderedElementsAreArray, IsSubsetOf, and
@@ -3212,7 +3597,6 @@ class UnorderedElementsAreMatcherImpl
   typedef internal::StlContainerView<RawContainer> View;
   typedef typename View::type StlContainer;
   typedef typename View::const_reference StlContainerReference;
-  typedef typename StlContainer::const_iterator StlContainerConstIterator;
   typedef typename StlContainer::value_type Element;
 
   template <typename InputIter>
@@ -3221,7 +3605,9 @@ class UnorderedElementsAreMatcherImpl
       : UnorderedElementsAreMatcherImplBase(matcher_flags) {
     for (; first != last; ++first) {
       matchers_.push_back(MatcherCast<const Element&>(*first));
-      matcher_describers().push_back(matchers_.back().GetDescriber());
+    }
+    for (const auto& m : matchers_) {
+      matcher_describers().push_back(m.GetDescriber());
     }
   }
 
@@ -3293,9 +3679,7 @@ class UnorderedElementsAreMatcherImpl
     return matrix;
   }
 
-  ::std::vector<Matcher<const Element&> > matchers_;
-
-  GTEST_DISALLOW_ASSIGN_(UnorderedElementsAreMatcherImpl);
+  ::std::vector<Matcher<const Element&>> matchers_;
 };
 
 // Functor for use in TransformTuple.
@@ -3320,7 +3704,7 @@ class UnorderedElementsAreMatcher {
     typedef GTEST_REMOVE_REFERENCE_AND_CONST_(Container) RawContainer;
     typedef typename internal::StlContainerView<RawContainer>::type View;
     typedef typename View::value_type Element;
-    typedef ::std::vector<Matcher<const Element&> > MatcherVec;
+    typedef ::std::vector<Matcher<const Element&>> MatcherVec;
     MatcherVec matchers;
     matchers.reserve(::std::tuple_size<MatcherTuple>::value);
     TransformTupleValues(CastAndAppendTransform<const Element&>(), matchers_,
@@ -3333,7 +3717,6 @@ class UnorderedElementsAreMatcher {
 
  private:
   const MatcherTuple matchers_;
-  GTEST_DISALLOW_ASSIGN_(UnorderedElementsAreMatcher);
 };
 
 // Implements ElementsAre.
@@ -3344,15 +3727,15 @@ class ElementsAreMatcher {
 
   template <typename Container>
   operator Matcher<Container>() const {
-    GTEST_COMPILE_ASSERT_(
+    static_assert(
         !IsHashTable<GTEST_REMOVE_REFERENCE_AND_CONST_(Container)>::value ||
             ::std::tuple_size<MatcherTuple>::value < 2,
-        use_UnorderedElementsAre_with_hash_tables);
+        "use UnorderedElementsAre with hash tables");
 
     typedef GTEST_REMOVE_REFERENCE_AND_CONST_(Container) RawContainer;
     typedef typename internal::StlContainerView<RawContainer>::type View;
     typedef typename View::value_type Element;
-    typedef ::std::vector<Matcher<const Element&> > MatcherVec;
+    typedef ::std::vector<Matcher<const Element&>> MatcherVec;
     MatcherVec matchers;
     matchers.reserve(::std::tuple_size<MatcherTuple>::value);
     TransformTupleValues(CastAndAppendTransform<const Element&>(), matchers_,
@@ -3363,7 +3746,6 @@ class ElementsAreMatcher {
 
  private:
   const MatcherTuple matchers_;
-  GTEST_DISALLOW_ASSIGN_(ElementsAreMatcher);
 };
 
 // Implements UnorderedElementsAreArray(), IsSubsetOf(), and IsSupersetOf().
@@ -3385,8 +3767,6 @@ class UnorderedElementsAreArrayMatcher {
  private:
   UnorderedMatcherRequire::Flags match_flags_;
   ::std::vector<T> matchers_;
-
-  GTEST_DISALLOW_ASSIGN_(UnorderedElementsAreArrayMatcher);
 };
 
 // Implements ElementsAreArray().
@@ -3398,9 +3778,9 @@ class ElementsAreArrayMatcher {
 
   template <typename Container>
   operator Matcher<Container>() const {
-    GTEST_COMPILE_ASSERT_(
+    static_assert(
         !IsHashTable<GTEST_REMOVE_REFERENCE_AND_CONST_(Container)>::value,
-        use_UnorderedElementsAreArray_with_hash_tables);
+        "use UnorderedElementsAreArray with hash tables");
 
     return Matcher<Container>(new ElementsAreMatcherImpl<const Container&>(
         matchers_.begin(), matchers_.end()));
@@ -3408,8 +3788,6 @@ class ElementsAreArrayMatcher {
 
  private:
   const ::std::vector<T> matchers_;
-
-  GTEST_DISALLOW_ASSIGN_(ElementsAreArrayMatcher);
 };
 
 // Given a 2-tuple matcher tm of type Tuple2Matcher and a value second
@@ -3471,8 +3849,6 @@ class BoundSecondMatcher {
    private:
     const Matcher<const ArgTuple&> mono_tuple2_matcher_;
     const Second second_value_;
-
-    GTEST_DISALLOW_ASSIGN_(Impl);
   };
 
   const Tuple2Matcher tuple2_matcher_;
@@ -3494,9 +3870,9 @@ BoundSecondMatcher<Tuple2Matcher, Second> MatcherBindSecond(
 // 'negation' is false; otherwise returns the description of the
 // negation of the matcher.  'param_values' contains a list of strings
 // that are the print-out of the matcher's parameters.
-GTEST_API_ std::string FormatMatcherDescription(bool negation,
-                                                const char* matcher_name,
-                                                const Strings& param_values);
+GTEST_API_ std::string FormatMatcherDescription(
+    bool negation, const char* matcher_name,
+    const std::vector<const char*>& param_names, const Strings& param_values);
 
 // Implements a matcher that checks the value of a optional<> type variable.
 template <typename ValueMatcher>
@@ -3545,12 +3921,10 @@ class OptionalMatcher {
 
    private:
     const Matcher<ValueType> value_matcher_;
-    GTEST_DISALLOW_ASSIGN_(Impl);
   };
 
  private:
   const ValueMatcher value_matcher_;
-  GTEST_DISALLOW_ASSIGN_(OptionalMatcher);
 };
 
 namespace variant_matcher {
@@ -3775,26 +4149,26 @@ ElementsAreArray(Iter first, Iter last) {
 }
 
 template <typename T>
-inline internal::ElementsAreArrayMatcher<T> ElementsAreArray(
-    const T* pointer, size_t count) {
+inline auto ElementsAreArray(const T* pointer, size_t count)
+    -> decltype(ElementsAreArray(pointer, pointer + count)) {
   return ElementsAreArray(pointer, pointer + count);
 }
 
 template <typename T, size_t N>
-inline internal::ElementsAreArrayMatcher<T> ElementsAreArray(
-    const T (&array)[N]) {
+inline auto ElementsAreArray(const T (&array)[N])
+    -> decltype(ElementsAreArray(array, N)) {
   return ElementsAreArray(array, N);
 }
 
 template <typename Container>
-inline internal::ElementsAreArrayMatcher<typename Container::value_type>
-ElementsAreArray(const Container& container) {
+inline auto ElementsAreArray(const Container& container)
+    -> decltype(ElementsAreArray(container.begin(), container.end())) {
   return ElementsAreArray(container.begin(), container.end());
 }
 
 template <typename T>
-inline internal::ElementsAreArrayMatcher<T>
-ElementsAreArray(::std::initializer_list<T> xs) {
+inline auto ElementsAreArray(::std::initializer_list<T> xs)
+    -> decltype(ElementsAreArray(xs.begin(), xs.end())) {
   return ElementsAreArray(xs.begin(), xs.end());
 }
 
@@ -3821,14 +4195,14 @@ UnorderedElementsAreArray(Iter first, Iter last) {
 }
 
 template <typename T>
-inline internal::UnorderedElementsAreArrayMatcher<T>
-UnorderedElementsAreArray(const T* pointer, size_t count) {
+inline internal::UnorderedElementsAreArrayMatcher<T> UnorderedElementsAreArray(
+    const T* pointer, size_t count) {
   return UnorderedElementsAreArray(pointer, pointer + count);
 }
 
 template <typename T, size_t N>
-inline internal::UnorderedElementsAreArrayMatcher<T>
-UnorderedElementsAreArray(const T (&array)[N]) {
+inline internal::UnorderedElementsAreArrayMatcher<T> UnorderedElementsAreArray(
+    const T (&array)[N]) {
   return UnorderedElementsAreArray(array, N);
 }
 
@@ -3840,8 +4214,8 @@ UnorderedElementsAreArray(const Container& container) {
 }
 
 template <typename T>
-inline internal::UnorderedElementsAreArrayMatcher<T>
-UnorderedElementsAreArray(::std::initializer_list<T> xs) {
+inline internal::UnorderedElementsAreArrayMatcher<T> UnorderedElementsAreArray(
+    ::std::initializer_list<T> xs) {
   return UnorderedElementsAreArray(xs.begin(), xs.end());
 }
 
@@ -3858,12 +4232,14 @@ const internal::AnythingMatcher _ = {};
 // Creates a matcher that matches any value of the given type T.
 template <typename T>
 inline Matcher<T> A() {
-  return Matcher<T>(new internal::AnyMatcherImpl<T>());
+  return _;
 }
 
 // Creates a matcher that matches any value of the given type T.
 template <typename T>
-inline Matcher<T> An() { return A<T>(); }
+inline Matcher<T> An() {
+  return _;
+}
 
 template <typename T, typename M>
 Matcher<T> internal::MatcherCastImpl<T, M>::CastImpl(
@@ -3873,14 +4249,14 @@ Matcher<T> internal::MatcherCastImpl<T, M>::CastImpl(
 }
 
 // Creates a polymorphic matcher that matches any NULL pointer.
-inline PolymorphicMatcher<internal::IsNullMatcher > IsNull() {
+inline PolymorphicMatcher<internal::IsNullMatcher> IsNull() {
   return MakePolymorphicMatcher(internal::IsNullMatcher());
 }
 
 // Creates a polymorphic matcher that matches any non-NULL pointer.
 // This is convenient as Not(NULL) doesn't compile (the compiler
 // thinks that that expression is comparing a pointer with an integer).
-inline PolymorphicMatcher<internal::NotNullMatcher > NotNull() {
+inline PolymorphicMatcher<internal::NotNullMatcher> NotNull() {
   return MakePolymorphicMatcher(internal::NotNullMatcher());
 }
 
@@ -3911,8 +4287,8 @@ inline internal::FloatingEqMatcher<double> NanSensitiveDoubleEq(double rhs) {
 // Creates a matcher that matches any double argument approximately equal to
 // rhs, up to the specified max absolute error bound, where two NANs are
 // considered unequal.  The max absolute error bound must be non-negative.
-inline internal::FloatingEqMatcher<double> DoubleNear(
-    double rhs, double max_abs_error) {
+inline internal::FloatingEqMatcher<double> DoubleNear(double rhs,
+                                                      double max_abs_error) {
   return internal::FloatingEqMatcher<double>(rhs, false, max_abs_error);
 }
 
@@ -3939,8 +4315,8 @@ inline internal::FloatingEqMatcher<float> NanSensitiveFloatEq(float rhs) {
 // Creates a matcher that matches any float argument approximately equal to
 // rhs, up to the specified max absolute error bound, where two NANs are
 // considered unequal.  The max absolute error bound must be non-negative.
-inline internal::FloatingEqMatcher<float> FloatNear(
-    float rhs, float max_abs_error) {
+inline internal::FloatingEqMatcher<float> FloatNear(float rhs,
+                                                    float max_abs_error) {
   return internal::FloatingEqMatcher<float>(rhs, false, max_abs_error);
 }
 
@@ -3968,7 +4344,7 @@ inline internal::PointeeMatcher<InnerMatcher> Pointee(
 // If To is a reference and the cast fails, this matcher returns false
 // immediately.
 template <typename To>
-inline PolymorphicMatcher<internal::WhenDynamicCastToMatcher<To> >
+inline PolymorphicMatcher<internal::WhenDynamicCastToMatcher<To>>
 WhenDynamicCastTo(const Matcher<To>& inner_matcher) {
   return MakePolymorphicMatcher(
       internal::WhenDynamicCastToMatcher<To>(inner_matcher));
@@ -3980,12 +4356,10 @@ WhenDynamicCastTo(const Matcher<To>& inner_matcher) {
 //   Field(&Foo::number, Ge(5))
 // matches a Foo object x if and only if x.number >= 5.
 template <typename Class, typename FieldType, typename FieldMatcher>
-inline PolymorphicMatcher<
-  internal::FieldMatcher<Class, FieldType> > Field(
+inline PolymorphicMatcher<internal::FieldMatcher<Class, FieldType>> Field(
     FieldType Class::*field, const FieldMatcher& matcher) {
-  return MakePolymorphicMatcher(
-      internal::FieldMatcher<Class, FieldType>(
-          field, MatcherCast<const FieldType&>(matcher)));
+  return MakePolymorphicMatcher(internal::FieldMatcher<Class, FieldType>(
+      field, MatcherCast<const FieldType&>(matcher)));
   // The call to MatcherCast() is required for supporting inner
   // matchers of compatible types.  For example, it allows
   //   Field(&Foo::bar, m)
@@ -3995,7 +4369,7 @@ inline PolymorphicMatcher<
 // Same as Field() but also takes the name of the field to provide better error
 // messages.
 template <typename Class, typename FieldType, typename FieldMatcher>
-inline PolymorphicMatcher<internal::FieldMatcher<Class, FieldType> > Field(
+inline PolymorphicMatcher<internal::FieldMatcher<Class, FieldType>> Field(
     const std::string& field_name, FieldType Class::*field,
     const FieldMatcher& matcher) {
   return MakePolymorphicMatcher(internal::FieldMatcher<Class, FieldType>(
@@ -4008,7 +4382,7 @@ inline PolymorphicMatcher<internal::FieldMatcher<Class, FieldType> > Field(
 // matches a Foo object x if and only if x.str() starts with "hi".
 template <typename Class, typename PropertyType, typename PropertyMatcher>
 inline PolymorphicMatcher<internal::PropertyMatcher<
-    Class, PropertyType, PropertyType (Class::*)() const> >
+    Class, PropertyType, PropertyType (Class::*)() const>>
 Property(PropertyType (Class::*property)() const,
          const PropertyMatcher& matcher) {
   return MakePolymorphicMatcher(
@@ -4025,7 +4399,7 @@ Property(PropertyType (Class::*property)() const,
 // better error messages.
 template <typename Class, typename PropertyType, typename PropertyMatcher>
 inline PolymorphicMatcher<internal::PropertyMatcher<
-    Class, PropertyType, PropertyType (Class::*)() const> >
+    Class, PropertyType, PropertyType (Class::*)() const>>
 Property(const std::string& property_name,
          PropertyType (Class::*property)() const,
          const PropertyMatcher& matcher) {
@@ -4038,8 +4412,8 @@ Property(const std::string& property_name,
 // The same as above but for reference-qualified member functions.
 template <typename Class, typename PropertyType, typename PropertyMatcher>
 inline PolymorphicMatcher<internal::PropertyMatcher<
-    Class, PropertyType, PropertyType (Class::*)() const &> >
-Property(PropertyType (Class::*property)() const &,
+    Class, PropertyType, PropertyType (Class::*)() const&>>
+Property(PropertyType (Class::*property)() const&,
          const PropertyMatcher& matcher) {
   return MakePolymorphicMatcher(
       internal::PropertyMatcher<Class, PropertyType,
@@ -4050,9 +4424,9 @@ Property(PropertyType (Class::*property)() const &,
 // Three-argument form for reference-qualified member functions.
 template <typename Class, typename PropertyType, typename PropertyMatcher>
 inline PolymorphicMatcher<internal::PropertyMatcher<
-    Class, PropertyType, PropertyType (Class::*)() const &> >
+    Class, PropertyType, PropertyType (Class::*)() const&>>
 Property(const std::string& property_name,
-         PropertyType (Class::*property)() const &,
+         PropertyType (Class::*property)() const&,
          const PropertyMatcher& matcher) {
   return MakePolymorphicMatcher(
       internal::PropertyMatcher<Class, PropertyType,
@@ -4071,109 +4445,127 @@ Property(const std::string& property_name,
 template <typename Callable, typename InnerMatcher>
 internal::ResultOfMatcher<Callable, InnerMatcher> ResultOf(
     Callable callable, InnerMatcher matcher) {
+  return internal::ResultOfMatcher<Callable, InnerMatcher>(std::move(callable),
+                                                           std::move(matcher));
+}
+
+// Same as ResultOf() above, but also takes a description of the `callable`
+// result to provide better error messages.
+template <typename Callable, typename InnerMatcher>
+internal::ResultOfMatcher<Callable, InnerMatcher> ResultOf(
+    const std::string& result_description, Callable callable,
+    InnerMatcher matcher) {
   return internal::ResultOfMatcher<Callable, InnerMatcher>(
-      std::move(callable), std::move(matcher));
+      result_description, std::move(callable), std::move(matcher));
 }
 
 // String matchers.
 
 // Matches a string equal to str.
-inline PolymorphicMatcher<internal::StrEqualityMatcher<std::string> > StrEq(
-    const std::string& str) {
+template <typename T = std::string>
+PolymorphicMatcher<internal::StrEqualityMatcher<std::string>> StrEq(
+    const internal::StringLike<T>& str) {
   return MakePolymorphicMatcher(
-      internal::StrEqualityMatcher<std::string>(str, true, true));
+      internal::StrEqualityMatcher<std::string>(std::string(str), true, true));
 }
 
 // Matches a string not equal to str.
-inline PolymorphicMatcher<internal::StrEqualityMatcher<std::string> > StrNe(
-    const std::string& str) {
+template <typename T = std::string>
+PolymorphicMatcher<internal::StrEqualityMatcher<std::string>> StrNe(
+    const internal::StringLike<T>& str) {
   return MakePolymorphicMatcher(
-      internal::StrEqualityMatcher<std::string>(str, false, true));
+      internal::StrEqualityMatcher<std::string>(std::string(str), false, true));
 }
 
 // Matches a string equal to str, ignoring case.
-inline PolymorphicMatcher<internal::StrEqualityMatcher<std::string> > StrCaseEq(
-    const std::string& str) {
+template <typename T = std::string>
+PolymorphicMatcher<internal::StrEqualityMatcher<std::string>> StrCaseEq(
+    const internal::StringLike<T>& str) {
   return MakePolymorphicMatcher(
-      internal::StrEqualityMatcher<std::string>(str, true, false));
+      internal::StrEqualityMatcher<std::string>(std::string(str), true, false));
 }
 
 // Matches a string not equal to str, ignoring case.
-inline PolymorphicMatcher<internal::StrEqualityMatcher<std::string> > StrCaseNe(
-    const std::string& str) {
-  return MakePolymorphicMatcher(
-      internal::StrEqualityMatcher<std::string>(str, false, false));
+template <typename T = std::string>
+PolymorphicMatcher<internal::StrEqualityMatcher<std::string>> StrCaseNe(
+    const internal::StringLike<T>& str) {
+  return MakePolymorphicMatcher(internal::StrEqualityMatcher<std::string>(
+      std::string(str), false, false));
 }
 
 // Creates a matcher that matches any string, std::string, or C string
 // that contains the given substring.
-inline PolymorphicMatcher<internal::HasSubstrMatcher<std::string> > HasSubstr(
-    const std::string& substring) {
+template <typename T = std::string>
+PolymorphicMatcher<internal::HasSubstrMatcher<std::string>> HasSubstr(
+    const internal::StringLike<T>& substring) {
   return MakePolymorphicMatcher(
-      internal::HasSubstrMatcher<std::string>(substring));
+      internal::HasSubstrMatcher<std::string>(std::string(substring)));
 }
 
 // Matches a string that starts with 'prefix' (case-sensitive).
-inline PolymorphicMatcher<internal::StartsWithMatcher<std::string> > StartsWith(
-    const std::string& prefix) {
+template <typename T = std::string>
+PolymorphicMatcher<internal::StartsWithMatcher<std::string>> StartsWith(
+    const internal::StringLike<T>& prefix) {
   return MakePolymorphicMatcher(
-      internal::StartsWithMatcher<std::string>(prefix));
+      internal::StartsWithMatcher<std::string>(std::string(prefix)));
 }
 
 // Matches a string that ends with 'suffix' (case-sensitive).
-inline PolymorphicMatcher<internal::EndsWithMatcher<std::string> > EndsWith(
-    const std::string& suffix) {
-  return MakePolymorphicMatcher(internal::EndsWithMatcher<std::string>(suffix));
+template <typename T = std::string>
+PolymorphicMatcher<internal::EndsWithMatcher<std::string>> EndsWith(
+    const internal::StringLike<T>& suffix) {
+  return MakePolymorphicMatcher(
+      internal::EndsWithMatcher<std::string>(std::string(suffix)));
 }
 
 #if GTEST_HAS_STD_WSTRING
 // Wide string matchers.
 
 // Matches a string equal to str.
-inline PolymorphicMatcher<internal::StrEqualityMatcher<std::wstring> > StrEq(
+inline PolymorphicMatcher<internal::StrEqualityMatcher<std::wstring>> StrEq(
     const std::wstring& str) {
   return MakePolymorphicMatcher(
       internal::StrEqualityMatcher<std::wstring>(str, true, true));
 }
 
 // Matches a string not equal to str.
-inline PolymorphicMatcher<internal::StrEqualityMatcher<std::wstring> > StrNe(
+inline PolymorphicMatcher<internal::StrEqualityMatcher<std::wstring>> StrNe(
     const std::wstring& str) {
   return MakePolymorphicMatcher(
       internal::StrEqualityMatcher<std::wstring>(str, false, true));
 }
 
 // Matches a string equal to str, ignoring case.
-inline PolymorphicMatcher<internal::StrEqualityMatcher<std::wstring> >
-StrCaseEq(const std::wstring& str) {
+inline PolymorphicMatcher<internal::StrEqualityMatcher<std::wstring>> StrCaseEq(
+    const std::wstring& str) {
   return MakePolymorphicMatcher(
       internal::StrEqualityMatcher<std::wstring>(str, true, false));
 }
 
 // Matches a string not equal to str, ignoring case.
-inline PolymorphicMatcher<internal::StrEqualityMatcher<std::wstring> >
-StrCaseNe(const std::wstring& str) {
+inline PolymorphicMatcher<internal::StrEqualityMatcher<std::wstring>> StrCaseNe(
+    const std::wstring& str) {
   return MakePolymorphicMatcher(
       internal::StrEqualityMatcher<std::wstring>(str, false, false));
 }
 
 // Creates a matcher that matches any ::wstring, std::wstring, or C wide string
 // that contains the given substring.
-inline PolymorphicMatcher<internal::HasSubstrMatcher<std::wstring> > HasSubstr(
+inline PolymorphicMatcher<internal::HasSubstrMatcher<std::wstring>> HasSubstr(
     const std::wstring& substring) {
   return MakePolymorphicMatcher(
       internal::HasSubstrMatcher<std::wstring>(substring));
 }
 
 // Matches a string that starts with 'prefix' (case-sensitive).
-inline PolymorphicMatcher<internal::StartsWithMatcher<std::wstring> >
-StartsWith(const std::wstring& prefix) {
+inline PolymorphicMatcher<internal::StartsWithMatcher<std::wstring>> StartsWith(
+    const std::wstring& prefix) {
   return MakePolymorphicMatcher(
       internal::StartsWithMatcher<std::wstring>(prefix));
 }
 
 // Matches a string that ends with 'suffix' (case-sensitive).
-inline PolymorphicMatcher<internal::EndsWithMatcher<std::wstring> > EndsWith(
+inline PolymorphicMatcher<internal::EndsWithMatcher<std::wstring>> EndsWith(
     const std::wstring& suffix) {
   return MakePolymorphicMatcher(
       internal::EndsWithMatcher<std::wstring>(suffix));
@@ -4268,8 +4660,8 @@ inline internal::NotMatcher<InnerMatcher> Not(InnerMatcher m) {
 // predicate.  The predicate can be any unary function or functor
 // whose return type can be implicitly converted to bool.
 template <typename Predicate>
-inline PolymorphicMatcher<internal::TrulyMatcher<Predicate> >
-Truly(Predicate pred) {
+inline PolymorphicMatcher<internal::TrulyMatcher<Predicate>> Truly(
+    Predicate pred) {
   return MakePolymorphicMatcher(internal::TrulyMatcher<Predicate>(pred));
 }
 
@@ -4280,8 +4672,8 @@ Truly(Predicate pred) {
 //   EXPECT_THAT(container, SizeIs(2));     // Checks container has 2 elements.
 //   EXPECT_THAT(container, SizeIs(Le(2));  // Checks container has at most 2.
 template <typename SizeMatcher>
-inline internal::SizeIsMatcher<SizeMatcher>
-SizeIs(const SizeMatcher& size_matcher) {
+inline internal::SizeIsMatcher<SizeMatcher> SizeIs(
+    const SizeMatcher& size_matcher) {
   return internal::SizeIsMatcher<SizeMatcher>(size_matcher);
 }
 
@@ -4291,8 +4683,8 @@ SizeIs(const SizeMatcher& size_matcher) {
 // do not implement size(). The container must provide const_iterator (with
 // valid iterator_traits), begin() and end().
 template <typename DistanceMatcher>
-inline internal::BeginEndDistanceIsMatcher<DistanceMatcher>
-BeginEndDistanceIs(const DistanceMatcher& distance_matcher) {
+inline internal::BeginEndDistanceIsMatcher<DistanceMatcher> BeginEndDistanceIs(
+    const DistanceMatcher& distance_matcher) {
   return internal::BeginEndDistanceIsMatcher<DistanceMatcher>(distance_matcher);
 }
 
@@ -4301,8 +4693,8 @@ BeginEndDistanceIs(const DistanceMatcher& distance_matcher) {
 // values that are included in one container but not the other. (Duplicate
 // values and order differences are not explained.)
 template <typename Container>
-inline PolymorphicMatcher<internal::ContainerEqMatcher<
-    typename std::remove_const<Container>::type>>
+inline PolymorphicMatcher<
+    internal::ContainerEqMatcher<typename std::remove_const<Container>::type>>
 ContainerEq(const Container& rhs) {
   return MakePolymorphicMatcher(internal::ContainerEqMatcher<Container>(rhs));
 }
@@ -4310,9 +4702,8 @@ ContainerEq(const Container& rhs) {
 // Returns a matcher that matches a container that, when sorted using
 // the given comparator, matches container_matcher.
 template <typename Comparator, typename ContainerMatcher>
-inline internal::WhenSortedByMatcher<Comparator, ContainerMatcher>
-WhenSortedBy(const Comparator& comparator,
-             const ContainerMatcher& container_matcher) {
+inline internal::WhenSortedByMatcher<Comparator, ContainerMatcher> WhenSortedBy(
+    const Comparator& comparator, const ContainerMatcher& container_matcher) {
   return internal::WhenSortedByMatcher<Comparator, ContainerMatcher>(
       comparator, container_matcher);
 }
@@ -4322,9 +4713,9 @@ WhenSortedBy(const Comparator& comparator,
 template <typename ContainerMatcher>
 inline internal::WhenSortedByMatcher<internal::LessComparator, ContainerMatcher>
 WhenSorted(const ContainerMatcher& container_matcher) {
-  return
-      internal::WhenSortedByMatcher<internal::LessComparator, ContainerMatcher>(
-          internal::LessComparator(), container_matcher);
+  return internal::WhenSortedByMatcher<internal::LessComparator,
+                                       ContainerMatcher>(
+      internal::LessComparator(), container_matcher);
 }
 
 // Matches an STL-style container or a native array that contains the
@@ -4341,15 +4732,13 @@ Pointwise(const TupleMatcher& tuple_matcher, const Container& rhs) {
                                                              rhs);
 }
 
-
 // Supports the Pointwise(m, {a, b, c}) syntax.
 template <typename TupleMatcher, typename T>
-inline internal::PointwiseMatcher<TupleMatcher, std::vector<T> > Pointwise(
+inline internal::PointwiseMatcher<TupleMatcher, std::vector<T>> Pointwise(
     const TupleMatcher& tuple_matcher, std::initializer_list<T> rhs) {
   return Pointwise(tuple_matcher, std::vector<T>(rhs));
 }
 
-
 // UnorderedPointwise(pair_matcher, rhs) matches an STL-style
 // container or a native array that contains the same number of
 // elements as in rhs, where in some permutation of the container, its
@@ -4378,28 +4767,25 @@ UnorderedPointwise(const Tuple2Matcher& tuple2_matcher,
       RhsView::ConstReference(rhs_container);
 
   // Create a matcher for each element in rhs_container.
-  ::std::vector<internal::BoundSecondMatcher<Tuple2Matcher, Second> > matchers;
-  for (typename RhsStlContainer::const_iterator it = rhs_stl_container.begin();
-       it != rhs_stl_container.end(); ++it) {
-    matchers.push_back(
-        internal::MatcherBindSecond(tuple2_matcher, *it));
+  ::std::vector<internal::BoundSecondMatcher<Tuple2Matcher, Second>> matchers;
+  for (auto it = rhs_stl_container.begin(); it != rhs_stl_container.end();
+       ++it) {
+    matchers.push_back(internal::MatcherBindSecond(tuple2_matcher, *it));
   }
 
   // Delegate the work to UnorderedElementsAreArray().
   return UnorderedElementsAreArray(matchers);
 }
 
-
 // Supports the UnorderedPointwise(m, {a, b, c}) syntax.
 template <typename Tuple2Matcher, typename T>
 inline internal::UnorderedElementsAreArrayMatcher<
-    typename internal::BoundSecondMatcher<Tuple2Matcher, T> >
+    typename internal::BoundSecondMatcher<Tuple2Matcher, T>>
 UnorderedPointwise(const Tuple2Matcher& tuple2_matcher,
                    std::initializer_list<T> rhs) {
   return UnorderedPointwise(tuple2_matcher, std::vector<T>(rhs));
 }
 
-
 // Matches an STL-style container or a native array that contains at
 // least one element matching the given value or matcher.
 //
@@ -4409,7 +4795,7 @@ UnorderedPointwise(const Tuple2Matcher& tuple2_matcher,
 //   page_ids.insert(1);
 //   EXPECT_THAT(page_ids, Contains(1));
 //   EXPECT_THAT(page_ids, Contains(Gt(2)));
-//   EXPECT_THAT(page_ids, Not(Contains(4)));
+//   EXPECT_THAT(page_ids, Not(Contains(4)));  // See below for Times(0)
 //
 //   ::std::map<int, size_t> page_lengths;
 //   page_lengths[1] = 100;
@@ -4418,6 +4804,19 @@ UnorderedPointwise(const Tuple2Matcher& tuple2_matcher,
 //
 //   const char* user_ids[] = { "joe", "mike", "tom" };
 //   EXPECT_THAT(user_ids, Contains(Eq(::std::string("tom"))));
+//
+// The matcher supports a modifier `Times` that allows to check for arbitrary
+// occurrences including testing for absence with Times(0).
+//
+// Examples:
+//   ::std::vector<int> ids;
+//   ids.insert(1);
+//   ids.insert(1);
+//   ids.insert(3);
+//   EXPECT_THAT(ids, Contains(1).Times(2));      // 1 occurs 2 times
+//   EXPECT_THAT(ids, Contains(2).Times(0));      // 2 is not present
+//   EXPECT_THAT(ids, Contains(3).Times(Ge(1)));  // 3 occurs at least once
+
 template <typename M>
 inline internal::ContainsMatcher<M> Contains(M matcher) {
   return internal::ContainsMatcher<M>(matcher);
@@ -4544,7 +4943,7 @@ inline internal::UnorderedElementsAreArrayMatcher<T> IsSubsetOf(
 // Matches an STL-style container or a native array that contains only
 // elements matching the given value or matcher.
 //
-// Each(m) is semantically equivalent to Not(Contains(Not(m))). Only
+// Each(m) is semantically equivalent to `Not(Contains(Not(m)))`. Only
 // the messages are different.
 //
 // Examples:
@@ -4587,11 +4986,60 @@ inline internal::KeyMatcher<M> Key(M inner_matcher) {
 // to match a std::map<int, string> that contains exactly one element whose key
 // is >= 5 and whose value equals "foo".
 template <typename FirstMatcher, typename SecondMatcher>
-inline internal::PairMatcher<FirstMatcher, SecondMatcher>
-Pair(FirstMatcher first_matcher, SecondMatcher second_matcher) {
-  return internal::PairMatcher<FirstMatcher, SecondMatcher>(
-      first_matcher, second_matcher);
+inline internal::PairMatcher<FirstMatcher, SecondMatcher> Pair(
+    FirstMatcher first_matcher, SecondMatcher second_matcher) {
+  return internal::PairMatcher<FirstMatcher, SecondMatcher>(first_matcher,
+                                                            second_matcher);
+}
+
+namespace no_adl {
+// Conditional() creates a matcher that conditionally uses either the first or
+// second matcher provided. For example, we could create an `equal if, and only
+// if' matcher using the Conditional wrapper as follows:
+//
+//   EXPECT_THAT(result, Conditional(condition, Eq(expected), Ne(expected)));
+template <typename MatcherTrue, typename MatcherFalse>
+internal::ConditionalMatcher<MatcherTrue, MatcherFalse> Conditional(
+    bool condition, MatcherTrue matcher_true, MatcherFalse matcher_false) {
+  return internal::ConditionalMatcher<MatcherTrue, MatcherFalse>(
+      condition, std::move(matcher_true), std::move(matcher_false));
+}
+
+// FieldsAre(matchers...) matches piecewise the fields of compatible structs.
+// These include those that support `get<I>(obj)`, and when structured bindings
+// are enabled any class that supports them.
+// In particular, `std::tuple`, `std::pair`, `std::array` and aggregate types.
+template <typename... M>
+internal::FieldsAreMatcher<typename std::decay<M>::type...> FieldsAre(
+    M&&... matchers) {
+  return internal::FieldsAreMatcher<typename std::decay<M>::type...>(
+      std::forward<M>(matchers)...);
+}
+
+// Creates a matcher that matches a pointer (raw or smart) that matches
+// inner_matcher.
+template <typename InnerMatcher>
+inline internal::PointerMatcher<InnerMatcher> Pointer(
+    const InnerMatcher& inner_matcher) {
+  return internal::PointerMatcher<InnerMatcher>(inner_matcher);
+}
+
+// Creates a matcher that matches an object that has an address that matches
+// inner_matcher.
+template <typename InnerMatcher>
+inline internal::AddressMatcher<InnerMatcher> Address(
+    const InnerMatcher& inner_matcher) {
+  return internal::AddressMatcher<InnerMatcher>(inner_matcher);
+}
+
+// Matches a base64 escaped string, when the unescaped string matches the
+// internal matcher.
+template <typename MatcherType>
+internal::WhenBase64UnescapedMatcher WhenBase64Unescaped(
+    const MatcherType& internal_matcher) {
+  return internal::WhenBase64UnescapedMatcher(internal_matcher);
 }
+}  // namespace no_adl
 
 // Returns a predicate that is satisfied by anything that matches the
 // given matcher.
@@ -4609,8 +5057,8 @@ inline bool Value(const T& value, M matcher) {
 // Matches the value against the given matcher and explains the match
 // result to listener.
 template <typename T, typename M>
-inline bool ExplainMatchResult(
-    M matcher, const T& value, MatchResultListener* listener) {
+inline bool ExplainMatchResult(M matcher, const T& value,
+                               MatchResultListener* listener) {
   return SafeMatcherCast<const T&>(matcher).MatchAndExplain(value, listener);
 }
 
@@ -4620,7 +5068,8 @@ inline bool ExplainMatchResult(
 //
 // MATCHER_P(XAndYThat, matcher,
 //           "X that " + DescribeMatcher<int>(matcher, negation) +
-//               " and Y that " + DescribeMatcher<double>(matcher, negation)) {
+//               (negation ? " or" : " and") + " Y that " +
+//               DescribeMatcher<double>(matcher, negation)) {
 //   return ExplainMatchResult(matcher, arg.x(), result_listener) &&
 //          ExplainMatchResult(matcher, arg.y(), result_listener);
 // }
@@ -4769,7 +5218,9 @@ internal::ArgsMatcher<typename std::decay<InnerMatcher>::type, k...> Args(
 //
 //   EXPECT_CALL(foo, Bar(_, _)).With(Eq());
 template <typename InnerMatcher>
-inline InnerMatcher AllArgs(const InnerMatcher& matcher) { return matcher; }
+inline InnerMatcher AllArgs(const InnerMatcher& matcher) {
+  return matcher;
+}
 
 // Returns a matcher that matches the value of an optional<> type variable.
 // The matcher implementation only uses '!arg' and requires that the optional<>
@@ -4777,7 +5228,7 @@ inline InnerMatcher AllArgs(const InnerMatcher& matcher) { return matcher; }
 // and is printable using 'PrintToString'. It is compatible with
 // std::optional/std::experimental::optional.
 // Note that to compare an optional type variable against nullopt you should
-// use Eq(nullopt) and not Optional(Eq(nullopt)). The latter implies that the
+// use Eq(nullopt) and not Eq(Optional(nullopt)). The latter implies that the
 // optional value contains an optional itself.
 template <typename ValueMatcher>
 inline internal::OptionalMatcher<ValueMatcher> Optional(
@@ -4787,7 +5238,7 @@ inline internal::OptionalMatcher<ValueMatcher> Optional(
 
 // Returns a matcher that matches the value of a absl::any type variable.
 template <typename T>
-PolymorphicMatcher<internal::any_cast_matcher::AnyCastMatcher<T> > AnyWith(
+PolymorphicMatcher<internal::any_cast_matcher::AnyCastMatcher<T>> AnyWith(
     const Matcher<const T&>& matcher) {
   return MakePolymorphicMatcher(
       internal::any_cast_matcher::AnyCastMatcher<T>(matcher));
@@ -4798,22 +5249,194 @@ PolymorphicMatcher<internal::any_cast_matcher::AnyCastMatcher<T> > AnyWith(
 // functions.
 // It is compatible with std::variant.
 template <typename T>
-PolymorphicMatcher<internal::variant_matcher::VariantMatcher<T> > VariantWith(
+PolymorphicMatcher<internal::variant_matcher::VariantMatcher<T>> VariantWith(
     const Matcher<const T&>& matcher) {
   return MakePolymorphicMatcher(
       internal::variant_matcher::VariantMatcher<T>(matcher));
 }
 
+#if GTEST_HAS_EXCEPTIONS
+
+// Anything inside the `internal` namespace is internal to the implementation
+// and must not be used in user code!
+namespace internal {
+
+class WithWhatMatcherImpl {
+ public:
+  WithWhatMatcherImpl(Matcher<std::string> matcher)
+      : matcher_(std::move(matcher)) {}
+
+  void DescribeTo(std::ostream* os) const {
+    *os << "contains .what() that ";
+    matcher_.DescribeTo(os);
+  }
+
+  void DescribeNegationTo(std::ostream* os) const {
+    *os << "contains .what() that does not ";
+    matcher_.DescribeTo(os);
+  }
+
+  template <typename Err>
+  bool MatchAndExplain(const Err& err, MatchResultListener* listener) const {
+    *listener << "which contains .what() (of value = " << err.what()
+              << ") that ";
+    return matcher_.MatchAndExplain(err.what(), listener);
+  }
+
+ private:
+  const Matcher<std::string> matcher_;
+};
+
+inline PolymorphicMatcher<WithWhatMatcherImpl> WithWhat(
+    Matcher<std::string> m) {
+  return MakePolymorphicMatcher(WithWhatMatcherImpl(std::move(m)));
+}
+
+template <typename Err>
+class ExceptionMatcherImpl {
+  class NeverThrown {
+   public:
+    const char* what() const noexcept {
+      return "this exception should never be thrown";
+    }
+  };
+
+  // If the matchee raises an exception of a wrong type, we'd like to
+  // catch it and print its message and type. To do that, we add an additional
+  // catch clause:
+  //
+  //     try { ... }
+  //     catch (const Err&) { /* an expected exception */ }
+  //     catch (const std::exception&) { /* exception of a wrong type */ }
+  //
+  // However, if the `Err` itself is `std::exception`, we'd end up with two
+  // identical `catch` clauses:
+  //
+  //     try { ... }
+  //     catch (const std::exception&) { /* an expected exception */ }
+  //     catch (const std::exception&) { /* exception of a wrong type */ }
+  //
+  // This can cause a warning or an error in some compilers. To resolve
+  // the issue, we use a fake error type whenever `Err` is `std::exception`:
+  //
+  //     try { ... }
+  //     catch (const std::exception&) { /* an expected exception */ }
+  //     catch (const NeverThrown&) { /* exception of a wrong type */ }
+  using DefaultExceptionType = typename std::conditional<
+      std::is_same<typename std::remove_cv<
+                       typename std::remove_reference<Err>::type>::type,
+                   std::exception>::value,
+      const NeverThrown&, const std::exception&>::type;
+
+ public:
+  ExceptionMatcherImpl(Matcher<const Err&> matcher)
+      : matcher_(std::move(matcher)) {}
+
+  void DescribeTo(std::ostream* os) const {
+    *os << "throws an exception which is a " << GetTypeName<Err>();
+    *os << " which ";
+    matcher_.DescribeTo(os);
+  }
+
+  void DescribeNegationTo(std::ostream* os) const {
+    *os << "throws an exception which is not a " << GetTypeName<Err>();
+    *os << " which ";
+    matcher_.DescribeNegationTo(os);
+  }
+
+  template <typename T>
+  bool MatchAndExplain(T&& x, MatchResultListener* listener) const {
+    try {
+      (void)(std::forward<T>(x)());
+    } catch (const Err& err) {
+      *listener << "throws an exception which is a " << GetTypeName<Err>();
+      *listener << " ";
+      return matcher_.MatchAndExplain(err, listener);
+    } catch (DefaultExceptionType err) {
+#if GTEST_HAS_RTTI
+      *listener << "throws an exception of type " << GetTypeName(typeid(err));
+      *listener << " ";
+#else
+      *listener << "throws an std::exception-derived type ";
+#endif
+      *listener << "with description \"" << err.what() << "\"";
+      return false;
+    } catch (...) {
+      *listener << "throws an exception of an unknown type";
+      return false;
+    }
+
+    *listener << "does not throw any exception";
+    return false;
+  }
+
+ private:
+  const Matcher<const Err&> matcher_;
+};
+
+}  // namespace internal
+
+// Throws()
+// Throws(exceptionMatcher)
+// ThrowsMessage(messageMatcher)
+//
+// This matcher accepts a callable and verifies that when invoked, it throws
+// an exception with the given type and properties.
+//
+// Examples:
+//
+//   EXPECT_THAT(
+//       []() { throw std::runtime_error("message"); },
+//       Throws<std::runtime_error>());
+//
+//   EXPECT_THAT(
+//       []() { throw std::runtime_error("message"); },
+//       ThrowsMessage<std::runtime_error>(HasSubstr("message")));
+//
+//   EXPECT_THAT(
+//       []() { throw std::runtime_error("message"); },
+//       Throws<std::runtime_error>(
+//           Property(&std::runtime_error::what, HasSubstr("message"))));
+
+template <typename Err>
+PolymorphicMatcher<internal::ExceptionMatcherImpl<Err>> Throws() {
+  return MakePolymorphicMatcher(
+      internal::ExceptionMatcherImpl<Err>(A<const Err&>()));
+}
+
+template <typename Err, typename ExceptionMatcher>
+PolymorphicMatcher<internal::ExceptionMatcherImpl<Err>> Throws(
+    const ExceptionMatcher& exception_matcher) {
+  // Using matcher cast allows users to pass a matcher of a more broad type.
+  // For example user may want to pass Matcher<std::exception>
+  // to Throws<std::runtime_error>, or Matcher<int64> to Throws<int32>.
+  return MakePolymorphicMatcher(internal::ExceptionMatcherImpl<Err>(
+      SafeMatcherCast<const Err&>(exception_matcher)));
+}
+
+template <typename Err, typename MessageMatcher>
+PolymorphicMatcher<internal::ExceptionMatcherImpl<Err>> ThrowsMessage(
+    MessageMatcher&& message_matcher) {
+  static_assert(std::is_base_of<std::exception, Err>::value,
+                "expected an std::exception-derived type");
+  return Throws<Err>(internal::WithWhat(
+      MatcherCast<std::string>(std::forward<MessageMatcher>(message_matcher))));
+}
+
+#endif  // GTEST_HAS_EXCEPTIONS
+
 // These macros allow using matchers to check values in Google Test
 // tests.  ASSERT_THAT(value, matcher) and EXPECT_THAT(value, matcher)
 // succeed if and only if the value matches the matcher.  If the assertion
 // fails, the value and the description of the matcher will be printed.
-#define ASSERT_THAT(value, matcher) ASSERT_PRED_FORMAT1(\
-    ::testing::internal::MakePredicateFormatterFromMatcher(matcher), value)
-#define EXPECT_THAT(value, matcher) EXPECT_PRED_FORMAT1(\
-    ::testing::internal::MakePredicateFormatterFromMatcher(matcher), value)
-
-// MATCHER* macroses itself are listed below.
+#define ASSERT_THAT(value, matcher) \
+  ASSERT_PRED_FORMAT1(              \
+      ::testing::internal::MakePredicateFormatterFromMatcher(matcher), value)
+#define EXPECT_THAT(value, matcher) \
+  EXPECT_PRED_FORMAT1(              \
+      ::testing::internal::MakePredicateFormatterFromMatcher(matcher), value)
+
+// MATCHER* macros itself are listed below.
 #define MATCHER(name, description)                                             \
   class name##Matcher                                                          \
       : public ::testing::internal::MatcherBaseImpl<name##Matcher> {           \
@@ -4834,12 +5457,13 @@ PolymorphicMatcher<internal::variant_matcher::VariantMatcher<T> > VariantWith(
                                                                                \
      private:                                                                  \
       ::std::string FormatDescription(bool negation) const {                   \
+        /* NOLINTNEXTLINE readability-redundant-string-init */                 \
         ::std::string gmock_description = (description);                       \
         if (!gmock_description.empty()) {                                      \
           return gmock_description;                                            \
         }                                                                      \
         return ::testing::internal::FormatMatcherDescription(negation, #name,  \
-                                                             {});              \
+                                                             {}, {});          \
       }                                                                        \
     };                                                                         \
   };                                                                           \
@@ -4851,33 +5475,41 @@ PolymorphicMatcher<internal::variant_matcher::VariantMatcher<T> > VariantWith(
       const
 
 #define MATCHER_P(name, p0, description) \
-  GMOCK_INTERNAL_MATCHER(name, name##MatcherP, description, (p0))
-#define MATCHER_P2(name, p0, p1, description) \
-  GMOCK_INTERNAL_MATCHER(name, name##MatcherP2, description, (p0, p1))
-#define MATCHER_P3(name, p0, p1, p2, description) \
-  GMOCK_INTERNAL_MATCHER(name, name##MatcherP3, description, (p0, p1, p2))
-#define MATCHER_P4(name, p0, p1, p2, p3, description) \
-  GMOCK_INTERNAL_MATCHER(name, name##MatcherP4, description, (p0, p1, p2, p3))
+  GMOCK_INTERNAL_MATCHER(name, name##MatcherP, description, (#p0), (p0))
+#define MATCHER_P2(name, p0, p1, description)                            \
+  GMOCK_INTERNAL_MATCHER(name, name##MatcherP2, description, (#p0, #p1), \
+                         (p0, p1))
+#define MATCHER_P3(name, p0, p1, p2, description)                             \
+  GMOCK_INTERNAL_MATCHER(name, name##MatcherP3, description, (#p0, #p1, #p2), \
+                         (p0, p1, p2))
+#define MATCHER_P4(name, p0, p1, p2, p3, description)        \
+  GMOCK_INTERNAL_MATCHER(name, name##MatcherP4, description, \
+                         (#p0, #p1, #p2, #p3), (p0, p1, p2, p3))
 #define MATCHER_P5(name, p0, p1, p2, p3, p4, description)    \
   GMOCK_INTERNAL_MATCHER(name, name##MatcherP5, description, \
-                         (p0, p1, p2, p3, p4))
+                         (#p0, #p1, #p2, #p3, #p4), (p0, p1, p2, p3, p4))
 #define MATCHER_P6(name, p0, p1, p2, p3, p4, p5, description) \
   GMOCK_INTERNAL_MATCHER(name, name##MatcherP6, description,  \
+                         (#p0, #p1, #p2, #p3, #p4, #p5),      \
                          (p0, p1, p2, p3, p4, p5))
 #define MATCHER_P7(name, p0, p1, p2, p3, p4, p5, p6, description) \
   GMOCK_INTERNAL_MATCHER(name, name##MatcherP7, description,      \
+                         (#p0, #p1, #p2, #p3, #p4, #p5, #p6),     \
                          (p0, p1, p2, p3, p4, p5, p6))
 #define MATCHER_P8(name, p0, p1, p2, p3, p4, p5, p6, p7, description) \
   GMOCK_INTERNAL_MATCHER(name, name##MatcherP8, description,          \
+                         (#p0, #p1, #p2, #p3, #p4, #p5, #p6, #p7),    \
                          (p0, p1, p2, p3, p4, p5, p6, p7))
 #define MATCHER_P9(name, p0, p1, p2, p3, p4, p5, p6, p7, p8, description) \
   GMOCK_INTERNAL_MATCHER(name, name##MatcherP9, description,              \
+                         (#p0, #p1, #p2, #p3, #p4, #p5, #p6, #p7, #p8),   \
                          (p0, p1, p2, p3, p4, p5, p6, p7, p8))
 #define MATCHER_P10(name, p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, description) \
   GMOCK_INTERNAL_MATCHER(name, name##MatcherP10, description,                  \
+                         (#p0, #p1, #p2, #p3, #p4, #p5, #p6, #p7, #p8, #p9),   \
                          (p0, p1, p2, p3, p4, p5, p6, p7, p8, p9))
 
-#define GMOCK_INTERNAL_MATCHER(name, full_name, description, args)             \
+#define GMOCK_INTERNAL_MATCHER(name, full_name, description, arg_names, args)  \
   template <GMOCK_INTERNAL_MATCHER_TEMPLATE_PARAMS(args)>                      \
   class full_name : public ::testing::internal::MatcherBaseImpl<               \
                         full_name<GMOCK_INTERNAL_MATCHER_TYPE_PARAMS(args)>> { \
@@ -4906,7 +5538,7 @@ PolymorphicMatcher<internal::variant_matcher::VariantMatcher<T> > VariantWith(
           return gmock_description;                                            \
         }                                                                      \
         return ::testing::internal::FormatMatcherDescription(                  \
-            negation, #name,                                                   \
+            negation, #name, {GMOCK_PP_REMOVE_PARENS(arg_names)},              \
             ::testing::internal::UniversalTersePrintTupleFieldsToStrings(      \
                 ::std::tuple<GMOCK_INTERNAL_MATCHER_TYPE_PARAMS(args)>(        \
                     GMOCK_INTERNAL_MATCHER_MEMBERS_USAGE(args))));             \
@@ -4963,6 +5595,9 @@ PolymorphicMatcher<internal::variant_matcher::VariantMatcher<T> > VariantWith(
 #define GMOCK_INTERNAL_MATCHER_ARG_USAGE(i, data_unused, arg_unused) \
   , gmock_p##i
 
+// To prevent ADL on certain functions we put them on a separate namespace.
+using namespace no_adl;  // NOLINT
+
 }  // namespace testing
 
 GTEST_DISABLE_MSC_WARNINGS_POP_()  //  4251 5046
@@ -4972,4 +5607,4 @@ GTEST_DISABLE_MSC_WARNINGS_POP_()  //  4251 5046
 // declarations from this file.
 #include "gmock/internal/custom/gmock-matchers.h"
 
-#endif  // GMOCK_INCLUDE_GMOCK_GMOCK_MATCHERS_H_
+#endif  // GOOGLEMOCK_INCLUDE_GMOCK_GMOCK_MATCHERS_H_
diff --git a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/include/gmock/gmock-more-actions.h b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/include/gmock/gmock-more-actions.h
index d42484aef2b..148ac017210 100644
--- a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/include/gmock/gmock-more-actions.h
+++ b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/include/gmock/gmock-more-actions.h
@@ -27,41 +27,555 @@
 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-
 // Google Mock - a framework for writing C++ mock classes.
 //
-// This file implements some actions that depend on gmock-generated-actions.h.
+// This file implements some commonly used variadic actions.
 
-// GOOGLETEST_CM0002 DO NOT DELETE
+// IWYU pragma: private, include "gmock/gmock.h"
+// IWYU pragma: friend gmock/.*
 
-#ifndef GMOCK_INCLUDE_GMOCK_GMOCK_MORE_ACTIONS_H_
-#define GMOCK_INCLUDE_GMOCK_GMOCK_MORE_ACTIONS_H_
+#ifndef GOOGLEMOCK_INCLUDE_GMOCK_GMOCK_MORE_ACTIONS_H_
+#define GOOGLEMOCK_INCLUDE_GMOCK_GMOCK_MORE_ACTIONS_H_
 
-#include <algorithm>
-#include <type_traits>
+#include <memory>
+#include <utility>
 
-#include "gmock/gmock-generated-actions.h"
+#include "gmock/gmock-actions.h"
+#include "gmock/internal/gmock-port.h"
 
-namespace testing {
-namespace internal {
+// Include any custom callback actions added by the local installation.
+#include "gmock/internal/custom/gmock-generated-actions.h"
 
-// An internal replacement for std::copy which mimics its behavior. This is
-// necessary because Visual Studio deprecates ::std::copy, issuing warning 4996.
-// However Visual Studio 2010 and later do not honor #pragmas which disable that
-// warning.
-template<typename InputIterator, typename OutputIterator>
-inline OutputIterator CopyElements(InputIterator first,
-                                   InputIterator last,
-                                   OutputIterator output) {
-  for (; first != last; ++first, ++output) {
-    *output = *first;
-  }
-  return output;
-}
+// Sometimes you want to give an action explicit template parameters
+// that cannot be inferred from its value parameters.  ACTION() and
+// ACTION_P*() don't support that.  ACTION_TEMPLATE() remedies that
+// and can be viewed as an extension to ACTION() and ACTION_P*().
+//
+// The syntax:
+//
+//   ACTION_TEMPLATE(ActionName,
+//                   HAS_m_TEMPLATE_PARAMS(kind1, name1, ..., kind_m, name_m),
+//                   AND_n_VALUE_PARAMS(p1, ..., p_n)) { statements; }
+//
+// defines an action template that takes m explicit template
+// parameters and n value parameters.  name_i is the name of the i-th
+// template parameter, and kind_i specifies whether it's a typename,
+// an integral constant, or a template.  p_i is the name of the i-th
+// value parameter.
+//
+// Example:
+//
+//   // DuplicateArg<k, T>(output) converts the k-th argument of the mock
+//   // function to type T and copies it to *output.
+//   ACTION_TEMPLATE(DuplicateArg,
+//                   HAS_2_TEMPLATE_PARAMS(int, k, typename, T),
+//                   AND_1_VALUE_PARAMS(output)) {
+//     *output = T(::std::get<k>(args));
+//   }
+//   ...
+//     int n;
+//     EXPECT_CALL(mock, Foo(_, _))
+//         .WillOnce(DuplicateArg<1, unsigned char>(&n));
+//
+// To create an instance of an action template, write:
+//
+//   ActionName<t1, ..., t_m>(v1, ..., v_n)
+//
+// where the ts are the template arguments and the vs are the value
+// arguments.  The value argument types are inferred by the compiler.
+// If you want to explicitly specify the value argument types, you can
+// provide additional template arguments:
+//
+//   ActionName<t1, ..., t_m, u1, ..., u_k>(v1, ..., v_n)
+//
+// where u_i is the desired type of v_i.
+//
+// ACTION_TEMPLATE and ACTION/ACTION_P* can be overloaded on the
+// number of value parameters, but not on the number of template
+// parameters.  Without the restriction, the meaning of the following
+// is unclear:
+//
+//   OverloadedAction<int, bool>(x);
+//
+// Are we using a single-template-parameter action where 'bool' refers
+// to the type of x, or are we using a two-template-parameter action
+// where the compiler is asked to infer the type of x?
+//
+// Implementation notes:
+//
+// GMOCK_INTERNAL_*_HAS_m_TEMPLATE_PARAMS and
+// GMOCK_INTERNAL_*_AND_n_VALUE_PARAMS are internal macros for
+// implementing ACTION_TEMPLATE.  The main trick we use is to create
+// new macro invocations when expanding a macro.  For example, we have
+//
+//   #define ACTION_TEMPLATE(name, template_params, value_params)
+//       ... GMOCK_INTERNAL_DECL_##template_params ...
+//
+// which causes ACTION_TEMPLATE(..., HAS_1_TEMPLATE_PARAMS(typename, T), ...)
+// to expand to
+//
+//       ... GMOCK_INTERNAL_DECL_HAS_1_TEMPLATE_PARAMS(typename, T) ...
+//
+// Since GMOCK_INTERNAL_DECL_HAS_1_TEMPLATE_PARAMS is a macro, the
+// preprocessor will continue to expand it to
+//
+//       ... typename T ...
+//
+// This technique conforms to the C++ standard and is portable.  It
+// allows us to implement action templates using O(N) code, where N is
+// the maximum number of template/value parameters supported.  Without
+// using it, we'd have to devote O(N^2) amount of code to implement all
+// combinations of m and n.
 
-}  // namespace internal
+// Declares the template parameters.
+#define GMOCK_INTERNAL_DECL_HAS_1_TEMPLATE_PARAMS(kind0, name0) kind0 name0
+#define GMOCK_INTERNAL_DECL_HAS_2_TEMPLATE_PARAMS(kind0, name0, kind1, name1) \
+  kind0 name0, kind1 name1
+#define GMOCK_INTERNAL_DECL_HAS_3_TEMPLATE_PARAMS(kind0, name0, kind1, name1, \
+                                                  kind2, name2)               \
+  kind0 name0, kind1 name1, kind2 name2
+#define GMOCK_INTERNAL_DECL_HAS_4_TEMPLATE_PARAMS(kind0, name0, kind1, name1, \
+                                                  kind2, name2, kind3, name3) \
+  kind0 name0, kind1 name1, kind2 name2, kind3 name3
+#define GMOCK_INTERNAL_DECL_HAS_5_TEMPLATE_PARAMS(                        \
+    kind0, name0, kind1, name1, kind2, name2, kind3, name3, kind4, name4) \
+  kind0 name0, kind1 name1, kind2 name2, kind3 name3, kind4 name4
+#define GMOCK_INTERNAL_DECL_HAS_6_TEMPLATE_PARAMS(kind0, name0, kind1, name1, \
+                                                  kind2, name2, kind3, name3, \
+                                                  kind4, name4, kind5, name5) \
+  kind0 name0, kind1 name1, kind2 name2, kind3 name3, kind4 name4, kind5 name5
+#define GMOCK_INTERNAL_DECL_HAS_7_TEMPLATE_PARAMS(                        \
+    kind0, name0, kind1, name1, kind2, name2, kind3, name3, kind4, name4, \
+    kind5, name5, kind6, name6)                                           \
+  kind0 name0, kind1 name1, kind2 name2, kind3 name3, kind4 name4,        \
+      kind5 name5, kind6 name6
+#define GMOCK_INTERNAL_DECL_HAS_8_TEMPLATE_PARAMS(                        \
+    kind0, name0, kind1, name1, kind2, name2, kind3, name3, kind4, name4, \
+    kind5, name5, kind6, name6, kind7, name7)                             \
+  kind0 name0, kind1 name1, kind2 name2, kind3 name3, kind4 name4,        \
+      kind5 name5, kind6 name6, kind7 name7
+#define GMOCK_INTERNAL_DECL_HAS_9_TEMPLATE_PARAMS(                        \
+    kind0, name0, kind1, name1, kind2, name2, kind3, name3, kind4, name4, \
+    kind5, name5, kind6, name6, kind7, name7, kind8, name8)               \
+  kind0 name0, kind1 name1, kind2 name2, kind3 name3, kind4 name4,        \
+      kind5 name5, kind6 name6, kind7 name7, kind8 name8
+#define GMOCK_INTERNAL_DECL_HAS_10_TEMPLATE_PARAMS(                       \
+    kind0, name0, kind1, name1, kind2, name2, kind3, name3, kind4, name4, \
+    kind5, name5, kind6, name6, kind7, name7, kind8, name8, kind9, name9) \
+  kind0 name0, kind1 name1, kind2 name2, kind3 name3, kind4 name4,        \
+      kind5 name5, kind6 name6, kind7 name7, kind8 name8, kind9 name9
+
+// Lists the template parameters.
+#define GMOCK_INTERNAL_LIST_HAS_1_TEMPLATE_PARAMS(kind0, name0) name0
+#define GMOCK_INTERNAL_LIST_HAS_2_TEMPLATE_PARAMS(kind0, name0, kind1, name1) \
+  name0, name1
+#define GMOCK_INTERNAL_LIST_HAS_3_TEMPLATE_PARAMS(kind0, name0, kind1, name1, \
+                                                  kind2, name2)               \
+  name0, name1, name2
+#define GMOCK_INTERNAL_LIST_HAS_4_TEMPLATE_PARAMS(kind0, name0, kind1, name1, \
+                                                  kind2, name2, kind3, name3) \
+  name0, name1, name2, name3
+#define GMOCK_INTERNAL_LIST_HAS_5_TEMPLATE_PARAMS(                        \
+    kind0, name0, kind1, name1, kind2, name2, kind3, name3, kind4, name4) \
+  name0, name1, name2, name3, name4
+#define GMOCK_INTERNAL_LIST_HAS_6_TEMPLATE_PARAMS(kind0, name0, kind1, name1, \
+                                                  kind2, name2, kind3, name3, \
+                                                  kind4, name4, kind5, name5) \
+  name0, name1, name2, name3, name4, name5
+#define GMOCK_INTERNAL_LIST_HAS_7_TEMPLATE_PARAMS(                        \
+    kind0, name0, kind1, name1, kind2, name2, kind3, name3, kind4, name4, \
+    kind5, name5, kind6, name6)                                           \
+  name0, name1, name2, name3, name4, name5, name6
+#define GMOCK_INTERNAL_LIST_HAS_8_TEMPLATE_PARAMS(                        \
+    kind0, name0, kind1, name1, kind2, name2, kind3, name3, kind4, name4, \
+    kind5, name5, kind6, name6, kind7, name7)                             \
+  name0, name1, name2, name3, name4, name5, name6, name7
+#define GMOCK_INTERNAL_LIST_HAS_9_TEMPLATE_PARAMS(                        \
+    kind0, name0, kind1, name1, kind2, name2, kind3, name3, kind4, name4, \
+    kind5, name5, kind6, name6, kind7, name7, kind8, name8)               \
+  name0, name1, name2, name3, name4, name5, name6, name7, name8
+#define GMOCK_INTERNAL_LIST_HAS_10_TEMPLATE_PARAMS(                       \
+    kind0, name0, kind1, name1, kind2, name2, kind3, name3, kind4, name4, \
+    kind5, name5, kind6, name6, kind7, name7, kind8, name8, kind9, name9) \
+  name0, name1, name2, name3, name4, name5, name6, name7, name8, name9
+
+// Declares the types of value parameters.
+#define GMOCK_INTERNAL_DECL_TYPE_AND_0_VALUE_PARAMS()
+#define GMOCK_INTERNAL_DECL_TYPE_AND_1_VALUE_PARAMS(p0) , typename p0##_type
+#define GMOCK_INTERNAL_DECL_TYPE_AND_2_VALUE_PARAMS(p0, p1) \
+  , typename p0##_type, typename p1##_type
+#define GMOCK_INTERNAL_DECL_TYPE_AND_3_VALUE_PARAMS(p0, p1, p2) \
+  , typename p0##_type, typename p1##_type, typename p2##_type
+#define GMOCK_INTERNAL_DECL_TYPE_AND_4_VALUE_PARAMS(p0, p1, p2, p3) \
+  , typename p0##_type, typename p1##_type, typename p2##_type,     \
+      typename p3##_type
+#define GMOCK_INTERNAL_DECL_TYPE_AND_5_VALUE_PARAMS(p0, p1, p2, p3, p4) \
+  , typename p0##_type, typename p1##_type, typename p2##_type,         \
+      typename p3##_type, typename p4##_type
+#define GMOCK_INTERNAL_DECL_TYPE_AND_6_VALUE_PARAMS(p0, p1, p2, p3, p4, p5) \
+  , typename p0##_type, typename p1##_type, typename p2##_type,             \
+      typename p3##_type, typename p4##_type, typename p5##_type
+#define GMOCK_INTERNAL_DECL_TYPE_AND_7_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, \
+                                                    p6)                     \
+  , typename p0##_type, typename p1##_type, typename p2##_type,             \
+      typename p3##_type, typename p4##_type, typename p5##_type,           \
+      typename p6##_type
+#define GMOCK_INTERNAL_DECL_TYPE_AND_8_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, \
+                                                    p6, p7)                 \
+  , typename p0##_type, typename p1##_type, typename p2##_type,             \
+      typename p3##_type, typename p4##_type, typename p5##_type,           \
+      typename p6##_type, typename p7##_type
+#define GMOCK_INTERNAL_DECL_TYPE_AND_9_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, \
+                                                    p6, p7, p8)             \
+  , typename p0##_type, typename p1##_type, typename p2##_type,             \
+      typename p3##_type, typename p4##_type, typename p5##_type,           \
+      typename p6##_type, typename p7##_type, typename p8##_type
+#define GMOCK_INTERNAL_DECL_TYPE_AND_10_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, \
+                                                     p6, p7, p8, p9)         \
+  , typename p0##_type, typename p1##_type, typename p2##_type,              \
+      typename p3##_type, typename p4##_type, typename p5##_type,            \
+      typename p6##_type, typename p7##_type, typename p8##_type,            \
+      typename p9##_type
+
+// Initializes the value parameters.
+#define GMOCK_INTERNAL_INIT_AND_0_VALUE_PARAMS() ()
+#define GMOCK_INTERNAL_INIT_AND_1_VALUE_PARAMS(p0) \
+  (p0##_type gmock_p0) : p0(::std::move(gmock_p0))
+#define GMOCK_INTERNAL_INIT_AND_2_VALUE_PARAMS(p0, p1) \
+  (p0##_type gmock_p0, p1##_type gmock_p1)             \
+      : p0(::std::move(gmock_p0)), p1(::std::move(gmock_p1))
+#define GMOCK_INTERNAL_INIT_AND_3_VALUE_PARAMS(p0, p1, p2)     \
+  (p0##_type gmock_p0, p1##_type gmock_p1, p2##_type gmock_p2) \
+      : p0(::std::move(gmock_p0)),                             \
+        p1(::std::move(gmock_p1)),                             \
+        p2(::std::move(gmock_p2))
+#define GMOCK_INTERNAL_INIT_AND_4_VALUE_PARAMS(p0, p1, p2, p3) \
+  (p0##_type gmock_p0, p1##_type gmock_p1, p2##_type gmock_p2, \
+   p3##_type gmock_p3)                                         \
+      : p0(::std::move(gmock_p0)),                             \
+        p1(::std::move(gmock_p1)),                             \
+        p2(::std::move(gmock_p2)),                             \
+        p3(::std::move(gmock_p3))
+#define GMOCK_INTERNAL_INIT_AND_5_VALUE_PARAMS(p0, p1, p2, p3, p4) \
+  (p0##_type gmock_p0, p1##_type gmock_p1, p2##_type gmock_p2,     \
+   p3##_type gmock_p3, p4##_type gmock_p4)                         \
+      : p0(::std::move(gmock_p0)),                                 \
+        p1(::std::move(gmock_p1)),                                 \
+        p2(::std::move(gmock_p2)),                                 \
+        p3(::std::move(gmock_p3)),                                 \
+        p4(::std::move(gmock_p4))
+#define GMOCK_INTERNAL_INIT_AND_6_VALUE_PARAMS(p0, p1, p2, p3, p4, p5) \
+  (p0##_type gmock_p0, p1##_type gmock_p1, p2##_type gmock_p2,         \
+   p3##_type gmock_p3, p4##_type gmock_p4, p5##_type gmock_p5)         \
+      : p0(::std::move(gmock_p0)),                                     \
+        p1(::std::move(gmock_p1)),                                     \
+        p2(::std::move(gmock_p2)),                                     \
+        p3(::std::move(gmock_p3)),                                     \
+        p4(::std::move(gmock_p4)),                                     \
+        p5(::std::move(gmock_p5))
+#define GMOCK_INTERNAL_INIT_AND_7_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6) \
+  (p0##_type gmock_p0, p1##_type gmock_p1, p2##_type gmock_p2,             \
+   p3##_type gmock_p3, p4##_type gmock_p4, p5##_type gmock_p5,             \
+   p6##_type gmock_p6)                                                     \
+      : p0(::std::move(gmock_p0)),                                         \
+        p1(::std::move(gmock_p1)),                                         \
+        p2(::std::move(gmock_p2)),                                         \
+        p3(::std::move(gmock_p3)),                                         \
+        p4(::std::move(gmock_p4)),                                         \
+        p5(::std::move(gmock_p5)),                                         \
+        p6(::std::move(gmock_p6))
+#define GMOCK_INTERNAL_INIT_AND_8_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, p7) \
+  (p0##_type gmock_p0, p1##_type gmock_p1, p2##_type gmock_p2,                 \
+   p3##_type gmock_p3, p4##_type gmock_p4, p5##_type gmock_p5,                 \
+   p6##_type gmock_p6, p7##_type gmock_p7)                                     \
+      : p0(::std::move(gmock_p0)),                                             \
+        p1(::std::move(gmock_p1)),                                             \
+        p2(::std::move(gmock_p2)),                                             \
+        p3(::std::move(gmock_p3)),                                             \
+        p4(::std::move(gmock_p4)),                                             \
+        p5(::std::move(gmock_p5)),                                             \
+        p6(::std::move(gmock_p6)),                                             \
+        p7(::std::move(gmock_p7))
+#define GMOCK_INTERNAL_INIT_AND_9_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, p7, \
+                                               p8)                             \
+  (p0##_type gmock_p0, p1##_type gmock_p1, p2##_type gmock_p2,                 \
+   p3##_type gmock_p3, p4##_type gmock_p4, p5##_type gmock_p5,                 \
+   p6##_type gmock_p6, p7##_type gmock_p7, p8##_type gmock_p8)                 \
+      : p0(::std::move(gmock_p0)),                                             \
+        p1(::std::move(gmock_p1)),                                             \
+        p2(::std::move(gmock_p2)),                                             \
+        p3(::std::move(gmock_p3)),                                             \
+        p4(::std::move(gmock_p4)),                                             \
+        p5(::std::move(gmock_p5)),                                             \
+        p6(::std::move(gmock_p6)),                                             \
+        p7(::std::move(gmock_p7)),                                             \
+        p8(::std::move(gmock_p8))
+#define GMOCK_INTERNAL_INIT_AND_10_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, \
+                                                p7, p8, p9)                 \
+  (p0##_type gmock_p0, p1##_type gmock_p1, p2##_type gmock_p2,              \
+   p3##_type gmock_p3, p4##_type gmock_p4, p5##_type gmock_p5,              \
+   p6##_type gmock_p6, p7##_type gmock_p7, p8##_type gmock_p8,              \
+   p9##_type gmock_p9)                                                      \
+      : p0(::std::move(gmock_p0)),                                          \
+        p1(::std::move(gmock_p1)),                                          \
+        p2(::std::move(gmock_p2)),                                          \
+        p3(::std::move(gmock_p3)),                                          \
+        p4(::std::move(gmock_p4)),                                          \
+        p5(::std::move(gmock_p5)),                                          \
+        p6(::std::move(gmock_p6)),                                          \
+        p7(::std::move(gmock_p7)),                                          \
+        p8(::std::move(gmock_p8)),                                          \
+        p9(::std::move(gmock_p9))
 
-// Various overloads for Invoke().
+// Defines the copy constructor
+#define GMOCK_INTERNAL_DEFN_COPY_AND_0_VALUE_PARAMS() \
+  {}  // Avoid https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82134
+#define GMOCK_INTERNAL_DEFN_COPY_AND_1_VALUE_PARAMS(...) = default;
+#define GMOCK_INTERNAL_DEFN_COPY_AND_2_VALUE_PARAMS(...) = default;
+#define GMOCK_INTERNAL_DEFN_COPY_AND_3_VALUE_PARAMS(...) = default;
+#define GMOCK_INTERNAL_DEFN_COPY_AND_4_VALUE_PARAMS(...) = default;
+#define GMOCK_INTERNAL_DEFN_COPY_AND_5_VALUE_PARAMS(...) = default;
+#define GMOCK_INTERNAL_DEFN_COPY_AND_6_VALUE_PARAMS(...) = default;
+#define GMOCK_INTERNAL_DEFN_COPY_AND_7_VALUE_PARAMS(...) = default;
+#define GMOCK_INTERNAL_DEFN_COPY_AND_8_VALUE_PARAMS(...) = default;
+#define GMOCK_INTERNAL_DEFN_COPY_AND_9_VALUE_PARAMS(...) = default;
+#define GMOCK_INTERNAL_DEFN_COPY_AND_10_VALUE_PARAMS(...) = default;
+
+// Declares the fields for storing the value parameters.
+#define GMOCK_INTERNAL_DEFN_AND_0_VALUE_PARAMS()
+#define GMOCK_INTERNAL_DEFN_AND_1_VALUE_PARAMS(p0) p0##_type p0;
+#define GMOCK_INTERNAL_DEFN_AND_2_VALUE_PARAMS(p0, p1) \
+  p0##_type p0;                                        \
+  p1##_type p1;
+#define GMOCK_INTERNAL_DEFN_AND_3_VALUE_PARAMS(p0, p1, p2) \
+  p0##_type p0;                                            \
+  p1##_type p1;                                            \
+  p2##_type p2;
+#define GMOCK_INTERNAL_DEFN_AND_4_VALUE_PARAMS(p0, p1, p2, p3) \
+  p0##_type p0;                                                \
+  p1##_type p1;                                                \
+  p2##_type p2;                                                \
+  p3##_type p3;
+#define GMOCK_INTERNAL_DEFN_AND_5_VALUE_PARAMS(p0, p1, p2, p3, p4) \
+  p0##_type p0;                                                    \
+  p1##_type p1;                                                    \
+  p2##_type p2;                                                    \
+  p3##_type p3;                                                    \
+  p4##_type p4;
+#define GMOCK_INTERNAL_DEFN_AND_6_VALUE_PARAMS(p0, p1, p2, p3, p4, p5) \
+  p0##_type p0;                                                        \
+  p1##_type p1;                                                        \
+  p2##_type p2;                                                        \
+  p3##_type p3;                                                        \
+  p4##_type p4;                                                        \
+  p5##_type p5;
+#define GMOCK_INTERNAL_DEFN_AND_7_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6) \
+  p0##_type p0;                                                            \
+  p1##_type p1;                                                            \
+  p2##_type p2;                                                            \
+  p3##_type p3;                                                            \
+  p4##_type p4;                                                            \
+  p5##_type p5;                                                            \
+  p6##_type p6;
+#define GMOCK_INTERNAL_DEFN_AND_8_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, p7) \
+  p0##_type p0;                                                                \
+  p1##_type p1;                                                                \
+  p2##_type p2;                                                                \
+  p3##_type p3;                                                                \
+  p4##_type p4;                                                                \
+  p5##_type p5;                                                                \
+  p6##_type p6;                                                                \
+  p7##_type p7;
+#define GMOCK_INTERNAL_DEFN_AND_9_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, p7, \
+                                               p8)                             \
+  p0##_type p0;                                                                \
+  p1##_type p1;                                                                \
+  p2##_type p2;                                                                \
+  p3##_type p3;                                                                \
+  p4##_type p4;                                                                \
+  p5##_type p5;                                                                \
+  p6##_type p6;                                                                \
+  p7##_type p7;                                                                \
+  p8##_type p8;
+#define GMOCK_INTERNAL_DEFN_AND_10_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, \
+                                                p7, p8, p9)                 \
+  p0##_type p0;                                                             \
+  p1##_type p1;                                                             \
+  p2##_type p2;                                                             \
+  p3##_type p3;                                                             \
+  p4##_type p4;                                                             \
+  p5##_type p5;                                                             \
+  p6##_type p6;                                                             \
+  p7##_type p7;                                                             \
+  p8##_type p8;                                                             \
+  p9##_type p9;
+
+// Lists the value parameters.
+#define GMOCK_INTERNAL_LIST_AND_0_VALUE_PARAMS()
+#define GMOCK_INTERNAL_LIST_AND_1_VALUE_PARAMS(p0) p0
+#define GMOCK_INTERNAL_LIST_AND_2_VALUE_PARAMS(p0, p1) p0, p1
+#define GMOCK_INTERNAL_LIST_AND_3_VALUE_PARAMS(p0, p1, p2) p0, p1, p2
+#define GMOCK_INTERNAL_LIST_AND_4_VALUE_PARAMS(p0, p1, p2, p3) p0, p1, p2, p3
+#define GMOCK_INTERNAL_LIST_AND_5_VALUE_PARAMS(p0, p1, p2, p3, p4) \
+  p0, p1, p2, p3, p4
+#define GMOCK_INTERNAL_LIST_AND_6_VALUE_PARAMS(p0, p1, p2, p3, p4, p5) \
+  p0, p1, p2, p3, p4, p5
+#define GMOCK_INTERNAL_LIST_AND_7_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6) \
+  p0, p1, p2, p3, p4, p5, p6
+#define GMOCK_INTERNAL_LIST_AND_8_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, p7) \
+  p0, p1, p2, p3, p4, p5, p6, p7
+#define GMOCK_INTERNAL_LIST_AND_9_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, p7, \
+                                               p8)                             \
+  p0, p1, p2, p3, p4, p5, p6, p7, p8
+#define GMOCK_INTERNAL_LIST_AND_10_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, \
+                                                p7, p8, p9)                 \
+  p0, p1, p2, p3, p4, p5, p6, p7, p8, p9
+
+// Lists the value parameter types.
+#define GMOCK_INTERNAL_LIST_TYPE_AND_0_VALUE_PARAMS()
+#define GMOCK_INTERNAL_LIST_TYPE_AND_1_VALUE_PARAMS(p0) , p0##_type
+#define GMOCK_INTERNAL_LIST_TYPE_AND_2_VALUE_PARAMS(p0, p1) \
+  , p0##_type, p1##_type
+#define GMOCK_INTERNAL_LIST_TYPE_AND_3_VALUE_PARAMS(p0, p1, p2) \
+  , p0##_type, p1##_type, p2##_type
+#define GMOCK_INTERNAL_LIST_TYPE_AND_4_VALUE_PARAMS(p0, p1, p2, p3) \
+  , p0##_type, p1##_type, p2##_type, p3##_type
+#define GMOCK_INTERNAL_LIST_TYPE_AND_5_VALUE_PARAMS(p0, p1, p2, p3, p4) \
+  , p0##_type, p1##_type, p2##_type, p3##_type, p4##_type
+#define GMOCK_INTERNAL_LIST_TYPE_AND_6_VALUE_PARAMS(p0, p1, p2, p3, p4, p5) \
+  , p0##_type, p1##_type, p2##_type, p3##_type, p4##_type, p5##_type
+#define GMOCK_INTERNAL_LIST_TYPE_AND_7_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, \
+                                                    p6)                     \
+  , p0##_type, p1##_type, p2##_type, p3##_type, p4##_type, p5##_type, p6##_type
+#define GMOCK_INTERNAL_LIST_TYPE_AND_8_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, \
+                                                    p6, p7)                 \
+  , p0##_type, p1##_type, p2##_type, p3##_type, p4##_type, p5##_type,       \
+      p6##_type, p7##_type
+#define GMOCK_INTERNAL_LIST_TYPE_AND_9_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, \
+                                                    p6, p7, p8)             \
+  , p0##_type, p1##_type, p2##_type, p3##_type, p4##_type, p5##_type,       \
+      p6##_type, p7##_type, p8##_type
+#define GMOCK_INTERNAL_LIST_TYPE_AND_10_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, \
+                                                     p6, p7, p8, p9)         \
+  , p0##_type, p1##_type, p2##_type, p3##_type, p4##_type, p5##_type,        \
+      p6##_type, p7##_type, p8##_type, p9##_type
+
+// Declares the value parameters.
+#define GMOCK_INTERNAL_DECL_AND_0_VALUE_PARAMS()
+#define GMOCK_INTERNAL_DECL_AND_1_VALUE_PARAMS(p0) p0##_type p0
+#define GMOCK_INTERNAL_DECL_AND_2_VALUE_PARAMS(p0, p1) \
+  p0##_type p0, p1##_type p1
+#define GMOCK_INTERNAL_DECL_AND_3_VALUE_PARAMS(p0, p1, p2) \
+  p0##_type p0, p1##_type p1, p2##_type p2
+#define GMOCK_INTERNAL_DECL_AND_4_VALUE_PARAMS(p0, p1, p2, p3) \
+  p0##_type p0, p1##_type p1, p2##_type p2, p3##_type p3
+#define GMOCK_INTERNAL_DECL_AND_5_VALUE_PARAMS(p0, p1, p2, p3, p4) \
+  p0##_type p0, p1##_type p1, p2##_type p2, p3##_type p3, p4##_type p4
+#define GMOCK_INTERNAL_DECL_AND_6_VALUE_PARAMS(p0, p1, p2, p3, p4, p5)  \
+  p0##_type p0, p1##_type p1, p2##_type p2, p3##_type p3, p4##_type p4, \
+      p5##_type p5
+#define GMOCK_INTERNAL_DECL_AND_7_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6) \
+  p0##_type p0, p1##_type p1, p2##_type p2, p3##_type p3, p4##_type p4,    \
+      p5##_type p5, p6##_type p6
+#define GMOCK_INTERNAL_DECL_AND_8_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, p7) \
+  p0##_type p0, p1##_type p1, p2##_type p2, p3##_type p3, p4##_type p4,        \
+      p5##_type p5, p6##_type p6, p7##_type p7
+#define GMOCK_INTERNAL_DECL_AND_9_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, p7, \
+                                               p8)                             \
+  p0##_type p0, p1##_type p1, p2##_type p2, p3##_type p3, p4##_type p4,        \
+      p5##_type p5, p6##_type p6, p7##_type p7, p8##_type p8
+#define GMOCK_INTERNAL_DECL_AND_10_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, \
+                                                p7, p8, p9)                 \
+  p0##_type p0, p1##_type p1, p2##_type p2, p3##_type p3, p4##_type p4,     \
+      p5##_type p5, p6##_type p6, p7##_type p7, p8##_type p8, p9##_type p9
+
+// The suffix of the class template implementing the action template.
+#define GMOCK_INTERNAL_COUNT_AND_0_VALUE_PARAMS()
+#define GMOCK_INTERNAL_COUNT_AND_1_VALUE_PARAMS(p0) P
+#define GMOCK_INTERNAL_COUNT_AND_2_VALUE_PARAMS(p0, p1) P2
+#define GMOCK_INTERNAL_COUNT_AND_3_VALUE_PARAMS(p0, p1, p2) P3
+#define GMOCK_INTERNAL_COUNT_AND_4_VALUE_PARAMS(p0, p1, p2, p3) P4
+#define GMOCK_INTERNAL_COUNT_AND_5_VALUE_PARAMS(p0, p1, p2, p3, p4) P5
+#define GMOCK_INTERNAL_COUNT_AND_6_VALUE_PARAMS(p0, p1, p2, p3, p4, p5) P6
+#define GMOCK_INTERNAL_COUNT_AND_7_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6) P7
+#define GMOCK_INTERNAL_COUNT_AND_8_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, \
+                                                p7)                         \
+  P8
+#define GMOCK_INTERNAL_COUNT_AND_9_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, \
+                                                p7, p8)                     \
+  P9
+#define GMOCK_INTERNAL_COUNT_AND_10_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, \
+                                                 p7, p8, p9)                 \
+  P10
+
+// The name of the class template implementing the action template.
+#define GMOCK_ACTION_CLASS_(name, value_params) \
+  GTEST_CONCAT_TOKEN_(name##Action, GMOCK_INTERNAL_COUNT_##value_params)
+
+#define ACTION_TEMPLATE(name, template_params, value_params)                   \
+  template <GMOCK_INTERNAL_DECL_##template_params                              \
+                GMOCK_INTERNAL_DECL_TYPE_##value_params>                       \
+  class GMOCK_ACTION_CLASS_(name, value_params) {                              \
+   public:                                                                     \
+    explicit GMOCK_ACTION_CLASS_(name, value_params)(                          \
+        GMOCK_INTERNAL_DECL_##value_params)                                    \
+        GMOCK_PP_IF(GMOCK_PP_IS_EMPTY(GMOCK_INTERNAL_COUNT_##value_params),    \
+                    = default;                                                 \
+                    ,                                                          \
+                    : impl_(std::make_shared<gmock_Impl>(                      \
+                        GMOCK_INTERNAL_LIST_##value_params)){})                \
+            GMOCK_ACTION_CLASS_(name, value_params)(const GMOCK_ACTION_CLASS_( \
+                name, value_params) &) noexcept GMOCK_INTERNAL_DEFN_COPY_      \
+        ##value_params GMOCK_ACTION_CLASS_(name, value_params)(                \
+            GMOCK_ACTION_CLASS_(name, value_params) &&) noexcept               \
+        GMOCK_INTERNAL_DEFN_COPY_##value_params template <typename F>          \
+        operator ::testing::Action<F>() const {                                \
+      return GMOCK_PP_IF(                                                      \
+          GMOCK_PP_IS_EMPTY(GMOCK_INTERNAL_COUNT_##value_params),              \
+          (::testing::internal::MakeAction<F, gmock_Impl>()),                  \
+          (::testing::internal::MakeAction<F>(impl_)));                        \
+    }                                                                          \
+                                                                               \
+   private:                                                                    \
+    class gmock_Impl {                                                         \
+     public:                                                                   \
+      explicit gmock_Impl GMOCK_INTERNAL_INIT_##value_params {}                \
+      template <typename function_type, typename return_type,                  \
+                typename args_type, GMOCK_ACTION_TEMPLATE_ARGS_NAMES_>         \
+      return_type gmock_PerformImpl(GMOCK_ACTION_ARG_TYPES_AND_NAMES_) const;  \
+      GMOCK_INTERNAL_DEFN_##value_params                                       \
+    };                                                                         \
+    GMOCK_PP_IF(GMOCK_PP_IS_EMPTY(GMOCK_INTERNAL_COUNT_##value_params), ,      \
+                std::shared_ptr<const gmock_Impl> impl_;)                      \
+  };                                                                           \
+  template <GMOCK_INTERNAL_DECL_##template_params                              \
+                GMOCK_INTERNAL_DECL_TYPE_##value_params>                       \
+  GMOCK_ACTION_CLASS_(                                                         \
+      name, value_params)<GMOCK_INTERNAL_LIST_##template_params                \
+                              GMOCK_INTERNAL_LIST_TYPE_##value_params>         \
+      name(GMOCK_INTERNAL_DECL_##value_params) GTEST_MUST_USE_RESULT_;         \
+  template <GMOCK_INTERNAL_DECL_##template_params                              \
+                GMOCK_INTERNAL_DECL_TYPE_##value_params>                       \
+  inline GMOCK_ACTION_CLASS_(                                                  \
+      name, value_params)<GMOCK_INTERNAL_LIST_##template_params                \
+                              GMOCK_INTERNAL_LIST_TYPE_##value_params>         \
+  name(GMOCK_INTERNAL_DECL_##value_params) {                                   \
+    return GMOCK_ACTION_CLASS_(                                                \
+        name, value_params)<GMOCK_INTERNAL_LIST_##template_params              \
+                                GMOCK_INTERNAL_LIST_TYPE_##value_params>(      \
+        GMOCK_INTERNAL_LIST_##value_params);                                   \
+  }                                                                            \
+  template <GMOCK_INTERNAL_DECL_##template_params                              \
+                GMOCK_INTERNAL_DECL_TYPE_##value_params>                       \
+  template <typename function_type, typename return_type, typename args_type,  \
+            GMOCK_ACTION_TEMPLATE_ARGS_NAMES_>                                 \
+  return_type GMOCK_ACTION_CLASS_(                                             \
+      name, value_params)<GMOCK_INTERNAL_LIST_##template_params                \
+                              GMOCK_INTERNAL_LIST_TYPE_##value_params>::       \
+      gmock_Impl::gmock_PerformImpl(GMOCK_ACTION_ARG_TYPES_AND_NAMES_UNUSED_)  \
+          const
+
+namespace testing {
 
 // The ACTION*() macros trigger warning C4100 (unreferenced formal
 // parameter) in MSVC with -W4.  Unfortunately they cannot be fixed in
@@ -69,94 +583,80 @@ inline OutputIterator CopyElements(InputIterator first,
 // is expanded and macro expansion cannot contain #pragma.  Therefore
 // we suppress them here.
 #ifdef _MSC_VER
-# pragma warning(push)
-# pragma warning(disable:4100)
+#pragma warning(push)
+#pragma warning(disable : 4100)
 #endif
 
-// Action ReturnArg<k>() returns the k-th argument of the mock function.
-ACTION_TEMPLATE(ReturnArg,
-                HAS_1_TEMPLATE_PARAMS(int, k),
-                AND_0_VALUE_PARAMS()) {
-  return ::std::get<k>(args);
-}
+namespace internal {
 
-// Action SaveArg<k>(pointer) saves the k-th (0-based) argument of the
-// mock function to *pointer.
-ACTION_TEMPLATE(SaveArg,
-                HAS_1_TEMPLATE_PARAMS(int, k),
-                AND_1_VALUE_PARAMS(pointer)) {
-  *pointer = ::std::get<k>(args);
+// internal::InvokeArgument - a helper for InvokeArgument action.
+// The basic overloads are provided here for generic functors.
+// Overloads for other custom-callables are provided in the
+// internal/custom/gmock-generated-actions.h header.
+template <typename F, typename... Args>
+auto InvokeArgument(F f, Args... args) -> decltype(f(args...)) {
+  return f(args...);
 }
 
-// Action SaveArgPointee<k>(pointer) saves the value pointed to
-// by the k-th (0-based) argument of the mock function to *pointer.
-ACTION_TEMPLATE(SaveArgPointee,
-                HAS_1_TEMPLATE_PARAMS(int, k),
-                AND_1_VALUE_PARAMS(pointer)) {
-  *pointer = *::std::get<k>(args);
-}
+template <std::size_t index, typename... Params>
+struct InvokeArgumentAction {
+  template <typename... Args,
+            typename = typename std::enable_if<(index < sizeof...(Args))>::type>
+  auto operator()(Args&&... args) const -> decltype(internal::InvokeArgument(
+      std::get<index>(std::forward_as_tuple(std::forward<Args>(args)...)),
+      std::declval<const Params&>()...)) {
+    internal::FlatTuple<Args&&...> args_tuple(FlatTupleConstructTag{},
+                                              std::forward<Args>(args)...);
+    return params.Apply([&](const Params&... unpacked_params) {
+      auto&& callable = args_tuple.template Get<index>();
+      return internal::InvokeArgument(
+          std::forward<decltype(callable)>(callable), unpacked_params...);
+    });
+  }
 
-// Action SetArgReferee<k>(value) assigns 'value' to the variable
-// referenced by the k-th (0-based) argument of the mock function.
-ACTION_TEMPLATE(SetArgReferee,
-                HAS_1_TEMPLATE_PARAMS(int, k),
-                AND_1_VALUE_PARAMS(value)) {
-  typedef typename ::std::tuple_element<k, args_type>::type argk_type;
-  // Ensures that argument #k is a reference.  If you get a compiler
-  // error on the next line, you are using SetArgReferee<k>(value) in
-  // a mock function whose k-th (0-based) argument is not a reference.
-  GTEST_COMPILE_ASSERT_(std::is_reference<argk_type>::value,
-                        SetArgReferee_must_be_used_with_a_reference_argument);
-  ::std::get<k>(args) = value;
-}
+  internal::FlatTuple<Params...> params;
+};
 
-// Action SetArrayArgument<k>(first, last) copies the elements in
-// source range [first, last) to the array pointed to by the k-th
-// (0-based) argument, which can be either a pointer or an
-// iterator. The action does not take ownership of the elements in the
-// source range.
-ACTION_TEMPLATE(SetArrayArgument,
-                HAS_1_TEMPLATE_PARAMS(int, k),
-                AND_2_VALUE_PARAMS(first, last)) {
-  // Visual Studio deprecates ::std::copy, so we use our own copy in that case.
-#ifdef _MSC_VER
-  internal::CopyElements(first, last, ::std::get<k>(args));
-#else
-  ::std::copy(first, last, ::std::get<k>(args));
-#endif
-}
+}  // namespace internal
 
-// Action DeleteArg<k>() deletes the k-th (0-based) argument of the mock
-// function.
-ACTION_TEMPLATE(DeleteArg,
-                HAS_1_TEMPLATE_PARAMS(int, k),
-                AND_0_VALUE_PARAMS()) {
-  delete ::std::get<k>(args);
+// The InvokeArgument<N>(a1, a2, ..., a_k) action invokes the N-th
+// (0-based) argument, which must be a k-ary callable, of the mock
+// function, with arguments a1, a2, ..., a_k.
+//
+// Notes:
+//
+//   1. The arguments are passed by value by default.  If you need to
+//   pass an argument by reference, wrap it inside std::ref().  For
+//   example,
+//
+//     InvokeArgument<1>(5, string("Hello"), std::ref(foo))
+//
+//   passes 5 and string("Hello") by value, and passes foo by
+//   reference.
+//
+//   2. If the callable takes an argument by reference but std::ref() is
+//   not used, it will receive the reference to a copy of the value,
+//   instead of the original value.  For example, when the 0-th
+//   argument of the mock function takes a const string&, the action
+//
+//     InvokeArgument<0>(string("Hello"))
+//
+//   makes a copy of the temporary string("Hello") object and passes a
+//   reference of the copy, instead of the original temporary object,
+//   to the callable.  This makes it easy for a user to define an
+//   InvokeArgument action from temporary values and have it performed
+//   later.
+template <std::size_t index, typename... Params>
+internal::InvokeArgumentAction<index, typename std::decay<Params>::type...>
+InvokeArgument(Params&&... params) {
+  return {internal::FlatTuple<typename std::decay<Params>::type...>(
+      internal::FlatTupleConstructTag{}, std::forward<Params>(params)...)};
 }
 
-// This action returns the value pointed to by 'pointer'.
-ACTION_P(ReturnPointee, pointer) { return *pointer; }
-
-// Action Throw(exception) can be used in a mock function of any type
-// to throw the given exception.  Any copyable value can be thrown.
-#if GTEST_HAS_EXCEPTIONS
-
-// Suppresses the 'unreachable code' warning that VC generates in opt modes.
-# ifdef _MSC_VER
-#  pragma warning(push)          // Saves the current warning state.
-#  pragma warning(disable:4702)  // Temporarily disables warning 4702.
-# endif
-ACTION_P(Throw, exception) { throw exception; }
-# ifdef _MSC_VER
-#  pragma warning(pop)           // Restores the warning state.
-# endif
-
-#endif  // GTEST_HAS_EXCEPTIONS
-
 #ifdef _MSC_VER
-# pragma warning(pop)
+#pragma warning(pop)
 #endif
 
 }  // namespace testing
 
-#endif  // GMOCK_INCLUDE_GMOCK_GMOCK_MORE_ACTIONS_H_
+#endif  // GOOGLEMOCK_INCLUDE_GMOCK_GMOCK_MORE_ACTIONS_H_
diff --git a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/include/gmock/gmock-more-matchers.h b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/include/gmock/gmock-more-matchers.h
index b306dd6037c..47aaf98461e 100644
--- a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/include/gmock/gmock-more-matchers.h
+++ b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/include/gmock/gmock-more-matchers.h
@@ -27,7 +27,6 @@
 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-
 // Google Mock - a framework for writing C++ mock classes.
 //
 // This file implements some matchers that depend on gmock-matchers.h.
@@ -35,10 +34,11 @@
 // Note that tests are implemented in gmock-matchers_test.cc rather than
 // gmock-more-matchers-test.cc.
 
-// GOOGLETEST_CM0002 DO NOT DELETE
+// IWYU pragma: private, include "gmock/gmock.h"
+// IWYU pragma: friend gmock/.*
 
-#ifndef GMOCK_INCLUDE_GMOCK_MORE_MATCHERS_H_
-#define GMOCK_INCLUDE_GMOCK_MORE_MATCHERS_H_
+#ifndef GOOGLEMOCK_INCLUDE_GMOCK_GMOCK_MORE_MATCHERS_H_
+#define GOOGLEMOCK_INCLUDE_GMOCK_GMOCK_MORE_MATCHERS_H_
 
 #include "gmock/gmock-matchers.h"
 
@@ -47,13 +47,13 @@ namespace testing {
 // Silence C4100 (unreferenced formal
 // parameter) for MSVC
 #ifdef _MSC_VER
-# pragma warning(push)
-# pragma warning(disable:4100)
+#pragma warning(push)
+#pragma warning(disable : 4100)
 #if (_MSC_VER == 1900)
 // and silence C4800 (C4800: 'int *const ': forcing value
 // to bool 'true' or 'false') for MSVC 14
-# pragma warning(disable:4800)
-  #endif
+#pragma warning(disable : 4800)
+#endif
 #endif
 
 // Defines a matcher that matches an empty container. The container must
@@ -83,10 +83,9 @@ MATCHER(IsFalse, negation ? "is true" : "is false") {
 }
 
 #ifdef _MSC_VER
-# pragma warning(pop)
+#pragma warning(pop)
 #endif
 
-
 }  // namespace testing
 
-#endif  // GMOCK_INCLUDE_GMOCK_MORE_MATCHERS_H_
+#endif  // GOOGLEMOCK_INCLUDE_GMOCK_GMOCK_MORE_MATCHERS_H_
diff --git a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/include/gmock/gmock-nice-strict.h b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/include/gmock/gmock-nice-strict.h
index 5495a9805b5..4f0eb35db72 100644
--- a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/include/gmock/gmock-nice-strict.h
+++ b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/include/gmock/gmock-nice-strict.h
@@ -27,7 +27,6 @@
 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-
 // Implements class templates NiceMock, NaggyMock, and StrictMock.
 //
 // Given a mock class MockFoo that is created using Google Mock,
@@ -58,22 +57,107 @@
 // In particular, nesting NiceMock, NaggyMock, and StrictMock is NOT
 // supported.
 
-// GOOGLETEST_CM0002 DO NOT DELETE
+// IWYU pragma: private, include "gmock/gmock.h"
+// IWYU pragma: friend gmock/.*
+
+#ifndef GOOGLEMOCK_INCLUDE_GMOCK_GMOCK_NICE_STRICT_H_
+#define GOOGLEMOCK_INCLUDE_GMOCK_GMOCK_NICE_STRICT_H_
 
-#ifndef GMOCK_INCLUDE_GMOCK_GMOCK_NICE_STRICT_H_
-#define GMOCK_INCLUDE_GMOCK_GMOCK_NICE_STRICT_H_
+#include <cstdint>
+#include <type_traits>
 
 #include "gmock/gmock-spec-builders.h"
 #include "gmock/internal/gmock-port.h"
 
 namespace testing {
+template <class MockClass>
+class NiceMock;
+template <class MockClass>
+class NaggyMock;
+template <class MockClass>
+class StrictMock;
+
+namespace internal {
+template <typename T>
+std::true_type StrictnessModifierProbe(const NiceMock<T>&);
+template <typename T>
+std::true_type StrictnessModifierProbe(const NaggyMock<T>&);
+template <typename T>
+std::true_type StrictnessModifierProbe(const StrictMock<T>&);
+std::false_type StrictnessModifierProbe(...);
+
+template <typename T>
+constexpr bool HasStrictnessModifier() {
+  return decltype(StrictnessModifierProbe(std::declval<const T&>()))::value;
+}
+
+// Base classes that register and deregister with testing::Mock to alter the
+// default behavior around uninteresting calls. Inheriting from one of these
+// classes first and then MockClass ensures the MockClass constructor is run
+// after registration, and that the MockClass destructor runs before
+// deregistration. This guarantees that MockClass's constructor and destructor
+// run with the same level of strictness as its instance methods.
+
+#if GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_MINGW && \
+    (defined(_MSC_VER) || defined(__clang__))
+// We need to mark these classes with this declspec to ensure that
+// the empty base class optimization is performed.
+#define GTEST_INTERNAL_EMPTY_BASE_CLASS __declspec(empty_bases)
+#else
+#define GTEST_INTERNAL_EMPTY_BASE_CLASS
+#endif
+
+template <typename Base>
+class NiceMockImpl {
+ public:
+  NiceMockImpl() {
+    ::testing::Mock::AllowUninterestingCalls(reinterpret_cast<uintptr_t>(this));
+  }
+
+  ~NiceMockImpl() {
+    ::testing::Mock::UnregisterCallReaction(reinterpret_cast<uintptr_t>(this));
+  }
+};
+
+template <typename Base>
+class NaggyMockImpl {
+ public:
+  NaggyMockImpl() {
+    ::testing::Mock::WarnUninterestingCalls(reinterpret_cast<uintptr_t>(this));
+  }
+
+  ~NaggyMockImpl() {
+    ::testing::Mock::UnregisterCallReaction(reinterpret_cast<uintptr_t>(this));
+  }
+};
+
+template <typename Base>
+class StrictMockImpl {
+ public:
+  StrictMockImpl() {
+    ::testing::Mock::FailUninterestingCalls(reinterpret_cast<uintptr_t>(this));
+  }
+
+  ~StrictMockImpl() {
+    ::testing::Mock::UnregisterCallReaction(reinterpret_cast<uintptr_t>(this));
+  }
+};
+
+}  // namespace internal
 
 template <class MockClass>
-class NiceMock : public MockClass {
+class GTEST_INTERNAL_EMPTY_BASE_CLASS NiceMock
+    : private internal::NiceMockImpl<MockClass>,
+      public MockClass {
  public:
+  static_assert(!internal::HasStrictnessModifier<MockClass>(),
+                "Can't apply NiceMock to a class hierarchy that already has a "
+                "strictness modifier. See "
+                "https://google.github.io/googletest/"
+                "gmock_cook_book.html#NiceStrictNaggy");
   NiceMock() : MockClass() {
-    ::testing::Mock::AllowUninterestingCalls(
-        internal::ImplicitCast_<MockClass*>(this));
+    static_assert(sizeof(*this) == sizeof(MockClass),
+                  "The impl subclass shouldn't introduce any padding");
   }
 
   // Ideally, we would inherit base class's constructors through a using
@@ -85,33 +169,37 @@ class NiceMock : public MockClass {
   // made explicit.
   template <typename A>
   explicit NiceMock(A&& arg) : MockClass(std::forward<A>(arg)) {
-    ::testing::Mock::AllowUninterestingCalls(
-        internal::ImplicitCast_<MockClass*>(this));
+    static_assert(sizeof(*this) == sizeof(MockClass),
+                  "The impl subclass shouldn't introduce any padding");
   }
 
-  template <typename A1, typename A2, typename... An>
-  NiceMock(A1&& arg1, A2&& arg2, An&&... args)
-      : MockClass(std::forward<A1>(arg1), std::forward<A2>(arg2),
+  template <typename TArg1, typename TArg2, typename... An>
+  NiceMock(TArg1&& arg1, TArg2&& arg2, An&&... args)
+      : MockClass(std::forward<TArg1>(arg1), std::forward<TArg2>(arg2),
                   std::forward<An>(args)...) {
-    ::testing::Mock::AllowUninterestingCalls(
-        internal::ImplicitCast_<MockClass*>(this));
-  }
-
-  ~NiceMock() {  // NOLINT
-    ::testing::Mock::UnregisterCallReaction(
-        internal::ImplicitCast_<MockClass*>(this));
+    static_assert(sizeof(*this) == sizeof(MockClass),
+                  "The impl subclass shouldn't introduce any padding");
   }
 
  private:
-  GTEST_DISALLOW_COPY_AND_ASSIGN_(NiceMock);
+  NiceMock(const NiceMock&) = delete;
+  NiceMock& operator=(const NiceMock&) = delete;
 };
 
 template <class MockClass>
-class NaggyMock : public MockClass {
+class GTEST_INTERNAL_EMPTY_BASE_CLASS NaggyMock
+    : private internal::NaggyMockImpl<MockClass>,
+      public MockClass {
+  static_assert(!internal::HasStrictnessModifier<MockClass>(),
+                "Can't apply NaggyMock to a class hierarchy that already has a "
+                "strictness modifier. See "
+                "https://google.github.io/googletest/"
+                "gmock_cook_book.html#NiceStrictNaggy");
+
  public:
   NaggyMock() : MockClass() {
-    ::testing::Mock::WarnUninterestingCalls(
-        internal::ImplicitCast_<MockClass*>(this));
+    static_assert(sizeof(*this) == sizeof(MockClass),
+                  "The impl subclass shouldn't introduce any padding");
   }
 
   // Ideally, we would inherit base class's constructors through a using
@@ -123,33 +211,37 @@ class NaggyMock : public MockClass {
   // made explicit.
   template <typename A>
   explicit NaggyMock(A&& arg) : MockClass(std::forward<A>(arg)) {
-    ::testing::Mock::WarnUninterestingCalls(
-        internal::ImplicitCast_<MockClass*>(this));
+    static_assert(sizeof(*this) == sizeof(MockClass),
+                  "The impl subclass shouldn't introduce any padding");
   }
 
-  template <typename A1, typename A2, typename... An>
-  NaggyMock(A1&& arg1, A2&& arg2, An&&... args)
-      : MockClass(std::forward<A1>(arg1), std::forward<A2>(arg2),
+  template <typename TArg1, typename TArg2, typename... An>
+  NaggyMock(TArg1&& arg1, TArg2&& arg2, An&&... args)
+      : MockClass(std::forward<TArg1>(arg1), std::forward<TArg2>(arg2),
                   std::forward<An>(args)...) {
-    ::testing::Mock::WarnUninterestingCalls(
-        internal::ImplicitCast_<MockClass*>(this));
-  }
-
-  ~NaggyMock() {  // NOLINT
-    ::testing::Mock::UnregisterCallReaction(
-        internal::ImplicitCast_<MockClass*>(this));
+    static_assert(sizeof(*this) == sizeof(MockClass),
+                  "The impl subclass shouldn't introduce any padding");
   }
 
  private:
-  GTEST_DISALLOW_COPY_AND_ASSIGN_(NaggyMock);
+  NaggyMock(const NaggyMock&) = delete;
+  NaggyMock& operator=(const NaggyMock&) = delete;
 };
 
 template <class MockClass>
-class StrictMock : public MockClass {
+class GTEST_INTERNAL_EMPTY_BASE_CLASS StrictMock
+    : private internal::StrictMockImpl<MockClass>,
+      public MockClass {
  public:
+  static_assert(
+      !internal::HasStrictnessModifier<MockClass>(),
+      "Can't apply StrictMock to a class hierarchy that already has a "
+      "strictness modifier. See "
+      "https://google.github.io/googletest/"
+      "gmock_cook_book.html#NiceStrictNaggy");
   StrictMock() : MockClass() {
-    ::testing::Mock::FailUninterestingCalls(
-        internal::ImplicitCast_<MockClass*>(this));
+    static_assert(sizeof(*this) == sizeof(MockClass),
+                  "The impl subclass shouldn't introduce any padding");
   }
 
   // Ideally, we would inherit base class's constructors through a using
@@ -161,55 +253,25 @@ class StrictMock : public MockClass {
   // made explicit.
   template <typename A>
   explicit StrictMock(A&& arg) : MockClass(std::forward<A>(arg)) {
-    ::testing::Mock::FailUninterestingCalls(
-        internal::ImplicitCast_<MockClass*>(this));
+    static_assert(sizeof(*this) == sizeof(MockClass),
+                  "The impl subclass shouldn't introduce any padding");
   }
 
-  template <typename A1, typename A2, typename... An>
-  StrictMock(A1&& arg1, A2&& arg2, An&&... args)
-      : MockClass(std::forward<A1>(arg1), std::forward<A2>(arg2),
+  template <typename TArg1, typename TArg2, typename... An>
+  StrictMock(TArg1&& arg1, TArg2&& arg2, An&&... args)
+      : MockClass(std::forward<TArg1>(arg1), std::forward<TArg2>(arg2),
                   std::forward<An>(args)...) {
-    ::testing::Mock::FailUninterestingCalls(
-        internal::ImplicitCast_<MockClass*>(this));
-  }
-
-  ~StrictMock() {  // NOLINT
-    ::testing::Mock::UnregisterCallReaction(
-        internal::ImplicitCast_<MockClass*>(this));
+    static_assert(sizeof(*this) == sizeof(MockClass),
+                  "The impl subclass shouldn't introduce any padding");
   }
 
  private:
-  GTEST_DISALLOW_COPY_AND_ASSIGN_(StrictMock);
+  StrictMock(const StrictMock&) = delete;
+  StrictMock& operator=(const StrictMock&) = delete;
 };
 
-// The following specializations catch some (relatively more common)
-// user errors of nesting nice and strict mocks.  They do NOT catch
-// all possible errors.
-
-// These specializations are declared but not defined, as NiceMock,
-// NaggyMock, and StrictMock cannot be nested.
-
-template <typename MockClass>
-class NiceMock<NiceMock<MockClass> >;
-template <typename MockClass>
-class NiceMock<NaggyMock<MockClass> >;
-template <typename MockClass>
-class NiceMock<StrictMock<MockClass> >;
-
-template <typename MockClass>
-class NaggyMock<NiceMock<MockClass> >;
-template <typename MockClass>
-class NaggyMock<NaggyMock<MockClass> >;
-template <typename MockClass>
-class NaggyMock<StrictMock<MockClass> >;
-
-template <typename MockClass>
-class StrictMock<NiceMock<MockClass> >;
-template <typename MockClass>
-class StrictMock<NaggyMock<MockClass> >;
-template <typename MockClass>
-class StrictMock<StrictMock<MockClass> >;
+#undef GTEST_INTERNAL_EMPTY_BASE_CLASS
 
 }  // namespace testing
 
-#endif  // GMOCK_INCLUDE_GMOCK_GMOCK_NICE_STRICT_H_
+#endif  // GOOGLEMOCK_INCLUDE_GMOCK_GMOCK_NICE_STRICT_H_
diff --git a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/include/gmock/gmock-spec-builders.h b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/include/gmock/gmock-spec-builders.h
index 718c9484abc..45cc605183c 100644
--- a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/include/gmock/gmock-spec-builders.h
+++ b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/include/gmock/gmock-spec-builders.h
@@ -27,7 +27,6 @@
 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-
 // Google Mock - a framework for writing C++ mock classes.
 //
 // This file implements the ON_CALL() and EXPECT_CALL() macros.
@@ -56,11 +55,13 @@
 // where all clauses are optional, and .InSequence()/.After()/
 // .WillOnce() can appear any number of times.
 
-// GOOGLETEST_CM0002 DO NOT DELETE
+// IWYU pragma: private, include "gmock/gmock.h"
+// IWYU pragma: friend gmock/.*
 
-#ifndef GMOCK_INCLUDE_GMOCK_GMOCK_SPEC_BUILDERS_H_
-#define GMOCK_INCLUDE_GMOCK_GMOCK_SPEC_BUILDERS_H_
+#ifndef GOOGLEMOCK_INCLUDE_GMOCK_GMOCK_SPEC_BUILDERS_H_
+#define GOOGLEMOCK_INCLUDE_GMOCK_GMOCK_SPEC_BUILDERS_H_
 
+#include <cstdint>
 #include <functional>
 #include <map>
 #include <memory>
@@ -70,6 +71,7 @@
 #include <type_traits>
 #include <utility>
 #include <vector>
+
 #include "gmock/gmock-actions.h"
 #include "gmock/gmock-cardinalities.h"
 #include "gmock/gmock-matchers.h"
@@ -78,7 +80,7 @@
 #include "gtest/gtest.h"
 
 #if GTEST_HAS_EXCEPTIONS
-# include <stdexcept>  // NOLINT
+#include <stdexcept>  // NOLINT
 #endif
 
 GTEST_DISABLE_MSC_WARNINGS_PUSH_(4251 \
@@ -97,17 +99,27 @@ class ExpectationSet;
 namespace internal {
 
 // Implements a mock function.
-template <typename F> class FunctionMocker;
+template <typename F>
+class FunctionMocker;
 
 // Base class for expectations.
 class ExpectationBase;
 
 // Implements an expectation.
-template <typename F> class TypedExpectation;
+template <typename F>
+class TypedExpectation;
 
 // Helper class for testing the Expectation class template.
 class ExpectationTester;
 
+// Helper classes for implementing NiceMock, StrictMock, and NaggyMock.
+template <typename MockClass>
+class NiceMockImpl;
+template <typename MockClass>
+class StrictMockImpl;
+template <typename MockClass>
+class NaggyMockImpl;
+
 // Protects the mock object registry (in class Mock), all function
 // mockers, and all expectations.
 //
@@ -121,9 +133,6 @@ class ExpectationTester;
 // calls to ensure the integrity of the mock objects' states.
 GTEST_API_ GTEST_DECLARE_STATIC_MUTEX_(g_gmock_mutex);
 
-// Untyped base class for ActionResultHolder<R>.
-class UntypedActionResultHolderBase;
-
 // Abstract base class of FunctionMocker.  This is the
 // type-agnostic part of the function mocker interface.  Its pure
 // virtual methods are implemented by FunctionMocker.
@@ -146,27 +155,12 @@ class GTEST_API_ UntypedFunctionMockerBase {
   // responsibility to guarantee the correctness of the arguments'
   // types.
 
-  // Performs the default action with the given arguments and returns
-  // the action's result.  The call description string will be used in
-  // the error message to describe the call in the case the default
-  // action fails.
-  // L = *
-  virtual UntypedActionResultHolderBase* UntypedPerformDefaultAction(
-      void* untyped_args, const std::string& call_description) const = 0;
-
-  // Performs the given action with the given arguments and returns
-  // the action's result.
-  // L = *
-  virtual UntypedActionResultHolderBase* UntypedPerformAction(
-      const void* untyped_action, void* untyped_args) const = 0;
-
   // Writes a message that the call is uninteresting (i.e. neither
   // explicitly expected nor explicitly unexpected) to the given
   // ostream.
-  virtual void UntypedDescribeUninterestingCall(
-      const void* untyped_args,
-      ::std::ostream* os) const
-          GTEST_LOCK_EXCLUDED_(g_gmock_mutex) = 0;
+  virtual void UntypedDescribeUninterestingCall(const void* untyped_args,
+                                                ::std::ostream* os) const
+      GTEST_LOCK_EXCLUDED_(g_gmock_mutex) = 0;
 
   // Returns the expectation that matches the given function arguments
   // (or NULL is there's no match); when a match is found,
@@ -175,10 +169,9 @@ class GTEST_API_ UntypedFunctionMockerBase {
   // is_excessive is modified to indicate whether the call exceeds the
   // expected number.
   virtual const ExpectationBase* UntypedFindMatchingExpectation(
-      const void* untyped_args,
-      const void** untyped_action, bool* is_excessive,
+      const void* untyped_args, const void** untyped_action, bool* is_excessive,
       ::std::ostream* what, ::std::ostream* why)
-          GTEST_LOCK_EXCLUDED_(g_gmock_mutex) = 0;
+      GTEST_LOCK_EXCLUDED_(g_gmock_mutex) = 0;
 
   // Prints the given function arguments to the ostream.
   virtual void UntypedPrintArgs(const void* untyped_args,
@@ -188,8 +181,7 @@ class GTEST_API_ UntypedFunctionMockerBase {
   // this information in the global mock registry.  Will be called
   // whenever an EXPECT_CALL() or ON_CALL() is executed on this mock
   // method.
-  void RegisterOwner(const void* mock_obj)
-      GTEST_LOCK_EXCLUDED_(g_gmock_mutex);
+  void RegisterOwner(const void* mock_obj) GTEST_LOCK_EXCLUDED_(g_gmock_mutex);
 
   // Sets the mock object this mock method belongs to, and sets the
   // name of the mock function.  Will be called upon each invocation
@@ -200,20 +192,11 @@ class GTEST_API_ UntypedFunctionMockerBase {
   // Returns the mock object this mock method belongs to.  Must be
   // called after RegisterOwner() or SetOwnerAndName() has been
   // called.
-  const void* MockObject() const
-      GTEST_LOCK_EXCLUDED_(g_gmock_mutex);
+  const void* MockObject() const GTEST_LOCK_EXCLUDED_(g_gmock_mutex);
 
   // Returns the name of this mock method.  Must be called after
   // SetOwnerAndName() has been called.
-  const char* Name() const
-      GTEST_LOCK_EXCLUDED_(g_gmock_mutex);
-
-  // Returns the result of invoking this mock function with the given
-  // arguments.  This function can be safely called from multiple
-  // threads concurrently.  The caller is responsible for deleting the
-  // result.
-  UntypedActionResultHolderBase* UntypedInvokeWith(void* untyped_args)
-      GTEST_LOCK_EXCLUDED_(g_gmock_mutex);
+  const char* Name() const GTEST_LOCK_EXCLUDED_(g_gmock_mutex);
 
  protected:
   typedef std::vector<const void*> UntypedOnCallSpecs;
@@ -413,40 +396,37 @@ class GTEST_API_ Mock {
   template <typename F>
   friend class internal::FunctionMocker;
 
-  template <typename M>
-  friend class NiceMock;
-
-  template <typename M>
-  friend class NaggyMock;
-
-  template <typename M>
-  friend class StrictMock;
+  template <typename MockClass>
+  friend class internal::NiceMockImpl;
+  template <typename MockClass>
+  friend class internal::NaggyMockImpl;
+  template <typename MockClass>
+  friend class internal::StrictMockImpl;
 
   // Tells Google Mock to allow uninteresting calls on the given mock
   // object.
-  static void AllowUninterestingCalls(const void* mock_obj)
+  static void AllowUninterestingCalls(uintptr_t mock_obj)
       GTEST_LOCK_EXCLUDED_(internal::g_gmock_mutex);
 
   // Tells Google Mock to warn the user about uninteresting calls on
   // the given mock object.
-  static void WarnUninterestingCalls(const void* mock_obj)
+  static void WarnUninterestingCalls(uintptr_t mock_obj)
       GTEST_LOCK_EXCLUDED_(internal::g_gmock_mutex);
 
   // Tells Google Mock to fail uninteresting calls on the given mock
   // object.
-  static void FailUninterestingCalls(const void* mock_obj)
+  static void FailUninterestingCalls(uintptr_t mock_obj)
       GTEST_LOCK_EXCLUDED_(internal::g_gmock_mutex);
 
   // Tells Google Mock the given mock object is being destroyed and
   // its entry in the call-reaction table should be removed.
-  static void UnregisterCallReaction(const void* mock_obj)
+  static void UnregisterCallReaction(uintptr_t mock_obj)
       GTEST_LOCK_EXCLUDED_(internal::g_gmock_mutex);
 
   // Returns the reaction Google Mock will have on uninteresting calls
   // made on the given mock object.
   static internal::CallReaction GetReactionOnUninterestingCalls(
-      const void* mock_obj)
-          GTEST_LOCK_EXCLUDED_(internal::g_gmock_mutex);
+      const void* mock_obj) GTEST_LOCK_EXCLUDED_(internal::g_gmock_mutex);
 
   // Verifies that all expectations on the given mock object have been
   // satisfied.  Reports one or more Google Test non-fatal failures
@@ -459,17 +439,16 @@ class GTEST_API_ Mock {
       GTEST_EXCLUSIVE_LOCK_REQUIRED_(internal::g_gmock_mutex);
 
   // Registers a mock object and a mock method it owns.
-  static void Register(
-      const void* mock_obj,
-      internal::UntypedFunctionMockerBase* mocker)
-          GTEST_LOCK_EXCLUDED_(internal::g_gmock_mutex);
+  static void Register(const void* mock_obj,
+                       internal::UntypedFunctionMockerBase* mocker)
+      GTEST_LOCK_EXCLUDED_(internal::g_gmock_mutex);
 
   // Tells Google Mock where in the source code mock_obj is used in an
   // ON_CALL or EXPECT_CALL.  In case mock_obj is leaked, this
   // information helps the user identify which object it is.
-  static void RegisterUseByOnCallOrExpectCall(
-      const void* mock_obj, const char* file, int line)
-          GTEST_LOCK_EXCLUDED_(internal::g_gmock_mutex);
+  static void RegisterUseByOnCallOrExpectCall(const void* mock_obj,
+                                              const char* file, int line)
+      GTEST_LOCK_EXCLUDED_(internal::g_gmock_mutex);
 
   // Unregisters a mock method; removes the owning mock object from
   // the registry when the last mock method associated with it has
@@ -499,7 +478,10 @@ class GTEST_API_ Expectation {
  public:
   // Constructs a null object that doesn't reference any expectation.
   Expectation();
-
+  Expectation(Expectation&&) = default;
+  Expectation(const Expectation&) = default;
+  Expectation& operator=(Expectation&&) = default;
+  Expectation& operator=(const Expectation&) = default;
   ~Expectation();
 
   // This single-argument ctor must not be explicit, in order to support the
@@ -623,7 +605,6 @@ class ExpectationSet {
   Expectation::Set expectations_;
 };
 
-
 // Sequence objects are used by a user to specify the relative order
 // in which the expectations should match.  They are copyable (we rely
 // on the compiler-defined copy constructor and assignment operator).
@@ -669,10 +650,12 @@ class GTEST_API_ InSequence {
  public:
   InSequence();
   ~InSequence();
+
  private:
   bool sequence_created_;
 
-  GTEST_DISALLOW_COPY_AND_ASSIGN_(InSequence);  // NOLINT
+  InSequence(const InSequence&) = delete;
+  InSequence& operator=(const InSequence&) = delete;
 } GTEST_ATTRIBUTE_UNUSED_;
 
 namespace internal {
@@ -775,40 +758,34 @@ class GTEST_API_ ExpectationBase {
   // the current thread.
 
   // Retires all pre-requisites of this expectation.
-  void RetireAllPreRequisites()
-      GTEST_EXCLUSIVE_LOCK_REQUIRED_(g_gmock_mutex);
+  void RetireAllPreRequisites() GTEST_EXCLUSIVE_LOCK_REQUIRED_(g_gmock_mutex);
 
   // Returns true if and only if this expectation is retired.
-  bool is_retired() const
-      GTEST_EXCLUSIVE_LOCK_REQUIRED_(g_gmock_mutex) {
+  bool is_retired() const GTEST_EXCLUSIVE_LOCK_REQUIRED_(g_gmock_mutex) {
     g_gmock_mutex.AssertHeld();
     return retired_;
   }
 
   // Retires this expectation.
-  void Retire()
-      GTEST_EXCLUSIVE_LOCK_REQUIRED_(g_gmock_mutex) {
+  void Retire() GTEST_EXCLUSIVE_LOCK_REQUIRED_(g_gmock_mutex) {
     g_gmock_mutex.AssertHeld();
     retired_ = true;
   }
 
   // Returns true if and only if this expectation is satisfied.
-  bool IsSatisfied() const
-      GTEST_EXCLUSIVE_LOCK_REQUIRED_(g_gmock_mutex) {
+  bool IsSatisfied() const GTEST_EXCLUSIVE_LOCK_REQUIRED_(g_gmock_mutex) {
     g_gmock_mutex.AssertHeld();
     return cardinality().IsSatisfiedByCallCount(call_count_);
   }
 
   // Returns true if and only if this expectation is saturated.
-  bool IsSaturated() const
-      GTEST_EXCLUSIVE_LOCK_REQUIRED_(g_gmock_mutex) {
+  bool IsSaturated() const GTEST_EXCLUSIVE_LOCK_REQUIRED_(g_gmock_mutex) {
     g_gmock_mutex.AssertHeld();
     return cardinality().IsSaturatedByCallCount(call_count_);
   }
 
   // Returns true if and only if this expectation is over-saturated.
-  bool IsOverSaturated() const
-      GTEST_EXCLUSIVE_LOCK_REQUIRED_(g_gmock_mutex) {
+  bool IsOverSaturated() const GTEST_EXCLUSIVE_LOCK_REQUIRED_(g_gmock_mutex) {
     g_gmock_mutex.AssertHeld();
     return cardinality().IsOverSaturatedByCallCount(call_count_);
   }
@@ -823,15 +800,13 @@ class GTEST_API_ ExpectationBase {
       GTEST_EXCLUSIVE_LOCK_REQUIRED_(g_gmock_mutex);
 
   // Returns the number this expectation has been invoked.
-  int call_count() const
-      GTEST_EXCLUSIVE_LOCK_REQUIRED_(g_gmock_mutex) {
+  int call_count() const GTEST_EXCLUSIVE_LOCK_REQUIRED_(g_gmock_mutex) {
     g_gmock_mutex.AssertHeld();
     return call_count_;
   }
 
   // Increments the number this expectation has been invoked.
-  void IncrementCallCount()
-      GTEST_EXCLUSIVE_LOCK_REQUIRED_(g_gmock_mutex) {
+  void IncrementCallCount() GTEST_EXCLUSIVE_LOCK_REQUIRED_(g_gmock_mutex) {
     g_gmock_mutex.AssertHeld();
     call_count_++;
   }
@@ -840,8 +815,7 @@ class GTEST_API_ ExpectationBase {
   // WillRepeatedly() clauses) against the cardinality if this hasn't
   // been done before.  Prints a warning if there are too many or too
   // few actions.
-  void CheckActionCountIfNotDone() const
-      GTEST_LOCK_EXCLUDED_(mutex_);
+  void CheckActionCountIfNotDone() const GTEST_LOCK_EXCLUDED_(mutex_);
 
   friend class ::testing::Sequence;
   friend class ::testing::internal::ExpectationTester;
@@ -854,12 +828,12 @@ class GTEST_API_ ExpectationBase {
 
   // This group of fields are part of the spec and won't change after
   // an EXPECT_CALL() statement finishes.
-  const char* file_;          // The file that contains the expectation.
-  int line_;                  // The line number of the expectation.
+  const char* file_;               // The file that contains the expectation.
+  int line_;                       // The line number of the expectation.
   const std::string source_text_;  // The EXPECT_CALL(...) source text.
   // True if and only if the cardinality is specified explicitly.
   bool cardinality_specified_;
-  Cardinality cardinality_;            // The cardinality of the expectation.
+  Cardinality cardinality_;  // The cardinality of the expectation.
   // The immediate pre-requisites (i.e. expectations that must be
   // satisfied before this expectation can be matched) of this
   // expectation.  We use std::shared_ptr in the set because we want an
@@ -878,14 +852,18 @@ class GTEST_API_ ExpectationBase {
   bool retires_on_saturation_;
   Clause last_clause_;
   mutable bool action_count_checked_;  // Under mutex_.
-  mutable Mutex mutex_;  // Protects action_count_checked_.
-
-  GTEST_DISALLOW_ASSIGN_(ExpectationBase);
-};  // class ExpectationBase
+  mutable Mutex mutex_;                // Protects action_count_checked_.
+};                                     // class ExpectationBase
 
-// Impements an expectation for the given function type.
 template <typename F>
-class TypedExpectation : public ExpectationBase {
+class TypedExpectation;
+
+// Implements an expectation for the given function type.
+template <typename R, typename... Args>
+class TypedExpectation<R(Args...)> : public ExpectationBase {
+ private:
+  using F = R(Args...);
+
  public:
   typedef typename Function<F>::ArgumentTuple ArgumentTuple;
   typedef typename Function<F>::ArgumentMatcherTuple ArgumentMatcherTuple;
@@ -938,9 +916,7 @@ class TypedExpectation : public ExpectationBase {
   }
 
   // Implements the .Times() clause.
-  TypedExpectation& Times(int n) {
-    return Times(Exactly(n));
-  }
+  TypedExpectation& Times(int n) { return Times(Exactly(n)); }
 
   // Implements the .InSequence() clause.
   TypedExpectation& InSequence(const Sequence& s) {
@@ -1000,14 +976,31 @@ class TypedExpectation : public ExpectationBase {
     return After(s1, s2, s3, s4).After(s5);
   }
 
-  // Implements the .WillOnce() clause.
-  TypedExpectation& WillOnce(const Action<F>& action) {
+  // Preferred, type-safe overload: consume anything that can be directly
+  // converted to a OnceAction, except for Action<F> objects themselves.
+  TypedExpectation& WillOnce(OnceAction<F> once_action) {
+    // Call the overload below, smuggling the OnceAction as a copyable callable.
+    // We know this is safe because a WillOnce action will not be called more
+    // than once.
+    return WillOnce(Action<F>(ActionAdaptor{
+        std::make_shared<OnceAction<F>>(std::move(once_action)),
+    }));
+  }
+
+  // Fallback overload: accept Action<F> objects and those actions that define
+  // `operator Action<F>` but not `operator OnceAction<F>`.
+  //
+  // This is templated in order to cause the overload above to be preferred
+  // when the input is convertible to either type.
+  template <int&... ExplicitArgumentBarrier, typename = void>
+  TypedExpectation& WillOnce(Action<F> action) {
     ExpectSpecProperty(last_clause_ <= kWillOnce,
                        ".WillOnce() cannot appear after "
                        ".WillRepeatedly() or .RetiresOnSaturation().");
     last_clause_ = kWillOnce;
 
-    untyped_actions_.push_back(new Action<F>(action));
+    untyped_actions_.push_back(new Action<F>(std::move(action)));
+
     if (!cardinality_specified()) {
       set_cardinality(Exactly(static_cast<int>(untyped_actions_.size())));
     }
@@ -1055,9 +1048,7 @@ class TypedExpectation : public ExpectationBase {
 
   // Returns the matchers for the arguments as specified inside the
   // EXPECT_CALL() macro.
-  const ArgumentMatcherTuple& matchers() const {
-    return matchers_;
-  }
+  const ArgumentMatcherTuple& matchers() const { return matchers_; }
 
   // Returns the matcher specified by the .With() clause.
   const Matcher<const ArgumentTuple&>& extra_matcher() const {
@@ -1081,6 +1072,16 @@ class TypedExpectation : public ExpectationBase {
   template <typename Function>
   friend class FunctionMocker;
 
+  // An adaptor that turns a OneAction<F> into something compatible with
+  // Action<F>. Must be called at most once.
+  struct ActionAdaptor {
+    std::shared_ptr<OnceAction<R(Args...)>> once_action;
+
+    R operator()(Args&&... args) const {
+      return std::move(*once_action).Call(std::forward<Args>(args)...);
+    }
+  };
+
   // Returns an Expectation object that references and co-owns this
   // expectation.
   Expectation GetHandle() override { return owner_->GetHandleOf(this); }
@@ -1112,10 +1113,8 @@ class TypedExpectation : public ExpectationBase {
 
   // Describes the result of matching the arguments against this
   // expectation to the given ostream.
-  void ExplainMatchResultTo(
-      const ArgumentTuple& args,
-      ::std::ostream* os) const
-          GTEST_EXCLUSIVE_LOCK_REQUIRED_(g_gmock_mutex) {
+  void ExplainMatchResultTo(const ArgumentTuple& args, ::std::ostream* os) const
+      GTEST_EXCLUSIVE_LOCK_REQUIRED_(g_gmock_mutex) {
     g_gmock_mutex.AssertHeld();
 
     if (is_retired()) {
@@ -1174,9 +1173,9 @@ class TypedExpectation : public ExpectationBase {
       ::std::stringstream ss;
       DescribeLocationTo(&ss);
       ss << "Actions ran out in " << source_text() << "...\n"
-         << "Called " << count << " times, but only "
-         << action_count << " WillOnce()"
-         << (action_count == 1 ? " is" : "s are") << " specified - ";
+         << "Called " << count << " times, but only " << action_count
+         << " WillOnce()" << (action_count == 1 ? " is" : "s are")
+         << " specified - ";
       mocker->DescribeDefaultActionTo(args, &ss);
       Log(kWarning, ss.str(), 1);
     }
@@ -1218,7 +1217,7 @@ class TypedExpectation : public ExpectationBase {
     }
 
     // Must be done after IncrementCount()!
-    *what << "Mock function call matches " << source_text() <<"...\n";
+    *what << "Mock function call matches " << source_text() << "...\n";
     return &(GetCurrentAction(mocker, args));
   }
 
@@ -1229,7 +1228,8 @@ class TypedExpectation : public ExpectationBase {
   Matcher<const ArgumentTuple&> extra_matcher_;
   Action<F> repeated_action_;
 
-  GTEST_DISALLOW_COPY_AND_ASSIGN_(TypedExpectation);
+  TypedExpectation(const TypedExpectation&) = delete;
+  TypedExpectation& operator=(const TypedExpectation&) = delete;
 };  // class TypedExpectation
 
 // A MockSpec object is used by ON_CALL() or EXPECT_CALL() for
@@ -1251,8 +1251,8 @@ template <typename F>
 class MockSpec {
  public:
   typedef typename internal::Function<F>::ArgumentTuple ArgumentTuple;
-  typedef typename internal::Function<F>::ArgumentMatcherTuple
-      ArgumentMatcherTuple;
+  typedef
+      typename internal::Function<F>::ArgumentMatcherTuple ArgumentMatcherTuple;
 
   // Constructs a MockSpec object, given the function mocker object
   // that the spec is associated with.
@@ -1262,8 +1262,9 @@ class MockSpec {
 
   // Adds a new default action spec to the function mocker and returns
   // the newly created spec.
-  internal::OnCallSpec<F>& InternalDefaultActionSetAt(
-      const char* file, int line, const char* obj, const char* call) {
+  internal::OnCallSpec<F>& InternalDefaultActionSetAt(const char* file,
+                                                      int line, const char* obj,
+                                                      const char* call) {
     LogWithLocation(internal::kInfo, file, line,
                     std::string("ON_CALL(") + obj + ", " + call + ") invoked");
     return function_mocker_->AddNewOnCallSpec(file, line, matchers_);
@@ -1271,13 +1272,14 @@ class MockSpec {
 
   // Adds a new expectation spec to the function mocker and returns
   // the newly created spec.
-  internal::TypedExpectation<F>& InternalExpectedAt(
-      const char* file, int line, const char* obj, const char* call) {
+  internal::TypedExpectation<F>& InternalExpectedAt(const char* file, int line,
+                                                    const char* obj,
+                                                    const char* call) {
     const std::string source_text(std::string("EXPECT_CALL(") + obj + ", " +
                                   call + ")");
     LogWithLocation(internal::kInfo, file, line, source_text + " invoked");
-    return function_mocker_->AddNewExpectation(
-        file, line, source_text, matchers_);
+    return function_mocker_->AddNewExpectation(file, line, source_text,
+                                               matchers_);
   }
 
   // This operator overload is used to swallow the superfluous parameter list
@@ -1295,8 +1297,6 @@ class MockSpec {
   internal::FunctionMocker<F>* const function_mocker_;
   // The argument matchers specified in the spec.
   ArgumentMatcherTuple matchers_;
-
-  GTEST_DISALLOW_ASSIGN_(MockSpec);
 };  // class MockSpec
 
 // Wrapper type for generically holding an ordinary value or lvalue reference.
@@ -1312,9 +1312,7 @@ template <typename T>
 class ReferenceOrValueWrapper {
  public:
   // Constructs a wrapper from the given value/reference.
-  explicit ReferenceOrValueWrapper(T value)
-      : value_(std::move(value)) {
-  }
+  explicit ReferenceOrValueWrapper(T value) : value_(std::move(value)) {}
 
   // Unwraps and returns the underlying value/reference, exactly as
   // originally passed. The behavior of calling this more than once on
@@ -1325,9 +1323,7 @@ class ReferenceOrValueWrapper {
   // Always returns a const reference (more precisely,
   // const std::add_lvalue_reference<T>::type). The behavior of calling this
   // after calling Unwrap on the same object is unspecified.
-  const T& Peek() const {
-    return value_;
-  }
+  const T& Peek() const { return value_; }
 
  private:
   T value_;
@@ -1341,8 +1337,7 @@ class ReferenceOrValueWrapper<T&> {
   // Workaround for debatable pass-by-reference lint warning (c-library-team
   // policy precludes NOLINT in this context)
   typedef T& reference;
-  explicit ReferenceOrValueWrapper(reference ref)
-      : value_ptr_(&ref) {}
+  explicit ReferenceOrValueWrapper(reference ref) : value_ptr_(&ref) {}
   T& Unwrap() { return *value_ptr_; }
   const T& Peek() const { return *value_ptr_; }
 
@@ -1350,102 +1345,27 @@ class ReferenceOrValueWrapper<T&> {
   T* value_ptr_;
 };
 
-// C++ treats the void type specially.  For example, you cannot define
-// a void-typed variable or pass a void value to a function.
-// ActionResultHolder<T> holds a value of type T, where T must be a
-// copyable type or void (T doesn't need to be default-constructable).
-// It hides the syntactic difference between void and other types, and
-// is used to unify the code for invoking both void-returning and
-// non-void-returning mock functions.
-
-// Untyped base class for ActionResultHolder<T>.
-class UntypedActionResultHolderBase {
- public:
-  virtual ~UntypedActionResultHolderBase() {}
-
-  // Prints the held value as an action's result to os.
-  virtual void PrintAsActionResult(::std::ostream* os) const = 0;
-};
-
-// This generic definition is used when T is not void.
+// Prints the held value as an action's result to os.
 template <typename T>
-class ActionResultHolder : public UntypedActionResultHolderBase {
- public:
-  // Returns the held value. Must not be called more than once.
-  T Unwrap() {
-    return result_.Unwrap();
-  }
-
-  // Prints the held value as an action's result to os.
-  void PrintAsActionResult(::std::ostream* os) const override {
-    *os << "\n          Returns: ";
-    // T may be a reference type, so we don't use UniversalPrint().
-    UniversalPrinter<T>::Print(result_.Peek(), os);
-  }
-
-  // Performs the given mock function's default action and returns the
-  // result in a new-ed ActionResultHolder.
-  template <typename F>
-  static ActionResultHolder* PerformDefaultAction(
-      const FunctionMocker<F>* func_mocker,
-      typename Function<F>::ArgumentTuple&& args,
-      const std::string& call_description) {
-    return new ActionResultHolder(Wrapper(func_mocker->PerformDefaultAction(
-        std::move(args), call_description)));
-  }
-
-  // Performs the given action and returns the result in a new-ed
-  // ActionResultHolder.
-  template <typename F>
-  static ActionResultHolder* PerformAction(
-      const Action<F>& action, typename Function<F>::ArgumentTuple&& args) {
-    return new ActionResultHolder(
-        Wrapper(action.Perform(std::move(args))));
-  }
+void PrintAsActionResult(const T& result, std::ostream& os) {
+  os << "\n          Returns: ";
+  // T may be a reference type, so we don't use UniversalPrint().
+  UniversalPrinter<T>::Print(result, &os);
+}
 
- private:
-  typedef ReferenceOrValueWrapper<T> Wrapper;
-
-  explicit ActionResultHolder(Wrapper result)
-      : result_(std::move(result)) {
-  }
-
-  Wrapper result_;
-
-  GTEST_DISALLOW_COPY_AND_ASSIGN_(ActionResultHolder);
-};
+// Reports an uninteresting call (whose description is in msg) in the
+// manner specified by 'reaction'.
+GTEST_API_ void ReportUninterestingCall(CallReaction reaction,
+                                        const std::string& msg);
 
-// Specialization for T = void.
-template <>
-class ActionResultHolder<void> : public UntypedActionResultHolderBase {
+// A generic RAII type that runs a user-provided function in its destructor.
+class Cleanup final {
  public:
-  void Unwrap() { }
-
-  void PrintAsActionResult(::std::ostream* /* os */) const override {}
-
-  // Performs the given mock function's default action and returns ownership
-  // of an empty ActionResultHolder*.
-  template <typename F>
-  static ActionResultHolder* PerformDefaultAction(
-      const FunctionMocker<F>* func_mocker,
-      typename Function<F>::ArgumentTuple&& args,
-      const std::string& call_description) {
-    func_mocker->PerformDefaultAction(std::move(args), call_description);
-    return new ActionResultHolder;
-  }
-
-  // Performs the given action and returns ownership of an empty
-  // ActionResultHolder*.
-  template <typename F>
-  static ActionResultHolder* PerformAction(
-      const Action<F>& action, typename Function<F>::ArgumentTuple&& args) {
-    action.Perform(std::move(args));
-    return new ActionResultHolder;
-  }
+  explicit Cleanup(std::function<void()> f) : f_(std::move(f)) {}
+  ~Cleanup() { f_(); }
 
  private:
-  ActionResultHolder() {}
-  GTEST_DISALLOW_COPY_AND_ASSIGN_(ActionResultHolder);
+  std::function<void()> f_;
 };
 
 template <typename F>
@@ -1490,14 +1410,12 @@ class FunctionMocker<R(Args...)> final : public UntypedFunctionMockerBase {
   // Returns the ON_CALL spec that matches this mock function with the
   // given arguments; returns NULL if no matching ON_CALL is found.
   // L = *
-  const OnCallSpec<F>* FindOnCallSpec(
-      const ArgumentTuple& args) const {
-    for (UntypedOnCallSpecs::const_reverse_iterator it
-             = untyped_on_call_specs_.rbegin();
+  const OnCallSpec<F>* FindOnCallSpec(const ArgumentTuple& args) const {
+    for (UntypedOnCallSpecs::const_reverse_iterator it =
+             untyped_on_call_specs_.rbegin();
          it != untyped_on_call_specs_.rend(); ++it) {
       const OnCallSpec<F>* spec = static_cast<const OnCallSpec<F>*>(*it);
-      if (spec->Matches(args))
-        return spec;
+      if (spec->Matches(args)) return spec;
     }
 
     return nullptr;
@@ -1505,15 +1423,14 @@ class FunctionMocker<R(Args...)> final : public UntypedFunctionMockerBase {
 
   // Performs the default action of this mock function on the given
   // arguments and returns the result. Asserts (or throws if
-  // exceptions are enabled) with a helpful call descrption if there
+  // exceptions are enabled) with a helpful call description if there
   // is no valid return value. This method doesn't depend on the
   // mutable state of this object, and thus can be called concurrently
   // without locking.
   // L = *
   Result PerformDefaultAction(ArgumentTuple&& args,
                               const std::string& call_description) const {
-    const OnCallSpec<F>* const spec =
-        this->FindOnCallSpec(args);
+    const OnCallSpec<F>* const spec = this->FindOnCallSpec(args);
     if (spec != nullptr) {
       return spec->GetAction().Perform(std::move(args));
     }
@@ -1531,32 +1448,6 @@ class FunctionMocker<R(Args...)> final : public UntypedFunctionMockerBase {
     return DefaultValue<Result>::Get();
   }
 
-  // Performs the default action with the given arguments and returns
-  // the action's result.  The call description string will be used in
-  // the error message to describe the call in the case the default
-  // action fails.  The caller is responsible for deleting the result.
-  // L = *
-  UntypedActionResultHolderBase* UntypedPerformDefaultAction(
-      void* untyped_args,  // must point to an ArgumentTuple
-      const std::string& call_description) const override {
-    ArgumentTuple* args = static_cast<ArgumentTuple*>(untyped_args);
-    return ResultHolder::PerformDefaultAction(this, std::move(*args),
-                                              call_description);
-  }
-
-  // Performs the given action with the given arguments and returns
-  // the action's result.  The caller is responsible for deleting the
-  // result.
-  // L = *
-  UntypedActionResultHolderBase* UntypedPerformAction(
-      const void* untyped_action, void* untyped_args) const override {
-    // Make a copy of the action before performing it, in case the
-    // action deletes the mock object (and thus deletes itself).
-    const Action<F> action = *static_cast<const Action<F>*>(untyped_action);
-    ArgumentTuple* args = static_cast<ArgumentTuple*>(untyped_args);
-    return ResultHolder::PerformAction(action, std::move(*args));
-  }
-
   // Implements UntypedFunctionMockerBase::ClearDefaultActionsLocked():
   // clears the ON_CALL()s set on this mock function.
   void ClearDefaultActionsLocked() override
@@ -1574,8 +1465,7 @@ class FunctionMocker<R(Args...)> final : public UntypedFunctionMockerBase {
     untyped_on_call_specs_.swap(specs_to_delete);
 
     g_gmock_mutex.Unlock();
-    for (UntypedOnCallSpecs::const_iterator it =
-             specs_to_delete.begin();
+    for (UntypedOnCallSpecs::const_iterator it = specs_to_delete.begin();
          it != specs_to_delete.end(); ++it) {
       delete static_cast<const OnCallSpec<F>*>(*it);
     }
@@ -1589,10 +1479,7 @@ class FunctionMocker<R(Args...)> final : public UntypedFunctionMockerBase {
   // arguments.  This function can be safely called from multiple
   // threads concurrently.
   Result Invoke(Args... args) GTEST_LOCK_EXCLUDED_(g_gmock_mutex) {
-    ArgumentTuple tuple(std::forward<Args>(args)...);
-    std::unique_ptr<ResultHolder> holder(DownCast_<ResultHolder*>(
-        this->UntypedInvokeWith(static_cast<void*>(&tuple))));
-    return holder->Unwrap();
+    return InvokeWith(ArgumentTuple(std::forward<Args>(args)...));
   }
 
   MockSpec<F> With(Matcher<Args>... m) {
@@ -1603,13 +1490,10 @@ class FunctionMocker<R(Args...)> final : public UntypedFunctionMockerBase {
   template <typename Function>
   friend class MockSpec;
 
-  typedef ActionResultHolder<Result> ResultHolder;
-
   // Adds and returns a default action spec for this mock function.
-  OnCallSpec<F>& AddNewOnCallSpec(
-      const char* file, int line,
-      const ArgumentMatcherTuple& m)
-          GTEST_LOCK_EXCLUDED_(g_gmock_mutex) {
+  OnCallSpec<F>& AddNewOnCallSpec(const char* file, int line,
+                                  const ArgumentMatcherTuple& m)
+      GTEST_LOCK_EXCLUDED_(g_gmock_mutex) {
     Mock::RegisterUseByOnCallOrExpectCall(MockObject(), file, line);
     OnCallSpec<F>* const on_call_spec = new OnCallSpec<F>(file, line, m);
     untyped_on_call_specs_.push_back(on_call_spec);
@@ -1639,7 +1523,8 @@ class FunctionMocker<R(Args...)> final : public UntypedFunctionMockerBase {
   }
 
  private:
-  template <typename Func> friend class TypedExpectation;
+  template <typename Func>
+  friend class TypedExpectation;
 
   // Some utilities needed for implementing UntypedInvokeWith().
 
@@ -1723,9 +1608,8 @@ class FunctionMocker<R(Args...)> final : public UntypedFunctionMockerBase {
 
   // Returns the expectation that matches the arguments, or NULL if no
   // expectation matches them.
-  TypedExpectation<F>* FindMatchingExpectationLocked(
-      const ArgumentTuple& args) const
-          GTEST_EXCLUSIVE_LOCK_REQUIRED_(g_gmock_mutex) {
+  TypedExpectation<F>* FindMatchingExpectationLocked(const ArgumentTuple& args)
+      const GTEST_EXCLUSIVE_LOCK_REQUIRED_(g_gmock_mutex) {
     g_gmock_mutex.AssertHeld();
     // See the definition of untyped_expectations_ for why access to
     // it is unprotected here.
@@ -1742,11 +1626,10 @@ class FunctionMocker<R(Args...)> final : public UntypedFunctionMockerBase {
   }
 
   // Returns a message that the arguments don't match any expectation.
-  void FormatUnexpectedCallMessageLocked(
-      const ArgumentTuple& args,
-      ::std::ostream* os,
-      ::std::ostream* why) const
-          GTEST_EXCLUSIVE_LOCK_REQUIRED_(g_gmock_mutex) {
+  void FormatUnexpectedCallMessageLocked(const ArgumentTuple& args,
+                                         ::std::ostream* os,
+                                         ::std::ostream* why) const
+      GTEST_EXCLUSIVE_LOCK_REQUIRED_(g_gmock_mutex) {
     g_gmock_mutex.AssertHeld();
     *os << "\nUnexpected mock function call - ";
     DescribeDefaultActionTo(args, os);
@@ -1755,15 +1638,14 @@ class FunctionMocker<R(Args...)> final : public UntypedFunctionMockerBase {
 
   // Prints a list of expectations that have been tried against the
   // current mock function call.
-  void PrintTriedExpectationsLocked(
-      const ArgumentTuple& args,
-      ::std::ostream* why) const
-          GTEST_EXCLUSIVE_LOCK_REQUIRED_(g_gmock_mutex) {
+  void PrintTriedExpectationsLocked(const ArgumentTuple& args,
+                                    ::std::ostream* why) const
+      GTEST_EXCLUSIVE_LOCK_REQUIRED_(g_gmock_mutex) {
     g_gmock_mutex.AssertHeld();
     const size_t count = untyped_expectations_.size();
     *why << "Google Mock tried the following " << count << " "
-         << (count == 1 ? "expectation, but it didn't match" :
-             "expectations, but none matched")
+         << (count == 1 ? "expectation, but it didn't match"
+                        : "expectations, but none matched")
          << ":\n";
     for (size_t i = 0; i < count; i++) {
       TypedExpectation<F>* const expectation =
@@ -1778,18 +1660,255 @@ class FunctionMocker<R(Args...)> final : public UntypedFunctionMockerBase {
       expectation->DescribeCallCountTo(why);
     }
   }
+
+  // Performs the given action (or the default if it's null) with the given
+  // arguments and returns the action's result.
+  // L = *
+  R PerformAction(const void* untyped_action, ArgumentTuple&& args,
+                  const std::string& call_description) const {
+    if (untyped_action == nullptr) {
+      return PerformDefaultAction(std::move(args), call_description);
+    }
+
+    // Make a copy of the action before performing it, in case the
+    // action deletes the mock object (and thus deletes itself).
+    const Action<F> action = *static_cast<const Action<F>*>(untyped_action);
+    return action.Perform(std::move(args));
+  }
+
+  // Is it possible to store an object of the supplied type in a local variable
+  // for the sake of printing it, then return it on to the caller?
+  template <typename T>
+  using can_print_result = internal::conjunction<
+      // void can't be stored as an object (and we also don't need to print it).
+      internal::negation<std::is_void<T>>,
+      // Non-moveable types can't be returned on to the user, so there's no way
+      // for us to intercept and print them.
+      std::is_move_constructible<T>>;
+
+  // Perform the supplied action, printing the result to os.
+  template <typename T = R,
+            typename std::enable_if<can_print_result<T>::value, int>::type = 0>
+  R PerformActionAndPrintResult(const void* const untyped_action,
+                                ArgumentTuple&& args,
+                                const std::string& call_description,
+                                std::ostream& os) {
+    R result = PerformAction(untyped_action, std::move(args), call_description);
+
+    PrintAsActionResult(result, os);
+    return std::forward<R>(result);
+  }
+
+  // An overload for when it's not possible to print the result. In this case we
+  // simply perform the action.
+  template <typename T = R,
+            typename std::enable_if<
+                internal::negation<can_print_result<T>>::value, int>::type = 0>
+  R PerformActionAndPrintResult(const void* const untyped_action,
+                                ArgumentTuple&& args,
+                                const std::string& call_description,
+                                std::ostream&) {
+    return PerformAction(untyped_action, std::move(args), call_description);
+  }
+
+  // Returns the result of invoking this mock function with the given
+  // arguments. This function can be safely called from multiple
+  // threads concurrently.
+  R InvokeWith(ArgumentTuple&& args) GTEST_LOCK_EXCLUDED_(g_gmock_mutex);
 };  // class FunctionMocker
 
-// Reports an uninteresting call (whose description is in msg) in the
-// manner specified by 'reaction'.
-void ReportUninterestingCall(CallReaction reaction, const std::string& msg);
+// Calculates the result of invoking this mock function with the given
+// arguments, prints it, and returns it.
+template <typename R, typename... Args>
+R FunctionMocker<R(Args...)>::InvokeWith(ArgumentTuple&& args)
+    GTEST_LOCK_EXCLUDED_(g_gmock_mutex) {
+  // See the definition of untyped_expectations_ for why access to it
+  // is unprotected here.
+  if (untyped_expectations_.size() == 0) {
+    // No expectation is set on this mock method - we have an
+    // uninteresting call.
+
+    // We must get Google Mock's reaction on uninteresting calls
+    // made on this mock object BEFORE performing the action,
+    // because the action may DELETE the mock object and make the
+    // following expression meaningless.
+    const CallReaction reaction =
+        Mock::GetReactionOnUninterestingCalls(MockObject());
+
+    // True if and only if we need to print this call's arguments and return
+    // value.  This definition must be kept in sync with
+    // the behavior of ReportUninterestingCall().
+    const bool need_to_report_uninteresting_call =
+        // If the user allows this uninteresting call, we print it
+        // only when they want informational messages.
+        reaction == kAllow ? LogIsVisible(kInfo) :
+                           // If the user wants this to be a warning, we print
+                           // it only when they want to see warnings.
+            reaction == kWarn
+            ? LogIsVisible(kWarning)
+            :
+            // Otherwise, the user wants this to be an error, and we
+            // should always print detailed information in the error.
+            true;
+
+    if (!need_to_report_uninteresting_call) {
+      // Perform the action without printing the call information.
+      return this->PerformDefaultAction(
+          std::move(args), "Function call: " + std::string(Name()));
+    }
+
+    // Warns about the uninteresting call.
+    ::std::stringstream ss;
+    this->UntypedDescribeUninterestingCall(&args, &ss);
+
+    // Perform the action, print the result, and then report the uninteresting
+    // call.
+    //
+    // We use RAII to do the latter in case R is void or a non-moveable type. In
+    // either case we can't assign it to a local variable.
+    const Cleanup report_uninteresting_call(
+        [&] { ReportUninterestingCall(reaction, ss.str()); });
+
+    return PerformActionAndPrintResult(nullptr, std::move(args), ss.str(), ss);
+  }
+
+  bool is_excessive = false;
+  ::std::stringstream ss;
+  ::std::stringstream why;
+  ::std::stringstream loc;
+  const void* untyped_action = nullptr;
+
+  // The UntypedFindMatchingExpectation() function acquires and
+  // releases g_gmock_mutex.
+
+  const ExpectationBase* const untyped_expectation =
+      this->UntypedFindMatchingExpectation(&args, &untyped_action,
+                                           &is_excessive, &ss, &why);
+  const bool found = untyped_expectation != nullptr;
+
+  // True if and only if we need to print the call's arguments
+  // and return value.
+  // This definition must be kept in sync with the uses of Expect()
+  // and Log() in this function.
+  const bool need_to_report_call =
+      !found || is_excessive || LogIsVisible(kInfo);
+  if (!need_to_report_call) {
+    // Perform the action without printing the call information.
+    return PerformAction(untyped_action, std::move(args), "");
+  }
+
+  ss << "    Function call: " << Name();
+  this->UntypedPrintArgs(&args, &ss);
+
+  // In case the action deletes a piece of the expectation, we
+  // generate the message beforehand.
+  if (found && !is_excessive) {
+    untyped_expectation->DescribeLocationTo(&loc);
+  }
+
+  // Perform the action, print the result, and then fail or log in whatever way
+  // is appropriate.
+  //
+  // We use RAII to do the latter in case R is void or a non-moveable type. In
+  // either case we can't assign it to a local variable.
+  const Cleanup handle_failures([&] {
+    ss << "\n" << why.str();
+
+    if (!found) {
+      // No expectation matches this call - reports a failure.
+      Expect(false, nullptr, -1, ss.str());
+    } else if (is_excessive) {
+      // We had an upper-bound violation and the failure message is in ss.
+      Expect(false, untyped_expectation->file(), untyped_expectation->line(),
+             ss.str());
+    } else {
+      // We had an expected call and the matching expectation is
+      // described in ss.
+      Log(kInfo, loc.str() + ss.str(), 2);
+    }
+  });
+
+  return PerformActionAndPrintResult(untyped_action, std::move(args), ss.str(),
+                                     ss);
+}
 
 }  // namespace internal
 
-// A MockFunction<F> class has one mock method whose type is F.  It is
-// useful when you just want your test code to emit some messages and
-// have Google Mock verify the right messages are sent (and perhaps at
-// the right times).  For example, if you are exercising code:
+namespace internal {
+
+template <typename F>
+class MockFunction;
+
+template <typename R, typename... Args>
+class MockFunction<R(Args...)> {
+ public:
+  MockFunction(const MockFunction&) = delete;
+  MockFunction& operator=(const MockFunction&) = delete;
+
+  std::function<R(Args...)> AsStdFunction() {
+    return [this](Args... args) -> R {
+      return this->Call(std::forward<Args>(args)...);
+    };
+  }
+
+  // Implementation detail: the expansion of the MOCK_METHOD macro.
+  R Call(Args... args) {
+    mock_.SetOwnerAndName(this, "Call");
+    return mock_.Invoke(std::forward<Args>(args)...);
+  }
+
+  MockSpec<R(Args...)> gmock_Call(Matcher<Args>... m) {
+    mock_.RegisterOwner(this);
+    return mock_.With(std::move(m)...);
+  }
+
+  MockSpec<R(Args...)> gmock_Call(const WithoutMatchers&, R (*)(Args...)) {
+    return this->gmock_Call(::testing::A<Args>()...);
+  }
+
+ protected:
+  MockFunction() = default;
+  ~MockFunction() = default;
+
+ private:
+  FunctionMocker<R(Args...)> mock_;
+};
+
+/*
+The SignatureOf<F> struct is a meta-function returning function signature
+corresponding to the provided F argument.
+
+It makes use of MockFunction easier by allowing it to accept more F arguments
+than just function signatures.
+
+Specializations provided here cover a signature type itself and any template
+that can be parameterized with a signature, including std::function and
+boost::function.
+*/
+
+template <typename F, typename = void>
+struct SignatureOf;
+
+template <typename R, typename... Args>
+struct SignatureOf<R(Args...)> {
+  using type = R(Args...);
+};
+
+template <template <typename> class C, typename F>
+struct SignatureOf<C<F>,
+                   typename std::enable_if<std::is_function<F>::value>::type>
+    : SignatureOf<F> {};
+
+template <typename F>
+using SignatureOfT = typename SignatureOf<F>::type;
+
+}  // namespace internal
+
+// A MockFunction<F> type has one mock method whose type is
+// internal::SignatureOfT<F>.  It is useful when you just want your
+// test code to emit some messages and have Google Mock verify the
+// right messages are sent (and perhaps at the right times).  For
+// example, if you are exercising code:
 //
 //   Foo(1);
 //   Foo(2);
@@ -1823,49 +1942,34 @@ void ReportUninterestingCall(CallReaction reaction, const std::string& msg);
 // Bar("a") is called by which call to Foo().
 //
 // MockFunction<F> can also be used to exercise code that accepts
-// std::function<F> callbacks. To do so, use AsStdFunction() method
-// to create std::function proxy forwarding to original object's Call.
-// Example:
+// std::function<internal::SignatureOfT<F>> callbacks. To do so, use
+// AsStdFunction() method to create std::function proxy forwarding to
+// original object's Call. Example:
 //
 // TEST(FooTest, RunsCallbackWithBarArgument) {
 //   MockFunction<int(string)> callback;
 //   EXPECT_CALL(callback, Call("bar")).WillOnce(Return(1));
 //   Foo(callback.AsStdFunction());
 // }
+//
+// The internal::SignatureOfT<F> indirection allows to use other types
+// than just function signature type. This is typically useful when
+// providing a mock for a predefined std::function type. Example:
+//
+// using FilterPredicate = std::function<bool(string)>;
+// void MyFilterAlgorithm(FilterPredicate predicate);
+//
+// TEST(FooTest, FilterPredicateAlwaysAccepts) {
+//   MockFunction<FilterPredicate> predicateMock;
+//   EXPECT_CALL(predicateMock, Call(_)).WillRepeatedly(Return(true));
+//   MyFilterAlgorithm(predicateMock.AsStdFunction());
+// }
 template <typename F>
-class MockFunction;
+class MockFunction : public internal::MockFunction<internal::SignatureOfT<F>> {
+  using Base = internal::MockFunction<internal::SignatureOfT<F>>;
 
-template <typename R, typename... Args>
-class MockFunction<R(Args...)> {
  public:
-  MockFunction() {}
-  MockFunction(const MockFunction&) = delete;
-  MockFunction& operator=(const MockFunction&) = delete;
-
-  std::function<R(Args...)> AsStdFunction() {
-    return [this](Args... args) -> R {
-      return this->Call(std::forward<Args>(args)...);
-    };
-  }
-
-  // Implementation detail: the expansion of the MOCK_METHOD macro.
-  R Call(Args... args) {
-    mock_.SetOwnerAndName(this, "Call");
-    return mock_.Invoke(std::forward<Args>(args)...);
-  }
-
-  internal::MockSpec<R(Args...)> gmock_Call(Matcher<Args>... m) {
-    mock_.RegisterOwner(this);
-    return mock_.With(std::move(m)...);
-  }
-
-  internal::MockSpec<R(Args...)> gmock_Call(const internal::WithoutMatchers&,
-                                            R (*)(Args...)) {
-    return this->gmock_Call(::testing::A<Args>()...);
-  }
-
- private:
-  internal::FunctionMocker<R(Args...)> mock_;
+  using Base::Base;
 };
 
 // The style guide prohibits "using" statements in a namespace scope
@@ -1891,7 +1995,9 @@ using internal::MockSpec;
 //   // Expects a call to const MockFoo::Bar().
 //   EXPECT_CALL(Const(foo), Bar());
 template <typename T>
-inline const T& Const(const T& x) { return x; }
+inline const T& Const(const T& x) {
+  return x;
+}
 
 // Constructs an Expectation object that references and co-owns exp.
 inline Expectation::Expectation(internal::ExpectationBase& exp)  // NOLINT
@@ -1974,4 +2080,4 @@ GTEST_DISABLE_MSC_WARNINGS_POP_()  //  4251
 #define EXPECT_CALL(obj, call) \
   GMOCK_ON_CALL_IMPL_(obj, InternalExpectedAt, call)
 
-#endif  // GMOCK_INCLUDE_GMOCK_GMOCK_SPEC_BUILDERS_H_
+#endif  // GOOGLEMOCK_INCLUDE_GMOCK_GMOCK_SPEC_BUILDERS_H_
diff --git a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/include/gmock/gmock.h b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/include/gmock/gmock.h
index 3c317b6d47f..568c8c71d78 100644
--- a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/include/gmock/gmock.h
+++ b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/include/gmock/gmock.h
@@ -27,15 +27,12 @@
 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-
 // Google Mock - a framework for writing C++ mock classes.
 //
 // This is the main header file a user should include.
 
-// GOOGLETEST_CM0002 DO NOT DELETE
-
-#ifndef GMOCK_INCLUDE_GMOCK_GMOCK_H_
-#define GMOCK_INCLUDE_GMOCK_GMOCK_H_
+#ifndef GOOGLEMOCK_INCLUDE_GMOCK_GMOCK_H_
+#define GOOGLEMOCK_INCLUDE_GMOCK_GMOCK_H_
 
 // This file implements the following syntax:
 //
@@ -59,20 +56,20 @@
 #include "gmock/gmock-actions.h"
 #include "gmock/gmock-cardinalities.h"
 #include "gmock/gmock-function-mocker.h"
-#include "gmock/gmock-generated-actions.h"
 #include "gmock/gmock-matchers.h"
 #include "gmock/gmock-more-actions.h"
 #include "gmock/gmock-more-matchers.h"
 #include "gmock/gmock-nice-strict.h"
 #include "gmock/internal/gmock-internal-utils.h"
-
-namespace testing {
+#include "gmock/internal/gmock-port.h"
 
 // Declares Google Mock flags that we want a user to use programmatically.
 GMOCK_DECLARE_bool_(catch_leaked_mocks);
 GMOCK_DECLARE_string_(verbose);
 GMOCK_DECLARE_int32_(default_mock_behavior);
 
+namespace testing {
+
 // Initializes Google Mock.  This must be called before running the
 // tests.  In particular, it parses the command line for the flags
 // that Google Mock recognizes.  Whenever a Google Mock flag is seen,
@@ -96,4 +93,4 @@ GTEST_API_ void InitGoogleMock();
 
 }  // namespace testing
 
-#endif  // GMOCK_INCLUDE_GMOCK_GMOCK_H_
+#endif  // GOOGLEMOCK_INCLUDE_GMOCK_GMOCK_H_
diff --git a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/include/gmock/internal/custom/README.md b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/include/gmock/internal/custom/README.md
index f6c93f616d6..9c4874fd0cb 100644
--- a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/include/gmock/internal/custom/README.md
+++ b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/include/gmock/internal/custom/README.md
@@ -14,3 +14,5 @@ The following macros can be defined:
 *   `GMOCK_DEFINE_bool_(name, default_val, doc)`
 *   `GMOCK_DEFINE_int32_(name, default_val, doc)`
 *   `GMOCK_DEFINE_string_(name, default_val, doc)`
+*   `GMOCK_FLAG_GET(flag_name)`
+*   `GMOCK_FLAG_SET(flag_name, value)`
diff --git a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/include/gmock/internal/custom/gmock-generated-actions.h b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/include/gmock/internal/custom/gmock-generated-actions.h
index 92d910cf06d..bbcad31c76e 100644
--- a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/include/gmock/internal/custom/gmock-generated-actions.h
+++ b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/include/gmock/internal/custom/gmock-generated-actions.h
@@ -1,10 +1,7 @@
-// This file was GENERATED by command:
-//     pump.py gmock-generated-actions.h.pump
-// DO NOT EDIT BY HAND!!!
+// IWYU pragma: private, include "gmock/gmock.h"
+// IWYU pragma: friend gmock/.*
 
-// GOOGLETEST_CM0002 DO NOT DELETE
+#ifndef GOOGLEMOCK_INCLUDE_GMOCK_INTERNAL_CUSTOM_GMOCK_GENERATED_ACTIONS_H_
+#define GOOGLEMOCK_INCLUDE_GMOCK_INTERNAL_CUSTOM_GMOCK_GENERATED_ACTIONS_H_
 
-#ifndef GMOCK_INCLUDE_GMOCK_INTERNAL_CUSTOM_GMOCK_GENERATED_ACTIONS_H_
-#define GMOCK_INCLUDE_GMOCK_INTERNAL_CUSTOM_GMOCK_GENERATED_ACTIONS_H_
-
-#endif  // GMOCK_INCLUDE_GMOCK_INTERNAL_CUSTOM_GMOCK_GENERATED_ACTIONS_H_
+#endif  // GOOGLEMOCK_INCLUDE_GMOCK_INTERNAL_CUSTOM_GMOCK_GENERATED_ACTIONS_H_
diff --git a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/include/gmock/internal/custom/gmock-generated-actions.h.pump b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/include/gmock/internal/custom/gmock-generated-actions.h.pump
deleted file mode 100644
index 67c221f14c7..00000000000
--- a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/include/gmock/internal/custom/gmock-generated-actions.h.pump
+++ /dev/null
@@ -1,12 +0,0 @@
-$$ -*- mode: c++; -*-
-$$ This is a Pump source file. Please use Pump to convert
-$$ it to callback-actions.h.
-$$
-$var max_callback_arity = 5
-$$}} This meta comment fixes auto-indentation in editors.
-
-// GOOGLETEST_CM0002 DO NOT DELETE
-#ifndef GMOCK_INCLUDE_GMOCK_INTERNAL_CUSTOM_GMOCK_GENERATED_ACTIONS_H_
-#define GMOCK_INCLUDE_GMOCK_INTERNAL_CUSTOM_GMOCK_GENERATED_ACTIONS_H_
-
-#endif  // GMOCK_INCLUDE_GMOCK_INTERNAL_CUSTOM_GMOCK_GENERATED_ACTIONS_H_
diff --git a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/include/gmock/internal/custom/gmock-matchers.h b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/include/gmock/internal/custom/gmock-matchers.h
index 14aafaabe6b..bb7dcbaa4cb 100644
--- a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/include/gmock/internal/custom/gmock-matchers.h
+++ b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/include/gmock/internal/custom/gmock-matchers.h
@@ -26,11 +26,12 @@
 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
+
 // Injection point for custom user configurations. See README for details
-//
-// GOOGLETEST_CM0002 DO NOT DELETE
 
-#ifndef GMOCK_INCLUDE_GMOCK_INTERNAL_CUSTOM_GMOCK_MATCHERS_H_
-#define GMOCK_INCLUDE_GMOCK_INTERNAL_CUSTOM_GMOCK_MATCHERS_H_
-#endif  // GMOCK_INCLUDE_GMOCK_INTERNAL_CUSTOM_GMOCK_MATCHERS_H_
+// IWYU pragma: private, include "gmock/gmock.h"
+// IWYU pragma: friend gmock/.*
+
+#ifndef GOOGLEMOCK_INCLUDE_GMOCK_INTERNAL_CUSTOM_GMOCK_MATCHERS_H_
+#define GOOGLEMOCK_INCLUDE_GMOCK_INTERNAL_CUSTOM_GMOCK_MATCHERS_H_
+#endif  // GOOGLEMOCK_INCLUDE_GMOCK_INTERNAL_CUSTOM_GMOCK_MATCHERS_H_
diff --git a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/include/gmock/internal/custom/gmock-port.h b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/include/gmock/internal/custom/gmock-port.h
index 0030fe91118..f055f7506b8 100644
--- a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/include/gmock/internal/custom/gmock-port.h
+++ b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/include/gmock/internal/custom/gmock-port.h
@@ -26,14 +26,15 @@
 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
+
 // Injection point for custom user configurations. See README for details
 //
 // ** Custom implementation starts here **
 
-// GOOGLETEST_CM0002 DO NOT DELETE
+// IWYU pragma: private, include "gmock/gmock.h"
+// IWYU pragma: friend gmock/.*
 
-#ifndef GMOCK_INCLUDE_GMOCK_INTERNAL_CUSTOM_GMOCK_PORT_H_
-#define GMOCK_INCLUDE_GMOCK_INTERNAL_CUSTOM_GMOCK_PORT_H_
+#ifndef GOOGLEMOCK_INCLUDE_GMOCK_INTERNAL_CUSTOM_GMOCK_PORT_H_
+#define GOOGLEMOCK_INCLUDE_GMOCK_INTERNAL_CUSTOM_GMOCK_PORT_H_
 
-#endif  // GMOCK_INCLUDE_GMOCK_INTERNAL_CUSTOM_GMOCK_PORT_H_
+#endif  // GOOGLEMOCK_INCLUDE_GMOCK_INTERNAL_CUSTOM_GMOCK_PORT_H_
diff --git a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/include/gmock/internal/gmock-internal-utils.h b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/include/gmock/internal/gmock-internal-utils.h
index 66cf857b7bf..b1343fdc82d 100644
--- a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/include/gmock/internal/gmock-internal-utils.h
+++ b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/include/gmock/internal/gmock-internal-utils.h
@@ -27,22 +27,25 @@
 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-
 // Google Mock - a framework for writing C++ mock classes.
 //
 // This file defines some utilities useful for implementing Google
 // Mock.  They are subject to change without notice, so please DO NOT
 // USE THEM IN USER CODE.
 
-// GOOGLETEST_CM0002 DO NOT DELETE
+// IWYU pragma: private, include "gmock/gmock.h"
+// IWYU pragma: friend gmock/.*
 
-#ifndef GMOCK_INCLUDE_GMOCK_INTERNAL_GMOCK_INTERNAL_UTILS_H_
-#define GMOCK_INCLUDE_GMOCK_INTERNAL_GMOCK_INTERNAL_UTILS_H_
+#ifndef GOOGLEMOCK_INCLUDE_GMOCK_INTERNAL_GMOCK_INTERNAL_UTILS_H_
+#define GOOGLEMOCK_INCLUDE_GMOCK_INTERNAL_GMOCK_INTERNAL_UTILS_H_
 
 #include <stdio.h>
+
 #include <ostream>  // NOLINT
 #include <string>
 #include <type_traits>
+#include <vector>
+
 #include "gmock/internal/gmock-port.h"
 #include "gtest/gtest.h"
 
@@ -56,14 +59,15 @@ namespace internal {
 // Silence MSVC C4100 (unreferenced formal parameter) and
 // C4805('==': unsafe mix of type 'const int' and type 'const bool')
 #ifdef _MSC_VER
-# pragma warning(push)
-# pragma warning(disable:4100)
-# pragma warning(disable:4805)
+#pragma warning(push)
+#pragma warning(disable : 4100)
+#pragma warning(disable : 4805)
 #endif
 
 // Joins a vector of strings as if they are fields of a tuple; returns
 // the joined string.
-GTEST_API_ std::string JoinAsTuple(const Strings& fields);
+GTEST_API_ std::string JoinAsKeyValueTuple(
+    const std::vector<const char*>& names, const Strings& values);
 
 // Converts an identifier name to a space-separated list of lower-case
 // words.  Each maximum substring of the form [A-Za-z][a-z]*|\d+ is
@@ -71,20 +75,6 @@ GTEST_API_ std::string JoinAsTuple(const Strings& fields);
 // "foo_bar_123" are converted to "foo bar 123".
 GTEST_API_ std::string ConvertIdentifierNameToWords(const char* id_name);
 
-// PointeeOf<Pointer>::type is the type of a value pointed to by a
-// Pointer, which can be either a smart pointer or a raw pointer.  The
-// following default implementation is for the case where Pointer is a
-// smart pointer.
-template <typename Pointer>
-struct PointeeOf {
-  // Smart pointer classes define type element_type as the type of
-  // their pointees.
-  typedef typename Pointer::element_type type;
-};
-// This specialization is for the raw pointer case.
-template <typename T>
-struct PointeeOf<T*> { typedef T type; };  // NOLINT
-
 // GetRawPointer(p) returns the raw pointer underlying p when p is a
 // smart pointer, or returns p itself when p is already a raw pointer.
 // The following default implementation is for the smart pointer case.
@@ -92,9 +82,18 @@ template <typename Pointer>
 inline const typename Pointer::element_type* GetRawPointer(const Pointer& p) {
   return p.get();
 }
+// This overload version is for std::reference_wrapper, which does not work with
+// the overload above, as it does not have an `element_type`.
+template <typename Element>
+inline const Element* GetRawPointer(const std::reference_wrapper<Element>& r) {
+  return &r.get();
+}
+
 // This overloaded version is for the raw pointer case.
 template <typename Element>
-inline Element* GetRawPointer(Element* p) { return p; }
+inline Element* GetRawPointer(Element* p) {
+  return p;
+}
 
 // MSVC treats wchar_t as a native type usually, but treats it as the
 // same as unsigned short when the compiler option /Zc:wchar_t- is
@@ -103,7 +102,7 @@ inline Element* GetRawPointer(Element* p) { return p; }
 #if defined(_MSC_VER) && !defined(_NATIVE_WCHAR_T_DEFINED)
 // wchar_t is a typedef.
 #else
-# define GMOCK_WCHAR_T_IS_NATIVE_ 1
+#define GMOCK_WCHAR_T_IS_NATIVE_ 1
 #endif
 
 // In what follows, we use the term "kind" to indicate whether a type
@@ -111,18 +110,20 @@ inline Element* GetRawPointer(Element* p) { return p; }
 // or none of them.  This categorization is useful for determining
 // when a matcher argument type can be safely converted to another
 // type in the implementation of SafeMatcherCast.
-enum TypeKind {
-  kBool, kInteger, kFloatingPoint, kOther
-};
+enum TypeKind { kBool, kInteger, kFloatingPoint, kOther };
 
 // KindOf<T>::value is the kind of type T.
-template <typename T> struct KindOf {
+template <typename T>
+struct KindOf {
   enum { value = kOther };  // The default kind.
 };
 
 // This macro declares that the kind of 'type' is 'kind'.
 #define GMOCK_DECLARE_KIND_(type, kind) \
-  template <> struct KindOf<type> { enum { value = kind }; }
+  template <>                           \
+  struct KindOf<type> {                 \
+    enum { value = kind };              \
+  }
 
 GMOCK_DECLARE_KIND_(bool, kBool);
 
@@ -130,13 +131,13 @@ GMOCK_DECLARE_KIND_(bool, kBool);
 GMOCK_DECLARE_KIND_(char, kInteger);
 GMOCK_DECLARE_KIND_(signed char, kInteger);
 GMOCK_DECLARE_KIND_(unsigned char, kInteger);
-GMOCK_DECLARE_KIND_(short, kInteger);  // NOLINT
+GMOCK_DECLARE_KIND_(short, kInteger);           // NOLINT
 GMOCK_DECLARE_KIND_(unsigned short, kInteger);  // NOLINT
 GMOCK_DECLARE_KIND_(int, kInteger);
 GMOCK_DECLARE_KIND_(unsigned int, kInteger);
-GMOCK_DECLARE_KIND_(long, kInteger);  // NOLINT
-GMOCK_DECLARE_KIND_(unsigned long, kInteger);  // NOLINT
-GMOCK_DECLARE_KIND_(long long, kInteger);  // NOLINT
+GMOCK_DECLARE_KIND_(long, kInteger);                // NOLINT
+GMOCK_DECLARE_KIND_(unsigned long, kInteger);       // NOLINT
+GMOCK_DECLARE_KIND_(long long, kInteger);           // NOLINT
 GMOCK_DECLARE_KIND_(unsigned long long, kInteger);  // NOLINT
 
 #if GMOCK_WCHAR_T_IS_NATIVE_
@@ -151,7 +152,7 @@ GMOCK_DECLARE_KIND_(long double, kFloatingPoint);
 #undef GMOCK_DECLARE_KIND_
 
 // Evaluates to the kind of 'type'.
-#define GMOCK_KIND_OF_(type) \
+#define GMOCK_KIND_OF_(type)                   \
   static_cast< ::testing::internal::TypeKind>( \
       ::testing::internal::KindOf<type>::value)
 
@@ -207,9 +208,7 @@ using LosslessArithmeticConvertible =
 class FailureReporterInterface {
  public:
   // The type of a failure (either non-fatal or fatal).
-  enum FailureType {
-    kNonfatal, kFatal
-  };
+  enum FailureType { kNonfatal, kFatal };
 
   virtual ~FailureReporterInterface() {}
 
@@ -229,8 +228,8 @@ GTEST_API_ FailureReporterInterface* GetFailureReporter();
 inline void Assert(bool condition, const char* file, int line,
                    const std::string& msg) {
   if (!condition) {
-    GetFailureReporter()->ReportFailure(FailureReporterInterface::kFatal,
-                                        file, line, msg);
+    GetFailureReporter()->ReportFailure(FailureReporterInterface::kFatal, file,
+                                        line, msg);
   }
 }
 inline void Assert(bool condition, const char* file, int line) {
@@ -251,10 +250,7 @@ inline void Expect(bool condition, const char* file, int line) {
 }
 
 // Severity level of a log.
-enum LogSeverity {
-  kInfo = 0,
-  kWarning = 1
-};
+enum LogSeverity { kInfo = 0, kWarning = 1 };
 
 // Valid values for the --gmock_verbose flag.
 
@@ -295,10 +291,10 @@ class WithoutMatchers {
 GTEST_API_ WithoutMatchers GetWithoutMatchers();
 
 // Disable MSVC warnings for infinite recursion, since in this case the
-// the recursion is unreachable.
+// recursion is unreachable.
 #ifdef _MSC_VER
-# pragma warning(push)
-# pragma warning(disable:4717)
+#pragma warning(push)
+#pragma warning(disable : 4717)
 #endif
 
 // Invalid<T>() is usable as an expression of type T, but will terminate
@@ -309,14 +305,17 @@ GTEST_API_ WithoutMatchers GetWithoutMatchers();
 template <typename T>
 inline T Invalid() {
   Assert(false, "", -1, "Internal error: attempt to return invalid value");
-  // This statement is unreachable, and would never terminate even if it
-  // could be reached. It is provided only to placate compiler warnings
-  // about missing return statements.
+#if defined(__GNUC__) || defined(__clang__)
+  __builtin_unreachable();
+#elif defined(_MSC_VER)
+  __assume(0);
+#else
   return Invalid<T>();
+#endif
 }
 
 #ifdef _MSC_VER
-# pragma warning(pop)
+#pragma warning(pop)
 #endif
 
 // Given a raw type (i.e. having no top-level reference or const
@@ -378,7 +377,8 @@ template <typename ElementPointer, typename Size>
 class StlContainerView< ::std::tuple<ElementPointer, Size> > {
  public:
   typedef typename std::remove_const<
-      typename internal::PointeeOf<ElementPointer>::type>::type RawElement;
+      typename std::pointer_traits<ElementPointer>::element_type>::type
+      RawElement;
   typedef internal::NativeArray<RawElement> type;
   typedef const type const_reference;
 
@@ -394,7 +394,8 @@ class StlContainerView< ::std::tuple<ElementPointer, Size> > {
 
 // The following specialization prevents the user from instantiating
 // StlContainer with a reference type.
-template <typename T> class StlContainerView<T&>;
+template <typename T>
+class StlContainerView<T&>;
 
 // A type transform to remove constness from the first part of a pair.
 // Pairs like that are used as the value_type of associative containers,
@@ -415,18 +416,21 @@ struct RemoveConstFromKey<std::pair<const K, V> > {
 GTEST_API_ void IllegalDoDefault(const char* file, int line);
 
 template <typename F, typename Tuple, size_t... Idx>
-auto ApplyImpl(F&& f, Tuple&& args, IndexSequence<Idx...>) -> decltype(
-    std::forward<F>(f)(std::get<Idx>(std::forward<Tuple>(args))...)) {
+auto ApplyImpl(F&& f, Tuple&& args, IndexSequence<Idx...>)
+    -> decltype(std::forward<F>(f)(
+        std::get<Idx>(std::forward<Tuple>(args))...)) {
   return std::forward<F>(f)(std::get<Idx>(std::forward<Tuple>(args))...);
 }
 
 // Apply the function to a tuple of arguments.
 template <typename F, typename Tuple>
-auto Apply(F&& f, Tuple&& args)
-    -> decltype(ApplyImpl(std::forward<F>(f), std::forward<Tuple>(args),
-                          MakeIndexSequence<std::tuple_size<Tuple>::value>())) {
+auto Apply(F&& f, Tuple&& args) -> decltype(ApplyImpl(
+    std::forward<F>(f), std::forward<Tuple>(args),
+    MakeIndexSequence<std::tuple_size<
+        typename std::remove_reference<Tuple>::type>::value>())) {
   return ApplyImpl(std::forward<F>(f), std::forward<Tuple>(args),
-                   MakeIndexSequence<std::tuple_size<Tuple>::value>());
+                   MakeIndexSequence<std::tuple_size<
+                       typename std::remove_reference<Tuple>::type>::value>());
 }
 
 // Template struct Function<F>, where F must be a function type, contains
@@ -460,11 +464,13 @@ struct Function<R(Args...)> {
 template <typename R, typename... Args>
 constexpr size_t Function<R(Args...)>::ArgumentCount;
 
+bool Base64Unescape(const std::string& encoded, std::string* decoded);
+
 #ifdef _MSC_VER
-# pragma warning(pop)
+#pragma warning(pop)
 #endif
 
 }  // namespace internal
 }  // namespace testing
 
-#endif  // GMOCK_INCLUDE_GMOCK_INTERNAL_GMOCK_INTERNAL_UTILS_H_
+#endif  // GOOGLEMOCK_INCLUDE_GMOCK_INTERNAL_GMOCK_INTERNAL_UTILS_H_
diff --git a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/include/gmock/internal/gmock-port.h b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/include/gmock/internal/gmock-port.h
index 70872ef3926..bc18a25f348 100644
--- a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/include/gmock/internal/gmock-port.h
+++ b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/include/gmock/internal/gmock-port.h
@@ -27,7 +27,6 @@
 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-//
 // Low-level types and utilities for porting Google Mock to various
 // platforms.  All macros ending with _ and symbols defined in an
 // internal namespace are subject to change without notice.  Code
@@ -35,10 +34,11 @@
 // end with _ are part of Google Mock's public API and can be used by
 // code outside Google Mock.
 
-// GOOGLETEST_CM0002 DO NOT DELETE
+// IWYU pragma: private, include "gmock/gmock.h"
+// IWYU pragma: friend gmock/.*
 
-#ifndef GMOCK_INCLUDE_GMOCK_INTERNAL_GMOCK_PORT_H_
-#define GMOCK_INCLUDE_GMOCK_INTERNAL_GMOCK_PORT_H_
+#ifndef GOOGLEMOCK_INCLUDE_GMOCK_INTERNAL_GMOCK_PORT_H_
+#define GOOGLEMOCK_INCLUDE_GMOCK_INTERNAL_GMOCK_PORT_H_
 
 #include <assert.h>
 #include <stdlib.h>
@@ -53,35 +53,87 @@
 // here, as Google Mock depends on Google Test.  Only add a utility
 // here if it's truly specific to Google Mock.
 
-#include "gtest/internal/gtest-port.h"
 #include "gmock/internal/custom/gmock-port.h"
+#include "gtest/internal/gtest-port.h"
+
+#if GTEST_HAS_ABSL
+#include "absl/flags/declare.h"
+#include "absl/flags/flag.h"
+#endif
 
 // For MS Visual C++, check the compiler version. At least VS 2015 is
 // required to compile Google Mock.
 #if defined(_MSC_VER) && _MSC_VER < 1900
-# error "At least Visual C++ 2015 (14.0) is required to compile Google Mock."
+#error "At least Visual C++ 2015 (14.0) is required to compile Google Mock."
 #endif
 
 // Macro for referencing flags.  This is public as we want the user to
 // use this syntax to reference Google Mock flags.
+#define GMOCK_FLAG_NAME_(name) gmock_##name
 #define GMOCK_FLAG(name) FLAGS_gmock_##name
 
-#if !defined(GMOCK_DECLARE_bool_)
+// Pick a command line flags implementation.
+#if GTEST_HAS_ABSL
+
+// Macros for defining flags.
+#define GMOCK_DEFINE_bool_(name, default_val, doc) \
+  ABSL_FLAG(bool, GMOCK_FLAG_NAME_(name), default_val, doc)
+#define GMOCK_DEFINE_int32_(name, default_val, doc) \
+  ABSL_FLAG(int32_t, GMOCK_FLAG_NAME_(name), default_val, doc)
+#define GMOCK_DEFINE_string_(name, default_val, doc) \
+  ABSL_FLAG(std::string, GMOCK_FLAG_NAME_(name), default_val, doc)
 
 // Macros for declaring flags.
-# define GMOCK_DECLARE_bool_(name) extern GTEST_API_ bool GMOCK_FLAG(name)
-# define GMOCK_DECLARE_int32_(name) extern GTEST_API_ int32_t GMOCK_FLAG(name)
-# define GMOCK_DECLARE_string_(name) \
-    extern GTEST_API_ ::std::string GMOCK_FLAG(name)
+#define GMOCK_DECLARE_bool_(name) \
+  ABSL_DECLARE_FLAG(bool, GMOCK_FLAG_NAME_(name))
+#define GMOCK_DECLARE_int32_(name) \
+  ABSL_DECLARE_FLAG(int32_t, GMOCK_FLAG_NAME_(name))
+#define GMOCK_DECLARE_string_(name) \
+  ABSL_DECLARE_FLAG(std::string, GMOCK_FLAG_NAME_(name))
+
+#define GMOCK_FLAG_GET(name) ::absl::GetFlag(GMOCK_FLAG(name))
+#define GMOCK_FLAG_SET(name, value) \
+  (void)(::absl::SetFlag(&GMOCK_FLAG(name), value))
+
+#else  // GTEST_HAS_ABSL
 
 // Macros for defining flags.
-# define GMOCK_DEFINE_bool_(name, default_val, doc) \
-    GTEST_API_ bool GMOCK_FLAG(name) = (default_val)
-# define GMOCK_DEFINE_int32_(name, default_val, doc) \
-    GTEST_API_ int32_t GMOCK_FLAG(name) = (default_val)
-# define GMOCK_DEFINE_string_(name, default_val, doc) \
-    GTEST_API_ ::std::string GMOCK_FLAG(name) = (default_val)
+#define GMOCK_DEFINE_bool_(name, default_val, doc)  \
+  namespace testing {                               \
+  GTEST_API_ bool GMOCK_FLAG(name) = (default_val); \
+  }                                                 \
+  static_assert(true, "no-op to require trailing semicolon")
+#define GMOCK_DEFINE_int32_(name, default_val, doc)    \
+  namespace testing {                                  \
+  GTEST_API_ int32_t GMOCK_FLAG(name) = (default_val); \
+  }                                                    \
+  static_assert(true, "no-op to require trailing semicolon")
+#define GMOCK_DEFINE_string_(name, default_val, doc)         \
+  namespace testing {                                        \
+  GTEST_API_ ::std::string GMOCK_FLAG(name) = (default_val); \
+  }                                                          \
+  static_assert(true, "no-op to require trailing semicolon")
+
+// Macros for declaring flags.
+#define GMOCK_DECLARE_bool_(name)          \
+  namespace testing {                      \
+  GTEST_API_ extern bool GMOCK_FLAG(name); \
+  }                                        \
+  static_assert(true, "no-op to require trailing semicolon")
+#define GMOCK_DECLARE_int32_(name)            \
+  namespace testing {                         \
+  GTEST_API_ extern int32_t GMOCK_FLAG(name); \
+  }                                           \
+  static_assert(true, "no-op to require trailing semicolon")
+#define GMOCK_DECLARE_string_(name)                 \
+  namespace testing {                               \
+  GTEST_API_ extern ::std::string GMOCK_FLAG(name); \
+  }                                                 \
+  static_assert(true, "no-op to require trailing semicolon")
+
+#define GMOCK_FLAG_GET(name) ::testing::GMOCK_FLAG(name)
+#define GMOCK_FLAG_SET(name, value) (void)(::testing::GMOCK_FLAG(name) = value)
 
-#endif  // !defined(GMOCK_DECLARE_bool_)
+#endif  // GTEST_HAS_ABSL
 
-#endif  // GMOCK_INCLUDE_GMOCK_INTERNAL_GMOCK_PORT_H_
+#endif  // GOOGLEMOCK_INCLUDE_GMOCK_INTERNAL_GMOCK_PORT_H_
diff --git a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/include/gmock/internal/gmock-pp.h b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/include/gmock/internal/gmock-pp.h
index d13e75f30dd..94d61c09c87 100644
--- a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/include/gmock/internal/gmock-pp.h
+++ b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/include/gmock/internal/gmock-pp.h
@@ -1,5 +1,5 @@
-#ifndef THIRD_PARTY_GOOGLETEST_GOOGLEMOCK_INCLUDE_GMOCK_PP_H_
-#define THIRD_PARTY_GOOGLETEST_GOOGLEMOCK_INCLUDE_GMOCK_PP_H_
+#ifndef GOOGLEMOCK_INCLUDE_GMOCK_INTERNAL_GMOCK_PP_H_
+#define GOOGLEMOCK_INCLUDE_GMOCK_INTERNAL_GMOCK_PP_H_
 
 // Expands and concatenates the arguments. Constructed macros reevaluate.
 #define GMOCK_PP_CAT(_1, _2) GMOCK_PP_INTERNAL_CAT(_1, _2)
@@ -28,16 +28,16 @@
 // Requires: the number of arguments after expansion is at most 15.
 #define GMOCK_PP_NARG(...) \
   GMOCK_PP_INTERNAL_16TH(  \
-      (__VA_ARGS__, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1))
+      (__VA_ARGS__, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0))
 
 // Returns 1 if the expansion of arguments has an unprotected comma. Otherwise
 // returns 0. Requires no more than 15 unprotected commas.
 #define GMOCK_PP_HAS_COMMA(...) \
   GMOCK_PP_INTERNAL_16TH(       \
-      (__VA_ARGS__, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0))
+      (__VA_ARGS__, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0))
 
 // Returns the first argument.
-#define GMOCK_PP_HEAD(...) GMOCK_PP_INTERNAL_HEAD((__VA_ARGS__))
+#define GMOCK_PP_HEAD(...) GMOCK_PP_INTERNAL_HEAD((__VA_ARGS__, unusedArg))
 
 // Returns the tail. A variadic list of all arguments minus the first. Requires
 // at least one argument.
@@ -276,4 +276,4 @@
   GMOCK_PP_INTERNAL_FOR_EACH_IMPL_14(GMOCK_PP_INC(_i), _Macro, _Data,   \
                                      (GMOCK_PP_TAIL _Tuple))
 
-#endif  // THIRD_PARTY_GOOGLETEST_GOOGLEMOCK_INCLUDE_GMOCK_INTERNAL_GMOCK_PP_H_
+#endif  // GOOGLEMOCK_INCLUDE_GMOCK_INTERNAL_GMOCK_PP_H_
diff --git a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/src/gmock-cardinalities.cc b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/src/gmock-cardinalities.cc
index 7463f438323..92cde3484ab 100644
--- a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/src/gmock-cardinalities.cc
+++ b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/src/gmock-cardinalities.cc
@@ -27,7 +27,6 @@
 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-
 // Google Mock - a framework for writing C++ mock classes.
 //
 // This file implements cardinalities.
@@ -35,9 +34,11 @@
 #include "gmock/gmock-cardinalities.h"
 
 #include <limits.h>
+
 #include <ostream>  // NOLINT
 #include <sstream>
 #include <string>
+
 #include "gmock/internal/gmock-internal-utils.h"
 #include "gtest/gtest.h"
 
@@ -49,8 +50,7 @@ namespace {
 class BetweenCardinalityImpl : public CardinalityInterface {
  public:
   BetweenCardinalityImpl(int min, int max)
-      : min_(min >= 0 ? min : 0),
-        max_(max >= min_ ? max : min_) {
+      : min_(min >= 0 ? min : 0), max_(max >= min_ ? max : min_) {
     std::stringstream ss;
     if (min < 0) {
       ss << "The invocation lower bound must be >= 0, "
@@ -62,8 +62,7 @@ class BetweenCardinalityImpl : public CardinalityInterface {
       internal::Expect(false, __FILE__, __LINE__, ss.str());
     } else if (min > max) {
       ss << "The invocation upper bound (" << max
-         << ") must be >= the invocation lower bound (" << min
-         << ").";
+         << ") must be >= the invocation lower bound (" << min << ").";
       internal::Expect(false, __FILE__, __LINE__, ss.str());
     }
   }
@@ -87,7 +86,8 @@ class BetweenCardinalityImpl : public CardinalityInterface {
   const int min_;
   const int max_;
 
-  GTEST_DISALLOW_COPY_AND_ASSIGN_(BetweenCardinalityImpl);
+  BetweenCardinalityImpl(const BetweenCardinalityImpl&) = delete;
+  BetweenCardinalityImpl& operator=(const BetweenCardinalityImpl&) = delete;
 };
 
 // Formats "n times" in a human-friendly way.
diff --git a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/src/gmock-internal-utils.cc b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/src/gmock-internal-utils.cc
index e5b547981d1..0a74841f35b 100644
--- a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/src/gmock-internal-utils.cc
+++ b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/src/gmock-internal-utils.cc
@@ -27,7 +27,6 @@
 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-
 // Google Mock - a framework for writing C++ mock classes.
 //
 // This file defines some utilities useful for implementing Google
@@ -37,8 +36,15 @@
 #include "gmock/internal/gmock-internal-utils.h"
 
 #include <ctype.h>
+
+#include <array>
+#include <cctype>
+#include <cstdint>
+#include <cstring>
 #include <ostream>  // NOLINT
 #include <string>
+#include <vector>
+
 #include "gmock/gmock.h"
 #include "gmock/internal/gmock-port.h"
 #include "gtest/gtest.h"
@@ -48,21 +54,22 @@ namespace internal {
 
 // Joins a vector of strings as if they are fields of a tuple; returns
 // the joined string.
-GTEST_API_ std::string JoinAsTuple(const Strings& fields) {
-  switch (fields.size()) {
-    case 0:
-      return "";
-    case 1:
-      return fields[0];
-    default:
-      std::string result = "(" + fields[0];
-      for (size_t i = 1; i < fields.size(); i++) {
-        result += ", ";
-        result += fields[i];
-      }
-      result += ")";
-      return result;
+GTEST_API_ std::string JoinAsKeyValueTuple(
+    const std::vector<const char*>& names, const Strings& values) {
+  GTEST_CHECK_(names.size() == values.size());
+  if (values.empty()) {
+    return "";
   }
+  const auto build_one = [&](const size_t i) {
+    return std::string(names[i]) + ": " + values[i];
+  };
+  std::string result = "(" + build_one(0);
+  for (size_t i = 1; i < values.size(); i++) {
+    result += ", ";
+    result += build_one(i);
+  }
+  result += ")";
+  return result;
 }
 
 // Converts an identifier name to a space-separated list of lower-case
@@ -76,12 +83,11 @@ GTEST_API_ std::string ConvertIdentifierNameToWords(const char* id_name) {
     // We don't care about the current locale as the input is
     // guaranteed to be a valid C++ identifier name.
     const bool starts_new_word = IsUpper(*p) ||
-        (!IsAlpha(prev_char) && IsLower(*p)) ||
-        (!IsDigit(prev_char) && IsDigit(*p));
+                                 (!IsAlpha(prev_char) && IsLower(*p)) ||
+                                 (!IsDigit(prev_char) && IsDigit(*p));
 
     if (IsAlNum(*p)) {
-      if (starts_new_word && result != "")
-        result += ' ';
+      if (starts_new_word && result != "") result += ' ';
       result += ToLower(*p);
     }
   }
@@ -95,12 +101,9 @@ class GoogleTestFailureReporter : public FailureReporterInterface {
  public:
   void ReportFailure(FailureType type, const char* file, int line,
                      const std::string& message) override {
-    AssertHelper(type == kFatal ?
-                 TestPartResult::kFatalFailure :
-                 TestPartResult::kNonFatalFailure,
-                 file,
-                 line,
-                 message.c_str()) = Message();
+    AssertHelper(type == kFatal ? TestPartResult::kFatalFailure
+                                : TestPartResult::kNonFatalFailure,
+                 file, line, message.c_str()) = Message();
     if (type == kFatal) {
       posix::Abort();
     }
@@ -126,10 +129,10 @@ static GTEST_DEFINE_STATIC_MUTEX_(g_log_mutex);
 // Returns true if and only if a log with the given severity is visible
 // according to the --gmock_verbose flag.
 GTEST_API_ bool LogIsVisible(LogSeverity severity) {
-  if (GMOCK_FLAG(verbose) == kInfoVerbosity) {
+  if (GMOCK_FLAG_GET(verbose) == kInfoVerbosity) {
     // Always show the log if --gmock_verbose=info.
     return true;
-  } else if (GMOCK_FLAG(verbose) == kErrorVerbosity) {
+  } else if (GMOCK_FLAG_GET(verbose) == kErrorVerbosity) {
     // Always hide it if --gmock_verbose=error.
     return false;
   } else {
@@ -148,8 +151,7 @@ GTEST_API_ bool LogIsVisible(LogSeverity severity) {
 // conservative.
 GTEST_API_ void Log(LogSeverity severity, const std::string& message,
                     int stack_frames_to_skip) {
-  if (!LogIsVisible(severity))
-    return;
+  if (!LogIsVisible(severity)) return;
 
   // Ensures that logs from different threads don't interleave.
   MutexLock l(&g_log_mutex);
@@ -178,8 +180,8 @@ GTEST_API_ void Log(LogSeverity severity, const std::string& message,
       std::cout << "\n";
     }
     std::cout << "Stack trace:\n"
-         << ::testing::internal::GetCurrentOsStackTraceExceptTop(
-             ::testing::UnitTest::GetInstance(), actual_to_skip);
+              << ::testing::internal::GetCurrentOsStackTraceExceptTop(
+                     ::testing::UnitTest::GetInstance(), actual_to_skip);
   }
   std::cout << ::std::flush;
 }
@@ -196,5 +198,53 @@ GTEST_API_ void IllegalDoDefault(const char* file, int line) {
       "the variable in various places.");
 }
 
+constexpr char UnBase64Impl(char c, const char* const base64, char carry) {
+  return *base64 == 0   ? static_cast<char>(65)
+         : *base64 == c ? carry
+                        : UnBase64Impl(c, base64 + 1, carry + 1);
+}
+
+template <size_t... I>
+constexpr std::array<char, 256> UnBase64Impl(IndexSequence<I...>,
+                                             const char* const base64) {
+  return {{UnBase64Impl(static_cast<char>(I), base64, 0)...}};
+}
+
+constexpr std::array<char, 256> UnBase64(const char* const base64) {
+  return UnBase64Impl(MakeIndexSequence<256>{}, base64);
+}
+
+static constexpr char kBase64[] =
+    "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
+static constexpr std::array<char, 256> kUnBase64 = UnBase64(kBase64);
+
+bool Base64Unescape(const std::string& encoded, std::string* decoded) {
+  decoded->clear();
+  size_t encoded_len = encoded.size();
+  decoded->reserve(3 * (encoded_len / 4) + (encoded_len % 4));
+  int bit_pos = 0;
+  char dst = 0;
+  for (int src : encoded) {
+    if (std::isspace(src) || src == '=') {
+      continue;
+    }
+    char src_bin = kUnBase64[static_cast<size_t>(src)];
+    if (src_bin >= 64) {
+      decoded->clear();
+      return false;
+    }
+    if (bit_pos == 0) {
+      dst |= static_cast<char>(src_bin << 2);
+      bit_pos = 6;
+    } else {
+      dst |= static_cast<char>(src_bin >> (bit_pos - 2));
+      decoded->push_back(dst);
+      dst = static_cast<char>(src_bin << (10 - bit_pos));
+      bit_pos = (bit_pos + 6) % 8;
+    }
+  }
+  return true;
+}
+
 }  // namespace internal
 }  // namespace testing
diff --git a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/src/gmock-matchers.cc b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/src/gmock-matchers.cc
index 4f73e0a69cb..a8d04a6da0c 100644
--- a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/src/gmock-matchers.cc
+++ b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/src/gmock-matchers.cc
@@ -27,7 +27,6 @@
 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-
 // Google Mock - a framework for writing C++ mock classes.
 //
 // This file implements Matcher<const string&>, Matcher<string>, and
@@ -36,9 +35,11 @@
 #include "gmock/gmock-matchers.h"
 
 #include <string.h>
+
 #include <iostream>
 #include <sstream>
 #include <string>
+#include <vector>
 
 namespace testing {
 namespace internal {
@@ -48,11 +49,13 @@ namespace internal {
 // 'negation' is false; otherwise returns the description of the
 // negation of the matcher.  'param_values' contains a list of strings
 // that are the print-out of the matcher's parameters.
-GTEST_API_ std::string FormatMatcherDescription(bool negation,
-                                                const char* matcher_name,
-                                                const Strings& param_values) {
+GTEST_API_ std::string FormatMatcherDescription(
+    bool negation, const char* matcher_name,
+    const std::vector<const char*>& param_names, const Strings& param_values) {
   std::string result = ConvertIdentifierNameToWords(matcher_name);
-  if (param_values.size() >= 1) result += " " + JoinAsTuple(param_values);
+  if (param_values.size() >= 1) {
+    result += " " + JoinAsKeyValueTuple(param_names, param_values);
+  }
   return negation ? "not (" + result + ")" : result;
 }
 
@@ -218,8 +221,6 @@ class MaxBipartiteMatchState {
   // right_[left_[i]] = i.
   ::std::vector<size_t> left_;
   ::std::vector<size_t> right_;
-
-  GTEST_DISALLOW_ASSIGN_(MaxBipartiteMatchState);
 };
 
 const size_t MaxBipartiteMatchState::kUnused;
diff --git a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/src/gmock-spec-builders.cc b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/src/gmock-spec-builders.cc
index 81ea98949c2..658ad3fa229 100644
--- a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/src/gmock-spec-builders.cc
+++ b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/src/gmock-spec-builders.cc
@@ -27,7 +27,6 @@
 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-
 // Google Mock - a framework for writing C++ mock classes.
 //
 // This file implements the spec builder syntax (ON_CALL and
@@ -42,6 +41,7 @@
 #include <memory>
 #include <set>
 #include <string>
+#include <unordered_map>
 #include <vector>
 
 #include "gmock/gmock.h"
@@ -49,15 +49,15 @@
 #include "gtest/internal/gtest-port.h"
 
 #if GTEST_OS_CYGWIN || GTEST_OS_LINUX || GTEST_OS_MAC
-# include <unistd.h>  // NOLINT
+#include <unistd.h>  // NOLINT
 #endif
 
 // Silence C4800 (C4800: 'int *const ': forcing value
 // to bool 'true' or 'false') for MSVC 15
 #ifdef _MSC_VER
 #if _MSC_VER == 1900
-#  pragma warning(push)
-#  pragma warning(disable:4800)
+#pragma warning(push)
+#pragma warning(disable : 4800)
 #endif
 #endif
 
@@ -195,11 +195,12 @@ void ExpectationBase::DescribeCallCountTo(::std::ostream* os) const
 
   // Describes the state of the expectation (e.g. is it satisfied?
   // is it active?).
-  *os << " - " << (IsOverSaturated() ? "over-saturated" :
-                   IsSaturated() ? "saturated" :
-                   IsSatisfied() ? "satisfied" : "unsatisfied")
-      << " and "
-      << (is_retired() ? "retired" : "active");
+  *os << " - "
+      << (IsOverSaturated() ? "over-saturated"
+          : IsSaturated()   ? "saturated"
+          : IsSatisfied()   ? "satisfied"
+                            : "unsatisfied")
+      << " and " << (is_retired() ? "retired" : "active");
 }
 
 // Checks the action count (i.e. the number of WillOnce() and
@@ -242,13 +243,12 @@ void ExpectationBase::CheckActionCountIfNotDone() const
 
     ::std::stringstream ss;
     DescribeLocationTo(&ss);
-    ss << "Too " << (too_many ? "many" : "few")
-       << " actions specified in " << source_text() << "...\n"
+    ss << "Too " << (too_many ? "many" : "few") << " actions specified in "
+       << source_text() << "...\n"
        << "Expected to be ";
     cardinality().DescribeTo(&ss);
-    ss << ", but has " << (too_many ? "" : "only ")
-       << action_count << " WillOnce()"
-       << (action_count == 1 ? "" : "s");
+    ss << ", but has " << (too_many ? "" : "only ") << action_count
+       << " WillOnce()" << (action_count == 1 ? "" : "s");
     if (repeated_action_specified_) {
       ss << " and a WillRepeatedly()";
     }
@@ -264,10 +264,10 @@ void ExpectationBase::UntypedTimes(const Cardinality& a_cardinality) {
                        ".Times() cannot appear "
                        "more than once in an EXPECT_CALL().");
   } else {
-    ExpectSpecProperty(last_clause_ < kTimes,
-                       ".Times() cannot appear after "
-                       ".InSequence(), .WillOnce(), .WillRepeatedly(), "
-                       "or .RetiresOnSaturation().");
+    ExpectSpecProperty(
+        last_clause_ < kTimes,
+        ".Times() may only appear *before* .InSequence(), .WillOnce(), "
+        ".WillRepeatedly(), or .RetiresOnSaturation(), not after.");
   }
   last_clause_ = kTimes;
 
@@ -283,7 +283,7 @@ GTEST_API_ ThreadLocal<Sequence*> g_gmock_implicit_sequence;
 void ReportUninterestingCall(CallReaction reaction, const std::string& msg) {
   // Include a stack trace only if --gmock_verbose=info is specified.
   const int stack_frames_to_skip =
-      GMOCK_FLAG(verbose) == kInfoVerbosity ? 3 : -1;
+      GMOCK_FLAG_GET(verbose) == kInfoVerbosity ? 3 : -1;
   switch (reaction) {
     case kAllow:
       Log(kInfo, msg, stack_frames_to_skip);
@@ -295,8 +295,8 @@ void ReportUninterestingCall(CallReaction reaction, const std::string& msg) {
               "call should not happen.  Do not suppress it by blindly adding "
               "an EXPECT_CALL() if you don't mean to enforce the call.  "
               "See "
-              "https://github.com/google/googletest/blob/master/googlemock/"
-              "docs/cook_book.md#"
+              "https://github.com/google/googletest/blob/master/docs/"
+              "gmock_cook_book.md#"
               "knowing-when-to-expect for details.\n",
           stack_frames_to_skip);
       break;
@@ -370,127 +370,12 @@ const char* UntypedFunctionMockerBase::Name() const
   return name;
 }
 
-// Calculates the result of invoking this mock function with the given
-// arguments, prints it, and returns it.  The caller is responsible
-// for deleting the result.
-UntypedActionResultHolderBase* UntypedFunctionMockerBase::UntypedInvokeWith(
-    void* const untyped_args) GTEST_LOCK_EXCLUDED_(g_gmock_mutex) {
-  // See the definition of untyped_expectations_ for why access to it
-  // is unprotected here.
-  if (untyped_expectations_.size() == 0) {
-    // No expectation is set on this mock method - we have an
-    // uninteresting call.
-
-    // We must get Google Mock's reaction on uninteresting calls
-    // made on this mock object BEFORE performing the action,
-    // because the action may DELETE the mock object and make the
-    // following expression meaningless.
-    const CallReaction reaction =
-        Mock::GetReactionOnUninterestingCalls(MockObject());
-
-    // True if and only if we need to print this call's arguments and return
-    // value.  This definition must be kept in sync with
-    // the behavior of ReportUninterestingCall().
-    const bool need_to_report_uninteresting_call =
-        // If the user allows this uninteresting call, we print it
-        // only when they want informational messages.
-        reaction == kAllow ? LogIsVisible(kInfo) :
-                           // If the user wants this to be a warning, we print
-                           // it only when they want to see warnings.
-            reaction == kWarn
-                ? LogIsVisible(kWarning)
-                :
-                // Otherwise, the user wants this to be an error, and we
-                // should always print detailed information in the error.
-                true;
-
-    if (!need_to_report_uninteresting_call) {
-      // Perform the action without printing the call information.
-      return this->UntypedPerformDefaultAction(
-          untyped_args, "Function call: " + std::string(Name()));
-    }
-
-    // Warns about the uninteresting call.
-    ::std::stringstream ss;
-    this->UntypedDescribeUninterestingCall(untyped_args, &ss);
-
-    // Calculates the function result.
-    UntypedActionResultHolderBase* const result =
-        this->UntypedPerformDefaultAction(untyped_args, ss.str());
-
-    // Prints the function result.
-    if (result != nullptr) result->PrintAsActionResult(&ss);
-
-    ReportUninterestingCall(reaction, ss.str());
-    return result;
-  }
-
-  bool is_excessive = false;
-  ::std::stringstream ss;
-  ::std::stringstream why;
-  ::std::stringstream loc;
-  const void* untyped_action = nullptr;
-
-  // The UntypedFindMatchingExpectation() function acquires and
-  // releases g_gmock_mutex.
-  const ExpectationBase* const untyped_expectation =
-      this->UntypedFindMatchingExpectation(
-          untyped_args, &untyped_action, &is_excessive,
-          &ss, &why);
-  const bool found = untyped_expectation != nullptr;
-
-  // True if and only if we need to print the call's arguments
-  // and return value.
-  // This definition must be kept in sync with the uses of Expect()
-  // and Log() in this function.
-  const bool need_to_report_call =
-      !found || is_excessive || LogIsVisible(kInfo);
-  if (!need_to_report_call) {
-    // Perform the action without printing the call information.
-    return untyped_action == nullptr
-               ? this->UntypedPerformDefaultAction(untyped_args, "")
-               : this->UntypedPerformAction(untyped_action, untyped_args);
-  }
-
-  ss << "    Function call: " << Name();
-  this->UntypedPrintArgs(untyped_args, &ss);
-
-  // In case the action deletes a piece of the expectation, we
-  // generate the message beforehand.
-  if (found && !is_excessive) {
-    untyped_expectation->DescribeLocationTo(&loc);
-  }
-
-  UntypedActionResultHolderBase* const result =
-      untyped_action == nullptr
-          ? this->UntypedPerformDefaultAction(untyped_args, ss.str())
-          : this->UntypedPerformAction(untyped_action, untyped_args);
-  if (result != nullptr) result->PrintAsActionResult(&ss);
-  ss << "\n" << why.str();
-
-  if (!found) {
-    // No expectation matches this call - reports a failure.
-    Expect(false, nullptr, -1, ss.str());
-  } else if (is_excessive) {
-    // We had an upper-bound violation and the failure message is in ss.
-    Expect(false, untyped_expectation->file(),
-           untyped_expectation->line(), ss.str());
-  } else {
-    // We had an expected call and the matching expectation is
-    // described in ss.
-    Log(kInfo, loc.str() + ss.str(), 2);
-  }
-
-  return result;
-}
-
 // Returns an Expectation object that references and co-owns exp,
 // which must be an expectation on this mock function.
 Expectation UntypedFunctionMockerBase::GetHandleOf(ExpectationBase* exp) {
   // See the definition of untyped_expectations_ for why access to it
   // is unprotected here.
-  for (UntypedExpectations::const_iterator it =
-           untyped_expectations_.begin();
+  for (UntypedExpectations::const_iterator it = untyped_expectations_.begin();
        it != untyped_expectations_.end(); ++it) {
     if (it->get() == exp) {
       return Expectation(*it);
@@ -510,8 +395,7 @@ bool UntypedFunctionMockerBase::VerifyAndClearExpectationsLocked()
     GTEST_EXCLUSIVE_LOCK_REQUIRED_(g_gmock_mutex) {
   g_gmock_mutex.AssertHeld();
   bool expectations_met = true;
-  for (UntypedExpectations::const_iterator it =
-           untyped_expectations_.begin();
+  for (UntypedExpectations::const_iterator it = untyped_expectations_.begin();
        it != untyped_expectations_.end(); ++it) {
     ExpectationBase* const untyped_expectation = it->get();
     if (untyped_expectation->IsOverSaturated()) {
@@ -522,15 +406,15 @@ bool UntypedFunctionMockerBase::VerifyAndClearExpectationsLocked()
     } else if (!untyped_expectation->IsSatisfied()) {
       expectations_met = false;
       ::std::stringstream ss;
-      ss  << "Actual function call count doesn't match "
-          << untyped_expectation->source_text() << "...\n";
+      ss << "Actual function call count doesn't match "
+         << untyped_expectation->source_text() << "...\n";
       // No need to show the source file location of the expectation
       // in the description, as the Expect() call that follows already
       // takes care of it.
       untyped_expectation->MaybeDescribeExtraMatcherTo(&ss);
       untyped_expectation->DescribeCallCountTo(&ss);
-      Expect(false, untyped_expectation->file(),
-             untyped_expectation->line(), ss.str());
+      Expect(false, untyped_expectation->file(), untyped_expectation->line(),
+             ss.str());
     }
   }
 
@@ -597,8 +481,7 @@ class MockObjectRegistry {
   // object alive.  Therefore we report any living object as test
   // failure, unless the user explicitly asked us to ignore it.
   ~MockObjectRegistry() {
-    if (!GMOCK_FLAG(catch_leaked_mocks))
-      return;
+    if (!GMOCK_FLAG_GET(catch_leaked_mocks)) return;
 
     int leaked_count = 0;
     for (StateMap::const_iterator it = states_.begin(); it != states_.end();
@@ -618,13 +501,13 @@ class MockObjectRegistry {
                   << state.first_used_test << ")";
       }
       std::cout << " should be deleted but never is. Its address is @"
-           << it->first << ".";
+                << it->first << ".";
       leaked_count++;
     }
     if (leaked_count > 0) {
       std::cout << "\nERROR: " << leaked_count << " leaked mock "
                 << (leaked_count == 1 ? "object" : "objects")
-                << " found at program exit. Expectations on a mock object is "
+                << " found at program exit. Expectations on a mock object are "
                    "verified when the object is destructed. Leaking a mock "
                    "means that its expectations aren't verified, which is "
                    "usually a test bug. If you really intend to leak a mock, "
@@ -652,57 +535,63 @@ MockObjectRegistry g_mock_object_registry;
 
 // Maps a mock object to the reaction Google Mock should have when an
 // uninteresting method is called.  Protected by g_gmock_mutex.
-std::map<const void*, internal::CallReaction> g_uninteresting_call_reaction;
+std::unordered_map<uintptr_t, internal::CallReaction>&
+UninterestingCallReactionMap() {
+  static auto* map = new std::unordered_map<uintptr_t, internal::CallReaction>;
+  return *map;
+}
 
 // Sets the reaction Google Mock should have when an uninteresting
 // method of the given mock object is called.
-void SetReactionOnUninterestingCalls(const void* mock_obj,
+void SetReactionOnUninterestingCalls(uintptr_t mock_obj,
                                      internal::CallReaction reaction)
     GTEST_LOCK_EXCLUDED_(internal::g_gmock_mutex) {
   internal::MutexLock l(&internal::g_gmock_mutex);
-  g_uninteresting_call_reaction[mock_obj] = reaction;
+  UninterestingCallReactionMap()[mock_obj] = reaction;
 }
 
 }  // namespace
 
 // Tells Google Mock to allow uninteresting calls on the given mock
 // object.
-void Mock::AllowUninterestingCalls(const void* mock_obj)
+void Mock::AllowUninterestingCalls(uintptr_t mock_obj)
     GTEST_LOCK_EXCLUDED_(internal::g_gmock_mutex) {
   SetReactionOnUninterestingCalls(mock_obj, internal::kAllow);
 }
 
 // Tells Google Mock to warn the user about uninteresting calls on the
 // given mock object.
-void Mock::WarnUninterestingCalls(const void* mock_obj)
+void Mock::WarnUninterestingCalls(uintptr_t mock_obj)
     GTEST_LOCK_EXCLUDED_(internal::g_gmock_mutex) {
   SetReactionOnUninterestingCalls(mock_obj, internal::kWarn);
 }
 
 // Tells Google Mock to fail uninteresting calls on the given mock
 // object.
-void Mock::FailUninterestingCalls(const void* mock_obj)
+void Mock::FailUninterestingCalls(uintptr_t mock_obj)
     GTEST_LOCK_EXCLUDED_(internal::g_gmock_mutex) {
   SetReactionOnUninterestingCalls(mock_obj, internal::kFail);
 }
 
 // Tells Google Mock the given mock object is being destroyed and its
 // entry in the call-reaction table should be removed.
-void Mock::UnregisterCallReaction(const void* mock_obj)
+void Mock::UnregisterCallReaction(uintptr_t mock_obj)
     GTEST_LOCK_EXCLUDED_(internal::g_gmock_mutex) {
   internal::MutexLock l(&internal::g_gmock_mutex);
-  g_uninteresting_call_reaction.erase(mock_obj);
+  UninterestingCallReactionMap().erase(static_cast<uintptr_t>(mock_obj));
 }
 
 // Returns the reaction Google Mock will have on uninteresting calls
 // made on the given mock object.
 internal::CallReaction Mock::GetReactionOnUninterestingCalls(
-    const void* mock_obj)
-        GTEST_LOCK_EXCLUDED_(internal::g_gmock_mutex) {
+    const void* mock_obj) GTEST_LOCK_EXCLUDED_(internal::g_gmock_mutex) {
   internal::MutexLock l(&internal::g_gmock_mutex);
-  return (g_uninteresting_call_reaction.count(mock_obj) == 0) ?
-      internal::intToCallReaction(GMOCK_FLAG(default_mock_behavior)) :
-      g_uninteresting_call_reaction[mock_obj];
+  return (UninterestingCallReactionMap().count(
+              reinterpret_cast<uintptr_t>(mock_obj)) == 0)
+             ? internal::intToCallReaction(
+                   GMOCK_FLAG_GET(default_mock_behavior))
+             : UninterestingCallReactionMap()[reinterpret_cast<uintptr_t>(
+                   mock_obj)];
 }
 
 // Tells Google Mock to ignore mock_obj when checking for leaked mock
@@ -857,8 +746,8 @@ Expectation::~Expectation() {}
 void Sequence::AddExpectation(const Expectation& expectation) const {
   if (*last_expectation_ != expectation) {
     if (last_expectation_->expectation_base() != nullptr) {
-      expectation.expectation_base()->immediate_prerequisites_
-          += *last_expectation_;
+      expectation.expectation_base()->immediate_prerequisites_ +=
+          *last_expectation_;
     }
     *last_expectation_ = expectation;
   }
@@ -887,6 +776,6 @@ InSequence::~InSequence() {
 
 #ifdef _MSC_VER
 #if _MSC_VER == 1900
-#  pragma warning(pop)
+#pragma warning(pop)
 #endif
 #endif
diff --git a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/src/gmock.cc b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/src/gmock.cc
index 32b2a7394fd..5025656a02a 100644
--- a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/src/gmock.cc
+++ b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/src/gmock.cc
@@ -27,17 +27,15 @@
 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-
 #include "gmock/gmock.h"
-#include "gmock/internal/gmock-port.h"
 
-namespace testing {
+#include "gmock/internal/gmock-port.h"
 
 GMOCK_DEFINE_bool_(catch_leaked_mocks, true,
                    "true if and only if Google Mock should report leaked "
                    "mock objects as failures.");
 
-GMOCK_DEFINE_string_(verbose, internal::kWarningVerbosity,
+GMOCK_DEFINE_string_(verbose, testing::internal::kWarningVerbosity,
                      "Controls how verbose Google Mock's output is."
                      "  Valid values:\n"
                      "  info    - prints all messages.\n"
@@ -51,6 +49,7 @@ GMOCK_DEFINE_int32_(default_mock_behavior, 1,
                     "  1 - by default, mocks act as NaggyMocks.\n"
                     "  2 - by default, mocks act as StrictMocks.");
 
+namespace testing {
 namespace internal {
 
 // Parses a string as a command line flag.  The string should have the
@@ -59,18 +58,18 @@ namespace internal {
 //
 // Returns the value of the flag, or NULL if the parsing failed.
 static const char* ParseGoogleMockFlagValue(const char* str,
-                                            const char* flag,
+                                            const char* flag_name,
                                             bool def_optional) {
   // str and flag must not be NULL.
-  if (str == nullptr || flag == nullptr) return nullptr;
+  if (str == nullptr || flag_name == nullptr) return nullptr;
 
   // The flag must start with "--gmock_".
-  const std::string flag_str = std::string("--gmock_") + flag;
-  const size_t flag_len = flag_str.length();
-  if (strncmp(str, flag_str.c_str(), flag_len) != 0) return nullptr;
+  const std::string flag_name_str = std::string("--gmock_") + flag_name;
+  const size_t flag_name_len = flag_name_str.length();
+  if (strncmp(str, flag_name_str.c_str(), flag_name_len) != 0) return nullptr;
 
   // Skips the flag name.
-  const char* flag_end = str + flag_len;
+  const char* flag_end = str + flag_name_len;
 
   // When def_optional is true, it's OK to not have a "=value" part.
   if (def_optional && (flag_end[0] == '\0')) {
@@ -91,10 +90,10 @@ static const char* ParseGoogleMockFlagValue(const char* str,
 //
 // On success, stores the value of the flag in *value, and returns
 // true.  On failure, returns false without changing *value.
-static bool ParseGoogleMockBoolFlag(const char* str, const char* flag,
-                                    bool* value) {
+static bool ParseGoogleMockFlag(const char* str, const char* flag_name,
+                                bool* value) {
   // Gets the value of the flag as a string.
-  const char* const value_str = ParseGoogleMockFlagValue(str, flag, true);
+  const char* const value_str = ParseGoogleMockFlagValue(str, flag_name, true);
 
   // Aborts if the parsing failed.
   if (value_str == nullptr) return false;
@@ -110,10 +109,10 @@ static bool ParseGoogleMockBoolFlag(const char* str, const char* flag,
 // On success, stores the value of the flag in *value, and returns
 // true.  On failure, returns false without changing *value.
 template <typename String>
-static bool ParseGoogleMockStringFlag(const char* str, const char* flag,
-                                      String* value) {
+static bool ParseGoogleMockFlag(const char* str, const char* flag_name,
+                                String* value) {
   // Gets the value of the flag as a string.
-  const char* const value_str = ParseGoogleMockFlagValue(str, flag, false);
+  const char* const value_str = ParseGoogleMockFlagValue(str, flag_name, false);
 
   // Aborts if the parsing failed.
   if (value_str == nullptr) return false;
@@ -123,17 +122,17 @@ static bool ParseGoogleMockStringFlag(const char* str, const char* flag,
   return true;
 }
 
-static bool ParseGoogleMockIntFlag(const char* str, const char* flag,
-                                   int* value) {
+static bool ParseGoogleMockFlag(const char* str, const char* flag_name,
+                                int32_t* value) {
   // Gets the value of the flag as a string.
-  const char* const value_str = ParseGoogleMockFlagValue(str, flag, true);
+  const char* const value_str = ParseGoogleMockFlagValue(str, flag_name, true);
 
   // Aborts if the parsing failed.
   if (value_str == nullptr) return false;
 
   // Sets *value to the value of the flag.
-  return ParseInt32(Message() << "The value of flag --" << flag,
-                    value_str, value);
+  return ParseInt32(Message() << "The value of flag --" << flag_name, value_str,
+                    value);
 }
 
 // The internal implementation of InitGoogleMock().
@@ -152,11 +151,22 @@ void InitGoogleMockImpl(int* argc, CharType** argv) {
     const char* const arg = arg_string.c_str();
 
     // Do we see a Google Mock flag?
-    if (ParseGoogleMockBoolFlag(arg, "catch_leaked_mocks",
-                                &GMOCK_FLAG(catch_leaked_mocks)) ||
-        ParseGoogleMockStringFlag(arg, "verbose", &GMOCK_FLAG(verbose)) ||
-        ParseGoogleMockIntFlag(arg, "default_mock_behavior",
-                               &GMOCK_FLAG(default_mock_behavior))) {
+    bool found_gmock_flag = false;
+
+#define GMOCK_INTERNAL_PARSE_FLAG(flag_name)            \
+  if (!found_gmock_flag) {                              \
+    auto value = GMOCK_FLAG_GET(flag_name);             \
+    if (ParseGoogleMockFlag(arg, #flag_name, &value)) { \
+      GMOCK_FLAG_SET(flag_name, value);                 \
+      found_gmock_flag = true;                          \
+    }                                                   \
+  }
+
+    GMOCK_INTERNAL_PARSE_FLAG(catch_leaked_mocks)
+    GMOCK_INTERNAL_PARSE_FLAG(verbose)
+    GMOCK_INTERNAL_PARSE_FLAG(default_mock_behavior)
+
+    if (found_gmock_flag) {
       // Yes.  Shift the remainder of the argv list left by one.  Note
       // that argv has (*argc + 1) elements, the last one always being
       // NULL.  The following loop moves the trailing NULL element as
diff --git a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/src/gmock_main.cc b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/src/gmock_main.cc
index 18c500f6639..b411c5ecb97 100644
--- a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/src/gmock_main.cc
+++ b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googlemock/src/gmock_main.cc
@@ -27,8 +27,8 @@
 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-
 #include <iostream>
+
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
 
@@ -56,7 +56,7 @@ void loop() { RUN_ALL_TESTS(); }
 // https://web.archive.org/web/20170912203238/connect.microsoft.com/VisualStudio/feedback/details/394464/wmain-link-error-in-the-static-library
 // // NOLINT
 #if GTEST_OS_WINDOWS_MOBILE
-# include <tchar.h>  // NOLINT
+#include <tchar.h>  // NOLINT
 
 GTEST_API_ int _tmain(int argc, TCHAR** argv) {
 #else
diff --git a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/CMakeLists.txt b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/CMakeLists.txt
index 9ee79408c27..aa00a5f3d27 100644
--- a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/CMakeLists.txt
+++ b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/CMakeLists.txt
@@ -1,4 +1,7 @@
 ########################################################################
+# Note: CMake support is community-based. The maintainers do not use CMake
+# internally.
+#
 # CMake build script for Google Test.
 #
 # To run the tests for Google Test itself on Linux, use 'make test' or
@@ -40,13 +43,12 @@ endif()
 # as ${gtest_SOURCE_DIR} and to the root binary directory as
 # ${gtest_BINARY_DIR}.
 # Language "C" is required for find_package(Threads).
-if (CMAKE_VERSION VERSION_LESS 3.0)
-  project(gtest CXX C)
-else()
-  cmake_policy(SET CMP0048 NEW)
-  project(gtest VERSION ${GOOGLETEST_VERSION} LANGUAGES CXX C)
-endif()
-cmake_minimum_required(VERSION 2.6.4)
+
+# Project version:
+
+cmake_minimum_required(VERSION 3.5)
+cmake_policy(SET CMP0048 NEW)
+project(gtest VERSION ${GOOGLETEST_VERSION} LANGUAGES CXX C)
 
 if (POLICY CMP0063) # Visibility
   cmake_policy(SET CMP0063 NEW)
@@ -85,15 +87,18 @@ include(cmake/internal_utils.cmake)
 
 config_compiler_and_linker()  # Defined in internal_utils.cmake.
 
+# Needed to set the namespace for both the export targets and the
+# alias libraries
+set(cmake_package_name GTest CACHE INTERNAL "")
+
 # Create the CMake package file descriptors.
 if (INSTALL_GTEST)
   include(CMakePackageConfigHelpers)
-  set(cmake_package_name GTest)
   set(targets_export_name ${cmake_package_name}Targets CACHE INTERNAL "")
   set(generated_dir "${CMAKE_CURRENT_BINARY_DIR}/generated" CACHE INTERNAL "")
   set(cmake_files_install_dir "${CMAKE_INSTALL_LIBDIR}/cmake/${cmake_package_name}")
   set(version_file "${generated_dir}/${cmake_package_name}ConfigVersion.cmake")
-  write_basic_package_version_file(${version_file} COMPATIBILITY AnyNewerVersion)
+  write_basic_package_version_file(${version_file} VERSION ${GOOGLETEST_VERSION} COMPATIBILITY AnyNewerVersion)
   install(EXPORT ${targets_export_name}
     NAMESPACE ${cmake_package_name}::
     DESTINATION ${cmake_files_install_dir})
@@ -110,18 +115,6 @@ set(gtest_build_include_dirs
   "${gtest_SOURCE_DIR}")
 include_directories(${gtest_build_include_dirs})
 
-# Summary of tuple support for Microsoft Visual Studio:
-# Compiler    version(MS)  version(cmake)  Support
-# ----------  -----------  --------------  -----------------------------
-# <= VS 2010  <= 10        <= 1600         Use Google Tests's own tuple.
-# VS 2012     11           1700            std::tr1::tuple + _VARIADIC_MAX=10
-# VS 2013     12           1800            std::tr1::tuple
-# VS 2015     14           1900            std::tuple
-# VS 2017     15           >= 1910         std::tuple
-if (MSVC AND MSVC_VERSION EQUAL 1700)
-  add_definitions(/D _VARIADIC_MAX=10)
-endif()
-
 ########################################################################
 #
 # Defines the gtest & gtest_main libraries.  User tests should link
@@ -131,18 +124,24 @@ endif()
 # are used for other targets, to ensure that gtest can be compiled by a user
 # aggressive about warnings.
 cxx_library(gtest "${cxx_strict}" src/gtest-all.cc)
+set_target_properties(gtest PROPERTIES VERSION ${GOOGLETEST_VERSION})
 cxx_library(gtest_main "${cxx_strict}" src/gtest_main.cc)
+set_target_properties(gtest_main PROPERTIES VERSION ${GOOGLETEST_VERSION})
 # If the CMake version supports it, attach header directory information
 # to the targets for when we are part of a parent build (ie being pulled
 # in via add_subdirectory() rather than being a standalone build).
 if (DEFINED CMAKE_VERSION AND NOT "${CMAKE_VERSION}" VERSION_LESS "2.8.11")
+  string(REPLACE ";" "$<SEMICOLON>" dirs "${gtest_build_include_dirs}")
   target_include_directories(gtest SYSTEM INTERFACE
-    "$<BUILD_INTERFACE:${gtest_build_include_dirs}>"
+    "$<BUILD_INTERFACE:${dirs}>"
     "$<INSTALL_INTERFACE:$<INSTALL_PREFIX>/${CMAKE_INSTALL_INCLUDEDIR}>")
   target_include_directories(gtest_main SYSTEM INTERFACE
-    "$<BUILD_INTERFACE:${gtest_build_include_dirs}>"
+    "$<BUILD_INTERFACE:${dirs}>"
     "$<INSTALL_INTERFACE:$<INSTALL_PREFIX>/${CMAKE_INSTALL_INCLUDEDIR}>")
 endif()
+if(CMAKE_SYSTEM_NAME MATCHES "QNX")
+  target_link_libraries(gtest PUBLIC regex)
+endif()
 target_link_libraries(gtest_main PUBLIC gtest)
 
 ########################################################################
@@ -193,7 +192,6 @@ if (gtest_build_tests)
   cxx_test(googletest-death-test-test gtest_main)
   cxx_test(gtest_environment_test gtest)
   cxx_test(googletest-filepath-test gtest_main)
-  cxx_test(googletest-linked-ptr-test gtest_main)
   cxx_test(googletest-listener-test gtest_main)
   cxx_test(gtest_main_unittest gtest_main)
   cxx_test(googletest-message-test gtest_main)
@@ -217,6 +215,8 @@ if (gtest_build_tests)
     test/gtest-typed-test2_test.cc)
   cxx_test(gtest_unittest gtest_main)
   cxx_test(gtest-unittest-api_test gtest)
+  cxx_test(gtest_skip_in_environment_setup_test gtest_main)
+  cxx_test(gtest_skip_test gtest_main)
 
   ############################################################
   # C++ tests built with non-standard compiler flags.
@@ -250,27 +250,15 @@ if (gtest_build_tests)
                         PROPERTIES
                         COMPILE_DEFINITIONS "GTEST_LINKED_AS_SHARED_LIBRARY=1")
 
-  if (NOT MSVC OR MSVC_VERSION LESS 1600)  # 1600 is Visual Studio 2010.
-    # Visual Studio 2010, 2012, and 2013 define symbols in std::tr1 that
-    # conflict with our own definitions. Therefore using our own tuple does not
-    # work on those compilers.
-    cxx_library(gtest_main_use_own_tuple "${cxx_use_own_tuple}"
-      src/gtest-all.cc src/gtest_main.cc)
-
-    cxx_test_with_flags(googletest-tuple-test "${cxx_use_own_tuple}"
-      gtest_main_use_own_tuple test/googletest-tuple-test.cc)
-
-    cxx_test_with_flags(gtest_use_own_tuple_test "${cxx_use_own_tuple}"
-      gtest_main_use_own_tuple
-      test/googletest-param-test-test.cc test/googletest-param-test2-test.cc)
-  endif()
-
   ############################################################
   # Python tests.
 
   cxx_executable(googletest-break-on-failure-unittest_ test gtest)
   py_test(googletest-break-on-failure-unittest)
 
+  py_test(gtest_skip_check_output_test)
+  py_test(gtest_skip_environment_check_output_test)
+
   # Visual Studio .NET 2003 does not support STL with exceptions disabled.
   if (NOT MSVC OR MSVC_VERSION GREATER 1310)  # 1310 is Visual Studio .NET 2003
     cxx_executable_with_flags(
@@ -320,6 +308,9 @@ if (gtest_build_tests)
   cxx_executable(googletest-uninitialized-test_ test gtest)
   py_test(googletest-uninitialized-test)
 
+  cxx_executable(gtest_list_output_unittest_ test gtest)
+  py_test(gtest_list_output_unittest)
+
   cxx_executable(gtest_xml_outfile1_test_ test gtest_main)
   cxx_executable(gtest_xml_outfile2_test_ test gtest_main)
   py_test(gtest_xml_outfiles_test)
diff --git a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/LICENSE b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/LICENSE
deleted file mode 100644
index 1941a11f8ce..00000000000
--- a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/LICENSE
+++ /dev/null
@@ -1,28 +0,0 @@
-Copyright 2008, Google Inc.
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are
-met:
-
-    * Redistributions of source code must retain the above copyright
-notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above
-copyright notice, this list of conditions and the following disclaimer
-in the documentation and/or other materials provided with the
-distribution.
-    * Neither the name of Google Inc. nor the names of its
-contributors may be used to endorse or promote products derived from
-this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/README.md b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/README.md
index 904048f4840..d26b309ed0d 100644
--- a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/README.md
+++ b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/README.md
@@ -2,39 +2,51 @@
 
 #### Setup
 
-To build Google Test and your tests that use it, you need to tell your build
+To build GoogleTest and your tests that use it, you need to tell your build
 system where to find its headers and source files. The exact way to do it
 depends on which build system you use, and is usually straightforward.
 
 ### Build with CMake
 
-Google Test comes with a CMake build script
+GoogleTest comes with a CMake build script
 ([CMakeLists.txt](https://github.com/google/googletest/blob/master/CMakeLists.txt))
 that can be used on a wide range of platforms ("C" stands for cross-platform.).
 If you don't have CMake installed already, you can download it for free from
 <http://www.cmake.org/>.
 
 CMake works by generating native makefiles or build projects that can be used in
-the compiler environment of your choice. You can either build Google Test as a
+the compiler environment of your choice. You can either build GoogleTest as a
 standalone project or it can be incorporated into an existing CMake build for
 another project.
 
 #### Standalone CMake Project
 
-When building Google Test as a standalone project, the typical workflow starts
-with:
+When building GoogleTest as a standalone project, the typical workflow starts
+with
 
-    mkdir mybuild       # Create a directory to hold the build output.
-    cd mybuild
-    cmake ${GTEST_DIR}  # Generate native build scripts.
+```
+git clone https://github.com/google/googletest.git -b release-1.11.0
+cd googletest        # Main directory of the cloned repository.
+mkdir build          # Create a directory to hold the build output.
+cd build
+cmake ..             # Generate native build scripts for GoogleTest.
+```
 
-If you want to build Google Test's samples, you should replace the last command
-with
+The above command also includes GoogleMock by default. And so, if you want to
+build only GoogleTest, you should replace the last command with
 
-    cmake -Dgtest_build_samples=ON ${GTEST_DIR}
+```
+cmake .. -DBUILD_GMOCK=OFF
+```
 
 If you are on a \*nix system, you should now see a Makefile in the current
-directory. Just type 'make' to build gtest.
+directory. Just type `make` to build GoogleTest. And then you can simply install
+GoogleTest if you are a system administrator.
+
+```
+make
+sudo make install    # Install in /usr/local/ by default
+```
 
 If you use Windows and have Visual Studio installed, a `gtest.sln` file and
 several `.vcproj` files will be created. You can then build them using Visual
@@ -44,13 +56,19 @@ On Mac OS X with Xcode installed, a `.xcodeproj` file will be generated.
 
 #### Incorporating Into An Existing CMake Project
 
-If you want to use gtest in a project which already uses CMake, then a more
-robust and flexible approach is to build gtest as part of that project directly.
-This is done by making the GoogleTest source code available to the main build
-and adding it using CMake's `add_subdirectory()` command. This has the
-significant advantage that the same compiler and linker settings are used
-between gtest and the rest of your project, so issues associated with using
-incompatible libraries (eg debug/release), etc. are avoided. This is
+If you want to use GoogleTest in a project which already uses CMake, the easiest
+way is to get installed libraries and headers.
+
+*   Import GoogleTest by using `find_package` (or `pkg_check_modules`). For
+    example, if `find_package(GTest CONFIG REQUIRED)` succeeds, you can use the
+    libraries as `GTest::gtest`, `GTest::gmock`.
+
+And a more robust and flexible approach is to build GoogleTest as part of that
+project directly. This is done by making the GoogleTest source code available to
+the main build and adding it using CMake's `add_subdirectory()` command. This
+has the significant advantage that the same compiler and linker settings are
+used between GoogleTest and the rest of your project, so issues associated with
+using incompatible libraries (eg debug/release), etc. are avoided. This is
 particularly useful on Windows. Making GoogleTest's source code available to the
 main build can be done a few different ways:
 
@@ -64,68 +82,23 @@ main build can be done a few different ways:
     possible or appropriate. Git submodules, for example, have their own set of
     advantages and drawbacks.
 *   Use CMake to download GoogleTest as part of the build's configure step. This
-    is just a little more complex, but doesn't have the limitations of the other
-    methods.
+    approach doesn't have the limitations of the other methods.
 
-The last of the above methods is implemented with a small piece of CMake code in
-a separate file (e.g. `CMakeLists.txt.in`) which is copied to the build area and
-then invoked as a sub-build _during the CMake stage_. That directory is then
-pulled into the main build with `add_subdirectory()`. For example:
+The last of the above methods is implemented with a small piece of CMake code
+that downloads and pulls the GoogleTest code into the main build.
 
-New file `CMakeLists.txt.in`:
+Just add to your `CMakeLists.txt`:
 
 ```cmake
-cmake_minimum_required(VERSION 2.8.2)
-
-project(googletest-download NONE)
-
-include(ExternalProject)
-ExternalProject_Add(googletest
-  GIT_REPOSITORY    https://github.com/google/googletest.git
-  GIT_TAG           master
-  SOURCE_DIR        "${CMAKE_CURRENT_BINARY_DIR}/googletest-src"
-  BINARY_DIR        "${CMAKE_CURRENT_BINARY_DIR}/googletest-build"
-  CONFIGURE_COMMAND ""
-  BUILD_COMMAND     ""
-  INSTALL_COMMAND   ""
-  TEST_COMMAND      ""
+include(FetchContent)
+FetchContent_Declare(
+  googletest
+  # Specify the commit you depend on and update it regularly.
+  URL https://github.com/google/googletest/archive/e2239ee6043f73722e7aa812a459f54a28552929.zip
 )
-```
-
-Existing build's `CMakeLists.txt`:
-
-```cmake
-# Download and unpack googletest at configure time
-configure_file(CMakeLists.txt.in googletest-download/CMakeLists.txt)
-execute_process(COMMAND ${CMAKE_COMMAND} -G "${CMAKE_GENERATOR}" .
-  RESULT_VARIABLE result
-  WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/googletest-download )
-if(result)
-  message(FATAL_ERROR "CMake step for googletest failed: ${result}")
-endif()
-execute_process(COMMAND ${CMAKE_COMMAND} --build .
-  RESULT_VARIABLE result
-  WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/googletest-download )
-if(result)
-  message(FATAL_ERROR "Build step for googletest failed: ${result}")
-endif()
-
-# Prevent overriding the parent project's compiler/linker
-# settings on Windows
+# For Windows: Prevent overriding the parent project's compiler/linker settings
 set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
-
-# Add googletest directly to our build. This defines
-# the gtest and gtest_main targets.
-add_subdirectory(${CMAKE_CURRENT_BINARY_DIR}/googletest-src
-                 ${CMAKE_CURRENT_BINARY_DIR}/googletest-build
-                 EXCLUDE_FROM_ALL)
-
-# The gtest/gtest_main targets carry header search path
-# dependencies automatically when using CMake 2.8.11 or
-# later. Otherwise we have to add them here ourselves.
-if (CMAKE_VERSION VERSION_LESS 2.8.11)
-  include_directories("${gtest_SOURCE_DIR}/include")
-endif()
+FetchContent_MakeAvailable(googletest)
 
 # Now simply link against gtest or gtest_main as needed. Eg
 add_executable(example example.cpp)
@@ -133,20 +106,18 @@ target_link_libraries(example gtest_main)
 add_test(NAME example_test COMMAND example)
 ```
 
-Note that this approach requires CMake 2.8.2 or later due to its use of the
-`ExternalProject_Add()` command. The above technique is discussed in more detail
-in [this separate article](http://crascit.com/2015/07/25/cmake-gtest/) which
-also contains a link to a fully generalized implementation of the technique.
+Note that this approach requires CMake 3.14 or later due to its use of the
+`FetchContent_MakeAvailable()` command.
 
 ##### Visual Studio Dynamic vs Static Runtimes
 
 By default, new Visual Studio projects link the C runtimes dynamically but
-Google Test links them statically. This will generate an error that looks
+GoogleTest links them statically. This will generate an error that looks
 something like the following: gtest.lib(gtest-all.obj) : error LNK2038: mismatch
 detected for 'RuntimeLibrary': value 'MTd_StaticDebug' doesn't match value
 'MDd_DynamicDebug' in main.obj
 
-Google Test already has a CMake option for this: `gtest_force_shared_crt`
+GoogleTest already has a CMake option for this: `gtest_force_shared_crt`
 
 Enabling this option will make gtest link the runtimes dynamically too, and
 match the project in which it is included.
@@ -154,17 +125,17 @@ match the project in which it is included.
 #### C++ Standard Version
 
 An environment that supports C++11 is required in order to successfully build
-Google Test. One way to ensure this is to specify the standard in the top-level
+GoogleTest. One way to ensure this is to specify the standard in the top-level
 project, for example by using the `set(CMAKE_CXX_STANDARD 11)` command. If this
-is not feasible, for example in a C project using Google Test for validation,
+is not feasible, for example in a C project using GoogleTest for validation,
 then it can be specified by adding it to the options for cmake via the
 `DCMAKE_CXX_FLAGS` option.
 
-### Tweaking Google Test
+### Tweaking GoogleTest
 
-Google Test can be used in diverse environments. The default configuration may
+GoogleTest can be used in diverse environments. The default configuration may
 not work (or may not work well) out of the box in some environments. However,
-you can easily tweak Google Test by defining control macros on the compiler
+you can easily tweak GoogleTest by defining control macros on the compiler
 command line. Generally, these macros are named like `GTEST_XYZ` and you define
 them to either 1 or 0 to enable or disable a certain feature.
 
@@ -173,12 +144,12 @@ We list the most frequently used macros below. For a complete list, see file
 
 ### Multi-threaded Tests
 
-Google Test is thread-safe where the pthread library is available. After
+GoogleTest is thread-safe where the pthread library is available. After
 `#include "gtest/gtest.h"`, you can check the
 `GTEST_IS_THREADSAFE` macro to see whether this is the case (yes if the macro is
 `#defined` to 1, no if it's undefined.).
 
-If Google Test doesn't correctly detect whether pthread is available in your
+If GoogleTest doesn't correctly detect whether pthread is available in your
 environment, you can force it with
 
     -DGTEST_HAS_PTHREAD=1
@@ -187,16 +158,16 @@ or
 
     -DGTEST_HAS_PTHREAD=0
 
-When Google Test uses pthread, you may need to add flags to your compiler and/or
+When GoogleTest uses pthread, you may need to add flags to your compiler and/or
 linker to select the pthread library, or you'll get link errors. If you use the
-CMake script or the deprecated Autotools script, this is taken care of for you.
-If you use your own build script, you'll need to read your compiler and linker's
-manual to figure out what flags to add.
+CMake script, this is taken care of for you. If you use your own build script,
+you'll need to read your compiler and linker's manual to figure out what flags
+to add.
 
 ### As a Shared Library (DLL)
 
-Google Test is compact, so most users can build and link it as a static library
-for the simplicity. You can choose to use Google Test as a shared library (known
+GoogleTest is compact, so most users can build and link it as a static library
+for the simplicity. You can choose to use GoogleTest as a shared library (known
 as a DLL on Windows) if you prefer.
 
 To compile *gtest* as a shared library, add
@@ -216,23 +187,25 @@ Note: while the above steps aren't technically necessary today when using some
 compilers (e.g. GCC), they may become necessary in the future, if we decide to
 improve the speed of loading the library (see
 <http://gcc.gnu.org/wiki/Visibility> for details). Therefore you are recommended
-to always add the above flags when using Google Test as a shared library.
-Otherwise a future release of Google Test may break your build script.
+to always add the above flags when using GoogleTest as a shared library.
+Otherwise a future release of GoogleTest may break your build script.
 
 ### Avoiding Macro Name Clashes
 
 In C++, macros don't obey namespaces. Therefore two libraries that both define a
 macro of the same name will clash if you `#include` both definitions. In case a
-Google Test macro clashes with another library, you can force Google Test to
+GoogleTest macro clashes with another library, you can force GoogleTest to
 rename its macro to avoid the conflict.
 
-Specifically, if both Google Test and some other code define macro FOO, you can
+Specifically, if both GoogleTest and some other code define macro FOO, you can
 add
 
     -DGTEST_DONT_DEFINE_FOO=1
 
-to the compiler flags to tell Google Test to change the macro's name from `FOO`
-to `GTEST_FOO`. Currently `FOO` can be `FAIL`, `SUCCEED`, or `TEST`. For
+to the compiler flags to tell GoogleTest to change the macro's name from `FOO`
+to `GTEST_FOO`. Currently `FOO` can be `ASSERT_EQ`, `ASSERT_FALSE`, `ASSERT_GE`,
+`ASSERT_GT`, `ASSERT_LE`, `ASSERT_LT`, `ASSERT_NE`, `ASSERT_TRUE`,
+`EXPECT_FALSE`, `EXPECT_TRUE`, `FAIL`, `SUCCEED`, `TEST`, or `TEST_F`. For
 example, with `-DGTEST_DONT_DEFINE_TEST=1`, you'll need to write
 
     GTEST_TEST(SomeTest, DoesThis) { ... }
diff --git a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/cmake/gtest.pc.in b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/cmake/gtest.pc.in
index e7967ad56ff..b4148fae42b 100644
--- a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/cmake/gtest.pc.in
+++ b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/cmake/gtest.pc.in
@@ -6,4 +6,4 @@ Description: GoogleTest (without main() function)
 Version: @PROJECT_VERSION@
 URL: https://github.com/google/googletest
 Libs: -L${libdir} -lgtest @CMAKE_THREAD_LIBS_INIT@
-Cflags: -I${includedir} @GTEST_HAS_PTHREAD_MACRO@ @CMAKE_THREAD_LIBS_INIT@
+Cflags: -I${includedir} @GTEST_HAS_PTHREAD_MACRO@
diff --git a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/cmake/gtest_main.pc.in b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/cmake/gtest_main.pc.in
index fe25d9c73c6..38c88c54d53 100644
--- a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/cmake/gtest_main.pc.in
+++ b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/cmake/gtest_main.pc.in
@@ -5,6 +5,6 @@ Name: gtest_main
 Description: GoogleTest (with main() function)
 Version: @PROJECT_VERSION@
 URL: https://github.com/google/googletest
-Requires: gtest
+Requires: gtest = @PROJECT_VERSION@
 Libs: -L${libdir} -lgtest_main @CMAKE_THREAD_LIBS_INIT@
-Cflags: -I${includedir} @GTEST_HAS_PTHREAD_MACRO@ @CMAKE_THREAD_LIBS_INIT@
+Cflags: -I${includedir} @GTEST_HAS_PTHREAD_MACRO@
diff --git a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/cmake/internal_utils.cmake b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/cmake/internal_utils.cmake
index 8c1f9ba99cf..5a34c07a1b9 100644
--- a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/cmake/internal_utils.cmake
+++ b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/cmake/internal_utils.cmake
@@ -12,6 +12,10 @@
 #   Test and Google Mock's option() definitions, and thus must be
 #   called *after* the options have been defined.
 
+if (POLICY CMP0054)
+  cmake_policy(SET CMP0054 NEW)
+endif (POLICY CMP0054)
+
 # Tweaks CMake's default compiler/linker settings to suit Google Test's needs.
 #
 # This must be a macro(), as inside a function string() can only
@@ -22,6 +26,8 @@ macro(fix_default_compiler_settings_)
     # This replacement code is taken from sample in the CMake Wiki at
     # https://gitlab.kitware.com/cmake/community/wikis/FAQ#dynamic-replace.
     foreach (flag_var
+             CMAKE_C_FLAGS CMAKE_C_FLAGS_DEBUG CMAKE_C_FLAGS_RELEASE
+             CMAKE_C_FLAGS_MINSIZEREL CMAKE_C_FLAGS_RELWITHDEBINFO
              CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE
              CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO)
       if (NOT BUILD_SHARED_LIBS AND NOT gtest_force_shared_crt)
@@ -56,7 +62,6 @@ macro(config_compiler_and_linker)
   unset(GTEST_HAS_PTHREAD)
   if (NOT gtest_disable_pthreads AND NOT MINGW)
     # Defines CMAKE_USE_PTHREADS_INIT and CMAKE_THREAD_LIBS_INIT.
-    set(THREADS_PREFER_PTHREAD_FLAG ON)
     find_package(Threads)
     if (CMAKE_USE_PTHREADS_INIT)
       set(GTEST_HAS_PTHREAD ON)
@@ -67,39 +72,25 @@ macro(config_compiler_and_linker)
   if (MSVC)
     # Newlines inside flags variables break CMake's NMake generator.
     # TODO(vladl@google.com): Add -RTCs and -RTCu to debug builds.
-    set(cxx_base_flags "-GS -W4 -WX -wd4251 -wd4275 -nologo -J -Zi")
-    if (MSVC_VERSION LESS 1400)  # 1400 is Visual Studio 2005
-      # Suppress spurious warnings MSVC 7.1 sometimes issues.
-      # Forcing value to bool.
-      set(cxx_base_flags "${cxx_base_flags} -wd4800")
-      # Copy constructor and assignment operator could not be generated.
-      set(cxx_base_flags "${cxx_base_flags} -wd4511 -wd4512")
-      # Compatibility warnings not applicable to Google Test.
-      # Resolved overload was found by argument-dependent lookup.
-      set(cxx_base_flags "${cxx_base_flags} -wd4675")
-    endif()
-    if (MSVC_VERSION LESS 1500)  # 1500 is Visual Studio 2008
-      # Conditional expression is constant.
-      # When compiling with /W4, we get several instances of C4127
-      # (Conditional expression is constant). In our code, we disable that
-      # warning on a case-by-case basis. However, on Visual Studio 2005,
-      # the warning fires on std::list. Therefore on that compiler and earlier,
-      # we disable the warning project-wide.
-      set(cxx_base_flags "${cxx_base_flags} -wd4127")
-    endif()
-    if (NOT (MSVC_VERSION LESS 1700))  # 1700 is Visual Studio 2012.
-      # Suppress "unreachable code" warning on VS 2012 and later.
-      # http://stackoverflow.com/questions/3232669 explains the issue.
-      set(cxx_base_flags "${cxx_base_flags} -wd4702")
-    endif()
-
+    set(cxx_base_flags "-GS -W4 -WX -wd4251 -wd4275 -nologo -J")
     set(cxx_base_flags "${cxx_base_flags} -D_UNICODE -DUNICODE -DWIN32 -D_WIN32")
     set(cxx_base_flags "${cxx_base_flags} -DSTRICT -DWIN32_LEAN_AND_MEAN")
     set(cxx_exception_flags "-EHsc -D_HAS_EXCEPTIONS=1")
     set(cxx_no_exception_flags "-EHs-c- -D_HAS_EXCEPTIONS=0")
     set(cxx_no_rtti_flags "-GR-")
+    # Suppress "unreachable code" warning
+    # http://stackoverflow.com/questions/3232669 explains the issue.
+    set(cxx_base_flags "${cxx_base_flags} -wd4702")
+    # Ensure MSVC treats source files as UTF-8 encoded.
+    set(cxx_base_flags "${cxx_base_flags} -utf-8")
+  elseif (CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
+    set(cxx_base_flags "-Wall -Wshadow -Wconversion")
+    set(cxx_exception_flags "-fexceptions")
+    set(cxx_no_exception_flags "-fno-exceptions")
+    set(cxx_strict_flags "-W -Wpointer-arith -Wreturn-type -Wcast-qual -Wwrite-strings -Wswitch -Wunused-parameter -Wcast-align -Wchar-subscripts -Winline -Wredundant-decls")
+    set(cxx_no_rtti_flags "-fno-rtti")
   elseif (CMAKE_COMPILER_IS_GNUCXX)
-    set(cxx_base_flags "-Wall -Wshadow -Werror")
+    set(cxx_base_flags "-Wall -Wshadow")
     if(NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 7.0.0)
       set(cxx_base_flags "${cxx_base_flags} -Wno-error=dangling-else")
     endif()
@@ -148,7 +139,6 @@ macro(config_compiler_and_linker)
     "${CMAKE_CXX_FLAGS} ${cxx_base_flags} ${cxx_no_exception_flags}")
   set(cxx_default "${cxx_exception}")
   set(cxx_no_rtti "${cxx_default} ${cxx_no_rtti_flags}")
-  set(cxx_use_own_tuple "${cxx_default} -DGTEST_USE_OWN_TR1_TUPLE=1")
 
   # For building the gtest libraries.
   set(cxx_strict "${cxx_default} ${cxx_strict_flags}")
@@ -160,13 +150,26 @@ function(cxx_library_with_type name type cxx_flags)
   # type can be either STATIC or SHARED to denote a static or shared library.
   # ARGN refers to additional arguments after 'cxx_flags'.
   add_library(${name} ${type} ${ARGN})
+  add_library(${cmake_package_name}::${name} ALIAS ${name})
   set_target_properties(${name}
     PROPERTIES
     COMPILE_FLAGS "${cxx_flags}")
-  # Generate debug library name with a postfix.
+  # Set the output directory for build artifacts
+  set_target_properties(${name}
+    PROPERTIES
+    RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/bin"
+    LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib"
+    ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib"
+    PDB_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/bin")
+  # make PDBs match library name
+  get_target_property(pdb_debug_postfix ${name} DEBUG_POSTFIX)
   set_target_properties(${name}
     PROPERTIES
-    DEBUG_POSTFIX "d")
+    PDB_NAME "${name}"
+    PDB_NAME_DEBUG "${name}${pdb_debug_postfix}"
+    COMPILE_PDB_NAME "${name}"
+    COMPILE_PDB_NAME_DEBUG "${name}${pdb_debug_postfix}")
+
   if (BUILD_SHARED_LIBS OR type STREQUAL "SHARED")
     set_target_properties(${name}
       PROPERTIES
@@ -184,6 +187,10 @@ function(cxx_library_with_type name type cxx_flags)
     endif()
     target_link_libraries(${name} PUBLIC ${threads_spec})
   endif()
+
+  if (NOT "${CMAKE_VERSION}" VERSION_LESS "3.8")
+    target_compile_features(${name} PUBLIC cxx_std_11)
+  endif()
 endfunction()
 
 ########################################################################
@@ -204,7 +211,7 @@ endfunction()
 # is built from the given source files with the given compiler flags.
 function(cxx_executable_with_flags name cxx_flags libs)
   add_executable(${name} ${ARGN})
-  if (MSVC AND (NOT (MSVC_VERSION LESS 1700)))  # 1700 is Visual Studio 2012.
+  if (MSVC)
     # BigObj required for tests.
     set(cxx_flags "${cxx_flags} -bigobj")
   endif()
@@ -236,7 +243,13 @@ function(cxx_executable name dir libs)
 endfunction()
 
 # Sets PYTHONINTERP_FOUND and PYTHON_EXECUTABLE.
-find_package(PythonInterp)
+if ("${CMAKE_VERSION}" VERSION_LESS "3.12.0")
+  find_package(PythonInterp)
+else()
+  find_package(Python COMPONENTS Interpreter)
+  set(PYTHONINTERP_FOUND ${Python_Interpreter_FOUND})
+  set(PYTHON_EXECUTABLE ${Python_EXECUTABLE})
+endif()
 
 # cxx_test_with_flags(name cxx_flags libs srcs...)
 #
@@ -244,7 +257,7 @@ find_package(PythonInterp)
 # from the given source files with the given compiler flags.
 function(cxx_test_with_flags name cxx_flags libs)
   cxx_executable_with_flags(${name} "${cxx_flags}" "${libs}" ${ARGN})
-  add_test(NAME ${name} COMMAND ${name})
+    add_test(NAME ${name} COMMAND "$<TARGET_FILE:${name}>")
 endfunction()
 
 # cxx_test(name libs srcs...)
@@ -263,33 +276,32 @@ endfunction()
 # test/name.py.  It does nothing if Python is not installed.
 function(py_test name)
   if (PYTHONINTERP_FOUND)
-    if (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 3.1)
+    if ("${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION}" VERSION_GREATER 3.1)
       if (CMAKE_CONFIGURATION_TYPES)
-	# Multi-configuration build generators as for Visual Studio save
-	# output in a subdirectory of CMAKE_CURRENT_BINARY_DIR (Debug,
-	# Release etc.), so we have to provide it here.
-        add_test(
-          NAME ${name}
+        # Multi-configuration build generators as for Visual Studio save
+        # output in a subdirectory of CMAKE_CURRENT_BINARY_DIR (Debug,
+        # Release etc.), so we have to provide it here.
+        add_test(NAME ${name}
           COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/test/${name}.py
               --build_dir=${CMAKE_CURRENT_BINARY_DIR}/$<CONFIG> ${ARGN})
       else (CMAKE_CONFIGURATION_TYPES)
-	# Single-configuration build generators like Makefile generators
-	# don't have subdirs below CMAKE_CURRENT_BINARY_DIR.
-        add_test(
-          NAME ${name}
+        # Single-configuration build generators like Makefile generators
+        # don't have subdirs below CMAKE_CURRENT_BINARY_DIR.
+        add_test(NAME ${name}
           COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/test/${name}.py
-              --build_dir=${CMAKE_CURRENT_BINARY_DIR} ${ARGN})
+            --build_dir=${CMAKE_CURRENT_BINARY_DIR} ${ARGN})
       endif (CMAKE_CONFIGURATION_TYPES)
-    else (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 3.1)
+    else()
       # ${CMAKE_CURRENT_BINARY_DIR} is known at configuration time, so we can
       # directly bind it from cmake. ${CTEST_CONFIGURATION_TYPE} is known
       # only at ctest runtime (by calling ctest -c <Configuration>), so
       # we have to escape $ to delay variable substitution here.
-      add_test(
-        ${name}
-        ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/test/${name}.py
+      add_test(NAME ${name}
+        COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/test/${name}.py
           --build_dir=${CMAKE_CURRENT_BINARY_DIR}/\${CTEST_CONFIGURATION_TYPE} ${ARGN})
-    endif (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 3.1)
+    endif()
+    # Make the Python import path consistent between Bazel and CMake.
+    set_tests_properties(${name} PROPERTIES ENVIRONMENT PYTHONPATH=${CMAKE_SOURCE_DIR})
   endif(PYTHONINTERP_FOUND)
 endfunction()
 
@@ -306,6 +318,18 @@ function(install_project)
       RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}"
       ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}"
       LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}")
+    if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC")
+      # Install PDBs
+      foreach(t ${ARGN})
+        get_target_property(t_pdb_name ${t} COMPILE_PDB_NAME)
+        get_target_property(t_pdb_name_debug ${t} COMPILE_PDB_NAME_DEBUG)
+        get_target_property(t_pdb_output_directory ${t} PDB_OUTPUT_DIRECTORY)
+        install(FILES
+          "${t_pdb_output_directory}/\${CMAKE_INSTALL_CONFIG_NAME}/$<$<CONFIG:Debug>:${t_pdb_name_debug}>$<$<NOT:$<CONFIG:Debug>>:${t_pdb_name}>.pdb"
+          DESTINATION ${CMAKE_INSTALL_LIBDIR}
+          OPTIONAL)
+      endforeach()
+    endif()
     # Configure and install pkgconfig files.
     foreach(t ${ARGN})
       set(configured_pc "${generated_dir}/${t}.pc")
diff --git a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/cmake/libgtest.la.in b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/cmake/libgtest.la.in
new file mode 100644
index 00000000000..840c83885f9
--- /dev/null
+++ b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/cmake/libgtest.la.in
@@ -0,0 +1,21 @@
+# libgtest.la - a libtool library file
+# Generated by libtool (GNU libtool) 2.4.6
+
+# Please DO NOT delete this file!
+# It is necessary for linking the library.
+
+# Names of this library.
+library_names='libgtest.so'
+
+# Is this an already installed library?
+installed=yes
+
+# Should we warn about portability when linking against -modules?
+shouldnotlink=no
+
+# Files to dlopen/dlpreopen
+dlopen=''
+dlpreopen=''
+
+# Directory that this library needs to be installed in:
+libdir='@CMAKE_INSTALL_FULL_LIBDIR@'
diff --git a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/include/gtest/gtest-assertion-result.h b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/include/gtest/gtest-assertion-result.h
new file mode 100644
index 00000000000..addbb59c641
--- /dev/null
+++ b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/include/gtest/gtest-assertion-result.h
@@ -0,0 +1,237 @@
+// Copyright 2005, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// The Google C++ Testing and Mocking Framework (Google Test)
+//
+// This file implements the AssertionResult type.
+
+// IWYU pragma: private, include "gtest/gtest.h"
+// IWYU pragma: friend gtest/.*
+// IWYU pragma: friend gmock/.*
+
+#ifndef GOOGLETEST_INCLUDE_GTEST_GTEST_ASSERTION_RESULT_H_
+#define GOOGLETEST_INCLUDE_GTEST_GTEST_ASSERTION_RESULT_H_
+
+#include <memory>
+#include <ostream>
+#include <string>
+#include <type_traits>
+
+#include "gtest/gtest-message.h"
+#include "gtest/internal/gtest-port.h"
+
+GTEST_DISABLE_MSC_WARNINGS_PUSH_(4251                                   \
+/* class A needs to have dll-interface to be used by clients of class B */)
+
+namespace testing {
+
+// A class for indicating whether an assertion was successful.  When
+// the assertion wasn't successful, the AssertionResult object
+// remembers a non-empty message that describes how it failed.
+//
+// To create an instance of this class, use one of the factory functions
+// (AssertionSuccess() and AssertionFailure()).
+//
+// This class is useful for two purposes:
+//   1. Defining predicate functions to be used with Boolean test assertions
+//      EXPECT_TRUE/EXPECT_FALSE and their ASSERT_ counterparts
+//   2. Defining predicate-format functions to be
+//      used with predicate assertions (ASSERT_PRED_FORMAT*, etc).
+//
+// For example, if you define IsEven predicate:
+//
+//   testing::AssertionResult IsEven(int n) {
+//     if ((n % 2) == 0)
+//       return testing::AssertionSuccess();
+//     else
+//       return testing::AssertionFailure() << n << " is odd";
+//   }
+//
+// Then the failed expectation EXPECT_TRUE(IsEven(Fib(5)))
+// will print the message
+//
+//   Value of: IsEven(Fib(5))
+//     Actual: false (5 is odd)
+//   Expected: true
+//
+// instead of a more opaque
+//
+//   Value of: IsEven(Fib(5))
+//     Actual: false
+//   Expected: true
+//
+// in case IsEven is a simple Boolean predicate.
+//
+// If you expect your predicate to be reused and want to support informative
+// messages in EXPECT_FALSE and ASSERT_FALSE (negative assertions show up
+// about half as often as positive ones in our tests), supply messages for
+// both success and failure cases:
+//
+//   testing::AssertionResult IsEven(int n) {
+//     if ((n % 2) == 0)
+//       return testing::AssertionSuccess() << n << " is even";
+//     else
+//       return testing::AssertionFailure() << n << " is odd";
+//   }
+//
+// Then a statement EXPECT_FALSE(IsEven(Fib(6))) will print
+//
+//   Value of: IsEven(Fib(6))
+//     Actual: true (8 is even)
+//   Expected: false
+//
+// NB: Predicates that support negative Boolean assertions have reduced
+// performance in positive ones so be careful not to use them in tests
+// that have lots (tens of thousands) of positive Boolean assertions.
+//
+// To use this class with EXPECT_PRED_FORMAT assertions such as:
+//
+//   // Verifies that Foo() returns an even number.
+//   EXPECT_PRED_FORMAT1(IsEven, Foo());
+//
+// you need to define:
+//
+//   testing::AssertionResult IsEven(const char* expr, int n) {
+//     if ((n % 2) == 0)
+//       return testing::AssertionSuccess();
+//     else
+//       return testing::AssertionFailure()
+//         << "Expected: " << expr << " is even\n  Actual: it's " << n;
+//   }
+//
+// If Foo() returns 5, you will see the following message:
+//
+//   Expected: Foo() is even
+//     Actual: it's 5
+//
+class GTEST_API_ AssertionResult {
+ public:
+  // Copy constructor.
+  // Used in EXPECT_TRUE/FALSE(assertion_result).
+  AssertionResult(const AssertionResult& other);
+
+// C4800 is a level 3 warning in Visual Studio 2015 and earlier.
+// This warning is not emitted in Visual Studio 2017.
+// This warning is off by default starting in Visual Studio 2019 but can be
+// enabled with command-line options.
+#if defined(_MSC_VER) && (_MSC_VER < 1910 || _MSC_VER >= 1920)
+  GTEST_DISABLE_MSC_WARNINGS_PUSH_(4800 /* forcing value to bool */)
+#endif
+
+  // Used in the EXPECT_TRUE/FALSE(bool_expression).
+  //
+  // T must be contextually convertible to bool.
+  //
+  // The second parameter prevents this overload from being considered if
+  // the argument is implicitly convertible to AssertionResult. In that case
+  // we want AssertionResult's copy constructor to be used.
+  template <typename T>
+  explicit AssertionResult(
+      const T& success,
+      typename std::enable_if<
+          !std::is_convertible<T, AssertionResult>::value>::type*
+      /*enabler*/
+      = nullptr)
+      : success_(success) {}
+
+#if defined(_MSC_VER) && (_MSC_VER < 1910 || _MSC_VER >= 1920)
+  GTEST_DISABLE_MSC_WARNINGS_POP_()
+#endif
+
+  // Assignment operator.
+  AssertionResult& operator=(AssertionResult other) {
+    swap(other);
+    return *this;
+  }
+
+  // Returns true if and only if the assertion succeeded.
+  operator bool() const { return success_; }  // NOLINT
+
+  // Returns the assertion's negation. Used with EXPECT/ASSERT_FALSE.
+  AssertionResult operator!() const;
+
+  // Returns the text streamed into this AssertionResult. Test assertions
+  // use it when they fail (i.e., the predicate's outcome doesn't match the
+  // assertion's expectation). When nothing has been streamed into the
+  // object, returns an empty string.
+  const char* message() const {
+    return message_.get() != nullptr ? message_->c_str() : "";
+  }
+  // Deprecated; please use message() instead.
+  const char* failure_message() const { return message(); }
+
+  // Streams a custom failure message into this object.
+  template <typename T>
+  AssertionResult& operator<<(const T& value) {
+    AppendMessage(Message() << value);
+    return *this;
+  }
+
+  // Allows streaming basic output manipulators such as endl or flush into
+  // this object.
+  AssertionResult& operator<<(
+      ::std::ostream& (*basic_manipulator)(::std::ostream& stream)) {
+    AppendMessage(Message() << basic_manipulator);
+    return *this;
+  }
+
+ private:
+  // Appends the contents of message to message_.
+  void AppendMessage(const Message& a_message) {
+    if (message_.get() == nullptr) message_.reset(new ::std::string);
+    message_->append(a_message.GetString().c_str());
+  }
+
+  // Swap the contents of this AssertionResult with other.
+  void swap(AssertionResult& other);
+
+  // Stores result of the assertion predicate.
+  bool success_;
+  // Stores the message describing the condition in case the expectation
+  // construct is not satisfied with the predicate's outcome.
+  // Referenced via a pointer to avoid taking too much stack frame space
+  // with test assertions.
+  std::unique_ptr< ::std::string> message_;
+};
+
+// Makes a successful assertion result.
+GTEST_API_ AssertionResult AssertionSuccess();
+
+// Makes a failed assertion result.
+GTEST_API_ AssertionResult AssertionFailure();
+
+// Makes a failed assertion result with the given failure message.
+// Deprecated; use AssertionFailure() << msg.
+GTEST_API_ AssertionResult AssertionFailure(const Message& msg);
+
+}  // namespace testing
+
+GTEST_DISABLE_MSC_WARNINGS_POP_()  // 4251
+
+#endif  // GOOGLETEST_INCLUDE_GTEST_GTEST_ASSERTION_RESULT_H_
diff --git a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/include/gtest/gtest-death-test.h b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/include/gtest/gtest-death-test.h
index 39f0ded1b58..84e5a5bbd37 100644
--- a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/include/gtest/gtest-death-test.h
+++ b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/include/gtest/gtest-death-test.h
@@ -27,20 +27,20 @@
 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-//
 // The Google C++ Testing and Mocking Framework (Google Test)
 //
 // This header file defines the public API for death tests.  It is
 // #included by gtest.h so a user doesn't need to include this
 // directly.
-// GOOGLETEST_CM0001 DO NOT DELETE
 
-#ifndef GTEST_INCLUDE_GTEST_GTEST_DEATH_TEST_H_
-#define GTEST_INCLUDE_GTEST_GTEST_DEATH_TEST_H_
+// IWYU pragma: private, include "gtest/gtest.h"
+// IWYU pragma: friend gtest/.*
+// IWYU pragma: friend gmock/.*
 
-#include "gtest/internal/gtest-death-test-internal.h"
+#ifndef GOOGLETEST_INCLUDE_GTEST_GTEST_DEATH_TEST_H_
+#define GOOGLETEST_INCLUDE_GTEST_GTEST_DEATH_TEST_H_
 
-namespace testing {
+#include "gtest/internal/gtest-death-test-internal.h"
 
 // This flag controls the style of death tests.  Valid values are "threadsafe",
 // meaning that the death test child process will re-execute the test binary
@@ -49,6 +49,8 @@ namespace testing {
 // after forking.
 GTEST_DECLARE_string_(death_test_style);
 
+namespace testing {
+
 #if GTEST_HAS_DEATH_TEST
 
 namespace internal {
@@ -97,9 +99,12 @@ GTEST_API_ bool InDeathTestChild();
 //
 //   ASSERT_EXIT(client.HangUpServer(), KilledBySIGHUP, "Hanging up!");
 //
+// The final parameter to each of these macros is a matcher applied to any data
+// the sub-process wrote to stderr.  For compatibility with existing tests, a
+// bare string is interpreted as a regular expression matcher.
+//
 // On the regular expressions used in death tests:
 //
-//   GOOGLETEST_CM0005 DO NOT DELETE
 //   On POSIX-compliant systems (*nix), we use the <regex.h> library,
 //   which uses the POSIX extended regex syntax.
 //
@@ -162,27 +167,27 @@ GTEST_API_ bool InDeathTestChild();
 //   directory in PATH.
 //
 
-// Asserts that a given statement causes the program to exit, with an
-// integer exit status that satisfies predicate, and emitting error output
-// that matches regex.
-#define ASSERT_EXIT(statement, predicate, regex) \
-  GTEST_DEATH_TEST_(statement, predicate, regex, GTEST_FATAL_FAILURE_)
+// Asserts that a given `statement` causes the program to exit, with an
+// integer exit status that satisfies `predicate`, and emitting error output
+// that matches `matcher`.
+#define ASSERT_EXIT(statement, predicate, matcher) \
+  GTEST_DEATH_TEST_(statement, predicate, matcher, GTEST_FATAL_FAILURE_)
 
-// Like ASSERT_EXIT, but continues on to successive tests in the
+// Like `ASSERT_EXIT`, but continues on to successive tests in the
 // test suite, if any:
-#define EXPECT_EXIT(statement, predicate, regex) \
-  GTEST_DEATH_TEST_(statement, predicate, regex, GTEST_NONFATAL_FAILURE_)
+#define EXPECT_EXIT(statement, predicate, matcher) \
+  GTEST_DEATH_TEST_(statement, predicate, matcher, GTEST_NONFATAL_FAILURE_)
 
-// Asserts that a given statement causes the program to exit, either by
+// Asserts that a given `statement` causes the program to exit, either by
 // explicitly exiting with a nonzero exit code or being killed by a
-// signal, and emitting error output that matches regex.
-#define ASSERT_DEATH(statement, regex) \
-  ASSERT_EXIT(statement, ::testing::internal::ExitedUnsuccessfully, regex)
+// signal, and emitting error output that matches `matcher`.
+#define ASSERT_DEATH(statement, matcher) \
+  ASSERT_EXIT(statement, ::testing::internal::ExitedUnsuccessfully, matcher)
 
-// Like ASSERT_DEATH, but continues on to successive tests in the
+// Like `ASSERT_DEATH`, but continues on to successive tests in the
 // test suite, if any:
-#define EXPECT_DEATH(statement, regex) \
-  EXPECT_EXIT(statement, ::testing::internal::ExitedUnsuccessfully, regex)
+#define EXPECT_DEATH(statement, matcher) \
+  EXPECT_EXIT(statement, ::testing::internal::ExitedUnsuccessfully, matcher)
 
 // Two predicate classes that can be used in {ASSERT,EXPECT}_EXIT*:
 
@@ -190,19 +195,17 @@ GTEST_API_ bool InDeathTestChild();
 class GTEST_API_ ExitedWithCode {
  public:
   explicit ExitedWithCode(int exit_code);
+  ExitedWithCode(const ExitedWithCode&) = default;
+  void operator=(const ExitedWithCode& other) = delete;
   bool operator()(int exit_status) const;
 
  private:
-  // No implementation - assignment is unsupported.
-  void operator=(const ExitedWithCode &other);
-
   const int exit_code_;
 };
 
 #if !GTEST_OS_WINDOWS && !GTEST_OS_FUCHSIA
 // Tests that an exit code describes an exit due to termination by a
 // given signal.
-// GOOGLETEST_CM0006 DO NOT DELETE
 class GTEST_API_ KilledBySignal {
  public:
   explicit KilledBySignal(int signum);
@@ -334,9 +337,9 @@ class GTEST_API_ KilledBySignal {
 #define EXPECT_DEATH_IF_SUPPORTED(statement, regex) \
   GTEST_UNSUPPORTED_DEATH_TEST(statement, regex, )
 #define ASSERT_DEATH_IF_SUPPORTED(statement, regex) \
-  GTEST_UNSUPPORTED_DEATH_TEST(statement, regex, return )
+  GTEST_UNSUPPORTED_DEATH_TEST(statement, regex, return)
 #endif
 
 }  // namespace testing
 
-#endif  // GTEST_INCLUDE_GTEST_GTEST_DEATH_TEST_H_
+#endif  // GOOGLETEST_INCLUDE_GTEST_GTEST_DEATH_TEST_H_
diff --git a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/include/gtest/gtest-matchers.h b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/include/gtest/gtest-matchers.h
index 20be24f43ca..bffa00c5338 100644
--- a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/include/gtest/gtest-matchers.h
+++ b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/include/gtest/gtest-matchers.h
@@ -32,13 +32,14 @@
 // This file implements just enough of the matcher interface to allow
 // EXPECT_DEATH and friends to accept a matcher argument.
 
-// IWYU pragma: private, include "testing/base/public/gunit.h"
-// IWYU pragma: friend third_party/googletest/googlemock/.*
-// IWYU pragma: friend third_party/googletest/googletest/.*
+// IWYU pragma: private, include "gtest/gtest.h"
+// IWYU pragma: friend gtest/.*
+// IWYU pragma: friend gmock/.*
 
-#ifndef GTEST_INCLUDE_GTEST_GTEST_MATCHERS_H_
-#define GTEST_INCLUDE_GTEST_GTEST_MATCHERS_H_
+#ifndef GOOGLETEST_INCLUDE_GTEST_GTEST_MATCHERS_H_
+#define GOOGLETEST_INCLUDE_GTEST_GTEST_MATCHERS_H_
 
+#include <atomic>
 #include <memory>
 #include <ostream>
 #include <string>
@@ -63,38 +64,34 @@ GTEST_DISABLE_MSC_WARNINGS_PUSH_(
 namespace testing {
 
 // To implement a matcher Foo for type T, define:
-//   1. a class FooMatcherImpl that implements the
-//      MatcherInterface<T> interface, and
-//   2. a factory function that creates a Matcher<T> object from a
-//      FooMatcherImpl*.
-//
-// The two-level delegation design makes it possible to allow a user
-// to write "v" instead of "Eq(v)" where a Matcher is expected, which
-// is impossible if we pass matchers by pointers.  It also eases
-// ownership management as Matcher objects can now be copied like
-// plain values.
-
-// MatchResultListener is an abstract class.  Its << operator can be
-// used by a matcher to explain why a value matches or doesn't match.
+//   1. a class FooMatcherMatcher that implements the matcher interface:
+//     using is_gtest_matcher = void;
+//     bool MatchAndExplain(const T&, std::ostream*);
+//       (MatchResultListener* can also be used instead of std::ostream*)
+//     void DescribeTo(std::ostream*);
+//     void DescribeNegationTo(std::ostream*);
 //
+//   2. a factory function that creates a Matcher<T> object from a
+//      FooMatcherMatcher.
+
 class MatchResultListener {
  public:
   // Creates a listener object with the given underlying ostream.  The
   // listener does not own the ostream, and does not dereference it
   // in the constructor or destructor.
-  explicit MatchResultListener(::std::ostream *os) : stream_(os) {}
+  explicit MatchResultListener(::std::ostream* os) : stream_(os) {}
   virtual ~MatchResultListener() = 0;  // Makes this class abstract.
 
   // Streams x to the underlying ostream; does nothing if the ostream
   // is NULL.
   template <typename T>
-  MatchResultListener &operator<<(const T &x) {
+  MatchResultListener& operator<<(const T& x) {
     if (stream_ != nullptr) *stream_ << x;
     return *this;
   }
 
   // Returns the underlying ostream.
-  ::std::ostream *stream() { return stream_; }
+  ::std::ostream* stream() { return stream_; }
 
   // Returns true if and only if the listener is interested in an explanation
   // of the match result.  A matcher's MatchAndExplain() method can use
@@ -103,16 +100,17 @@ class MatchResultListener {
   bool IsInterested() const { return stream_ != nullptr; }
 
  private:
-  ::std::ostream *const stream_;
+  ::std::ostream* const stream_;
 
-  GTEST_DISALLOW_COPY_AND_ASSIGN_(MatchResultListener);
+  MatchResultListener(const MatchResultListener&) = delete;
+  MatchResultListener& operator=(const MatchResultListener&) = delete;
 };
 
 inline MatchResultListener::~MatchResultListener() {}
 
 // An instance of a subclass of this knows how to describe itself as a
 // matcher.
-class MatcherDescriberInterface {
+class GTEST_API_ MatcherDescriberInterface {
  public:
   virtual ~MatcherDescriberInterface() {}
 
@@ -121,7 +119,7 @@ class MatcherDescriberInterface {
   // matcher should have.  The subject of the verb phrase is the value
   // being matched.  For example, the DescribeTo() method of the Gt(7)
   // matcher prints "is greater than 7".
-  virtual void DescribeTo(::std::ostream *os) const = 0;
+  virtual void DescribeTo(::std::ostream* os) const = 0;
 
   // Describes the negation of this matcher to an ostream.  For
   // example, if the description of this matcher is "is greater than
@@ -129,7 +127,7 @@ class MatcherDescriberInterface {
   // You are not required to override this when implementing
   // MatcherInterface, but it is highly advised so that your matcher
   // can produce good error messages.
-  virtual void DescribeNegationTo(::std::ostream *os) const {
+  virtual void DescribeNegationTo(::std::ostream* os) const {
     *os << "not (";
     DescribeTo(os);
     *os << ")";
@@ -171,7 +169,7 @@ class MatcherInterface : public MatcherDescriberInterface {
   // can talk to 'listener' without checking its validity first.
   // However, in order to implement dummy listeners efficiently,
   // listener->stream() may be NULL.
-  virtual bool MatchAndExplain(T x, MatchResultListener *listener) const = 0;
+  virtual bool MatchAndExplain(T x, MatchResultListener* listener) const = 0;
 
   // Inherits these methods from MatcherDescriberInterface:
   //   virtual void DescribeTo(::std::ostream* os) const = 0;
@@ -180,64 +178,39 @@ class MatcherInterface : public MatcherDescriberInterface {
 
 namespace internal {
 
-// Converts a MatcherInterface<T> to a MatcherInterface<const T&>.
-template <typename T>
-class MatcherInterfaceAdapter : public MatcherInterface<const T &> {
- public:
-  explicit MatcherInterfaceAdapter(const MatcherInterface<T> *impl)
-      : impl_(impl) {}
-  ~MatcherInterfaceAdapter() override { delete impl_; }
-
-  void DescribeTo(::std::ostream *os) const override { impl_->DescribeTo(os); }
-
-  void DescribeNegationTo(::std::ostream *os) const override {
-    impl_->DescribeNegationTo(os);
-  }
-
-  bool MatchAndExplain(const T &x,
-                       MatchResultListener *listener) const override {
-    return impl_->MatchAndExplain(x, listener);
-  }
-
- private:
-  const MatcherInterface<T> *const impl_;
-
-  GTEST_DISALLOW_COPY_AND_ASSIGN_(MatcherInterfaceAdapter);
-};
-
 struct AnyEq {
   template <typename A, typename B>
-  bool operator()(const A &a, const B &b) const {
+  bool operator()(const A& a, const B& b) const {
     return a == b;
   }
 };
 struct AnyNe {
   template <typename A, typename B>
-  bool operator()(const A &a, const B &b) const {
+  bool operator()(const A& a, const B& b) const {
     return a != b;
   }
 };
 struct AnyLt {
   template <typename A, typename B>
-  bool operator()(const A &a, const B &b) const {
+  bool operator()(const A& a, const B& b) const {
     return a < b;
   }
 };
 struct AnyGt {
   template <typename A, typename B>
-  bool operator()(const A &a, const B &b) const {
+  bool operator()(const A& a, const B& b) const {
     return a > b;
   }
 };
 struct AnyLe {
   template <typename A, typename B>
-  bool operator()(const A &a, const B &b) const {
+  bool operator()(const A& a, const B& b) const {
     return a <= b;
   }
 };
 struct AnyGe {
   template <typename A, typename B>
-  bool operator()(const A &a, const B &b) const {
+  bool operator()(const A& a, const B& b) const {
     return a >= b;
   }
 };
@@ -248,7 +221,8 @@ class DummyMatchResultListener : public MatchResultListener {
   DummyMatchResultListener() : MatchResultListener(nullptr) {}
 
  private:
-  GTEST_DISALLOW_COPY_AND_ASSIGN_(DummyMatchResultListener);
+  DummyMatchResultListener(const DummyMatchResultListener&) = delete;
+  DummyMatchResultListener& operator=(const DummyMatchResultListener&) = delete;
 };
 
 // A match result listener that forwards the explanation to a given
@@ -256,41 +230,66 @@ class DummyMatchResultListener : public MatchResultListener {
 // that the former is concrete.
 class StreamMatchResultListener : public MatchResultListener {
  public:
-  explicit StreamMatchResultListener(::std::ostream *os)
+  explicit StreamMatchResultListener(::std::ostream* os)
       : MatchResultListener(os) {}
 
  private:
-  GTEST_DISALLOW_COPY_AND_ASSIGN_(StreamMatchResultListener);
+  StreamMatchResultListener(const StreamMatchResultListener&) = delete;
+  StreamMatchResultListener& operator=(const StreamMatchResultListener&) =
+      delete;
+};
+
+struct SharedPayloadBase {
+  std::atomic<int> ref{1};
+  void Ref() { ref.fetch_add(1, std::memory_order_relaxed); }
+  bool Unref() { return ref.fetch_sub(1, std::memory_order_acq_rel) == 1; }
+};
+
+template <typename T>
+struct SharedPayload : SharedPayloadBase {
+  explicit SharedPayload(const T& v) : value(v) {}
+  explicit SharedPayload(T&& v) : value(std::move(v)) {}
+
+  static void Destroy(SharedPayloadBase* shared) {
+    delete static_cast<SharedPayload*>(shared);
+  }
+
+  T value;
 };
 
 // An internal class for implementing Matcher<T>, which will derive
 // from it.  We put functionalities common to all Matcher<T>
 // specializations here to avoid code duplication.
 template <typename T>
-class MatcherBase {
+class MatcherBase : private MatcherDescriberInterface {
  public:
   // Returns true if and only if the matcher matches x; also explains the
   // match result to 'listener'.
-  bool MatchAndExplain(const T &x, MatchResultListener *listener) const {
-    return impl_->MatchAndExplain(x, listener);
+  bool MatchAndExplain(const T& x, MatchResultListener* listener) const {
+    GTEST_CHECK_(vtable_ != nullptr);
+    return vtable_->match_and_explain(*this, x, listener);
   }
 
   // Returns true if and only if this matcher matches x.
-  bool Matches(const T &x) const {
+  bool Matches(const T& x) const {
     DummyMatchResultListener dummy;
     return MatchAndExplain(x, &dummy);
   }
 
   // Describes this matcher to an ostream.
-  void DescribeTo(::std::ostream *os) const { impl_->DescribeTo(os); }
+  void DescribeTo(::std::ostream* os) const final {
+    GTEST_CHECK_(vtable_ != nullptr);
+    vtable_->describe(*this, os, false);
+  }
 
   // Describes the negation of this matcher to an ostream.
-  void DescribeNegationTo(::std::ostream *os) const {
-    impl_->DescribeNegationTo(os);
+  void DescribeNegationTo(::std::ostream* os) const final {
+    GTEST_CHECK_(vtable_ != nullptr);
+    vtable_->describe(*this, os, true);
   }
 
   // Explains why x matches, or doesn't match, the matcher.
-  void ExplainMatchResultTo(const T &x, ::std::ostream *os) const {
+  void ExplainMatchResultTo(const T& x, ::std::ostream* os) const {
     StreamMatchResultListener listener(os);
     MatchAndExplain(x, &listener);
   }
@@ -298,30 +297,196 @@ class MatcherBase {
   // Returns the describer for this matcher object; retains ownership
   // of the describer, which is only guaranteed to be alive when
   // this matcher object is alive.
-  const MatcherDescriberInterface *GetDescriber() const { return impl_.get(); }
+  const MatcherDescriberInterface* GetDescriber() const {
+    if (vtable_ == nullptr) return nullptr;
+    return vtable_->get_describer(*this);
+  }
 
  protected:
-  MatcherBase() {}
+  MatcherBase() : vtable_(nullptr), buffer_() {}
 
   // Constructs a matcher from its implementation.
-  explicit MatcherBase(const MatcherInterface<const T &> *impl) : impl_(impl) {}
-
   template <typename U>
-  explicit MatcherBase(
-      const MatcherInterface<U> *impl,
-      typename std::enable_if<!std::is_same<U, const U &>::value>::type * =
-          nullptr)
-      : impl_(new internal::MatcherInterfaceAdapter<U>(impl)) {}
+  explicit MatcherBase(const MatcherInterface<U>* impl)
+      : vtable_(nullptr), buffer_() {
+    Init(impl);
+  }
+
+  template <typename M, typename = typename std::remove_reference<
+                            M>::type::is_gtest_matcher>
+  MatcherBase(M&& m) : vtable_(nullptr), buffer_() {  // NOLINT
+    Init(std::forward<M>(m));
+  }
+
+  MatcherBase(const MatcherBase& other)
+      : vtable_(other.vtable_), buffer_(other.buffer_) {
+    if (IsShared()) buffer_.shared->Ref();
+  }
+
+  MatcherBase& operator=(const MatcherBase& other) {
+    if (this == &other) return *this;
+    Destroy();
+    vtable_ = other.vtable_;
+    buffer_ = other.buffer_;
+    if (IsShared()) buffer_.shared->Ref();
+    return *this;
+  }
+
+  MatcherBase(MatcherBase&& other)
+      : vtable_(other.vtable_), buffer_(other.buffer_) {
+    other.vtable_ = nullptr;
+  }
 
-  MatcherBase(const MatcherBase &) = default;
-  MatcherBase &operator=(const MatcherBase &) = default;
-  MatcherBase(MatcherBase &&) = default;
-  MatcherBase &operator=(MatcherBase &&) = default;
+  MatcherBase& operator=(MatcherBase&& other) {
+    if (this == &other) return *this;
+    Destroy();
+    vtable_ = other.vtable_;
+    buffer_ = other.buffer_;
+    other.vtable_ = nullptr;
+    return *this;
+  }
 
-  virtual ~MatcherBase() {}
+  ~MatcherBase() override { Destroy(); }
 
  private:
-  std::shared_ptr<const MatcherInterface<const T &>> impl_;
+  struct VTable {
+    bool (*match_and_explain)(const MatcherBase&, const T&,
+                              MatchResultListener*);
+    void (*describe)(const MatcherBase&, std::ostream*, bool negation);
+    // Returns the captured object if it implements the interface, otherwise
+    // returns the MatcherBase itself.
+    const MatcherDescriberInterface* (*get_describer)(const MatcherBase&);
+    // Called on shared instances when the reference count reaches 0.
+    void (*shared_destroy)(SharedPayloadBase*);
+  };
+
+  bool IsShared() const {
+    return vtable_ != nullptr && vtable_->shared_destroy != nullptr;
+  }
+
+  // If the implementation uses a listener, call that.
+  template <typename P>
+  static auto MatchAndExplainImpl(const MatcherBase& m, const T& value,
+                                  MatchResultListener* listener)
+      -> decltype(P::Get(m).MatchAndExplain(value, listener->stream())) {
+    return P::Get(m).MatchAndExplain(value, listener->stream());
+  }
+
+  template <typename P>
+  static auto MatchAndExplainImpl(const MatcherBase& m, const T& value,
+                                  MatchResultListener* listener)
+      -> decltype(P::Get(m).MatchAndExplain(value, listener)) {
+    return P::Get(m).MatchAndExplain(value, listener);
+  }
+
+  template <typename P>
+  static void DescribeImpl(const MatcherBase& m, std::ostream* os,
+                           bool negation) {
+    if (negation) {
+      P::Get(m).DescribeNegationTo(os);
+    } else {
+      P::Get(m).DescribeTo(os);
+    }
+  }
+
+  template <typename P>
+  static const MatcherDescriberInterface* GetDescriberImpl(
+      const MatcherBase& m) {
+    // If the impl is a MatcherDescriberInterface, then return it.
+    // Otherwise use MatcherBase itself.
+    // This allows us to implement the GetDescriber() function without support
+    // from the impl, but some users really want to get their impl back when
+    // they call GetDescriber().
+    // We use std::get on a tuple as a workaround of not having `if constexpr`.
+    return std::get<(
+        std::is_convertible<decltype(&P::Get(m)),
+                            const MatcherDescriberInterface*>::value
+            ? 1
+            : 0)>(std::make_tuple(&m, &P::Get(m)));
+  }
+
+  template <typename P>
+  const VTable* GetVTable() {
+    static constexpr VTable kVTable = {&MatchAndExplainImpl<P>,
+                                       &DescribeImpl<P>, &GetDescriberImpl<P>,
+                                       P::shared_destroy};
+    return &kVTable;
+  }
+
+  union Buffer {
+    // Add some types to give Buffer some common alignment/size use cases.
+    void* ptr;
+    double d;
+    int64_t i;
+    // And add one for the out-of-line cases.
+    SharedPayloadBase* shared;
+  };
+
+  void Destroy() {
+    if (IsShared() && buffer_.shared->Unref()) {
+      vtable_->shared_destroy(buffer_.shared);
+    }
+  }
+
+  template <typename M>
+  static constexpr bool IsInlined() {
+    return sizeof(M) <= sizeof(Buffer) && alignof(M) <= alignof(Buffer) &&
+           std::is_trivially_copy_constructible<M>::value &&
+           std::is_trivially_destructible<M>::value;
+  }
+
+  template <typename M, bool = MatcherBase::IsInlined<M>()>
+  struct ValuePolicy {
+    static const M& Get(const MatcherBase& m) {
+      // When inlined along with Init, need to be explicit to avoid violating
+      // strict aliasing rules.
+      const M* ptr =
+          static_cast<const M*>(static_cast<const void*>(&m.buffer_));
+      return *ptr;
+    }
+    static void Init(MatcherBase& m, M impl) {
+      ::new (static_cast<void*>(&m.buffer_)) M(impl);
+    }
+    static constexpr auto shared_destroy = nullptr;
+  };
+
+  template <typename M>
+  struct ValuePolicy<M, false> {
+    using Shared = SharedPayload<M>;
+    static const M& Get(const MatcherBase& m) {
+      return static_cast<Shared*>(m.buffer_.shared)->value;
+    }
+    template <typename Arg>
+    static void Init(MatcherBase& m, Arg&& arg) {
+      m.buffer_.shared = new Shared(std::forward<Arg>(arg));
+    }
+    static constexpr auto shared_destroy = &Shared::Destroy;
+  };
+
+  template <typename U, bool B>
+  struct ValuePolicy<const MatcherInterface<U>*, B> {
+    using M = const MatcherInterface<U>;
+    using Shared = SharedPayload<std::unique_ptr<M>>;
+    static const M& Get(const MatcherBase& m) {
+      return *static_cast<Shared*>(m.buffer_.shared)->value;
+    }
+    static void Init(MatcherBase& m, M* impl) {
+      m.buffer_.shared = new Shared(std::unique_ptr<M>(impl));
+    }
+
+    static constexpr auto shared_destroy = &Shared::Destroy;
+  };
+
+  template <typename M>
+  void Init(M&& m) {
+    using MM = typename std::decay<M>::type;
+    using Policy = ValuePolicy<MM>;
+    vtable_ = GetVTable<Policy>();
+    Policy::Init(*this, std::forward<M>(m));
+  }
+
+  const VTable* vtable_;
+  Buffer buffer_;
 };
 
 }  // namespace internal
@@ -339,16 +504,20 @@ class Matcher : public internal::MatcherBase<T> {
   explicit Matcher() {}  // NOLINT
 
   // Constructs a matcher from its implementation.
-  explicit Matcher(const MatcherInterface<const T &> *impl)
+  explicit Matcher(const MatcherInterface<const T&>* impl)
       : internal::MatcherBase<T>(impl) {}
 
   template <typename U>
   explicit Matcher(
-      const MatcherInterface<U> *impl,
-      typename std::enable_if<!std::is_same<U, const U &>::value>::type * =
+      const MatcherInterface<U>* impl,
+      typename std::enable_if<!std::is_same<U, const U&>::value>::type* =
           nullptr)
       : internal::MatcherBase<T>(impl) {}
 
+  template <typename M, typename = typename std::remove_reference<
+                            M>::type::is_gtest_matcher>
+  Matcher(M&& m) : internal::MatcherBase<T>(std::forward<M>(m)) {}  // NOLINT
+
   // Implicit constructor here allows people to write
   // EXPECT_CALL(foo, Bar(5)) instead of EXPECT_CALL(foo, Bar(Eq(5))) sometimes
   Matcher(T value);  // NOLINT
@@ -358,20 +527,25 @@ class Matcher : public internal::MatcherBase<T> {
 // instead of Eq(str) and "foo" instead of Eq("foo") when a std::string
 // matcher is expected.
 template <>
-class GTEST_API_ Matcher<const std::string &>
-    : public internal::MatcherBase<const std::string &> {
+class GTEST_API_ Matcher<const std::string&>
+    : public internal::MatcherBase<const std::string&> {
  public:
   Matcher() {}
 
-  explicit Matcher(const MatcherInterface<const std::string &> *impl)
-      : internal::MatcherBase<const std::string &>(impl) {}
+  explicit Matcher(const MatcherInterface<const std::string&>* impl)
+      : internal::MatcherBase<const std::string&>(impl) {}
+
+  template <typename M, typename = typename std::remove_reference<
+                            M>::type::is_gtest_matcher>
+  Matcher(M&& m)  // NOLINT
+      : internal::MatcherBase<const std::string&>(std::forward<M>(m)) {}
 
   // Allows the user to write str instead of Eq(str) sometimes, where
   // str is a std::string object.
-  Matcher(const std::string &s);  // NOLINT
+  Matcher(const std::string& s);  // NOLINT
 
   // Allows the user to write "foo" instead of Eq("foo") sometimes.
-  Matcher(const char *s);  // NOLINT
+  Matcher(const char* s);  // NOLINT
 };
 
 template <>
@@ -380,17 +554,22 @@ class GTEST_API_ Matcher<std::string>
  public:
   Matcher() {}
 
-  explicit Matcher(const MatcherInterface<const std::string &> *impl)
+  explicit Matcher(const MatcherInterface<const std::string&>* impl)
       : internal::MatcherBase<std::string>(impl) {}
-  explicit Matcher(const MatcherInterface<std::string> *impl)
+  explicit Matcher(const MatcherInterface<std::string>* impl)
       : internal::MatcherBase<std::string>(impl) {}
 
+  template <typename M, typename = typename std::remove_reference<
+                            M>::type::is_gtest_matcher>
+  Matcher(M&& m)  // NOLINT
+      : internal::MatcherBase<std::string>(std::forward<M>(m)) {}
+
   // Allows the user to write str instead of Eq(str) sometimes, where
   // str is a string object.
-  Matcher(const std::string &s);  // NOLINT
+  Matcher(const std::string& s);  // NOLINT
 
   // Allows the user to write "foo" instead of Eq("foo") sometimes.
-  Matcher(const char *s);  // NOLINT
+  Matcher(const char* s);  // NOLINT
 };
 
 #if GTEST_INTERNAL_HAS_STRING_VIEW
@@ -398,20 +577,26 @@ class GTEST_API_ Matcher<std::string>
 // instead of Eq(str) and "foo" instead of Eq("foo") when a absl::string_view
 // matcher is expected.
 template <>
-class GTEST_API_ Matcher<const internal::StringView &>
-    : public internal::MatcherBase<const internal::StringView &> {
+class GTEST_API_ Matcher<const internal::StringView&>
+    : public internal::MatcherBase<const internal::StringView&> {
  public:
   Matcher() {}
 
-  explicit Matcher(const MatcherInterface<const internal::StringView &> *impl)
-      : internal::MatcherBase<const internal::StringView &>(impl) {}
+  explicit Matcher(const MatcherInterface<const internal::StringView&>* impl)
+      : internal::MatcherBase<const internal::StringView&>(impl) {}
+
+  template <typename M, typename = typename std::remove_reference<
+                            M>::type::is_gtest_matcher>
+  Matcher(M&& m)  // NOLINT
+      : internal::MatcherBase<const internal::StringView&>(std::forward<M>(m)) {
+  }
 
   // Allows the user to write str instead of Eq(str) sometimes, where
   // str is a std::string object.
-  Matcher(const std::string &s);  // NOLINT
+  Matcher(const std::string& s);  // NOLINT
 
   // Allows the user to write "foo" instead of Eq("foo") sometimes.
-  Matcher(const char *s);  // NOLINT
+  Matcher(const char* s);  // NOLINT
 
   // Allows the user to pass absl::string_views or std::string_views directly.
   Matcher(internal::StringView s);  // NOLINT
@@ -423,17 +608,22 @@ class GTEST_API_ Matcher<internal::StringView>
  public:
   Matcher() {}
 
-  explicit Matcher(const MatcherInterface<const internal::StringView &> *impl)
+  explicit Matcher(const MatcherInterface<const internal::StringView&>* impl)
       : internal::MatcherBase<internal::StringView>(impl) {}
-  explicit Matcher(const MatcherInterface<internal::StringView> *impl)
+  explicit Matcher(const MatcherInterface<internal::StringView>* impl)
       : internal::MatcherBase<internal::StringView>(impl) {}
 
+  template <typename M, typename = typename std::remove_reference<
+                            M>::type::is_gtest_matcher>
+  Matcher(M&& m)  // NOLINT
+      : internal::MatcherBase<internal::StringView>(std::forward<M>(m)) {}
+
   // Allows the user to write str instead of Eq(str) sometimes, where
   // str is a std::string object.
-  Matcher(const std::string &s);  // NOLINT
+  Matcher(const std::string& s);  // NOLINT
 
   // Allows the user to write "foo" instead of Eq("foo") sometimes.
-  Matcher(const char *s);  // NOLINT
+  Matcher(const char* s);  // NOLINT
 
   // Allows the user to pass absl::string_views or std::string_views directly.
   Matcher(internal::StringView s);  // NOLINT
@@ -442,7 +632,7 @@ class GTEST_API_ Matcher<internal::StringView>
 
 // Prints a matcher in a human-readable format.
 template <typename T>
-std::ostream &operator<<(std::ostream &os, const Matcher<T> &matcher) {
+std::ostream& operator<<(std::ostream& os, const Matcher<T>& matcher) {
   matcher.DescribeTo(&os);
   return os;
 }
@@ -462,34 +652,34 @@ std::ostream &operator<<(std::ostream &os, const Matcher<T> &matcher) {
 template <class Impl>
 class PolymorphicMatcher {
  public:
-  explicit PolymorphicMatcher(const Impl &an_impl) : impl_(an_impl) {}
+  explicit PolymorphicMatcher(const Impl& an_impl) : impl_(an_impl) {}
 
   // Returns a mutable reference to the underlying matcher
   // implementation object.
-  Impl &mutable_impl() { return impl_; }
+  Impl& mutable_impl() { return impl_; }
 
   // Returns an immutable reference to the underlying matcher
   // implementation object.
-  const Impl &impl() const { return impl_; }
+  const Impl& impl() const { return impl_; }
 
   template <typename T>
   operator Matcher<T>() const {
-    return Matcher<T>(new MonomorphicImpl<const T &>(impl_));
+    return Matcher<T>(new MonomorphicImpl<const T&>(impl_));
   }
 
  private:
   template <typename T>
   class MonomorphicImpl : public MatcherInterface<T> {
    public:
-    explicit MonomorphicImpl(const Impl &impl) : impl_(impl) {}
+    explicit MonomorphicImpl(const Impl& impl) : impl_(impl) {}
 
-    void DescribeTo(::std::ostream *os) const override { impl_.DescribeTo(os); }
+    void DescribeTo(::std::ostream* os) const override { impl_.DescribeTo(os); }
 
-    void DescribeNegationTo(::std::ostream *os) const override {
+    void DescribeNegationTo(::std::ostream* os) const override {
       impl_.DescribeNegationTo(os);
     }
 
-    bool MatchAndExplain(T x, MatchResultListener *listener) const override {
+    bool MatchAndExplain(T x, MatchResultListener* listener) const override {
       return impl_.MatchAndExplain(x, listener);
     }
 
@@ -507,7 +697,7 @@ class PolymorphicMatcher {
 // MakeMatcher may create a Matcher that accepts its argument by value, which
 // leads to unnecessary copies & lack of support for non-copyable types.
 template <typename T>
-inline Matcher<T> MakeMatcher(const MatcherInterface<T> *impl) {
+inline Matcher<T> MakeMatcher(const MatcherInterface<T>* impl) {
   return Matcher<T>(impl);
 }
 
@@ -519,7 +709,7 @@ inline Matcher<T> MakeMatcher(const MatcherInterface<T> *impl) {
 // vs
 //   PolymorphicMatcher<TypeOfFoo>(foo);
 template <class Impl>
-inline PolymorphicMatcher<Impl> MakePolymorphicMatcher(const Impl &impl) {
+inline PolymorphicMatcher<Impl> MakePolymorphicMatcher(const Impl& impl) {
   return PolymorphicMatcher<Impl>(impl);
 }
 
@@ -537,105 +727,100 @@ namespace internal {
 template <typename D, typename Rhs, typename Op>
 class ComparisonBase {
  public:
-  explicit ComparisonBase(const Rhs &rhs) : rhs_(rhs) {}
+  explicit ComparisonBase(const Rhs& rhs) : rhs_(rhs) {}
+
+  using is_gtest_matcher = void;
+
   template <typename Lhs>
-  operator Matcher<Lhs>() const {
-    return Matcher<Lhs>(new Impl<const Lhs &>(rhs_));
+  bool MatchAndExplain(const Lhs& lhs, std::ostream*) const {
+    return Op()(lhs, Unwrap(rhs_));
+  }
+  void DescribeTo(std::ostream* os) const {
+    *os << D::Desc() << " ";
+    UniversalPrint(Unwrap(rhs_), os);
+  }
+  void DescribeNegationTo(std::ostream* os) const {
+    *os << D::NegatedDesc() << " ";
+    UniversalPrint(Unwrap(rhs_), os);
   }
 
  private:
   template <typename T>
-  static const T &Unwrap(const T &v) {
+  static const T& Unwrap(const T& v) {
     return v;
   }
   template <typename T>
-  static const T &Unwrap(std::reference_wrapper<T> v) {
+  static const T& Unwrap(std::reference_wrapper<T> v) {
     return v;
   }
 
-  template <typename Lhs, typename = Rhs>
-  class Impl : public MatcherInterface<Lhs> {
-   public:
-    explicit Impl(const Rhs &rhs) : rhs_(rhs) {}
-    bool MatchAndExplain(Lhs lhs,
-                         MatchResultListener * /* listener */) const override {
-      return Op()(lhs, Unwrap(rhs_));
-    }
-    void DescribeTo(::std::ostream *os) const override {
-      *os << D::Desc() << " ";
-      UniversalPrint(Unwrap(rhs_), os);
-    }
-    void DescribeNegationTo(::std::ostream *os) const override {
-      *os << D::NegatedDesc() << " ";
-      UniversalPrint(Unwrap(rhs_), os);
-    }
-
-   private:
-    Rhs rhs_;
-  };
   Rhs rhs_;
 };
 
 template <typename Rhs>
 class EqMatcher : public ComparisonBase<EqMatcher<Rhs>, Rhs, AnyEq> {
  public:
-  explicit EqMatcher(const Rhs &rhs)
+  explicit EqMatcher(const Rhs& rhs)
       : ComparisonBase<EqMatcher<Rhs>, Rhs, AnyEq>(rhs) {}
-  static const char *Desc() { return "is equal to"; }
-  static const char *NegatedDesc() { return "isn't equal to"; }
+  static const char* Desc() { return "is equal to"; }
+  static const char* NegatedDesc() { return "isn't equal to"; }
 };
 template <typename Rhs>
 class NeMatcher : public ComparisonBase<NeMatcher<Rhs>, Rhs, AnyNe> {
  public:
-  explicit NeMatcher(const Rhs &rhs)
+  explicit NeMatcher(const Rhs& rhs)
       : ComparisonBase<NeMatcher<Rhs>, Rhs, AnyNe>(rhs) {}
-  static const char *Desc() { return "isn't equal to"; }
-  static const char *NegatedDesc() { return "is equal to"; }
+  static const char* Desc() { return "isn't equal to"; }
+  static const char* NegatedDesc() { return "is equal to"; }
 };
 template <typename Rhs>
 class LtMatcher : public ComparisonBase<LtMatcher<Rhs>, Rhs, AnyLt> {
  public:
-  explicit LtMatcher(const Rhs &rhs)
+  explicit LtMatcher(const Rhs& rhs)
       : ComparisonBase<LtMatcher<Rhs>, Rhs, AnyLt>(rhs) {}
-  static const char *Desc() { return "is <"; }
-  static const char *NegatedDesc() { return "isn't <"; }
+  static const char* Desc() { return "is <"; }
+  static const char* NegatedDesc() { return "isn't <"; }
 };
 template <typename Rhs>
 class GtMatcher : public ComparisonBase<GtMatcher<Rhs>, Rhs, AnyGt> {
  public:
-  explicit GtMatcher(const Rhs &rhs)
+  explicit GtMatcher(const Rhs& rhs)
       : ComparisonBase<GtMatcher<Rhs>, Rhs, AnyGt>(rhs) {}
-  static const char *Desc() { return "is >"; }
-  static const char *NegatedDesc() { return "isn't >"; }
+  static const char* Desc() { return "is >"; }
+  static const char* NegatedDesc() { return "isn't >"; }
 };
 template <typename Rhs>
 class LeMatcher : public ComparisonBase<LeMatcher<Rhs>, Rhs, AnyLe> {
  public:
-  explicit LeMatcher(const Rhs &rhs)
+  explicit LeMatcher(const Rhs& rhs)
       : ComparisonBase<LeMatcher<Rhs>, Rhs, AnyLe>(rhs) {}
-  static const char *Desc() { return "is <="; }
-  static const char *NegatedDesc() { return "isn't <="; }
+  static const char* Desc() { return "is <="; }
+  static const char* NegatedDesc() { return "isn't <="; }
 };
 template <typename Rhs>
 class GeMatcher : public ComparisonBase<GeMatcher<Rhs>, Rhs, AnyGe> {
  public:
-  explicit GeMatcher(const Rhs &rhs)
+  explicit GeMatcher(const Rhs& rhs)
       : ComparisonBase<GeMatcher<Rhs>, Rhs, AnyGe>(rhs) {}
-  static const char *Desc() { return "is >="; }
-  static const char *NegatedDesc() { return "isn't >="; }
+  static const char* Desc() { return "is >="; }
+  static const char* NegatedDesc() { return "isn't >="; }
 };
 
+template <typename T, typename = typename std::enable_if<
+                          std::is_constructible<std::string, T>::value>::type>
+using StringLike = T;
+
 // Implements polymorphic matchers MatchesRegex(regex) and
 // ContainsRegex(regex), which can be used as a Matcher<T> as long as
 // T can be converted to a string.
 class MatchesRegexMatcher {
  public:
-  MatchesRegexMatcher(const RE *regex, bool full_match)
+  MatchesRegexMatcher(const RE* regex, bool full_match)
       : regex_(regex), full_match_(full_match) {}
 
 #if GTEST_INTERNAL_HAS_STRING_VIEW
-  bool MatchAndExplain(const internal::StringView &s,
-                       MatchResultListener *listener) const {
+  bool MatchAndExplain(const internal::StringView& s,
+                       MatchResultListener* listener) const {
     return MatchAndExplain(std::string(s), listener);
   }
 #endif  // GTEST_INTERNAL_HAS_STRING_VIEW
@@ -646,7 +831,7 @@ class MatchesRegexMatcher {
   //   const wchar_t*
   //   wchar_t*
   template <typename CharType>
-  bool MatchAndExplain(CharType *s, MatchResultListener *listener) const {
+  bool MatchAndExplain(CharType* s, MatchResultListener* listener) const {
     return s != nullptr && MatchAndExplain(std::string(s), listener);
   }
 
@@ -655,19 +840,19 @@ class MatchesRegexMatcher {
   // This is a template, not just a plain function with const std::string&,
   // because absl::string_view has some interfering non-explicit constructors.
   template <class MatcheeStringType>
-  bool MatchAndExplain(const MatcheeStringType &s,
-                       MatchResultListener * /* listener */) const {
-    const std::string &s2(s);
+  bool MatchAndExplain(const MatcheeStringType& s,
+                       MatchResultListener* /* listener */) const {
+    const std::string& s2(s);
     return full_match_ ? RE::FullMatch(s2, *regex_)
                        : RE::PartialMatch(s2, *regex_);
   }
 
-  void DescribeTo(::std::ostream *os) const {
+  void DescribeTo(::std::ostream* os) const {
     *os << (full_match_ ? "matches" : "contains") << " regular expression ";
     UniversalPrinter<std::string>::Print(regex_->pattern(), os);
   }
 
-  void DescribeNegationTo(::std::ostream *os) const {
+  void DescribeNegationTo(::std::ostream* os) const {
     *os << "doesn't " << (full_match_ ? "match" : "contain")
         << " regular expression ";
     UniversalPrinter<std::string>::Print(regex_->pattern(), os);
@@ -682,23 +867,25 @@ class MatchesRegexMatcher {
 // Matches a string that fully matches regular expression 'regex'.
 // The matcher takes ownership of 'regex'.
 inline PolymorphicMatcher<internal::MatchesRegexMatcher> MatchesRegex(
-    const internal::RE *regex) {
+    const internal::RE* regex) {
   return MakePolymorphicMatcher(internal::MatchesRegexMatcher(regex, true));
 }
-inline PolymorphicMatcher<internal::MatchesRegexMatcher> MatchesRegex(
-    const std::string &regex) {
-  return MatchesRegex(new internal::RE(regex));
+template <typename T = std::string>
+PolymorphicMatcher<internal::MatchesRegexMatcher> MatchesRegex(
+    const internal::StringLike<T>& regex) {
+  return MatchesRegex(new internal::RE(std::string(regex)));
 }
 
 // Matches a string that contains regular expression 'regex'.
 // The matcher takes ownership of 'regex'.
 inline PolymorphicMatcher<internal::MatchesRegexMatcher> ContainsRegex(
-    const internal::RE *regex) {
+    const internal::RE* regex) {
   return MakePolymorphicMatcher(internal::MatchesRegexMatcher(regex, false));
 }
-inline PolymorphicMatcher<internal::MatchesRegexMatcher> ContainsRegex(
-    const std::string &regex) {
-  return ContainsRegex(new internal::RE(regex));
+template <typename T = std::string>
+PolymorphicMatcher<internal::MatchesRegexMatcher> ContainsRegex(
+    const internal::StringLike<T>& regex) {
+  return ContainsRegex(new internal::RE(std::string(regex)));
 }
 
 // Creates a polymorphic matcher that matches anything equal to x.
@@ -729,7 +916,7 @@ Matcher<T>::Matcher(T value) {
 // can always write Matcher<T>(Lt(5)) to be explicit about the type,
 // for example.
 template <typename Lhs, typename Rhs>
-inline Matcher<Lhs> TypedEq(const Rhs &rhs) {
+inline Matcher<Lhs> TypedEq(const Rhs& rhs) {
   return Eq(rhs);
 }
 
@@ -766,4 +953,4 @@ inline internal::NeMatcher<Rhs> Ne(Rhs x) {
 
 GTEST_DISABLE_MSC_WARNINGS_POP_()  //  4251 5046
 
-#endif  // GTEST_INCLUDE_GTEST_GTEST_MATCHERS_H_
+#endif  // GOOGLETEST_INCLUDE_GTEST_GTEST_MATCHERS_H_
diff --git a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/include/gtest/gtest-message.h b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/include/gtest/gtest-message.h
index 713facae84e..6c8bf900094 100644
--- a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/include/gtest/gtest-message.h
+++ b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/include/gtest/gtest-message.h
@@ -27,7 +27,6 @@
 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-//
 // The Google C++ Testing and Mocking Framework (Google Test)
 //
 // This header file defines the Message class.
@@ -42,10 +41,12 @@
 // to CHANGE WITHOUT NOTICE.  Therefore DO NOT DEPEND ON IT in a user
 // program!
 
-// GOOGLETEST_CM0001 DO NOT DELETE
+// IWYU pragma: private, include "gtest/gtest.h"
+// IWYU pragma: friend gtest/.*
+// IWYU pragma: friend gmock/.*
 
-#ifndef GTEST_INCLUDE_GTEST_GTEST_MESSAGE_H_
-#define GTEST_INCLUDE_GTEST_GTEST_MESSAGE_H_
+#ifndef GOOGLETEST_INCLUDE_GTEST_GTEST_MESSAGE_H_
+#define GOOGLETEST_INCLUDE_GTEST_GTEST_MESSAGE_H_
 
 #include <limits>
 #include <memory>
@@ -58,7 +59,7 @@ GTEST_DISABLE_MSC_WARNINGS_PUSH_(4251 \
 
 // Ensures that there is at least one operator<< in the global namespace.
 // See Message& operator<<(...) below for why.
-void operator<<(const testing::internal::Secret &, int);
+void operator<<(const testing::internal::Secret&, int);
 
 namespace testing {
 
@@ -92,26 +93,26 @@ class GTEST_API_ Message {
  private:
   // The type of basic IO manipulators (endl, ends, and flush) for
   // narrow streams.
-  typedef std::ostream &(*BasicNarrowIoManip)(std::ostream &);
+  typedef std::ostream& (*BasicNarrowIoManip)(std::ostream&);
 
  public:
   // Constructs an empty Message.
   Message();
 
   // Copy constructor.
-  Message(const Message &msg) : ss_(new ::std::stringstream) {  // NOLINT
+  Message(const Message& msg) : ss_(new ::std::stringstream) {  // NOLINT
     *ss_ << msg.GetString();
   }
 
   // Constructs a Message from a C-string.
-  explicit Message(const char *str) : ss_(new ::std::stringstream) {
+  explicit Message(const char* str) : ss_(new ::std::stringstream) {
     *ss_ << str;
   }
 
   // Streams a non-pointer value to this object.
   template <typename T>
-  inline Message &operator<<(const T &val) {
-    // Some libraries overload << for STL containers.  These
+  inline Message& operator<<(const T& val) {
+        // Some libraries overload << for STL containers.  These
     // overloads are defined in the global namespace instead of ::std.
     //
     // C++'s symbol lookup rule (i.e. Koenig lookup) says that these
@@ -144,7 +145,7 @@ class GTEST_API_ Message {
   // ensure consistent result across compilers, we always treat NULL
   // as "(null)".
   template <typename T>
-  inline Message &operator<<(T *const &pointer) {  // NOLINT
+  inline Message& operator<<(T* const& pointer) {  // NOLINT
     if (pointer == nullptr) {
       *ss_ << "(null)";
     } else {
@@ -159,23 +160,23 @@ class GTEST_API_ Message {
   // templatized version above.  Without this definition, streaming
   // endl or other basic IO manipulators to Message will confuse the
   // compiler.
-  Message &operator<<(BasicNarrowIoManip val) {
+  Message& operator<<(BasicNarrowIoManip val) {
     *ss_ << val;
     return *this;
   }
 
   // Instead of 1/0, we want to see true/false for bool values.
-  Message &operator<<(bool b) { return *this << (b ? "true" : "false"); }
+  Message& operator<<(bool b) { return *this << (b ? "true" : "false"); }
 
   // These two overloads allow streaming a wide C string to a Message
   // using the UTF-8 encoding.
-  Message &operator<<(const wchar_t *wide_c_str);
-  Message &operator<<(wchar_t *wide_c_str);
+  Message& operator<<(const wchar_t* wide_c_str);
+  Message& operator<<(wchar_t* wide_c_str);
 
 #if GTEST_HAS_STD_WSTRING
   // Converts the given wide string to a narrow string using the UTF-8
   // encoding, and streams the result to this Message object.
-  Message &operator<<(const ::std::wstring &wstr);
+  Message& operator<<(const ::std::wstring& wstr);
 #endif  // GTEST_HAS_STD_WSTRING
 
   // Gets the text streamed to this object so far as an std::string.
@@ -190,11 +191,11 @@ class GTEST_API_ Message {
 
   // We declare (but don't implement) this to prevent the compiler
   // from implementing the assignment operator.
-  void operator=(const Message &);
+  void operator=(const Message&);
 };
 
 // Streams a Message to an ostream.
-inline std::ostream &operator<<(std::ostream &os, const Message &sb) {
+inline std::ostream& operator<<(std::ostream& os, const Message& sb) {
   return os << sb.GetString();
 }
 
@@ -205,7 +206,7 @@ namespace internal {
 // ::std::string, ::wstring, or ::std::wstring object, each NUL
 // character in it is replaced with "\\0".
 template <typename T>
-std::string StreamableToString(const T &streamable) {
+std::string StreamableToString(const T& streamable) {
   return (Message() << streamable).GetString();
 }
 
@@ -214,4 +215,4 @@ std::string StreamableToString(const T &streamable) {
 
 GTEST_DISABLE_MSC_WARNINGS_POP_()  //  4251
 
-#endif  // GTEST_INCLUDE_GTEST_GTEST_MESSAGE_H_
+#endif  // GOOGLETEST_INCLUDE_GTEST_GTEST_MESSAGE_H_
diff --git a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/include/gtest/gtest-param-test.h b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/include/gtest/gtest-param-test.h
index 8d01df52503..b55119ac62f 100644
--- a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/include/gtest/gtest-param-test.h
+++ b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/include/gtest/gtest-param-test.h
@@ -26,15 +26,16 @@
 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
+
 // Macros and functions for implementing parameterized tests
 // in Google C++ Testing and Mocking Framework (Google Test)
-//
-// This file is generated by a SCRIPT.  DO NOT EDIT BY HAND!
-//
-// GOOGLETEST_CM0001 DO NOT DELETE
-#ifndef GTEST_INCLUDE_GTEST_GTEST_PARAM_TEST_H_
-#define GTEST_INCLUDE_GTEST_GTEST_PARAM_TEST_H_
+
+// IWYU pragma: private, include "gtest/gtest.h"
+// IWYU pragma: friend gtest/.*
+// IWYU pragma: friend gmock/.*
+
+#ifndef GOOGLETEST_INCLUDE_GTEST_GTEST_PARAM_TEST_H_
+#define GOOGLETEST_INCLUDE_GTEST_GTEST_PARAM_TEST_H_
 
 // Value-parameterized tests allow you to test your code with different
 // parameters without writing multiple copies of the same test.
@@ -306,7 +307,7 @@ internal::ParamGenerator<T> ValuesIn(const T (&array)[N]) {
 
 template <class Container>
 internal::ParamGenerator<typename Container::value_type> ValuesIn(
-    const Container &container) {
+    const Container& container) {
   return ValuesIn(container.begin(), container.end());
 }
 
@@ -368,8 +369,6 @@ inline internal::ParamGenerator<bool> Bool() { return Values(false, true); }
 //     std::tuple<T1, T2, ..., TN> where T1, T2, ..., TN are the types
 //     of elements from sequences produces by gen1, gen2, ..., genN.
 //
-// Combine can have up to 10 arguments.
-//
 // Example:
 //
 // This will instantiate tests in test suite AnimalTest each one with
@@ -404,7 +403,7 @@ inline internal::ParamGenerator<bool> Bool() { return Values(false, true); }
 //                          Combine(Bool(), Bool()));
 //
 template <typename... Generator>
-internal::CartesianProductHolder<Generator...> Combine(const Generator &... g) {
+internal::CartesianProductHolder<Generator...> Combine(const Generator&... g) {
   return internal::CartesianProductHolder<Generator...>(g...);
 }
 
@@ -425,12 +424,16 @@ internal::CartesianProductHolder<Generator...> Combine(const Generator &... g) {
           ->AddTestPattern(                                                    \
               GTEST_STRINGIFY_(test_suite_name), GTEST_STRINGIFY_(test_name),  \
               new ::testing::internal::TestMetaFactory<GTEST_TEST_CLASS_NAME_( \
-                  test_suite_name, test_name)>());                             \
+                  test_suite_name, test_name)>(),                              \
+              ::testing::internal::CodeLocation(__FILE__, __LINE__));          \
       return 0;                                                                \
     }                                                                          \
     static int gtest_registering_dummy_ GTEST_ATTRIBUTE_UNUSED_;               \
-    GTEST_DISALLOW_COPY_AND_ASSIGN_(GTEST_TEST_CLASS_NAME_(test_suite_name,    \
-                                                           test_name));        \
+    GTEST_TEST_CLASS_NAME_(test_suite_name, test_name)                         \
+    (const GTEST_TEST_CLASS_NAME_(test_suite_name, test_name) &) = delete;     \
+    GTEST_TEST_CLASS_NAME_(test_suite_name, test_name) & operator=(            \
+        const GTEST_TEST_CLASS_NAME_(test_suite_name,                          \
+                                     test_name) &) = delete; /* NOLINT */      \
   };                                                                           \
   int GTEST_TEST_CLASS_NAME_(test_suite_name,                                  \
                              test_name)::gtest_registering_dummy_ =            \
@@ -460,7 +463,7 @@ internal::CartesianProductHolder<Generator...> Combine(const Generator &... g) {
     return GTEST_EXPAND_(GTEST_GET_FIRST_(__VA_ARGS__, DUMMY_PARAM_));       \
   }                                                                          \
   static ::std::string gtest_##prefix##test_suite_name##_EvalGenerateName_(  \
-      const ::testing::TestParamInfo<test_suite_name::ParamType> &info) {    \
+      const ::testing::TestParamInfo<test_suite_name::ParamType>& info) {    \
     if (::testing::internal::AlwaysFalse()) {                                \
       ::testing::internal::TestNotEmpty(GTEST_EXPAND_(GTEST_GET_SECOND_(     \
           __VA_ARGS__,                                                       \
@@ -504,4 +507,4 @@ internal::CartesianProductHolder<Generator...> Combine(const Generator &... g) {
 
 }  // namespace testing
 
-#endif  // GTEST_INCLUDE_GTEST_GTEST_PARAM_TEST_H_
+#endif  // GOOGLETEST_INCLUDE_GTEST_GTEST_PARAM_TEST_H_
diff --git a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/include/gtest/gtest-printers.h b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/include/gtest/gtest-printers.h
index 950247cf67e..a91e8b8b10e 100644
--- a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/include/gtest/gtest-printers.h
+++ b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/include/gtest/gtest-printers.h
@@ -94,12 +94,15 @@
 // being defined as many user-defined container types don't have
 // value_type.
 
-// GOOGLETEST_CM0001 DO NOT DELETE
+// IWYU pragma: private, include "gtest/gtest.h"
+// IWYU pragma: friend gtest/.*
+// IWYU pragma: friend gmock/.*
 
-#ifndef GTEST_INCLUDE_GTEST_GTEST_PRINTERS_H_
-#define GTEST_INCLUDE_GTEST_GTEST_PRINTERS_H_
+#ifndef GOOGLETEST_INCLUDE_GTEST_GTEST_PRINTERS_H_
+#define GOOGLETEST_INCLUDE_GTEST_GTEST_PRINTERS_H_
 
 #include <functional>
+#include <memory>
 #include <ostream>  // NOLINT
 #include <sstream>
 #include <string>
@@ -107,64 +110,125 @@
 #include <type_traits>
 #include <utility>
 #include <vector>
+
 #include "gtest/internal/gtest-internal.h"
 #include "gtest/internal/gtest-port.h"
 
-#if GTEST_HAS_ABSL
-#include "absl/strings/string_view.h"
-#include "absl/types/optional.h"
-#include "absl/types/variant.h"
-#endif  // GTEST_HAS_ABSL
-
 namespace testing {
 
-// Definitions in the 'internal' and 'internal2' name spaces are
-// subject to change without notice.  DO NOT USE THEM IN USER CODE!
-namespace internal2 {
+// Definitions in the internal* namespaces are subject to change without notice.
+// DO NOT USE THEM IN USER CODE!
+namespace internal {
 
-// Prints the given number of bytes in the given object to the given
-// ostream.
-GTEST_API_ void PrintBytesInObjectTo(const unsigned char *obj_bytes,
-                                     size_t count, ::std::ostream *os);
-
-// For selecting which printer to use when a given type has neither <<
-// nor PrintTo().
-enum TypeKind {
-  kProtobuf,              // a protobuf type
-  kConvertibleToInteger,  // a type implicitly convertible to BiggestInt
-                          // (e.g. a named or unnamed enum type)
-#if GTEST_INTERNAL_HAS_STRING_VIEW
-  kConvertibleToStringView,  // a type implicitly convertible to
-                             // absl::string_view or std::string_view
-#endif
-  kOtherType  // anything else
+template <typename T>
+void UniversalPrint(const T& value, ::std::ostream* os);
+
+// Used to print an STL-style container when the user doesn't define
+// a PrintTo() for it.
+struct ContainerPrinter {
+  template <typename T,
+            typename = typename std::enable_if<
+                (sizeof(IsContainerTest<T>(0)) == sizeof(IsContainer)) &&
+                !IsRecursiveContainer<T>::value>::type>
+  static void PrintValue(const T& container, std::ostream* os) {
+    const size_t kMaxCount = 32;  // The maximum number of elements to print.
+    *os << '{';
+    size_t count = 0;
+    for (auto&& elem : container) {
+      if (count > 0) {
+        *os << ',';
+        if (count == kMaxCount) {  // Enough has been printed.
+          *os << " ...";
+          break;
+        }
+      }
+      *os << ' ';
+      // We cannot call PrintTo(elem, os) here as PrintTo() doesn't
+      // handle `elem` being a native array.
+      internal::UniversalPrint(elem, os);
+      ++count;
+    }
+
+    if (count > 0) {
+      *os << ' ';
+    }
+    *os << '}';
+  }
 };
 
-// TypeWithoutFormatter<T, kTypeKind>::PrintValue(value, os) is called
-// by the universal printer to print a value of type T when neither
-// operator<< nor PrintTo() is defined for T, where kTypeKind is the
-// "kind" of T as defined by enum TypeKind.
-template <typename T, TypeKind kTypeKind>
-class TypeWithoutFormatter {
- public:
-  // This default version is called when kTypeKind is kOtherType.
-  static void PrintValue(const T &value, ::std::ostream *os) {
-    PrintBytesInObjectTo(
-        static_cast<const unsigned char *>(
-            reinterpret_cast<const void *>(std::addressof(value))),
-        sizeof(value), os);
+// Used to print a pointer that is neither a char pointer nor a member
+// pointer, when the user doesn't define PrintTo() for it.  (A member
+// variable pointer or member function pointer doesn't really point to
+// a location in the address space.  Their representation is
+// implementation-defined.  Therefore they will be printed as raw
+// bytes.)
+struct FunctionPointerPrinter {
+  template <typename T, typename = typename std::enable_if<
+                            std::is_function<T>::value>::type>
+  static void PrintValue(T* p, ::std::ostream* os) {
+    if (p == nullptr) {
+      *os << "NULL";
+    } else {
+      // T is a function type, so '*os << p' doesn't do what we want
+      // (it just prints p as bool).  We want to print p as a const
+      // void*.
+      *os << reinterpret_cast<const void*>(p);
+    }
   }
 };
 
-// We print a protobuf using its ShortDebugString() when the string
-// doesn't exceed this many characters; otherwise we print it using
-// DebugString() for better readability.
-const size_t kProtobufOneLinerMaxLength = 50;
+struct PointerPrinter {
+  template <typename T>
+  static void PrintValue(T* p, ::std::ostream* os) {
+    if (p == nullptr) {
+      *os << "NULL";
+    } else {
+      // T is not a function type.  We just call << to print p,
+      // relying on ADL to pick up user-defined << for their pointer
+      // types, if any.
+      *os << p;
+    }
+  }
+};
 
-template <typename T>
-class TypeWithoutFormatter<T, kProtobuf> {
- public:
-  static void PrintValue(const T &value, ::std::ostream *os) {
+namespace internal_stream_operator_without_lexical_name_lookup {
+
+// The presence of an operator<< here will terminate lexical scope lookup
+// straight away (even though it cannot be a match because of its argument
+// types). Thus, the two operator<< calls in StreamPrinter will find only ADL
+// candidates.
+struct LookupBlocker {};
+void operator<<(LookupBlocker, LookupBlocker);
+
+struct StreamPrinter {
+  template <typename T,
+            // Don't accept member pointers here. We'd print them via implicit
+            // conversion to bool, which isn't useful.
+            typename = typename std::enable_if<
+                !std::is_member_pointer<T>::value>::type,
+            // Only accept types for which we can find a streaming operator via
+            // ADL (possibly involving implicit conversions).
+            typename = decltype(std::declval<std::ostream&>()
+                                << std::declval<const T&>())>
+  static void PrintValue(const T& value, ::std::ostream* os) {
+    // Call streaming operator found by ADL, possibly with implicit conversions
+    // of the arguments.
+    *os << value;
+  }
+};
+
+}  // namespace internal_stream_operator_without_lexical_name_lookup
+
+struct ProtobufPrinter {
+  // We print a protobuf using its ShortDebugString() when the string
+  // doesn't exceed this many characters; otherwise we print it using
+  // DebugString() for better readability.
+  static const size_t kProtobufOneLinerMaxLength = 50;
+
+  template <typename T,
+            typename = typename std::enable_if<
+                internal::HasDebugStringAndShortDebugString<T>::value>::type>
+  static void PrintValue(const T& value, ::std::ostream* os) {
     std::string pretty_str = value.ShortDebugString();
     if (pretty_str.length() > kProtobufOneLinerMaxLength) {
       pretty_str = "\n" + value.DebugString();
@@ -173,9 +237,7 @@ class TypeWithoutFormatter<T, kProtobuf> {
   }
 };
 
-template <typename T>
-class TypeWithoutFormatter<T, kConvertibleToInteger> {
- public:
+struct ConvertibleToIntegerPrinter {
   // Since T has no << operator or PrintTo() but can be implicitly
   // converted to BiggestInt, we print it as a BiggestInt.
   //
@@ -183,111 +245,72 @@ class TypeWithoutFormatter<T, kConvertibleToInteger> {
   // case printing it as an integer is the desired behavior.  In case
   // T is not an enum, printing it as an integer is the best we can do
   // given that it has no user-defined printer.
-  static void PrintValue(const T &value, ::std::ostream *os) {
-    const internal::BiggestInt kBigInt = value;
-    *os << kBigInt;
+  static void PrintValue(internal::BiggestInt value, ::std::ostream* os) {
+    *os << value;
   }
 };
 
+struct ConvertibleToStringViewPrinter {
 #if GTEST_INTERNAL_HAS_STRING_VIEW
-template <typename T>
-class TypeWithoutFormatter<T, kConvertibleToStringView> {
- public:
-  // Since T has neither operator<< nor PrintTo() but can be implicitly
-  // converted to absl::string_view, we print it as a absl::string_view
-  // (or std::string_view).
-  //
-  // Note: the implementation is further below, as it depends on
-  // internal::PrintTo symbol which is defined later in the file.
-  static void PrintValue(const T &value, ::std::ostream *os);
-};
+  static void PrintValue(internal::StringView value, ::std::ostream* os) {
+    internal::UniversalPrint(value, os);
+  }
 #endif
+};
 
-// Prints the given value to the given ostream.  If the value is a
-// protocol message, its debug string is printed; if it's an enum or
-// of a type implicitly convertible to BiggestInt, it's printed as an
-// integer; otherwise the bytes in the value are printed.  This is
-// what UniversalPrinter<T>::Print() does when it knows nothing about
-// type T and T has neither << operator nor PrintTo().
-//
-// A user can override this behavior for a class type Foo by defining
-// a << operator in the namespace where Foo is defined.
-//
-// We put this operator in namespace 'internal2' instead of 'internal'
-// to simplify the implementation, as much code in 'internal' needs to
-// use << in STL, which would conflict with our own << were it defined
-// in 'internal'.
-//
-// Note that this operator<< takes a generic std::basic_ostream<Char,
-// CharTraits> type instead of the more restricted std::ostream.  If
-// we define it to take an std::ostream instead, we'll get an
-// "ambiguous overloads" compiler error when trying to print a type
-// Foo that supports streaming to std::basic_ostream<Char,
-// CharTraits>, as the compiler cannot tell whether
-// operator<<(std::ostream&, const T&) or
-// operator<<(std::basic_stream<Char, CharTraits>, const Foo&) is more
-// specific.
-template <typename Char, typename CharTraits, typename T>
-::std::basic_ostream<Char, CharTraits> &operator<<(
-    ::std::basic_ostream<Char, CharTraits> &os, const T &x) {
-  TypeWithoutFormatter<
-      T, (internal::IsAProtocolMessage<T>::value
-              ? kProtobuf
-              : std::is_convertible<const T &, internal::BiggestInt>::value
-                    ? kConvertibleToInteger
-                    :
-#if GTEST_INTERNAL_HAS_STRING_VIEW
-                    std::is_convertible<const T &, internal::StringView>::value
-                        ? kConvertibleToStringView
-                        :
-#endif
-                        kOtherType)>::PrintValue(x, &os);
-  return os;
-}
+// Prints the given number of bytes in the given object to the given
+// ostream.
+GTEST_API_ void PrintBytesInObjectTo(const unsigned char* obj_bytes,
+                                     size_t count, ::std::ostream* os);
+struct RawBytesPrinter {
+  // SFINAE on `sizeof` to make sure we have a complete type.
+  template <typename T, size_t = sizeof(T)>
+  static void PrintValue(const T& value, ::std::ostream* os) {
+    PrintBytesInObjectTo(
+        static_cast<const unsigned char*>(
+            // Load bearing cast to void* to support iOS
+            reinterpret_cast<const void*>(std::addressof(value))),
+        sizeof(value), os);
+  }
+};
 
-}  // namespace internal2
-}  // namespace testing
+struct FallbackPrinter {
+  template <typename T>
+  static void PrintValue(const T&, ::std::ostream* os) {
+    *os << "(incomplete type)";
+  }
+};
 
-// This namespace MUST NOT BE NESTED IN ::testing, or the name look-up
-// magic needed for implementing UniversalPrinter won't work.
-namespace testing_internal {
+// Try every printer in order and return the first one that works.
+template <typename T, typename E, typename Printer, typename... Printers>
+struct FindFirstPrinter : FindFirstPrinter<T, E, Printers...> {};
 
-// Used to print a value that is not an STL-style container when the
-// user doesn't define PrintTo() for it.
+template <typename T, typename Printer, typename... Printers>
+struct FindFirstPrinter<
+    T, decltype(Printer::PrintValue(std::declval<const T&>(), nullptr)),
+    Printer, Printers...> {
+  using type = Printer;
+};
+
+// Select the best printer in the following order:
+//  - Print containers (they have begin/end/etc).
+//  - Print function pointers.
+//  - Print object pointers.
+//  - Use the stream operator, if available.
+//  - Print protocol buffers.
+//  - Print types convertible to BiggestInt.
+//  - Print types convertible to StringView, if available.
+//  - Fallback to printing the raw bytes of the object.
 template <typename T>
-void DefaultPrintNonContainerTo(const T &value, ::std::ostream *os) {
-  // With the following statement, during unqualified name lookup,
-  // testing::internal2::operator<< appears as if it was declared in
-  // the nearest enclosing namespace that contains both
-  // ::testing_internal and ::testing::internal2, i.e. the global
-  // namespace.  For more details, refer to the C++ Standard section
-  // 7.3.4-1 [namespace.udir].  This allows us to fall back onto
-  // testing::internal2::operator<< in case T doesn't come with a <<
-  // operator.
-
-  using ::testing::internal2::operator<<;
-
-  // Assuming T is defined in namespace foo, in the next statement,
-  // the compiler will consider all of:
-  //
-  //   1. foo::operator<< (thanks to Koenig look-up),
-  //   2. ::operator<< (as the current namespace is enclosed in ::),
-  //   3. testing::internal2::operator<< (thanks to the using statement above).
-  //
-  // The operator<< whose type matches T best will be picked.
-  //
-  // We deliberately allow #2 to be a candidate, as sometimes it's
-  // impossible to define #1 (e.g. when foo is ::std, defining
-  // anything in it is undefined behavior unless you are a compiler
-  // vendor.).
-  *os << value;
+void PrintWithFallback(const T& value, ::std::ostream* os) {
+  using Printer = typename FindFirstPrinter<
+      T, void, ContainerPrinter, FunctionPointerPrinter, PointerPrinter,
+      internal_stream_operator_without_lexical_name_lookup::StreamPrinter,
+      ProtobufPrinter, ConvertibleToIntegerPrinter,
+      ConvertibleToStringViewPrinter, RawBytesPrinter, FallbackPrinter>::type;
+  Printer::PrintValue(value, os);
 }
 
-}  // namespace testing_internal
-
-namespace testing {
-namespace internal {
-
 // FormatForComparison<ToPrint, OtherOperand>::Format(value) formats a
 // value of type ToPrint that is an operand of a comparison assertion
 // (e.g. ASSERT_EQ).  OtherOperand is the type of the other operand in
@@ -306,7 +329,7 @@ namespace internal {
 template <typename ToPrint, typename OtherOperand>
 class FormatForComparison {
  public:
-  static ::std::string Format(const ToPrint &value) {
+  static ::std::string Format(const ToPrint& value) {
     return ::testing::PrintToString(value);
   }
 };
@@ -315,27 +338,35 @@ class FormatForComparison {
 template <typename ToPrint, size_t N, typename OtherOperand>
 class FormatForComparison<ToPrint[N], OtherOperand> {
  public:
-  static ::std::string Format(const ToPrint *value) {
-    return FormatForComparison<const ToPrint *, OtherOperand>::Format(value);
+  static ::std::string Format(const ToPrint* value) {
+    return FormatForComparison<const ToPrint*, OtherOperand>::Format(value);
   }
 };
 
 // By default, print C string as pointers to be safe, as we don't know
 // whether they actually point to a NUL-terminated string.
 
-#define GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_(CharType)                 \
-  template <typename OtherOperand>                                       \
-  class FormatForComparison<CharType *, OtherOperand> {                  \
-   public:                                                               \
-    static ::std::string Format(CharType *value) {                       \
-      return ::testing::PrintToString(static_cast<const void *>(value)); \
-    }                                                                    \
+#define GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_(CharType)                \
+  template <typename OtherOperand>                                      \
+  class FormatForComparison<CharType*, OtherOperand> {                  \
+   public:                                                              \
+    static ::std::string Format(CharType* value) {                      \
+      return ::testing::PrintToString(static_cast<const void*>(value)); \
+    }                                                                   \
   }
 
 GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_(char);
 GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_(const char);
 GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_(wchar_t);
 GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_(const wchar_t);
+#ifdef __cpp_lib_char8_t
+GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_(char8_t);
+GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_(const char8_t);
+#endif
+GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_(char16_t);
+GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_(const char16_t);
+GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_(char32_t);
+GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_(const char32_t);
 
 #undef GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_
 
@@ -344,15 +375,23 @@ GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_(const wchar_t);
 
 #define GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(CharType, OtherStringType) \
   template <>                                                            \
-  class FormatForComparison<CharType *, OtherStringType> {               \
+  class FormatForComparison<CharType*, OtherStringType> {                \
    public:                                                               \
-    static ::std::string Format(CharType *value) {                       \
+    static ::std::string Format(CharType* value) {                       \
       return ::testing::PrintToString(value);                            \
     }                                                                    \
   }
 
 GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(char, ::std::string);
 GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(const char, ::std::string);
+#ifdef __cpp_char8_t
+GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(char8_t, ::std::u8string);
+GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(const char8_t, ::std::u8string);
+#endif
+GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(char16_t, ::std::u16string);
+GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(const char16_t, ::std::u16string);
+GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(char32_t, ::std::u32string);
+GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(const char32_t, ::std::u32string);
 
 #if GTEST_HAS_STD_WSTRING
 GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(wchar_t, ::std::wstring);
@@ -370,8 +409,8 @@ GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(const wchar_t, ::std::wstring);
 //
 // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
 template <typename T1, typename T2>
-std::string FormatForComparisonFailureMessage(const T1 &value,
-                                              const T2 & /* other_operand */) {
+std::string FormatForComparisonFailureMessage(const T1& value,
+                                              const T2& /* other_operand */) {
   return FormatForComparison<T1, T2>::Format(value);
 }
 
@@ -385,86 +424,6 @@ std::string FormatForComparisonFailureMessage(const T1 &value,
 template <typename T>
 class UniversalPrinter;
 
-template <typename T>
-void UniversalPrint(const T &value, ::std::ostream *os);
-
-enum DefaultPrinterType {
-  kPrintContainer,
-  kPrintPointer,
-  kPrintFunctionPointer,
-  kPrintOther,
-};
-template <DefaultPrinterType type>
-struct WrapPrinterType {};
-
-// Used to print an STL-style container when the user doesn't define
-// a PrintTo() for it.
-template <typename C>
-void DefaultPrintTo(WrapPrinterType<kPrintContainer> /* dummy */,
-                    const C &container, ::std::ostream *os) {
-  const size_t kMaxCount = 32;  // The maximum number of elements to print.
-  *os << '{';
-  size_t count = 0;
-  for (typename C::const_iterator it = container.begin(); it != container.end();
-       ++it, ++count) {
-    if (count > 0) {
-      *os << ',';
-      if (count == kMaxCount) {  // Enough has been printed.
-        *os << " ...";
-        break;
-      }
-    }
-    *os << ' ';
-    // We cannot call PrintTo(*it, os) here as PrintTo() doesn't
-    // handle *it being a native array.
-    internal::UniversalPrint(*it, os);
-  }
-
-  if (count > 0) {
-    *os << ' ';
-  }
-  *os << '}';
-}
-
-// Used to print a pointer that is neither a char pointer nor a member
-// pointer, when the user doesn't define PrintTo() for it.  (A member
-// variable pointer or member function pointer doesn't really point to
-// a location in the address space.  Their representation is
-// implementation-defined.  Therefore they will be printed as raw
-// bytes.)
-template <typename T>
-void DefaultPrintTo(WrapPrinterType<kPrintPointer> /* dummy */, T *p,
-                    ::std::ostream *os) {
-  if (p == nullptr) {
-    *os << "NULL";
-  } else {
-    // T is not a function type.  We just call << to print p,
-    // relying on ADL to pick up user-defined << for their pointer
-    // types, if any.
-    *os << p;
-  }
-}
-template <typename T>
-void DefaultPrintTo(WrapPrinterType<kPrintFunctionPointer> /* dummy */, T *p,
-                    ::std::ostream *os) {
-  if (p == nullptr) {
-    *os << "NULL";
-  } else {
-    // T is a function type, so '*os << p' doesn't do what we want
-    // (it just prints p as bool).  We want to print p as a const
-    // void*.
-    *os << reinterpret_cast<const void *>(p);
-  }
-}
-
-// Used to print a non-container, non-pointer value when the user
-// doesn't define PrintTo() for it.
-template <typename T>
-void DefaultPrintTo(WrapPrinterType<kPrintOther> /* dummy */, const T &value,
-                    ::std::ostream *os) {
-  ::testing_internal::DefaultPrintNonContainerTo(value, os);
-}
-
 // Prints the given value using the << operator if it has one;
 // otherwise prints the bytes in it.  This is what
 // UniversalPrinter<T>::Print() does when PrintTo() is not specialized
@@ -477,37 +436,8 @@ void DefaultPrintTo(WrapPrinterType<kPrintOther> /* dummy */, const T &value,
 // or there is already a << operator but it doesn't do what the user
 // wants).
 template <typename T>
-void PrintTo(const T &value, ::std::ostream *os) {
-  // DefaultPrintTo() is overloaded.  The type of its first argument
-  // determines which version will be picked.
-  //
-  // Note that we check for container types here, prior to we check
-  // for protocol message types in our operator<<.  The rationale is:
-  //
-  // For protocol messages, we want to give people a chance to
-  // override Google Mock's format by defining a PrintTo() or
-  // operator<<.  For STL containers, other formats can be
-  // incompatible with Google Mock's format for the container
-  // elements; therefore we check for container types here to ensure
-  // that our format is used.
-  //
-  // Note that MSVC and clang-cl do allow an implicit conversion from
-  // pointer-to-function to pointer-to-object, but clang-cl warns on it.
-  // So don't use ImplicitlyConvertible if it can be helped since it will
-  // cause this warning, and use a separate overload of DefaultPrintTo for
-  // function pointers so that the `*os << p` in the object pointer overload
-  // doesn't cause that warning either.
-  DefaultPrintTo(
-      WrapPrinterType <
-                  (sizeof(IsContainerTest<T>(0)) == sizeof(IsContainer)) &&
-              !IsRecursiveContainer<T>::value
-          ? kPrintContainer
-          : !std::is_pointer<T>::value
-                ? kPrintOther
-                : std::is_function<typename std::remove_pointer<T>::type>::value
-                      ? kPrintFunctionPointer
-                      : kPrintPointer > (),
-      value, os);
+void PrintTo(const T& value, ::std::ostream* os) {
+  internal::PrintWithFallback(value, os);
 }
 
 // The following list of PrintTo() overloads tells
@@ -515,9 +445,9 @@ void PrintTo(const T &value, ::std::ostream *os) {
 // types, strings, plain arrays, and pointers).
 
 // Overloads for various char types.
-GTEST_API_ void PrintTo(unsigned char c, ::std::ostream *os);
-GTEST_API_ void PrintTo(signed char c, ::std::ostream *os);
-inline void PrintTo(char c, ::std::ostream *os) {
+GTEST_API_ void PrintTo(unsigned char c, ::std::ostream* os);
+GTEST_API_ void PrintTo(signed char c, ::std::ostream* os);
+inline void PrintTo(char c, ::std::ostream* os) {
   // When printing a plain char, we always treat it as unsigned.  This
   // way, the output won't be affected by whether the compiler thinks
   // char is signed or not.
@@ -525,7 +455,7 @@ inline void PrintTo(char c, ::std::ostream *os) {
 }
 
 // Overloads for other simple built-in types.
-inline void PrintTo(bool x, ::std::ostream *os) {
+inline void PrintTo(bool x, ::std::ostream* os) {
   *os << (x ? "true" : "false");
 }
 
@@ -536,27 +466,60 @@ inline void PrintTo(bool x, ::std::ostream *os) {
 // as signed integer when wchar_t is implemented by the compiler
 // as a signed type and is printed as an unsigned integer when wchar_t
 // is implemented as an unsigned type.
-GTEST_API_ void PrintTo(wchar_t wc, ::std::ostream *os);
+GTEST_API_ void PrintTo(wchar_t wc, ::std::ostream* os);
+
+GTEST_API_ void PrintTo(char32_t c, ::std::ostream* os);
+inline void PrintTo(char16_t c, ::std::ostream* os) {
+  PrintTo(ImplicitCast_<char32_t>(c), os);
+}
+#ifdef __cpp_char8_t
+inline void PrintTo(char8_t c, ::std::ostream* os) {
+  PrintTo(ImplicitCast_<char32_t>(c), os);
+}
+#endif
+
+// gcc/clang __{u,}int128_t
+#if defined(__SIZEOF_INT128__)
+GTEST_API_ void PrintTo(__uint128_t v, ::std::ostream* os);
+GTEST_API_ void PrintTo(__int128_t v, ::std::ostream* os);
+#endif  // __SIZEOF_INT128__
 
 // Overloads for C strings.
-GTEST_API_ void PrintTo(const char *s, ::std::ostream *os);
-inline void PrintTo(char *s, ::std::ostream *os) {
-  PrintTo(ImplicitCast_<const char *>(s), os);
+GTEST_API_ void PrintTo(const char* s, ::std::ostream* os);
+inline void PrintTo(char* s, ::std::ostream* os) {
+  PrintTo(ImplicitCast_<const char*>(s), os);
 }
 
 // signed/unsigned char is often used for representing binary data, so
 // we print pointers to it as void* to be safe.
-inline void PrintTo(const signed char *s, ::std::ostream *os) {
-  PrintTo(ImplicitCast_<const void *>(s), os);
+inline void PrintTo(const signed char* s, ::std::ostream* os) {
+  PrintTo(ImplicitCast_<const void*>(s), os);
 }
-inline void PrintTo(signed char *s, ::std::ostream *os) {
-  PrintTo(ImplicitCast_<const void *>(s), os);
+inline void PrintTo(signed char* s, ::std::ostream* os) {
+  PrintTo(ImplicitCast_<const void*>(s), os);
 }
-inline void PrintTo(const unsigned char *s, ::std::ostream *os) {
-  PrintTo(ImplicitCast_<const void *>(s), os);
+inline void PrintTo(const unsigned char* s, ::std::ostream* os) {
+  PrintTo(ImplicitCast_<const void*>(s), os);
 }
-inline void PrintTo(unsigned char *s, ::std::ostream *os) {
-  PrintTo(ImplicitCast_<const void *>(s), os);
+inline void PrintTo(unsigned char* s, ::std::ostream* os) {
+  PrintTo(ImplicitCast_<const void*>(s), os);
+}
+#ifdef __cpp_char8_t
+// Overloads for u8 strings.
+GTEST_API_ void PrintTo(const char8_t* s, ::std::ostream* os);
+inline void PrintTo(char8_t* s, ::std::ostream* os) {
+  PrintTo(ImplicitCast_<const char8_t*>(s), os);
+}
+#endif
+// Overloads for u16 strings.
+GTEST_API_ void PrintTo(const char16_t* s, ::std::ostream* os);
+inline void PrintTo(char16_t* s, ::std::ostream* os) {
+  PrintTo(ImplicitCast_<const char16_t*>(s), os);
+}
+// Overloads for u32 strings.
+GTEST_API_ void PrintTo(const char32_t* s, ::std::ostream* os);
+inline void PrintTo(char32_t* s, ::std::ostream* os) {
+  PrintTo(ImplicitCast_<const char32_t*>(s), os);
 }
 
 // MSVC can be configured to define wchar_t as a typedef of unsigned
@@ -566,9 +529,9 @@ inline void PrintTo(unsigned char *s, ::std::ostream *os) {
 // possibly causing invalid memory accesses.
 #if !defined(_MSC_VER) || defined(_NATIVE_WCHAR_T_DEFINED)
 // Overloads for wide C strings
-GTEST_API_ void PrintTo(const wchar_t *s, ::std::ostream *os);
-inline void PrintTo(wchar_t *s, ::std::ostream *os) {
-  PrintTo(ImplicitCast_<const wchar_t *>(s), os);
+GTEST_API_ void PrintTo(const wchar_t* s, ::std::ostream* os);
+inline void PrintTo(wchar_t* s, ::std::ostream* os) {
+  PrintTo(ImplicitCast_<const wchar_t*>(s), os);
 }
 #endif
 
@@ -578,7 +541,7 @@ inline void PrintTo(wchar_t *s, ::std::ostream *os) {
 // Prints the given number of elements in an array, without printing
 // the curly braces.
 template <typename T>
-void PrintRawArrayTo(const T a[], size_t count, ::std::ostream *os) {
+void PrintRawArrayTo(const T a[], size_t count, ::std::ostream* os) {
   UniversalPrint(a[0], os);
   for (size_t i = 1; i != count; i++) {
     *os << ", ";
@@ -587,42 +550,105 @@ void PrintRawArrayTo(const T a[], size_t count, ::std::ostream *os) {
 }
 
 // Overloads for ::std::string.
-GTEST_API_ void PrintStringTo(const ::std::string &s, ::std::ostream *os);
-inline void PrintTo(const ::std::string &s, ::std::ostream *os) {
+GTEST_API_ void PrintStringTo(const ::std::string& s, ::std::ostream* os);
+inline void PrintTo(const ::std::string& s, ::std::ostream* os) {
   PrintStringTo(s, os);
 }
 
+// Overloads for ::std::u8string
+#ifdef __cpp_char8_t
+GTEST_API_ void PrintU8StringTo(const ::std::u8string& s, ::std::ostream* os);
+inline void PrintTo(const ::std::u8string& s, ::std::ostream* os) {
+  PrintU8StringTo(s, os);
+}
+#endif
+
+// Overloads for ::std::u16string
+GTEST_API_ void PrintU16StringTo(const ::std::u16string& s, ::std::ostream* os);
+inline void PrintTo(const ::std::u16string& s, ::std::ostream* os) {
+  PrintU16StringTo(s, os);
+}
+
+// Overloads for ::std::u32string
+GTEST_API_ void PrintU32StringTo(const ::std::u32string& s, ::std::ostream* os);
+inline void PrintTo(const ::std::u32string& s, ::std::ostream* os) {
+  PrintU32StringTo(s, os);
+}
+
 // Overloads for ::std::wstring.
 #if GTEST_HAS_STD_WSTRING
-GTEST_API_ void PrintWideStringTo(const ::std::wstring &s, ::std::ostream *os);
-inline void PrintTo(const ::std::wstring &s, ::std::ostream *os) {
+GTEST_API_ void PrintWideStringTo(const ::std::wstring& s, ::std::ostream* os);
+inline void PrintTo(const ::std::wstring& s, ::std::ostream* os) {
   PrintWideStringTo(s, os);
 }
 #endif  // GTEST_HAS_STD_WSTRING
 
 #if GTEST_INTERNAL_HAS_STRING_VIEW
 // Overload for internal::StringView.
-inline void PrintTo(internal::StringView sp, ::std::ostream *os) {
+inline void PrintTo(internal::StringView sp, ::std::ostream* os) {
   PrintTo(::std::string(sp), os);
 }
 #endif  // GTEST_INTERNAL_HAS_STRING_VIEW
 
-inline void PrintTo(std::nullptr_t, ::std::ostream *os) { *os << "(nullptr)"; }
+inline void PrintTo(std::nullptr_t, ::std::ostream* os) { *os << "(nullptr)"; }
+
+#if GTEST_HAS_RTTI
+inline void PrintTo(const std::type_info& info, std::ostream* os) {
+  *os << internal::GetTypeName(info);
+}
+#endif  // GTEST_HAS_RTTI
 
 template <typename T>
-void PrintTo(std::reference_wrapper<T> ref, ::std::ostream *os) {
-  UniversalPrinter<T &>::Print(ref.get(), os);
+void PrintTo(std::reference_wrapper<T> ref, ::std::ostream* os) {
+  UniversalPrinter<T&>::Print(ref.get(), os);
+}
+
+inline const void* VoidifyPointer(const void* p) { return p; }
+inline const void* VoidifyPointer(volatile const void* p) {
+  return const_cast<const void*>(p);
+}
+
+template <typename T, typename Ptr>
+void PrintSmartPointer(const Ptr& ptr, std::ostream* os, char) {
+  if (ptr == nullptr) {
+    *os << "(nullptr)";
+  } else {
+    // We can't print the value. Just print the pointer..
+    *os << "(" << (VoidifyPointer)(ptr.get()) << ")";
+  }
+}
+template <typename T, typename Ptr,
+          typename = typename std::enable_if<!std::is_void<T>::value &&
+                                             !std::is_array<T>::value>::type>
+void PrintSmartPointer(const Ptr& ptr, std::ostream* os, int) {
+  if (ptr == nullptr) {
+    *os << "(nullptr)";
+  } else {
+    *os << "(ptr = " << (VoidifyPointer)(ptr.get()) << ", value = ";
+    UniversalPrinter<T>::Print(*ptr, os);
+    *os << ")";
+  }
+}
+
+template <typename T, typename D>
+void PrintTo(const std::unique_ptr<T, D>& ptr, std::ostream* os) {
+  (PrintSmartPointer<T>)(ptr, os, 0);
+}
+
+template <typename T>
+void PrintTo(const std::shared_ptr<T>& ptr, std::ostream* os) {
+  (PrintSmartPointer<T>)(ptr, os, 0);
 }
 
 // Helper function for printing a tuple.  T must be instantiated with
 // a tuple type.
 template <typename T>
-void PrintTupleTo(const T &, std::integral_constant<size_t, 0>,
-                  ::std::ostream *) {}
+void PrintTupleTo(const T&, std::integral_constant<size_t, 0>,
+                  ::std::ostream*) {}
 
 template <typename T, size_t I>
-void PrintTupleTo(const T &t, std::integral_constant<size_t, I>,
-                  ::std::ostream *os) {
+void PrintTupleTo(const T& t, std::integral_constant<size_t, I>,
+                  ::std::ostream* os) {
   PrintTupleTo(t, std::integral_constant<size_t, I - 1>(), os);
   GTEST_INTENTIONAL_CONST_COND_PUSH_()
   if (I > 1) {
@@ -634,7 +660,7 @@ void PrintTupleTo(const T &t, std::integral_constant<size_t, I>,
 }
 
 template <typename... Types>
-void PrintTo(const ::std::tuple<Types...> &t, ::std::ostream *os) {
+void PrintTo(const ::std::tuple<Types...>& t, ::std::ostream* os) {
   *os << "(";
   PrintTupleTo(t, std::integral_constant<size_t, sizeof...(Types)>(), os);
   *os << ")";
@@ -642,7 +668,7 @@ void PrintTo(const ::std::tuple<Types...> &t, ::std::ostream *os) {
 
 // Overload for std::pair.
 template <typename T1, typename T2>
-void PrintTo(const ::std::pair<T1, T2> &value, ::std::ostream *os) {
+void PrintTo(const ::std::pair<T1, T2>& value, ::std::ostream* os) {
   *os << '(';
   // We cannot use UniversalPrint(value.first, os) here, as T1 may be
   // a reference type.  The same for printing value.second.
@@ -664,7 +690,7 @@ class UniversalPrinter {
   // Note: we deliberately don't call this PrintTo(), as that name
   // conflicts with ::testing::internal::PrintTo in the body of the
   // function.
-  static void Print(const T &value, ::std::ostream *os) {
+  static void Print(const T& value, ::std::ostream* os) {
     // By default, ::testing::internal::PrintTo() is used for printing
     // the value.
     //
@@ -679,14 +705,46 @@ class UniversalPrinter {
   GTEST_DISABLE_MSC_WARNINGS_POP_()
 };
 
-#if GTEST_HAS_ABSL
+// Remove any const-qualifiers before passing a type to UniversalPrinter.
+template <typename T>
+class UniversalPrinter<const T> : public UniversalPrinter<T> {};
+
+#if GTEST_INTERNAL_HAS_ANY
 
-// Printer for absl::optional
+// Printer for std::any / absl::any
+
+template <>
+class UniversalPrinter<Any> {
+ public:
+  static void Print(const Any& value, ::std::ostream* os) {
+    if (value.has_value()) {
+      *os << "value of type " << GetTypeName(value);
+    } else {
+      *os << "no value";
+    }
+  }
+
+ private:
+  static std::string GetTypeName(const Any& value) {
+#if GTEST_HAS_RTTI
+    return internal::GetTypeName(value.type());
+#else
+    static_cast<void>(value);  // possibly unused
+    return "<unknown_type>";
+#endif  // GTEST_HAS_RTTI
+  }
+};
+
+#endif  // GTEST_INTERNAL_HAS_ANY
+
+#if GTEST_INTERNAL_HAS_OPTIONAL
+
+// Printer for std::optional / absl::optional
 
 template <typename T>
-class UniversalPrinter<::absl::optional<T>> {
+class UniversalPrinter<Optional<T>> {
  public:
-  static void Print(const ::absl::optional<T> &value, ::std::ostream *os) {
+  static void Print(const Optional<T>& value, ::std::ostream* os) {
     *os << '(';
     if (!value) {
       *os << "nullopt";
@@ -697,34 +755,52 @@ class UniversalPrinter<::absl::optional<T>> {
   }
 };
 
-// Printer for absl::variant
+template <>
+class UniversalPrinter<decltype(Nullopt())> {
+ public:
+  static void Print(decltype(Nullopt()), ::std::ostream* os) {
+    *os << "(nullopt)";
+  }
+};
+
+#endif  // GTEST_INTERNAL_HAS_OPTIONAL
+
+#if GTEST_INTERNAL_HAS_VARIANT
+
+// Printer for std::variant / absl::variant
 
 template <typename... T>
-class UniversalPrinter<::absl::variant<T...>> {
+class UniversalPrinter<Variant<T...>> {
  public:
-  static void Print(const ::absl::variant<T...> &value, ::std::ostream *os) {
+  static void Print(const Variant<T...>& value, ::std::ostream* os) {
     *os << '(';
-    absl::visit(Visitor{ os }, value);
+#if GTEST_HAS_ABSL
+    absl::visit(Visitor{os, value.index()}, value);
+#else
+    std::visit(Visitor{os, value.index()}, value);
+#endif  // GTEST_HAS_ABSL
     *os << ')';
   }
 
  private:
   struct Visitor {
     template <typename U>
-    void operator()(const U &u) const {
-      *os << "'" << GetTypeName<U>() << "' with value ";
+    void operator()(const U& u) const {
+      *os << "'" << GetTypeName<U>() << "(index = " << index
+          << ")' with value ";
       UniversalPrint(u, os);
     }
-    ::std::ostream *os;
+    ::std::ostream* os;
+    std::size_t index;
   };
 };
 
-#endif  // GTEST_HAS_ABSL
+#endif  // GTEST_INTERNAL_HAS_VARIANT
 
 // UniversalPrintArray(begin, len, os) prints an array of 'len'
 // elements, starting at address 'begin'.
 template <typename T>
-void UniversalPrintArray(const T *begin, size_t len, ::std::ostream *os) {
+void UniversalPrintArray(const T* begin, size_t len, ::std::ostream* os) {
   if (len == 0) {
     *os << "{}";
   } else {
@@ -745,12 +821,26 @@ void UniversalPrintArray(const T *begin, size_t len, ::std::ostream *os) {
   }
 }
 // This overload prints a (const) char array compactly.
-GTEST_API_ void UniversalPrintArray(const char *begin, size_t len,
-                                    ::std::ostream *os);
+GTEST_API_ void UniversalPrintArray(const char* begin, size_t len,
+                                    ::std::ostream* os);
+
+#ifdef __cpp_char8_t
+// This overload prints a (const) char8_t array compactly.
+GTEST_API_ void UniversalPrintArray(const char8_t* begin, size_t len,
+                                    ::std::ostream* os);
+#endif
+
+// This overload prints a (const) char16_t array compactly.
+GTEST_API_ void UniversalPrintArray(const char16_t* begin, size_t len,
+                                    ::std::ostream* os);
+
+// This overload prints a (const) char32_t array compactly.
+GTEST_API_ void UniversalPrintArray(const char32_t* begin, size_t len,
+                                    ::std::ostream* os);
 
 // This overload prints a (const) wchar_t array compactly.
-GTEST_API_ void UniversalPrintArray(const wchar_t *begin, size_t len,
-                                    ::std::ostream *os);
+GTEST_API_ void UniversalPrintArray(const wchar_t* begin, size_t len,
+                                    ::std::ostream* os);
 
 // Implements printing an array type T[N].
 template <typename T, size_t N>
@@ -758,23 +848,23 @@ class UniversalPrinter<T[N]> {
  public:
   // Prints the given array, omitting some elements when there are too
   // many.
-  static void Print(const T (&a)[N], ::std::ostream *os) {
+  static void Print(const T (&a)[N], ::std::ostream* os) {
     UniversalPrintArray(a, N, os);
   }
 };
 
 // Implements printing a reference type T&.
 template <typename T>
-class UniversalPrinter<T &> {
+class UniversalPrinter<T&> {
  public:
   // MSVC warns about adding const to a function type, so we want to
   // disable the warning.
   GTEST_DISABLE_MSC_WARNINGS_PUSH_(4180)
 
-  static void Print(const T &value, ::std::ostream *os) {
+  static void Print(const T& value, ::std::ostream* os) {
     // Prints the address of the value.  We use reinterpret_cast here
     // as static_cast doesn't compile when T is a function type.
-    *os << "@" << reinterpret_cast<const void *>(&value) << " ";
+    *os << "@" << reinterpret_cast<const void*>(&value) << " ";
 
     // Then prints the value itself.
     UniversalPrint(value, os);
@@ -790,28 +880,28 @@ class UniversalPrinter<T &> {
 template <typename T>
 class UniversalTersePrinter {
  public:
-  static void Print(const T &value, ::std::ostream *os) {
+  static void Print(const T& value, ::std::ostream* os) {
     UniversalPrint(value, os);
   }
 };
 template <typename T>
-class UniversalTersePrinter<T &> {
+class UniversalTersePrinter<T&> {
  public:
-  static void Print(const T &value, ::std::ostream *os) {
+  static void Print(const T& value, ::std::ostream* os) {
     UniversalPrint(value, os);
   }
 };
 template <typename T, size_t N>
 class UniversalTersePrinter<T[N]> {
  public:
-  static void Print(const T (&value)[N], ::std::ostream *os) {
+  static void Print(const T (&value)[N], ::std::ostream* os) {
     UniversalPrinter<T[N]>::Print(value, os);
   }
 };
 template <>
-class UniversalTersePrinter<const char *> {
+class UniversalTersePrinter<const char*> {
  public:
-  static void Print(const char *str, ::std::ostream *os) {
+  static void Print(const char* str, ::std::ostream* os) {
     if (str == nullptr) {
       *os << "NULL";
     } else {
@@ -820,18 +910,61 @@ class UniversalTersePrinter<const char *> {
   }
 };
 template <>
-class UniversalTersePrinter<char *> {
+class UniversalTersePrinter<char*> : public UniversalTersePrinter<const char*> {
+};
+
+#ifdef __cpp_char8_t
+template <>
+class UniversalTersePrinter<const char8_t*> {
+ public:
+  static void Print(const char8_t* str, ::std::ostream* os) {
+    if (str == nullptr) {
+      *os << "NULL";
+    } else {
+      UniversalPrint(::std::u8string(str), os);
+    }
+  }
+};
+template <>
+class UniversalTersePrinter<char8_t*>
+    : public UniversalTersePrinter<const char8_t*> {};
+#endif
+
+template <>
+class UniversalTersePrinter<const char16_t*> {
+ public:
+  static void Print(const char16_t* str, ::std::ostream* os) {
+    if (str == nullptr) {
+      *os << "NULL";
+    } else {
+      UniversalPrint(::std::u16string(str), os);
+    }
+  }
+};
+template <>
+class UniversalTersePrinter<char16_t*>
+    : public UniversalTersePrinter<const char16_t*> {};
+
+template <>
+class UniversalTersePrinter<const char32_t*> {
  public:
-  static void Print(char *str, ::std::ostream *os) {
-    UniversalTersePrinter<const char *>::Print(str, os);
+  static void Print(const char32_t* str, ::std::ostream* os) {
+    if (str == nullptr) {
+      *os << "NULL";
+    } else {
+      UniversalPrint(::std::u32string(str), os);
+    }
   }
 };
+template <>
+class UniversalTersePrinter<char32_t*>
+    : public UniversalTersePrinter<const char32_t*> {};
 
 #if GTEST_HAS_STD_WSTRING
 template <>
-class UniversalTersePrinter<const wchar_t *> {
+class UniversalTersePrinter<const wchar_t*> {
  public:
-  static void Print(const wchar_t *str, ::std::ostream *os) {
+  static void Print(const wchar_t* str, ::std::ostream* os) {
     if (str == nullptr) {
       *os << "NULL";
     } else {
@@ -842,15 +975,15 @@ class UniversalTersePrinter<const wchar_t *> {
 #endif
 
 template <>
-class UniversalTersePrinter<wchar_t *> {
+class UniversalTersePrinter<wchar_t*> {
  public:
-  static void Print(wchar_t *str, ::std::ostream *os) {
-    UniversalTersePrinter<const wchar_t *>::Print(str, os);
+  static void Print(wchar_t* str, ::std::ostream* os) {
+    UniversalTersePrinter<const wchar_t*>::Print(str, os);
   }
 };
 
 template <typename T>
-void UniversalTersePrint(const T &value, ::std::ostream *os) {
+void UniversalTersePrint(const T& value, ::std::ostream* os) {
   UniversalTersePrinter<T>::Print(value, os);
 }
 
@@ -859,7 +992,7 @@ void UniversalTersePrint(const T &value, ::std::ostream *os) {
 // (const) char pointer, this prints both the pointer and the
 // NUL-terminated string.
 template <typename T>
-void UniversalPrint(const T &value, ::std::ostream *os) {
+void UniversalPrint(const T& value, ::std::ostream* os) {
   // A workarond for the bug in VC++ 7.1 that prevents us from instantiating
   // UniversalPrinter with T directly.
   typedef T T1;
@@ -871,12 +1004,12 @@ typedef ::std::vector<::std::string> Strings;
 // Tersely prints the first N fields of a tuple to a string vector,
 // one element for each field.
 template <typename Tuple>
-void TersePrintPrefixToStrings(const Tuple &, std::integral_constant<size_t, 0>,
-                               Strings *) {}
+void TersePrintPrefixToStrings(const Tuple&, std::integral_constant<size_t, 0>,
+                               Strings*) {}
 template <typename Tuple, size_t I>
-void TersePrintPrefixToStrings(const Tuple &t,
+void TersePrintPrefixToStrings(const Tuple& t,
                                std::integral_constant<size_t, I>,
-                               Strings *strings) {
+                               Strings* strings) {
   TersePrintPrefixToStrings(t, std::integral_constant<size_t, I - 1>(),
                             strings);
   ::std::stringstream ss;
@@ -888,7 +1021,7 @@ void TersePrintPrefixToStrings(const Tuple &t,
 // element for each field.  See the comment before
 // UniversalTersePrint() for how we define "tersely".
 template <typename Tuple>
-Strings UniversalTersePrintTupleFieldsToStrings(const Tuple &value) {
+Strings UniversalTersePrintTupleFieldsToStrings(const Tuple& value) {
   Strings result;
   TersePrintPrefixToStrings(
       value, std::integral_constant<size_t, std::tuple_size<Tuple>::value>(),
@@ -898,18 +1031,8 @@ Strings UniversalTersePrintTupleFieldsToStrings(const Tuple &value) {
 
 }  // namespace internal
 
-#if GTEST_INTERNAL_HAS_STRING_VIEW
-namespace internal2 {
-template <typename T>
-void TypeWithoutFormatter<T, kConvertibleToStringView>::PrintValue(
-    const T &value, ::std::ostream *os) {
-  internal::PrintTo(internal::StringView(value), os);
-}
-}  // namespace internal2
-#endif
-
 template <typename T>
-::std::string PrintToString(const T &value) {
+::std::string PrintToString(const T& value) {
   ::std::stringstream ss;
   internal::UniversalTersePrinter<T>::Print(value, &ss);
   return ss.str();
@@ -922,4 +1045,4 @@ template <typename T>
 // declarations from this file.
 #include "gtest/internal/custom/gtest-printers.h"
 
-#endif  // GTEST_INCLUDE_GTEST_GTEST_PRINTERS_H_
+#endif  // GOOGLETEST_INCLUDE_GTEST_GTEST_PRINTERS_H_
diff --git a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/include/gtest/gtest-spi.h b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/include/gtest/gtest-spi.h
index e263b1033ff..bec8c4810bb 100644
--- a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/include/gtest/gtest-spi.h
+++ b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/include/gtest/gtest-spi.h
@@ -27,14 +27,11 @@
 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-//
 // Utilities for testing Google Test itself and code that uses Google Test
 // (e.g. frameworks built on top of Google Test).
 
-// GOOGLETEST_CM0004 DO NOT DELETE
-
-#ifndef GTEST_INCLUDE_GTEST_GTEST_SPI_H_
-#define GTEST_INCLUDE_GTEST_GTEST_SPI_H_
+#ifndef GOOGLETEST_INCLUDE_GTEST_GTEST_SPI_H_
+#define GOOGLETEST_INCLUDE_GTEST_GTEST_SPI_H_
 
 #include "gtest/gtest.h"
 
@@ -65,11 +62,11 @@ class GTEST_API_ ScopedFakeTestPartResultReporter
   // by Google Test.  The 'result' parameter specifies where to report the
   // results. This reporter will only catch failures generated in the current
   // thread. DEPRECATED
-  explicit ScopedFakeTestPartResultReporter(TestPartResultArray *result);
+  explicit ScopedFakeTestPartResultReporter(TestPartResultArray* result);
 
   // Same as above, but you can choose the interception scope of this object.
   ScopedFakeTestPartResultReporter(InterceptMode intercept_mode,
-                                   TestPartResultArray *result);
+                                   TestPartResultArray* result);
 
   // The d'tor restores the previous test part result reporter.
   ~ScopedFakeTestPartResultReporter() override;
@@ -79,16 +76,19 @@ class GTEST_API_ ScopedFakeTestPartResultReporter
   //
   // This method is from the TestPartResultReporterInterface
   // interface.
-  void ReportTestPartResult(const TestPartResult &result) override;
+  void ReportTestPartResult(const TestPartResult& result) override;
 
  private:
   void Init();
 
   const InterceptMode intercept_mode_;
-  TestPartResultReporterInterface *old_reporter_;
-  TestPartResultArray *const result_;
+  TestPartResultReporterInterface* old_reporter_;
+  TestPartResultArray* const result_;
 
-  GTEST_DISALLOW_COPY_AND_ASSIGN_(ScopedFakeTestPartResultReporter);
+  ScopedFakeTestPartResultReporter(const ScopedFakeTestPartResultReporter&) =
+      delete;
+  ScopedFakeTestPartResultReporter& operator=(
+      const ScopedFakeTestPartResultReporter&) = delete;
 };
 
 namespace internal {
@@ -101,16 +101,17 @@ namespace internal {
 class GTEST_API_ SingleFailureChecker {
  public:
   // The constructor remembers the arguments.
-  SingleFailureChecker(const TestPartResultArray *results,
-                       TestPartResult::Type type, const std::string &substr);
+  SingleFailureChecker(const TestPartResultArray* results,
+                       TestPartResult::Type type, const std::string& substr);
   ~SingleFailureChecker();
 
  private:
-  const TestPartResultArray *const results_;
+  const TestPartResultArray* const results_;
   const TestPartResult::Type type_;
   const std::string substr_;
 
-  GTEST_DISALLOW_COPY_AND_ASSIGN_(SingleFailureChecker);
+  SingleFailureChecker(const SingleFailureChecker&) = delete;
+  SingleFailureChecker& operator=(const SingleFailureChecker&) = delete;
 };
 
 }  // namespace internal
@@ -120,7 +121,8 @@ class GTEST_API_ SingleFailureChecker {
 GTEST_DISABLE_MSC_WARNINGS_POP_()  //  4251
 
 // A set of macros for testing Google Test assertions or code that's expected
-// to generate Google Test fatal failures.  It verifies that the given
+// to generate Google Test fatal failures (e.g. a failure from an ASSERT_EQ, but
+// not a non-fatal failure, as from EXPECT_EQ).  It verifies that the given
 // statement will cause exactly one fatal Google Test failure with 'substr'
 // being part of the failure message.
 //
@@ -178,9 +180,10 @@ GTEST_DISABLE_MSC_WARNINGS_POP_()  //  4251
   } while (::testing::internal::AlwaysFalse())
 
 // A macro for testing Google Test assertions or code that's expected to
-// generate Google Test non-fatal failures.  It asserts that the given
-// statement will cause exactly one non-fatal Google Test failure with 'substr'
-// being part of the failure message.
+// generate Google Test non-fatal failures (e.g. a failure from an EXPECT_EQ,
+// but not from an ASSERT_EQ). It asserts that the given statement will cause
+// exactly one non-fatal Google Test failure with 'substr' being part of the
+// failure message.
 //
 // There are two different versions of this macro. EXPECT_NONFATAL_FAILURE only
 // affects and considers failures generated in the current thread and
@@ -242,4 +245,4 @@ GTEST_DISABLE_MSC_WARNINGS_POP_()  //  4251
     }                                                                         \
   } while (::testing::internal::AlwaysFalse())
 
-#endif  // GTEST_INCLUDE_GTEST_GTEST_SPI_H_
+#endif  // GOOGLETEST_INCLUDE_GTEST_GTEST_SPI_H_
diff --git a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/include/gtest/gtest-test-part.h b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/include/gtest/gtest-test-part.h
index a28afb309b9..09cc8c34f04 100644
--- a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/include/gtest/gtest-test-part.h
+++ b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/include/gtest/gtest-test-part.h
@@ -26,14 +26,17 @@
 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// GOOGLETEST_CM0001 DO NOT DELETE
 
-#ifndef GTEST_INCLUDE_GTEST_GTEST_TEST_PART_H_
-#define GTEST_INCLUDE_GTEST_GTEST_TEST_PART_H_
+// IWYU pragma: private, include "gtest/gtest.h"
+// IWYU pragma: friend gtest/.*
+// IWYU pragma: friend gmock/.*
+
+#ifndef GOOGLETEST_INCLUDE_GTEST_GTEST_TEST_PART_H_
+#define GOOGLETEST_INCLUDE_GTEST_GTEST_TEST_PART_H_
 
 #include <iosfwd>
 #include <vector>
+
 #include "gtest/internal/gtest-internal.h"
 #include "gtest/internal/gtest-string.h"
 
@@ -60,10 +63,12 @@ class GTEST_API_ TestPartResult {
   // C'tor.  TestPartResult does NOT have a default constructor.
   // Always use this constructor (with parameters) to create a
   // TestPartResult object.
-  TestPartResult(Type a_type, const char *a_file_name, int a_line_number,
-                 const char *a_message)
-      : type_(a_type), file_name_(a_file_name == nullptr ? "" : a_file_name),
-        line_number_(a_line_number), summary_(ExtractSummary(a_message)),
+  TestPartResult(Type a_type, const char* a_file_name, int a_line_number,
+                 const char* a_message)
+      : type_(a_type),
+        file_name_(a_file_name == nullptr ? "" : a_file_name),
+        line_number_(a_line_number),
+        summary_(ExtractSummary(a_message)),
         message_(a_message) {}
 
   // Gets the outcome of the test part.
@@ -71,7 +76,7 @@ class GTEST_API_ TestPartResult {
 
   // Gets the name of the source file where the test part took place, or
   // NULL if it's unknown.
-  const char *file_name() const {
+  const char* file_name() const {
     return file_name_.empty() ? nullptr : file_name_.c_str();
   }
 
@@ -80,10 +85,10 @@ class GTEST_API_ TestPartResult {
   int line_number() const { return line_number_; }
 
   // Gets the summary of the failure message.
-  const char *summary() const { return summary_.c_str(); }
+  const char* summary() const { return summary_.c_str(); }
 
   // Gets the message associated with the test part.
-  const char *message() const { return message_.c_str(); }
+  const char* message() const { return message_.c_str(); }
 
   // Returns true if and only if the test part was skipped.
   bool skipped() const { return type_ == kSkip; }
@@ -105,7 +110,7 @@ class GTEST_API_ TestPartResult {
 
   // Gets the summary of the failure message by omitting the stack
   // trace in it.
-  static std::string ExtractSummary(const char *message);
+  static std::string ExtractSummary(const char* message);
 
   // The name of the source file where the test part took place, or
   // "" if the source file is unknown.
@@ -118,7 +123,7 @@ class GTEST_API_ TestPartResult {
 };
 
 // Prints a TestPartResult object.
-std::ostream &operator<<(std::ostream &os, const TestPartResult &result);
+std::ostream& operator<<(std::ostream& os, const TestPartResult& result);
 
 // An array of TestPartResult objects.
 //
@@ -129,10 +134,10 @@ class GTEST_API_ TestPartResultArray {
   TestPartResultArray() {}
 
   // Appends the given TestPartResult to the array.
-  void Append(const TestPartResult &result);
+  void Append(const TestPartResult& result);
 
   // Returns the TestPartResult at the given index (0-based).
-  const TestPartResult &GetTestPartResult(int index) const;
+  const TestPartResult& GetTestPartResult(int index) const;
 
   // Returns the number of TestPartResult objects in the array.
   int size() const;
@@ -140,7 +145,8 @@ class GTEST_API_ TestPartResultArray {
  private:
   std::vector<TestPartResult> array_;
 
-  GTEST_DISALLOW_COPY_AND_ASSIGN_(TestPartResultArray);
+  TestPartResultArray(const TestPartResultArray&) = delete;
+  TestPartResultArray& operator=(const TestPartResultArray&) = delete;
 };
 
 // This interface knows how to report a test part result.
@@ -148,7 +154,7 @@ class GTEST_API_ TestPartResultReporterInterface {
  public:
   virtual ~TestPartResultReporterInterface() {}
 
-  virtual void ReportTestPartResult(const TestPartResult &result) = 0;
+  virtual void ReportTestPartResult(const TestPartResult& result) = 0;
 };
 
 namespace internal {
@@ -164,14 +170,15 @@ class GTEST_API_ HasNewFatalFailureHelper
  public:
   HasNewFatalFailureHelper();
   ~HasNewFatalFailureHelper() override;
-  void ReportTestPartResult(const TestPartResult &result) override;
+  void ReportTestPartResult(const TestPartResult& result) override;
   bool has_new_fatal_failure() const { return has_new_fatal_failure_; }
 
  private:
   bool has_new_fatal_failure_;
-  TestPartResultReporterInterface *original_reporter_;
+  TestPartResultReporterInterface* original_reporter_;
 
-  GTEST_DISALLOW_COPY_AND_ASSIGN_(HasNewFatalFailureHelper);
+  HasNewFatalFailureHelper(const HasNewFatalFailureHelper&) = delete;
+  HasNewFatalFailureHelper& operator=(const HasNewFatalFailureHelper&) = delete;
 };
 
 }  // namespace internal
@@ -180,4 +187,4 @@ class GTEST_API_ HasNewFatalFailureHelper
 
 GTEST_DISABLE_MSC_WARNINGS_POP_()  //  4251
 
-#endif  // GTEST_INCLUDE_GTEST_GTEST_TEST_PART_H_
+#endif  // GOOGLETEST_INCLUDE_GTEST_GTEST_TEST_PART_H_
diff --git a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/include/gtest/gtest-typed-test.h b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/include/gtest/gtest-typed-test.h
index f5afc4db87f..bd35a326601 100644
--- a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/include/gtest/gtest-typed-test.h
+++ b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/include/gtest/gtest-typed-test.h
@@ -27,10 +27,12 @@
 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-// GOOGLETEST_CM0001 DO NOT DELETE
+// IWYU pragma: private, include "gtest/gtest.h"
+// IWYU pragma: friend gtest/.*
+// IWYU pragma: friend gmock/.*
 
-#ifndef GTEST_INCLUDE_GTEST_GTEST_TYPED_TEST_H_
-#define GTEST_INCLUDE_GTEST_GTEST_TYPED_TEST_H_
+#ifndef GOOGLETEST_INCLUDE_GTEST_GTEST_TYPED_TEST_H_
+#define GOOGLETEST_INCLUDE_GTEST_GTEST_TYPED_TEST_H_
 
 // This header implements typed tests and type-parameterized tests.
 
@@ -175,8 +177,6 @@ INSTANTIATE_TYPED_TEST_SUITE_P(My, FooTest, MyTypes);
 
 // Implements typed tests.
 
-#if GTEST_HAS_TYPED_TEST
-
 // INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
 //
 // Expands to the name of the typedef for the type parameters of the
@@ -192,7 +192,7 @@ INSTANTIATE_TYPED_TEST_SUITE_P(My, FooTest, MyTypes);
   typedef ::testing::internal::GenerateTypeList<Types>::type            \
       GTEST_TYPE_PARAMS_(CaseName);                                     \
   typedef ::testing::internal::NameGeneratorSelector<__VA_ARGS__>::type \
-      GTEST_NAME_GENERATOR_(CaseName)
+  GTEST_NAME_GENERATOR_(CaseName)
 
 #define TYPED_TEST(CaseName, TestName)                                        \
   static_assert(sizeof(GTEST_STRINGIFY_(TestName)) > 1,                       \
@@ -230,12 +230,8 @@ INSTANTIATE_TYPED_TEST_SUITE_P(My, FooTest, MyTypes);
   TYPED_TEST_SUITE
 #endif  // GTEST_REMOVE_LEGACY_TEST_CASEAPI_
 
-#endif  // GTEST_HAS_TYPED_TEST
-
 // Implements type-parameterized tests.
 
-#if GTEST_HAS_TYPED_TEST_P
-
 // INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
 //
 // Expands to the namespace name that the type-parameterized tests for
@@ -262,7 +258,7 @@ INSTANTIATE_TYPED_TEST_SUITE_P(My, FooTest, MyTypes);
 // #included in multiple translation units linked together.
 #define TYPED_TEST_SUITE_P(SuiteName)              \
   static ::testing::internal::TypedTestSuitePState \
-      GTEST_TYPED_TEST_SUITE_P_STATE_(SuiteName)
+  GTEST_TYPED_TEST_SUITE_P_STATE_(SuiteName)
 
 // Legacy API is deprecated but still available
 #ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
@@ -294,7 +290,7 @@ INSTANTIATE_TYPED_TEST_SUITE_P(My, FooTest, MyTypes);
   namespace GTEST_SUITE_NAMESPACE_(SuiteName) {                             \
     typedef ::testing::internal::Templates<__VA_ARGS__> gtest_AllTests_;    \
   }                                                                         \
-  static const char *const GTEST_REGISTERED_TEST_NAMES_(                    \
+  static const char* const GTEST_REGISTERED_TEST_NAMES_(                    \
       SuiteName) GTEST_ATTRIBUTE_UNUSED_ =                                  \
       GTEST_TYPED_TEST_SUITE_P_STATE_(SuiteName).VerifyRegisteredTestNames( \
           GTEST_STRINGIFY_(SuiteName), __FILE__, __LINE__, #__VA_ARGS__)
@@ -332,6 +328,4 @@ INSTANTIATE_TYPED_TEST_SUITE_P(My, FooTest, MyTypes);
   INSTANTIATE_TYPED_TEST_SUITE_P
 #endif  // GTEST_REMOVE_LEGACY_TEST_CASEAPI_
 
-#endif  // GTEST_HAS_TYPED_TEST_P
-
-#endif  // GTEST_INCLUDE_GTEST_GTEST_TYPED_TEST_H_
+#endif  // GOOGLETEST_INCLUDE_GTEST_GTEST_TYPED_TEST_H_
diff --git a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/include/gtest/gtest.h b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/include/gtest/gtest.h
index 8fd7eea1e7e..d19a587a18c 100644
--- a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/include/gtest/gtest.h
+++ b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/include/gtest/gtest.h
@@ -27,7 +27,6 @@
 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-//
 // The Google C++ Testing and Mocking Framework (Google Test)
 //
 // This header file defines the public API for Google Test.  It should be
@@ -47,10 +46,8 @@
 // registration from Barthelemy Dagenais' (barthelemy@prologique.com)
 // easyUnit framework.
 
-// GOOGLETEST_CM0001 DO NOT DELETE
-
-#ifndef GTEST_INCLUDE_GTEST_GTEST_H_
-#define GTEST_INCLUDE_GTEST_GTEST_H_
+#ifndef GOOGLETEST_INCLUDE_GTEST_GTEST_H_
+#define GOOGLETEST_INCLUDE_GTEST_GTEST_H_
 
 #include <cstddef>
 #include <limits>
@@ -59,30 +56,22 @@
 #include <type_traits>
 #include <vector>
 
-#include "gtest/internal/gtest-internal.h"
-#include "gtest/internal/gtest-string.h"
+#include "gtest/gtest-assertion-result.h"
 #include "gtest/gtest-death-test.h"
 #include "gtest/gtest-matchers.h"
 #include "gtest/gtest-message.h"
 #include "gtest/gtest-param-test.h"
 #include "gtest/gtest-printers.h"
-#include "gtest/gtest_prod.h"
 #include "gtest/gtest-test-part.h"
 #include "gtest/gtest-typed-test.h"
+#include "gtest/gtest_pred_impl.h"
+#include "gtest/gtest_prod.h"
+#include "gtest/internal/gtest-internal.h"
+#include "gtest/internal/gtest-string.h"
 
 GTEST_DISABLE_MSC_WARNINGS_PUSH_(4251 \
 /* class A needs to have dll-interface to be used by clients of class B */)
 
-namespace testing {
-
-// Silence C4100 (unreferenced formal parameter) and 4805
-// unsafe mix of type 'const int' and type 'const bool'
-#ifdef _MSC_VER
-#pragma warning(push)
-#pragma warning(disable : 4805)
-#pragma warning(disable : 4100)
-#endif
-
 // Declares the flags.
 
 // This flag temporary enables the disabled tests.
@@ -100,6 +89,10 @@ GTEST_DECLARE_bool_(catch_exceptions);
 // to let Google Test decide.
 GTEST_DECLARE_string_(color);
 
+// This flag controls whether the test runner should continue execution past
+// first failure.
+GTEST_DECLARE_bool_(fail_fast);
+
 // This flag sets up the filter to select by name using a glob pattern
 // the tests to run. If the filter is not given all tests are executed.
 GTEST_DECLARE_string_(filter);
@@ -116,6 +109,9 @@ GTEST_DECLARE_bool_(list_tests);
 // in addition to its normal textual output.
 GTEST_DECLARE_string_(output);
 
+// This flags control whether Google Test prints only test failures.
+GTEST_DECLARE_bool_(brief);
+
 // This flags control whether Google Test prints the elapsed time for each
 // test.
 GTEST_DECLARE_bool_(print_time);
@@ -130,6 +126,12 @@ GTEST_DECLARE_int32_(random_seed);
 // is 1. If the value is -1 the tests are repeating forever.
 GTEST_DECLARE_int32_(repeat);
 
+// This flag controls whether Google Test Environments are recreated for each
+// repeat of the tests. The default value is true. If set to false the global
+// test Environment objects are only set up once, for the first iteration, and
+// only torn down once, for the last.
+GTEST_DECLARE_bool_(recreate_environments_when_repeating);
+
 // This flag controls whether Google Test includes Google Test internal
 // stack frames in failure stack traces.
 GTEST_DECLARE_bool_(show_internal_stack_frames);
@@ -155,6 +157,16 @@ GTEST_DECLARE_string_(stream_result_to);
 GTEST_DECLARE_string_(flagfile);
 #endif  // GTEST_USE_OWN_FLAGFILE_FLAG_
 
+namespace testing {
+
+// Silence C4100 (unreferenced formal parameter) and 4805
+// unsafe mix of type 'const int' and type 'const bool'
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable : 4805)
+#pragma warning(disable : 4100)
+#endif
+
 // The upper limit for valid stack trace depths.
 const int kMaxStackTraceDepth = 100;
 
@@ -173,10 +185,10 @@ class TestEventRepeater;
 class UnitTestRecordPropertyTestHelper;
 class WindowsDeathTest;
 class FuchsiaDeathTest;
-class UnitTestImpl *GetUnitTestImpl();
+class UnitTestImpl* GetUnitTestImpl();
 void ReportFailureInUnknownLocation(TestPartResult::Type result_type,
-                                    const std::string &message);
-std::set<std::string> *GetIgnoredParameterizedTestSuites();
+                                    const std::string& message);
+std::set<std::string>* GetIgnoredParameterizedTestSuites();
 
 }  // namespace internal
 
@@ -193,194 +205,6 @@ using TestCase = TestSuite;
 class TestInfo;
 class UnitTest;
 
-// A class for indicating whether an assertion was successful.  When
-// the assertion wasn't successful, the AssertionResult object
-// remembers a non-empty message that describes how it failed.
-//
-// To create an instance of this class, use one of the factory functions
-// (AssertionSuccess() and AssertionFailure()).
-//
-// This class is useful for two purposes:
-//   1. Defining predicate functions to be used with Boolean test assertions
-//      EXPECT_TRUE/EXPECT_FALSE and their ASSERT_ counterparts
-//   2. Defining predicate-format functions to be
-//      used with predicate assertions (ASSERT_PRED_FORMAT*, etc).
-//
-// For example, if you define IsEven predicate:
-//
-//   testing::AssertionResult IsEven(int n) {
-//     if ((n % 2) == 0)
-//       return testing::AssertionSuccess();
-//     else
-//       return testing::AssertionFailure() << n << " is odd";
-//   }
-//
-// Then the failed expectation EXPECT_TRUE(IsEven(Fib(5)))
-// will print the message
-//
-//   Value of: IsEven(Fib(5))
-//     Actual: false (5 is odd)
-//   Expected: true
-//
-// instead of a more opaque
-//
-//   Value of: IsEven(Fib(5))
-//     Actual: false
-//   Expected: true
-//
-// in case IsEven is a simple Boolean predicate.
-//
-// If you expect your predicate to be reused and want to support informative
-// messages in EXPECT_FALSE and ASSERT_FALSE (negative assertions show up
-// about half as often as positive ones in our tests), supply messages for
-// both success and failure cases:
-//
-//   testing::AssertionResult IsEven(int n) {
-//     if ((n % 2) == 0)
-//       return testing::AssertionSuccess() << n << " is even";
-//     else
-//       return testing::AssertionFailure() << n << " is odd";
-//   }
-//
-// Then a statement EXPECT_FALSE(IsEven(Fib(6))) will print
-//
-//   Value of: IsEven(Fib(6))
-//     Actual: true (8 is even)
-//   Expected: false
-//
-// NB: Predicates that support negative Boolean assertions have reduced
-// performance in positive ones so be careful not to use them in tests
-// that have lots (tens of thousands) of positive Boolean assertions.
-//
-// To use this class with EXPECT_PRED_FORMAT assertions such as:
-//
-//   // Verifies that Foo() returns an even number.
-//   EXPECT_PRED_FORMAT1(IsEven, Foo());
-//
-// you need to define:
-//
-//   testing::AssertionResult IsEven(const char* expr, int n) {
-//     if ((n % 2) == 0)
-//       return testing::AssertionSuccess();
-//     else
-//       return testing::AssertionFailure()
-//         << "Expected: " << expr << " is even\n  Actual: it's " << n;
-//   }
-//
-// If Foo() returns 5, you will see the following message:
-//
-//   Expected: Foo() is even
-//     Actual: it's 5
-//
-class GTEST_API_ AssertionResult {
- public:
-  // Copy constructor.
-  // Used in EXPECT_TRUE/FALSE(assertion_result).
-  AssertionResult(const AssertionResult &other);
-
-// C4800 is a level 3 warning in Visual Studio 2015 and earlier.
-// This warning is not emitted in Visual Studio 2017.
-// This warning is off by default starting in Visual Studio 2019 but can be
-// enabled with command-line options.
-#if defined(_MSC_VER) && (_MSC_VER < 1910 || _MSC_VER >= 1920)
-  GTEST_DISABLE_MSC_WARNINGS_PUSH_(4800 /* forcing value to bool */)
-#endif
-
-  // Used in the EXPECT_TRUE/FALSE(bool_expression).
-  //
-  // T must be contextually convertible to bool.
-  //
-  // The second parameter prevents this overload from being considered if
-  // the argument is implicitly convertible to AssertionResult. In that case
-  // we want AssertionResult's copy constructor to be used.
-  template <typename T>
-  explicit AssertionResult(
-      const T &success,
-      typename std::enable_if<
-          !std::is_convertible<T, AssertionResult>::value>::type *
-      /*enabler*/
-      = nullptr)
-      : success_(success) {}
-
-#if defined(_MSC_VER) && (_MSC_VER < 1910 || _MSC_VER >= 1920)
-  GTEST_DISABLE_MSC_WARNINGS_POP_()
-#endif
-
-  // Assignment operator.
-  AssertionResult &operator=(AssertionResult other) {
-    swap(other);
-    return *this;
-  }
-
-  // Returns true if and only if the assertion succeeded.
-  operator bool() const { return success_; }  // NOLINT
-
-  // Returns the assertion's negation. Used with EXPECT/ASSERT_FALSE.
-  AssertionResult operator!() const;
-
-  // Returns the text streamed into this AssertionResult. Test assertions
-  // use it when they fail (i.e., the predicate's outcome doesn't match the
-  // assertion's expectation). When nothing has been streamed into the
-  // object, returns an empty string.
-  const char *message() const {
-    return message_.get() != nullptr ? message_->c_str() : "";
-  }
-  // Deprecated; please use message() instead.
-  const char *failure_message() const { return message(); }
-
-  // Streams a custom failure message into this object.
-  template <typename T>
-  AssertionResult &operator<<(const T &value) {
-    AppendMessage(Message() << value);
-    return *this;
-  }
-
-  // Allows streaming basic output manipulators such as endl or flush into
-  // this object.
-  AssertionResult &operator<<(
-      ::std::ostream &(*basic_manipulator)(::std::ostream &stream)) {
-    AppendMessage(Message() << basic_manipulator);
-    return *this;
-  }
-
- private:
-  // Appends the contents of message to message_.
-  void AppendMessage(const Message &a_message) {
-    if (message_.get() == nullptr) message_.reset(new ::std::string);
-    message_->append(a_message.GetString().c_str());
-  }
-
-  // Swap the contents of this AssertionResult with other.
-  void swap(AssertionResult &other);
-
-  // Stores result of the assertion predicate.
-  bool success_;
-  // Stores the message describing the condition in case the expectation
-  // construct is not satisfied with the predicate's outcome.
-  // Referenced via a pointer to avoid taking too much stack frame space
-  // with test assertions.
-  std::unique_ptr< ::std::string> message_;
-};
-
-// Makes a successful assertion result.
-GTEST_API_ AssertionResult AssertionSuccess();
-
-// Makes a failed assertion result.
-GTEST_API_ AssertionResult AssertionFailure();
-
-// Makes a failed assertion result with the given failure message.
-// Deprecated; use AssertionFailure() << msg.
-GTEST_API_ AssertionResult AssertionFailure(const Message &msg);
-
-}  // namespace testing
-
-// Includes the auto-generated header that implements a family of generic
-// predicate assertion macros. This include comes late because it relies on
-// APIs declared above.
-#include "gtest/gtest_pred_impl.h"
-
-namespace testing {
-
 // The abstract class that all tests inherit from.
 //
 // In Google Test, a unit test program contains one or many TestSuites, and
@@ -411,10 +235,10 @@ class GTEST_API_ Test {
   // The d'tor is virtual as we intend to inherit from Test.
   virtual ~Test();
 
-  // Sets up the stuff shared by all tests in this test case.
+  // Sets up the stuff shared by all tests in this test suite.
   //
   // Google Test will call Foo::SetUpTestSuite() before running the first
-  // test in test case Foo.  Hence a sub-class can define its own
+  // test in test suite Foo.  Hence a sub-class can define its own
   // SetUpTestSuite() method to shadow the one defined in the super
   // class.
   static void SetUpTestSuite() {}
@@ -422,12 +246,13 @@ class GTEST_API_ Test {
   // Tears down the stuff shared by all tests in this test suite.
   //
   // Google Test will call Foo::TearDownTestSuite() after running the last
-  // test in test case Foo.  Hence a sub-class can define its own
+  // test in test suite Foo.  Hence a sub-class can define its own
   // TearDownTestSuite() method to shadow the one defined in the super
   // class.
   static void TearDownTestSuite() {}
 
-  // Legacy API is deprecated but still available
+  // Legacy API is deprecated but still available. Use SetUpTestSuite and
+  // TearDownTestSuite instead.
 #ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
   static void TearDownTestCase() {}
   static void SetUpTestCase() {}
@@ -459,8 +284,8 @@ class GTEST_API_ Test {
   // global context (before or after invocation of RUN_ALL_TESTS and from
   // SetUp/TearDown method of Environment objects registered with Google
   // Test) will be output as attributes of the <testsuites> element.
-  static void RecordProperty(const std::string &key, const std::string &value);
-  static void RecordProperty(const std::string &key, int value);
+  static void RecordProperty(const std::string& key, const std::string& value);
+  static void RecordProperty(const std::string& key, int value);
 
  protected:
   // Creates a Test object.
@@ -511,10 +336,11 @@ class GTEST_API_ Test {
   // If you see an error about overriding the following function or
   // about it being private, you have mis-spelled SetUp() as Setup().
   struct Setup_should_be_spelled_SetUp {};
-  virtual Setup_should_be_spelled_SetUp *Setup() { return nullptr; }
+  virtual Setup_should_be_spelled_SetUp* Setup() { return nullptr; }
 
   // We disallow copying Tests.
-  GTEST_DISALLOW_COPY_AND_ASSIGN_(Test);
+  Test(const Test&) = delete;
+  Test& operator=(const Test&) = delete;
 };
 
 typedef internal::TimeInMillis TimeInMillis;
@@ -528,17 +354,17 @@ class TestProperty {
   // C'tor.  TestProperty does NOT have a default constructor.
   // Always use this constructor (with parameters) to create a
   // TestProperty object.
-  TestProperty(const std::string &a_key, const std::string &a_value)
+  TestProperty(const std::string& a_key, const std::string& a_value)
       : key_(a_key), value_(a_value) {}
 
   // Gets the user supplied key.
-  const char *key() const { return key_.c_str(); }
+  const char* key() const { return key_.c_str(); }
 
   // Gets the user supplied value.
-  const char *value() const { return value_.c_str(); }
+  const char* value() const { return value_.c_str(); }
 
   // Sets a new value, overriding the one supplied in the constructor.
-  void SetValue(const std::string &new_value) { value_ = new_value; }
+  void SetValue(const std::string& new_value) { value_ = new_value; }
 
  private:
   // The key supplied by the user.
@@ -592,12 +418,12 @@ class GTEST_API_ TestResult {
 
   // Returns the i-th test part result among all the results. i can range from 0
   // to total_part_count() - 1. If i is not in that range, aborts the program.
-  const TestPartResult &GetTestPartResult(int i) const;
+  const TestPartResult& GetTestPartResult(int i) const;
 
   // Returns the i-th test property. i can range from 0 to
   // test_property_count() - 1. If i is not in that range, aborts the
   // program.
-  const TestProperty &GetTestProperty(int i) const;
+  const TestProperty& GetTestProperty(int i) const;
 
  private:
   friend class TestInfo;
@@ -611,12 +437,12 @@ class GTEST_API_ TestResult {
   friend class internal::FuchsiaDeathTest;
 
   // Gets the vector of TestPartResults.
-  const std::vector<TestPartResult> &test_part_results() const {
+  const std::vector<TestPartResult>& test_part_results() const {
     return test_part_results_;
   }
 
   // Gets the vector of TestProperties.
-  const std::vector<TestProperty> &test_properties() const {
+  const std::vector<TestProperty>& test_properties() const {
     return test_properties_;
   }
 
@@ -632,17 +458,17 @@ class GTEST_API_ TestResult {
   // value will be updated, rather than storing multiple values for the same
   // key.  xml_element specifies the element for which the property is being
   // recorded and is used for validation.
-  void RecordProperty(const std::string &xml_element,
-                      const TestProperty &test_property);
+  void RecordProperty(const std::string& xml_element,
+                      const TestProperty& test_property);
 
   // Adds a failure if the key is a reserved attribute of Google Test
   // testsuite tags.  Returns true if the property is valid.
   // FIXME: Validate attribute names are legal and human readable.
-  static bool ValidateTestProperty(const std::string &xml_element,
-                                   const TestProperty &test_property);
+  static bool ValidateTestProperty(const std::string& xml_element,
+                                   const TestProperty& test_property);
 
   // Adds a test part result to the list.
-  void AddTestPartResult(const TestPartResult &test_part_result);
+  void AddTestPartResult(const TestPartResult& test_part_result);
 
   // Returns the death test count.
   int death_test_count() const { return death_test_count_; }
@@ -658,7 +484,7 @@ class GTEST_API_ TestResult {
 
   // Protects mutable state of the property vector and of owned
   // properties, whose values may be updated.
-  internal::Mutex test_properites_mutex_;
+  internal::Mutex test_properties_mutex_;
 
   // The vector of TestPartResults
   std::vector<TestPartResult> test_part_results_;
@@ -672,7 +498,8 @@ class GTEST_API_ TestResult {
   TimeInMillis elapsed_time_;
 
   // We disallow copying TestResult.
-  GTEST_DISALLOW_COPY_AND_ASSIGN_(TestResult);
+  TestResult(const TestResult&) = delete;
+  TestResult& operator=(const TestResult&) = delete;
 };  // class TestResult
 
 // A TestInfo object stores the following information about a test:
@@ -693,32 +520,32 @@ class GTEST_API_ TestInfo {
   ~TestInfo();
 
   // Returns the test suite name.
-  const char *test_suite_name() const { return test_suite_name_.c_str(); }
+  const char* test_suite_name() const { return test_suite_name_.c_str(); }
 
 // Legacy API is deprecated but still available
 #ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
-  const char *test_case_name() const { return test_suite_name(); }
+  const char* test_case_name() const { return test_suite_name(); }
 #endif  // GTEST_REMOVE_LEGACY_TEST_CASEAPI_
 
   // Returns the test name.
-  const char *name() const { return name_.c_str(); }
+  const char* name() const { return name_.c_str(); }
 
   // Returns the name of the parameter type, or NULL if this is not a typed
   // or a type-parameterized test.
-  const char *type_param() const {
+  const char* type_param() const {
     if (type_param_.get() != nullptr) return type_param_->c_str();
     return nullptr;
   }
 
   // Returns the text representation of the value parameter, or NULL if this
   // is not a value-parameterized test.
-  const char *value_param() const {
+  const char* value_param() const {
     if (value_param_.get() != nullptr) return value_param_->c_str();
     return nullptr;
   }
 
   // Returns the file name where this test is defined.
-  const char *file() const { return location_.file.c_str(); }
+  const char* file() const { return location_.file.c_str(); }
 
   // Returns the line where this test is defined.
   int line() const { return location_.line; }
@@ -752,7 +579,7 @@ class GTEST_API_ TestInfo {
   }
 
   // Returns the result of the test.
-  const TestResult *result() const { return &result_; }
+  const TestResult* result() const { return &result_; }
 
  private:
 #if GTEST_HAS_DEATH_TEST
@@ -762,21 +589,21 @@ class GTEST_API_ TestInfo {
   friend class TestSuite;
   friend class internal::UnitTestImpl;
   friend class internal::StreamingListenerTest;
-  friend TestInfo *internal::MakeAndRegisterTestInfo(
-      const char *test_suite_name, const char *name, const char *type_param,
-      const char *value_param, internal::CodeLocation code_location,
+  friend TestInfo* internal::MakeAndRegisterTestInfo(
+      const char* test_suite_name, const char* name, const char* type_param,
+      const char* value_param, internal::CodeLocation code_location,
       internal::TypeId fixture_class_id, internal::SetUpTestSuiteFunc set_up_tc,
       internal::TearDownTestSuiteFunc tear_down_tc,
-      internal::TestFactoryBase *factory);
+      internal::TestFactoryBase* factory);
 
   // Constructs a TestInfo object. The newly constructed instance assumes
   // ownership of the factory object.
-  TestInfo(const std::string &test_suite_name, const std::string &name,
-           const char *a_type_param,   // NULL if not a type-parameterized test
-           const char *a_value_param,  // NULL if not a value-parameterized test
+  TestInfo(const std::string& test_suite_name, const std::string& name,
+           const char* a_type_param,   // NULL if not a type-parameterized test
+           const char* a_value_param,  // NULL if not a value-parameterized test
            internal::CodeLocation a_code_location,
            internal::TypeId fixture_class_id,
-           internal::TestFactoryBase *factory);
+           internal::TestFactoryBase* factory);
 
   // Increments the number of death tests encountered in this test so
   // far.
@@ -788,7 +615,10 @@ class GTEST_API_ TestInfo {
   // deletes it.
   void Run();
 
-  static void ClearTestResult(TestInfo *test_info) {
+  // Skip and records the test result for this object.
+  void Skip();
+
+  static void ClearTestResult(TestInfo* test_info) {
     test_info->result_.Clear();
   }
 
@@ -808,14 +638,15 @@ class GTEST_API_ TestInfo {
   bool matches_filter_;       // True if this test matches the
                               // user-specified filter.
   bool is_in_another_shard_;  // Will be run in another shard.
-  internal::TestFactoryBase *const factory_;  // The factory that creates
+  internal::TestFactoryBase* const factory_;  // The factory that creates
                                               // the test object
 
   // This field is mutable and needs to be reset before running the
   // test for the second time.
   TestResult result_;
 
-  GTEST_DISALLOW_COPY_AND_ASSIGN_(TestInfo);
+  TestInfo(const TestInfo&) = delete;
+  TestInfo& operator=(const TestInfo&) = delete;
 };
 
 // A test suite, which consists of a vector of TestInfos.
@@ -835,7 +666,7 @@ class GTEST_API_ TestSuite {
   //                 this is not a type-parameterized test.
   //   set_up_tc:    pointer to the function that sets up the test suite
   //   tear_down_tc: pointer to the function that tears down the test suite
-  TestSuite(const char *name, const char *a_type_param,
+  TestSuite(const char* name, const char* a_type_param,
             internal::SetUpTestSuiteFunc set_up_tc,
             internal::TearDownTestSuiteFunc tear_down_tc);
 
@@ -843,11 +674,11 @@ class GTEST_API_ TestSuite {
   virtual ~TestSuite();
 
   // Gets the name of the TestSuite.
-  const char *name() const { return name_.c_str(); }
+  const char* name() const { return name_.c_str(); }
 
   // Returns the name of the parameter type, or NULL if this is not a
   // type-parameterized test suite.
-  const char *type_param() const {
+  const char* type_param() const {
     if (type_param_.get() != nullptr) return type_param_->c_str();
     return nullptr;
   }
@@ -896,46 +727,49 @@ class GTEST_API_ TestSuite {
 
   // Returns the i-th test among all the tests. i can range from 0 to
   // total_test_count() - 1. If i is not in that range, returns NULL.
-  const TestInfo *GetTestInfo(int i) const;
+  const TestInfo* GetTestInfo(int i) const;
 
   // Returns the TestResult that holds test properties recorded during
   // execution of SetUpTestSuite and TearDownTestSuite.
-  const TestResult &ad_hoc_test_result() const { return ad_hoc_test_result_; }
+  const TestResult& ad_hoc_test_result() const { return ad_hoc_test_result_; }
 
  private:
   friend class Test;
   friend class internal::UnitTestImpl;
 
   // Gets the (mutable) vector of TestInfos in this TestSuite.
-  std::vector<TestInfo *> &test_info_list() { return test_info_list_; }
+  std::vector<TestInfo*>& test_info_list() { return test_info_list_; }
 
   // Gets the (immutable) vector of TestInfos in this TestSuite.
-  const std::vector<TestInfo *> &test_info_list() const {
+  const std::vector<TestInfo*>& test_info_list() const {
     return test_info_list_;
   }
 
   // Returns the i-th test among all the tests. i can range from 0 to
   // total_test_count() - 1. If i is not in that range, returns NULL.
-  TestInfo *GetMutableTestInfo(int i);
+  TestInfo* GetMutableTestInfo(int i);
 
   // Sets the should_run member.
   void set_should_run(bool should) { should_run_ = should; }
 
   // Adds a TestInfo to this test suite.  Will delete the TestInfo upon
   // destruction of the TestSuite object.
-  void AddTestInfo(TestInfo *test_info);
+  void AddTestInfo(TestInfo* test_info);
 
   // Clears the results of all tests in this test suite.
   void ClearResult();
 
   // Clears the results of all tests in the given test suite.
-  static void ClearTestSuiteResult(TestSuite *test_suite) {
+  static void ClearTestSuiteResult(TestSuite* test_suite) {
     test_suite->ClearResult();
   }
 
   // Runs every test in this TestSuite.
   void Run();
 
+  // Skips the execution of tests under this TestSuite
+  void Skip();
+
   // Runs SetUpTestSuite() for this TestSuite.  This wrapper is needed
   // for catching exceptions thrown from SetUpTestSuite().
   void RunSetUpTestSuite() {
@@ -953,43 +787,43 @@ class GTEST_API_ TestSuite {
   }
 
   // Returns true if and only if test passed.
-  static bool TestPassed(const TestInfo *test_info) {
+  static bool TestPassed(const TestInfo* test_info) {
     return test_info->should_run() && test_info->result()->Passed();
   }
 
   // Returns true if and only if test skipped.
-  static bool TestSkipped(const TestInfo *test_info) {
+  static bool TestSkipped(const TestInfo* test_info) {
     return test_info->should_run() && test_info->result()->Skipped();
   }
 
   // Returns true if and only if test failed.
-  static bool TestFailed(const TestInfo *test_info) {
+  static bool TestFailed(const TestInfo* test_info) {
     return test_info->should_run() && test_info->result()->Failed();
   }
 
   // Returns true if and only if the test is disabled and will be reported in
   // the XML report.
-  static bool TestReportableDisabled(const TestInfo *test_info) {
+  static bool TestReportableDisabled(const TestInfo* test_info) {
     return test_info->is_reportable() && test_info->is_disabled_;
   }
 
   // Returns true if and only if test is disabled.
-  static bool TestDisabled(const TestInfo *test_info) {
+  static bool TestDisabled(const TestInfo* test_info) {
     return test_info->is_disabled_;
   }
 
   // Returns true if and only if this test will appear in the XML report.
-  static bool TestReportable(const TestInfo *test_info) {
+  static bool TestReportable(const TestInfo* test_info) {
     return test_info->is_reportable();
   }
 
   // Returns true if the given test should run.
-  static bool ShouldRunTest(const TestInfo *test_info) {
+  static bool ShouldRunTest(const TestInfo* test_info) {
     return test_info->should_run();
   }
 
   // Shuffles the tests in this test suite.
-  void ShuffleTests(internal::Random *random);
+  void ShuffleTests(internal::Random* random);
 
   // Restores the test order to before the first shuffle.
   void UnshuffleTests();
@@ -1001,7 +835,7 @@ class GTEST_API_ TestSuite {
   const std::unique_ptr<const ::std::string> type_param_;
   // The vector of TestInfos in their original order.  It owns the
   // elements in the vector.
-  std::vector<TestInfo *> test_info_list_;
+  std::vector<TestInfo*> test_info_list_;
   // Provides a level of indirection for the test list to allow easy
   // shuffling and restoring the test order.  The i-th element in this
   // vector is the index of the i-th test in the shuffled test list.
@@ -1021,7 +855,8 @@ class GTEST_API_ TestSuite {
   TestResult ad_hoc_test_result_;
 
   // We disallow copying TestSuites.
-  GTEST_DISALLOW_COPY_AND_ASSIGN_(TestSuite);
+  TestSuite(const TestSuite&) = delete;
+  TestSuite& operator=(const TestSuite&) = delete;
 };
 
 // An Environment object is capable of setting up and tearing down an
@@ -1053,7 +888,7 @@ class Environment {
   // If you see an error about overriding the following function or
   // about it being private, you have mis-spelled SetUp() as Setup().
   struct Setup_should_be_spelled_SetUp {};
-  virtual Setup_should_be_spelled_SetUp *Setup() { return nullptr; }
+  virtual Setup_should_be_spelled_SetUp* Setup() { return nullptr; }
 };
 
 #if GTEST_HAS_EXCEPTIONS
@@ -1062,7 +897,7 @@ class Environment {
 class GTEST_API_ AssertionException
     : public internal::GoogleTestFailureException {
  public:
-  explicit AssertionException(const TestPartResult &result)
+  explicit AssertionException(const TestPartResult& result)
       : GoogleTestFailureException(result) {}
 };
 
@@ -1075,58 +910,61 @@ class TestEventListener {
   virtual ~TestEventListener() {}
 
   // Fired before any test activity starts.
-  virtual void OnTestProgramStart(const UnitTest &unit_test) = 0;
+  virtual void OnTestProgramStart(const UnitTest& unit_test) = 0;
 
   // Fired before each iteration of tests starts.  There may be more than
   // one iteration if GTEST_FLAG(repeat) is set. iteration is the iteration
   // index, starting from 0.
-  virtual void OnTestIterationStart(const UnitTest &unit_test,
+  virtual void OnTestIterationStart(const UnitTest& unit_test,
                                     int iteration) = 0;
 
   // Fired before environment set-up for each iteration of tests starts.
-  virtual void OnEnvironmentsSetUpStart(const UnitTest &unit_test) = 0;
+  virtual void OnEnvironmentsSetUpStart(const UnitTest& unit_test) = 0;
 
   // Fired after environment set-up for each iteration of tests ends.
-  virtual void OnEnvironmentsSetUpEnd(const UnitTest &unit_test) = 0;
+  virtual void OnEnvironmentsSetUpEnd(const UnitTest& unit_test) = 0;
 
   // Fired before the test suite starts.
-  virtual void OnTestSuiteStart(const TestSuite & /*test_suite*/) {}
+  virtual void OnTestSuiteStart(const TestSuite& /*test_suite*/) {}
 
   //  Legacy API is deprecated but still available
 #ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
-  virtual void OnTestCaseStart(const TestCase & /*test_case*/) {}
+  virtual void OnTestCaseStart(const TestCase& /*test_case*/) {}
 #endif  //  GTEST_REMOVE_LEGACY_TEST_CASEAPI_
 
   // Fired before the test starts.
-  virtual void OnTestStart(const TestInfo &test_info) = 0;
+  virtual void OnTestStart(const TestInfo& test_info) = 0;
+
+  // Fired when a test is disabled
+  virtual void OnTestDisabled(const TestInfo& /*test_info*/) {}
 
   // Fired after a failed assertion or a SUCCEED() invocation.
   // If you want to throw an exception from this function to skip to the next
   // TEST, it must be AssertionException defined above, or inherited from it.
-  virtual void OnTestPartResult(const TestPartResult &test_part_result) = 0;
+  virtual void OnTestPartResult(const TestPartResult& test_part_result) = 0;
 
   // Fired after the test ends.
-  virtual void OnTestEnd(const TestInfo &test_info) = 0;
+  virtual void OnTestEnd(const TestInfo& test_info) = 0;
 
   // Fired after the test suite ends.
-  virtual void OnTestSuiteEnd(const TestSuite & /*test_suite*/) {}
+  virtual void OnTestSuiteEnd(const TestSuite& /*test_suite*/) {}
 
 //  Legacy API is deprecated but still available
 #ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
-  virtual void OnTestCaseEnd(const TestCase & /*test_case*/) {}
+  virtual void OnTestCaseEnd(const TestCase& /*test_case*/) {}
 #endif  //  GTEST_REMOVE_LEGACY_TEST_CASEAPI_
 
   // Fired before environment tear-down for each iteration of tests starts.
-  virtual void OnEnvironmentsTearDownStart(const UnitTest &unit_test) = 0;
+  virtual void OnEnvironmentsTearDownStart(const UnitTest& unit_test) = 0;
 
   // Fired after environment tear-down for each iteration of tests ends.
-  virtual void OnEnvironmentsTearDownEnd(const UnitTest &unit_test) = 0;
+  virtual void OnEnvironmentsTearDownEnd(const UnitTest& unit_test) = 0;
 
   // Fired after each iteration of tests finishes.
-  virtual void OnTestIterationEnd(const UnitTest &unit_test, int iteration) = 0;
+  virtual void OnTestIterationEnd(const UnitTest& unit_test, int iteration) = 0;
 
   // Fired after all test activities have ended.
-  virtual void OnTestProgramEnd(const UnitTest &unit_test) = 0;
+  virtual void OnTestProgramEnd(const UnitTest& unit_test) = 0;
 };
 
 // The convenience class for users who need to override just one or two
@@ -1136,30 +974,31 @@ class TestEventListener {
 // above.
 class EmptyTestEventListener : public TestEventListener {
  public:
-  void OnTestProgramStart(const UnitTest & /*unit_test*/) override {}
-  void OnTestIterationStart(const UnitTest & /*unit_test*/,
+  void OnTestProgramStart(const UnitTest& /*unit_test*/) override {}
+  void OnTestIterationStart(const UnitTest& /*unit_test*/,
                             int /*iteration*/) override {}
-  void OnEnvironmentsSetUpStart(const UnitTest & /*unit_test*/) override {}
-  void OnEnvironmentsSetUpEnd(const UnitTest & /*unit_test*/) override {}
-  void OnTestSuiteStart(const TestSuite & /*test_suite*/) override {}
+  void OnEnvironmentsSetUpStart(const UnitTest& /*unit_test*/) override {}
+  void OnEnvironmentsSetUpEnd(const UnitTest& /*unit_test*/) override {}
+  void OnTestSuiteStart(const TestSuite& /*test_suite*/) override {}
 //  Legacy API is deprecated but still available
 #ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
-  void OnTestCaseStart(const TestCase & /*test_case*/) override {}
+  void OnTestCaseStart(const TestCase& /*test_case*/) override {}
 #endif  //  GTEST_REMOVE_LEGACY_TEST_CASEAPI_
 
-  void OnTestStart(const TestInfo & /*test_info*/) override {}
-  void OnTestPartResult(const TestPartResult & /*test_part_result*/) override {}
-  void OnTestEnd(const TestInfo & /*test_info*/) override {}
-  void OnTestSuiteEnd(const TestSuite & /*test_suite*/) override {}
+  void OnTestStart(const TestInfo& /*test_info*/) override {}
+  void OnTestDisabled(const TestInfo& /*test_info*/) override {}
+  void OnTestPartResult(const TestPartResult& /*test_part_result*/) override {}
+  void OnTestEnd(const TestInfo& /*test_info*/) override {}
+  void OnTestSuiteEnd(const TestSuite& /*test_suite*/) override {}
 #ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
-  void OnTestCaseEnd(const TestCase & /*test_case*/) override {}
+  void OnTestCaseEnd(const TestCase& /*test_case*/) override {}
 #endif  //  GTEST_REMOVE_LEGACY_TEST_CASEAPI_
 
-  void OnEnvironmentsTearDownStart(const UnitTest & /*unit_test*/) override {}
-  void OnEnvironmentsTearDownEnd(const UnitTest & /*unit_test*/) override {}
-  void OnTestIterationEnd(const UnitTest & /*unit_test*/,
+  void OnEnvironmentsTearDownStart(const UnitTest& /*unit_test*/) override {}
+  void OnEnvironmentsTearDownEnd(const UnitTest& /*unit_test*/) override {}
+  void OnTestIterationEnd(const UnitTest& /*unit_test*/,
                           int /*iteration*/) override {}
-  void OnTestProgramEnd(const UnitTest & /*unit_test*/) override {}
+  void OnTestProgramEnd(const UnitTest& /*unit_test*/) override {}
 };
 
 // TestEventListeners lets users add listeners to track events in Google Test.
@@ -1171,19 +1010,19 @@ class GTEST_API_ TestEventListeners {
   // Appends an event listener to the end of the list. Google Test assumes
   // the ownership of the listener (i.e. it will delete the listener when
   // the test program finishes).
-  void Append(TestEventListener *listener);
+  void Append(TestEventListener* listener);
 
   // Removes the given event listener from the list and returns it.  It then
   // becomes the caller's responsibility to delete the listener. Returns
   // NULL if the listener is not found in the list.
-  TestEventListener *Release(TestEventListener *listener);
+  TestEventListener* Release(TestEventListener* listener);
 
   // Returns the standard listener responsible for the default console
   // output.  Can be removed from the listeners list to shut down default
   // console output.  Note that removing this object from the listener list
   // with Release transfers its ownership to the caller and makes this
   // function return NULL the next time.
-  TestEventListener *default_result_printer() const {
+  TestEventListener* default_result_printer() const {
     return default_result_printer_;
   }
 
@@ -1194,7 +1033,7 @@ class GTEST_API_ TestEventListeners {
   // removing this object from the listener list with Release transfers its
   // ownership to the caller and makes this function return NULL the next
   // time.
-  TestEventListener *default_xml_generator() const {
+  TestEventListener* default_xml_generator() const {
     return default_xml_generator_;
   }
 
@@ -1208,21 +1047,21 @@ class GTEST_API_ TestEventListeners {
 
   // Returns repeater that broadcasts the TestEventListener events to all
   // subscribers.
-  TestEventListener *repeater();
+  TestEventListener* repeater();
 
   // Sets the default_result_printer attribute to the provided listener.
   // The listener is also added to the listener list and previous
   // default_result_printer is removed from it and deleted. The listener can
   // also be NULL in which case it will not be added to the list. Does
   // nothing if the previous and the current listener objects are the same.
-  void SetDefaultResultPrinter(TestEventListener *listener);
+  void SetDefaultResultPrinter(TestEventListener* listener);
 
   // Sets the default_xml_generator attribute to the provided listener.  The
   // listener is also added to the listener list and previous
   // default_xml_generator is removed from it and deleted. The listener can
   // also be NULL in which case it will not be added to the list. Does
   // nothing if the previous and the current listener objects are the same.
-  void SetDefaultXmlGenerator(TestEventListener *listener);
+  void SetDefaultXmlGenerator(TestEventListener* listener);
 
   // Controls whether events will be forwarded by the repeater to the
   // listeners in the list.
@@ -1230,14 +1069,15 @@ class GTEST_API_ TestEventListeners {
   void SuppressEventForwarding();
 
   // The actual list of listeners.
-  internal::TestEventRepeater *repeater_;
+  internal::TestEventRepeater* repeater_;
   // Listener responsible for the standard result output.
-  TestEventListener *default_result_printer_;
+  TestEventListener* default_result_printer_;
   // Listener responsible for the creation of the XML output file.
-  TestEventListener *default_xml_generator_;
+  TestEventListener* default_xml_generator_;
 
   // We disallow copying TestEventListeners.
-  GTEST_DISALLOW_COPY_AND_ASSIGN_(TestEventListeners);
+  TestEventListeners(const TestEventListeners&) = delete;
+  TestEventListeners& operator=(const TestEventListeners&) = delete;
 };
 
 // A UnitTest consists of a vector of TestSuites.
@@ -1255,7 +1095,7 @@ class GTEST_API_ UnitTest {
   // Gets the singleton UnitTest object.  The first time this method
   // is called, a UnitTest object is constructed and returned.
   // Consecutive calls will return the same object.
-  static UnitTest *GetInstance();
+  static UnitTest* GetInstance();
 
   // Runs all tests in this UnitTest object and prints the result.
   // Returns 0 if successful, or 1 otherwise.
@@ -1267,20 +1107,20 @@ class GTEST_API_ UnitTest {
 
   // Returns the working directory when the first TEST() or TEST_F()
   // was executed.  The UnitTest object owns the string.
-  const char *original_working_dir() const;
+  const char* original_working_dir() const;
 
   // Returns the TestSuite object for the test that's currently running,
   // or NULL if no test is running.
-  const TestSuite *current_test_suite() const GTEST_LOCK_EXCLUDED_(mutex_);
+  const TestSuite* current_test_suite() const GTEST_LOCK_EXCLUDED_(mutex_);
 
 // Legacy API is still available but deprecated
 #ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
-  const TestCase *current_test_case() const GTEST_LOCK_EXCLUDED_(mutex_);
+  const TestCase* current_test_case() const GTEST_LOCK_EXCLUDED_(mutex_);
 #endif
 
   // Returns the TestInfo object for the test that's currently running,
   // or NULL if no test is running.
-  const TestInfo *current_test_info() const GTEST_LOCK_EXCLUDED_(mutex_);
+  const TestInfo* current_test_info() const GTEST_LOCK_EXCLUDED_(mutex_);
 
   // Returns the random seed used at the start of the current test run.
   int random_seed() const;
@@ -1289,7 +1129,7 @@ class GTEST_API_ UnitTest {
   // value-parameterized tests and instantiate and register them.
   //
   // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
-  internal::ParameterizedTestSuiteRegistry &parameterized_test_registry()
+  internal::ParameterizedTestSuiteRegistry& parameterized_test_registry()
       GTEST_LOCK_EXCLUDED_(mutex_);
 
   // Gets the number of successful test suites.
@@ -1354,20 +1194,20 @@ class GTEST_API_ UnitTest {
 
   // Gets the i-th test suite among all the test suites. i can range from 0 to
   // total_test_suite_count() - 1. If i is not in that range, returns NULL.
-  const TestSuite *GetTestSuite(int i) const;
+  const TestSuite* GetTestSuite(int i) const;
 
 //  Legacy API is deprecated but still available
 #ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
-  const TestCase *GetTestCase(int i) const;
+  const TestCase* GetTestCase(int i) const;
 #endif  //  GTEST_REMOVE_LEGACY_TEST_CASEAPI_
 
   // Returns the TestResult containing information on test failures and
   // properties logged outside of individual test suites.
-  const TestResult &ad_hoc_test_result() const;
+  const TestResult& ad_hoc_test_result() const;
 
   // Returns the list of event listeners that can be used to track events
   // inside Google Test.
-  TestEventListeners &listeners();
+  TestEventListeners& listeners();
 
  private:
   // Registers and returns a global test environment.  When a test
@@ -1379,16 +1219,16 @@ class GTEST_API_ UnitTest {
   // The UnitTest object takes ownership of the given environment.
   //
   // This method can only be called from the main thread.
-  Environment *AddEnvironment(Environment *env);
+  Environment* AddEnvironment(Environment* env);
 
   // Adds a TestPartResult to the current TestResult object.  All
   // Google Test assertion macros (e.g. ASSERT_TRUE, EXPECT_EQ, etc)
   // eventually call this to report their results.  The user code
   // should use the assertion macros instead of calling this directly.
   void AddTestPartResult(TestPartResult::Type result_type,
-                         const char *file_name, int line_number,
-                         const std::string &message,
-                         const std::string &os_stack_trace)
+                         const char* file_name, int line_number,
+                         const std::string& message,
+                         const std::string& os_stack_trace)
       GTEST_LOCK_EXCLUDED_(mutex_);
 
   // Adds a TestProperty to the current TestResult object when invoked from
@@ -1396,15 +1236,15 @@ class GTEST_API_ UnitTest {
   // from SetUpTestSuite or TearDownTestSuite, or to the global property set
   // when invoked elsewhere.  If the result already contains a property with
   // the same key, the value will be updated.
-  void RecordProperty(const std::string &key, const std::string &value);
+  void RecordProperty(const std::string& key, const std::string& value);
 
   // Gets the i-th test suite among all the test suites. i can range from 0 to
   // total_test_suite_count() - 1. If i is not in that range, returns NULL.
-  TestSuite *GetMutableTestSuite(int i);
+  TestSuite* GetMutableTestSuite(int i);
 
   // Accessors for the implementation object.
-  internal::UnitTestImpl *impl() { return impl_; }
-  const internal::UnitTestImpl *impl() const { return impl_; }
+  internal::UnitTestImpl* impl() { return impl_; }
+  const internal::UnitTestImpl* impl() const { return impl_; }
 
   // These classes and functions are friends as they need to access private
   // members of UnitTest.
@@ -1413,11 +1253,11 @@ class GTEST_API_ UnitTest {
   friend class internal::AssertHelper;
   friend class internal::StreamingListenerTest;
   friend class internal::UnitTestRecordPropertyTestHelper;
-  friend Environment *AddGlobalTestEnvironment(Environment *env);
-  friend std::set<std::string> *internal::GetIgnoredParameterizedTestSuites();
-  friend internal::UnitTestImpl *internal::GetUnitTestImpl();
+  friend Environment* AddGlobalTestEnvironment(Environment* env);
+  friend std::set<std::string>* internal::GetIgnoredParameterizedTestSuites();
+  friend internal::UnitTestImpl* internal::GetUnitTestImpl();
   friend void internal::ReportFailureInUnknownLocation(
-      TestPartResult::Type result_type, const std::string &message);
+      TestPartResult::Type result_type, const std::string& message);
 
   // Creates an empty UnitTest.
   UnitTest();
@@ -1427,7 +1267,7 @@ class GTEST_API_ UnitTest {
 
   // Pushes a trace defined by SCOPED_TRACE() on to the per-thread
   // Google Test trace stack.
-  void PushGTestTrace(const internal::TraceInfo &trace)
+  void PushGTestTrace(const internal::TraceInfo& trace)
       GTEST_LOCK_EXCLUDED_(mutex_);
 
   // Pops a trace from the per-thread Google Test trace stack.
@@ -1441,10 +1281,11 @@ class GTEST_API_ UnitTest {
   // the object is constructed.  We don't mark it as const here, as
   // doing so will cause a warning in the constructor of UnitTest.
   // Mutable state in *impl_ is protected by mutex_.
-  internal::UnitTestImpl *impl_;
+  internal::UnitTestImpl* impl_;
 
   // We disallow copying UnitTest.
-  GTEST_DISALLOW_COPY_AND_ASSIGN_(UnitTest);
+  UnitTest(const UnitTest&) = delete;
+  UnitTest& operator=(const UnitTest&) = delete;
 };
 
 // A convenient wrapper for adding an environment for the test
@@ -1465,7 +1306,7 @@ class GTEST_API_ UnitTest {
 // translation units and the environments have dependencies among them
 // (remember that the compiler doesn't guarantee the order in which
 // global variables from different translation units are initialized).
-inline Environment *AddGlobalTestEnvironment(Environment *env) {
+inline Environment* AddGlobalTestEnvironment(Environment* env) {
   return UnitTest::GetInstance()->AddEnvironment(env);
 }
 
@@ -1478,11 +1319,11 @@ inline Environment *AddGlobalTestEnvironment(Environment *env) {
 // updated.
 //
 // Calling the function for the second time has no user-visible effect.
-GTEST_API_ void InitGoogleTest(int *argc, char **argv);
+GTEST_API_ void InitGoogleTest(int* argc, char** argv);
 
 // This overloaded version can be used in Windows programs compiled in
 // UNICODE mode.
-GTEST_API_ void InitGoogleTest(int *argc, wchar_t **argv);
+GTEST_API_ void InitGoogleTest(int* argc, wchar_t** argv);
 
 // This overloaded version can be used on Arduino/embedded platforms where
 // there is no argc/argv.
@@ -1494,9 +1335,9 @@ namespace internal {
 // frame size of CmpHelperEQ. This helps reduce the overhead of some sanitizers
 // when calling EXPECT_* in a tight loop.
 template <typename T1, typename T2>
-AssertionResult CmpHelperEQFailure(const char *lhs_expression,
-                                   const char *rhs_expression, const T1 &lhs,
-                                   const T2 &rhs) {
+AssertionResult CmpHelperEQFailure(const char* lhs_expression,
+                                   const char* rhs_expression, const T1& lhs,
+                                   const T2& rhs) {
   return EqFailure(lhs_expression, rhs_expression,
                    FormatForComparisonFailureMessage(lhs, rhs),
                    FormatForComparisonFailureMessage(rhs, lhs), false);
@@ -1511,9 +1352,9 @@ inline bool operator!=(faketype, faketype) { return false; }
 
 // The helper function for {ASSERT|EXPECT}_EQ.
 template <typename T1, typename T2>
-AssertionResult CmpHelperEQ(const char *lhs_expression,
-                            const char *rhs_expression, const T1 &lhs,
-                            const T2 &rhs) {
+AssertionResult CmpHelperEQ(const char* lhs_expression,
+                            const char* rhs_expression, const T1& lhs,
+                            const T2& rhs) {
   if (lhs == rhs) {
     return AssertionSuccess();
   }
@@ -1521,13 +1362,6 @@ AssertionResult CmpHelperEQ(const char *lhs_expression,
   return CmpHelperEQFailure(lhs_expression, rhs_expression, lhs, rhs);
 }
 
-// With this overloaded version, we allow anonymous enums to be used
-// in {ASSERT|EXPECT}_EQ when compiled with gcc 4, as anonymous enums
-// can be implicitly cast to BiggestInt.
-GTEST_API_ AssertionResult CmpHelperEQ(const char *lhs_expression,
-                                       const char *rhs_expression,
-                                       BiggestInt lhs, BiggestInt rhs);
-
 class EqHelper {
  public:
   // This templatized version is for the general case.
@@ -1536,10 +1370,10 @@ class EqHelper {
       // Disable this overload for cases where one argument is a pointer
       // and the other is the null pointer constant.
       typename std::enable_if<!std::is_integral<T1>::value ||
-                              !std::is_pointer<T2>::value>::type * = nullptr>
-  static AssertionResult Compare(const char *lhs_expression,
-                                 const char *rhs_expression, const T1 &lhs,
-                                 const T2 &rhs) {
+                              !std::is_pointer<T2>::value>::type* = nullptr>
+  static AssertionResult Compare(const char* lhs_expression,
+                                 const char* rhs_expression, const T1& lhs,
+                                 const T2& rhs) {
     return CmpHelperEQ(lhs_expression, rhs_expression, lhs, rhs);
   }
 
@@ -1549,20 +1383,20 @@ class EqHelper {
   //
   // Even though its body looks the same as the above version, we
   // cannot merge the two, as it will make anonymous enums unhappy.
-  static AssertionResult Compare(const char *lhs_expression,
-                                 const char *rhs_expression, BiggestInt lhs,
+  static AssertionResult Compare(const char* lhs_expression,
+                                 const char* rhs_expression, BiggestInt lhs,
                                  BiggestInt rhs) {
     return CmpHelperEQ(lhs_expression, rhs_expression, lhs, rhs);
   }
 
   template <typename T>
   static AssertionResult Compare(
-      const char *lhs_expression, const char *rhs_expression,
+      const char* lhs_expression, const char* rhs_expression,
       // Handle cases where '0' is used as a null pointer literal.
-      std::nullptr_t /* lhs */, T *rhs) {
+      std::nullptr_t /* lhs */, T* rhs) {
     // We already know that 'lhs' is a null pointer.
-    return CmpHelperEQ(lhs_expression, rhs_expression,
-                       static_cast<T *>(nullptr), rhs);
+    return CmpHelperEQ(lhs_expression, rhs_expression, static_cast<T*>(nullptr),
+                       rhs);
   }
 };
 
@@ -1570,9 +1404,9 @@ class EqHelper {
 // frame size of CmpHelperOP. This helps reduce the overhead of some sanitizers
 // when calling EXPECT_OP in a tight loop.
 template <typename T1, typename T2>
-AssertionResult CmpHelperOpFailure(const char *expr1, const char *expr2,
-                                   const T1 &val1, const T2 &val2,
-                                   const char *op) {
+AssertionResult CmpHelperOpFailure(const char* expr1, const char* expr2,
+                                   const T1& val1, const T2& val2,
+                                   const char* op) {
   return AssertionFailure()
          << "Expected: (" << expr1 << ") " << op << " (" << expr2
          << "), actual: " << FormatForComparisonFailureMessage(val1, val2)
@@ -1583,82 +1417,75 @@ AssertionResult CmpHelperOpFailure(const char *expr1, const char *expr2,
 // ASSERT_?? and EXPECT_??.  It is here just to avoid copy-and-paste
 // of similar code.
 //
-// For each templatized helper function, we also define an overloaded
-// version for BiggestInt in order to reduce code bloat and allow
-// anonymous enums to be used with {ASSERT|EXPECT}_?? when compiled
-// with gcc 4.
-//
 // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
 
 #define GTEST_IMPL_CMP_HELPER_(op_name, op)                                \
   template <typename T1, typename T2>                                      \
-  AssertionResult CmpHelper##op_name(const char *expr1, const char *expr2, \
-                                     const T1 &val1, const T2 &val2) {     \
+  AssertionResult CmpHelper##op_name(const char* expr1, const char* expr2, \
+                                     const T1& val1, const T2& val2) {     \
     if (val1 op val2) {                                                    \
       return AssertionSuccess();                                           \
     } else {                                                               \
       return CmpHelperOpFailure(expr1, expr2, val1, val2, #op);            \
     }                                                                      \
-  }                                                                        \
-  GTEST_API_ AssertionResult CmpHelper##op_name(                           \
-      const char *expr1, const char *expr2, BiggestInt val1, BiggestInt val2)
+  }
 
 // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
 
 // Implements the helper function for {ASSERT|EXPECT}_NE
-GTEST_IMPL_CMP_HELPER_(NE, !=);
+GTEST_IMPL_CMP_HELPER_(NE, !=)
 // Implements the helper function for {ASSERT|EXPECT}_LE
-GTEST_IMPL_CMP_HELPER_(LE, <=);
+GTEST_IMPL_CMP_HELPER_(LE, <=)
 // Implements the helper function for {ASSERT|EXPECT}_LT
-GTEST_IMPL_CMP_HELPER_(LT, <);
+GTEST_IMPL_CMP_HELPER_(LT, <)
 // Implements the helper function for {ASSERT|EXPECT}_GE
-GTEST_IMPL_CMP_HELPER_(GE, >=);
+GTEST_IMPL_CMP_HELPER_(GE, >=)
 // Implements the helper function for {ASSERT|EXPECT}_GT
-GTEST_IMPL_CMP_HELPER_(GT, >);
+GTEST_IMPL_CMP_HELPER_(GT, >)
 
 #undef GTEST_IMPL_CMP_HELPER_
 
 // The helper function for {ASSERT|EXPECT}_STREQ.
 //
 // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
-GTEST_API_ AssertionResult CmpHelperSTREQ(const char *s1_expression,
-                                          const char *s2_expression,
-                                          const char *s1, const char *s2);
+GTEST_API_ AssertionResult CmpHelperSTREQ(const char* s1_expression,
+                                          const char* s2_expression,
+                                          const char* s1, const char* s2);
 
 // The helper function for {ASSERT|EXPECT}_STRCASEEQ.
 //
 // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
-GTEST_API_ AssertionResult CmpHelperSTRCASEEQ(const char *s1_expression,
-                                              const char *s2_expression,
-                                              const char *s1, const char *s2);
+GTEST_API_ AssertionResult CmpHelperSTRCASEEQ(const char* s1_expression,
+                                              const char* s2_expression,
+                                              const char* s1, const char* s2);
 
 // The helper function for {ASSERT|EXPECT}_STRNE.
 //
 // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
-GTEST_API_ AssertionResult CmpHelperSTRNE(const char *s1_expression,
-                                          const char *s2_expression,
-                                          const char *s1, const char *s2);
+GTEST_API_ AssertionResult CmpHelperSTRNE(const char* s1_expression,
+                                          const char* s2_expression,
+                                          const char* s1, const char* s2);
 
 // The helper function for {ASSERT|EXPECT}_STRCASENE.
 //
 // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
-GTEST_API_ AssertionResult CmpHelperSTRCASENE(const char *s1_expression,
-                                              const char *s2_expression,
-                                              const char *s1, const char *s2);
+GTEST_API_ AssertionResult CmpHelperSTRCASENE(const char* s1_expression,
+                                              const char* s2_expression,
+                                              const char* s1, const char* s2);
 
 // Helper function for *_STREQ on wide strings.
 //
 // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
-GTEST_API_ AssertionResult CmpHelperSTREQ(const char *s1_expression,
-                                          const char *s2_expression,
-                                          const wchar_t *s1, const wchar_t *s2);
+GTEST_API_ AssertionResult CmpHelperSTREQ(const char* s1_expression,
+                                          const char* s2_expression,
+                                          const wchar_t* s1, const wchar_t* s2);
 
 // Helper function for *_STRNE on wide strings.
 //
 // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
-GTEST_API_ AssertionResult CmpHelperSTRNE(const char *s1_expression,
-                                          const char *s2_expression,
-                                          const wchar_t *s1, const wchar_t *s2);
+GTEST_API_ AssertionResult CmpHelperSTRNE(const char* s1_expression,
+                                          const char* s2_expression,
+                                          const wchar_t* s1, const wchar_t* s2);
 
 }  // namespace internal
 
@@ -1670,40 +1497,40 @@ GTEST_API_ AssertionResult CmpHelperSTRNE(const char *s1_expression,
 //
 // The {needle,haystack}_expr arguments are the stringified
 // expressions that generated the two real arguments.
-GTEST_API_ AssertionResult IsSubstring(const char *needle_expr,
-                                       const char *haystack_expr,
-                                       const char *needle,
-                                       const char *haystack);
-GTEST_API_ AssertionResult IsSubstring(const char *needle_expr,
-                                       const char *haystack_expr,
-                                       const wchar_t *needle,
-                                       const wchar_t *haystack);
-GTEST_API_ AssertionResult IsNotSubstring(const char *needle_expr,
-                                          const char *haystack_expr,
-                                          const char *needle,
-                                          const char *haystack);
-GTEST_API_ AssertionResult IsNotSubstring(const char *needle_expr,
-                                          const char *haystack_expr,
-                                          const wchar_t *needle,
-                                          const wchar_t *haystack);
-GTEST_API_ AssertionResult IsSubstring(const char *needle_expr,
-                                       const char *haystack_expr,
-                                       const ::std::string &needle,
-                                       const ::std::string &haystack);
-GTEST_API_ AssertionResult IsNotSubstring(const char *needle_expr,
-                                          const char *haystack_expr,
-                                          const ::std::string &needle,
-                                          const ::std::string &haystack);
+GTEST_API_ AssertionResult IsSubstring(const char* needle_expr,
+                                       const char* haystack_expr,
+                                       const char* needle,
+                                       const char* haystack);
+GTEST_API_ AssertionResult IsSubstring(const char* needle_expr,
+                                       const char* haystack_expr,
+                                       const wchar_t* needle,
+                                       const wchar_t* haystack);
+GTEST_API_ AssertionResult IsNotSubstring(const char* needle_expr,
+                                          const char* haystack_expr,
+                                          const char* needle,
+                                          const char* haystack);
+GTEST_API_ AssertionResult IsNotSubstring(const char* needle_expr,
+                                          const char* haystack_expr,
+                                          const wchar_t* needle,
+                                          const wchar_t* haystack);
+GTEST_API_ AssertionResult IsSubstring(const char* needle_expr,
+                                       const char* haystack_expr,
+                                       const ::std::string& needle,
+                                       const ::std::string& haystack);
+GTEST_API_ AssertionResult IsNotSubstring(const char* needle_expr,
+                                          const char* haystack_expr,
+                                          const ::std::string& needle,
+                                          const ::std::string& haystack);
 
 #if GTEST_HAS_STD_WSTRING
-GTEST_API_ AssertionResult IsSubstring(const char *needle_expr,
-                                       const char *haystack_expr,
-                                       const ::std::wstring &needle,
-                                       const ::std::wstring &haystack);
-GTEST_API_ AssertionResult IsNotSubstring(const char *needle_expr,
-                                          const char *haystack_expr,
-                                          const ::std::wstring &needle,
-                                          const ::std::wstring &haystack);
+GTEST_API_ AssertionResult IsSubstring(const char* needle_expr,
+                                       const char* haystack_expr,
+                                       const ::std::wstring& needle,
+                                       const ::std::wstring& haystack);
+GTEST_API_ AssertionResult IsNotSubstring(const char* needle_expr,
+                                          const char* haystack_expr,
+                                          const ::std::wstring& needle,
+                                          const ::std::wstring& haystack);
 #endif  // GTEST_HAS_STD_WSTRING
 
 namespace internal {
@@ -1716,8 +1543,8 @@ namespace internal {
 //
 // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
 template <typename RawType>
-AssertionResult CmpHelperFloatingPointEQ(const char *lhs_expression,
-                                         const char *rhs_expression,
+AssertionResult CmpHelperFloatingPointEQ(const char* lhs_expression,
+                                         const char* rhs_expression,
                                          RawType lhs_value, RawType rhs_value) {
   const FloatingPoint<RawType> lhs(lhs_value), rhs(rhs_value);
 
@@ -1741,9 +1568,9 @@ AssertionResult CmpHelperFloatingPointEQ(const char *lhs_expression,
 // Helper function for implementing ASSERT_NEAR.
 //
 // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
-GTEST_API_ AssertionResult DoubleNearPredFormat(const char *expr1,
-                                                const char *expr2,
-                                                const char *abs_error_expr,
+GTEST_API_ AssertionResult DoubleNearPredFormat(const char* expr1,
+                                                const char* expr2,
+                                                const char* abs_error_expr,
                                                 double val1, double val2,
                                                 double abs_error);
 
@@ -1752,13 +1579,13 @@ GTEST_API_ AssertionResult DoubleNearPredFormat(const char *expr1,
 class GTEST_API_ AssertHelper {
  public:
   // Constructor.
-  AssertHelper(TestPartResult::Type type, const char *file, int line,
-               const char *message);
+  AssertHelper(TestPartResult::Type type, const char* file, int line,
+               const char* message);
   ~AssertHelper();
 
   // Message assignment is a semantic trick to enable assertion
   // streaming; see the GTEST_MESSAGE_ macro below.
-  void operator=(const Message &message) const;
+  void operator=(const Message& message) const;
 
  private:
   // We put our data in a struct so that the size of the AssertHelper class can
@@ -1766,30 +1593,26 @@ class GTEST_API_ AssertHelper {
   // re-using stack space even for temporary variables, so every EXPECT_EQ
   // reserves stack space for another AssertHelper.
   struct AssertHelperData {
-    AssertHelperData(TestPartResult::Type t, const char *srcfile, int line_num,
-                     const char *msg)
+    AssertHelperData(TestPartResult::Type t, const char* srcfile, int line_num,
+                     const char* msg)
         : type(t), file(srcfile), line(line_num), message(msg) {}
 
     TestPartResult::Type const type;
-    const char *const file;
+    const char* const file;
     int const line;
     std::string const message;
 
    private:
-    GTEST_DISALLOW_COPY_AND_ASSIGN_(AssertHelperData);
+    AssertHelperData(const AssertHelperData&) = delete;
+    AssertHelperData& operator=(const AssertHelperData&) = delete;
   };
 
-  AssertHelperData *const data_;
+  AssertHelperData* const data_;
 
-  GTEST_DISALLOW_COPY_AND_ASSIGN_(AssertHelper);
+  AssertHelper(const AssertHelper&) = delete;
+  AssertHelper& operator=(const AssertHelper&) = delete;
 };
 
-enum GTestColor { COLOR_DEFAULT, COLOR_RED, COLOR_GREEN, COLOR_YELLOW };
-
-GTEST_API_ GTEST_ATTRIBUTE_PRINTF_(2, 3) void ColoredPrintf(GTestColor color,
-                                                            const char *fmt,
-                                                            ...);
-
 }  // namespace internal
 
 // The pure interface class that all value-parameterized tests inherit from.
@@ -1834,7 +1657,7 @@ class WithParamInterface {
 
   // The current parameter value. Is also available in the test fixture's
   // constructor.
-  static const ParamType &GetParam() {
+  static const ParamType& GetParam() {
     GTEST_CHECK_(parameter_ != nullptr)
         << "GetParam() can only be called inside a value-parameterized test "
         << "-- did you intend to write TEST_P instead of TEST_F?";
@@ -1844,10 +1667,10 @@ class WithParamInterface {
  private:
   // Sets parameter value. The caller is responsible for making sure the value
   // remains alive and unchanged throughout the current test.
-  static void SetParam(const ParamType *parameter) { parameter_ = parameter; }
+  static void SetParam(const ParamType* parameter) { parameter_ = parameter; }
 
   // Static value used for accessing parameter during a test lifetime.
-  static const ParamType *parameter_;
+  static const ParamType* parameter_;
 
   // TestClass must be a subclass of WithParamInterface<T> and Test.
   template <class TestClass>
@@ -1855,7 +1678,7 @@ class WithParamInterface {
 };
 
 template <typename T>
-const T *WithParamInterface<T>::parameter_ = nullptr;
+const T* WithParamInterface<T>::parameter_ = nullptr;
 
 // Most value-parameterized classes can ignore the existence of
 // WithParamInterface, and can just inherit from ::testing::TestWithParam.
@@ -1944,18 +1767,37 @@ class TestWithParam : public Test, public WithParamInterface<T> {};
 // Boolean assertions. Condition can be either a Boolean expression or an
 // AssertionResult. For more information on how to use AssertionResult with
 // these macros see comments on that class.
-#define EXPECT_TRUE(condition)                            \
+#define GTEST_EXPECT_TRUE(condition)                      \
   GTEST_TEST_BOOLEAN_(condition, #condition, false, true, \
                       GTEST_NONFATAL_FAILURE_)
-#define EXPECT_FALSE(condition)                              \
+#define GTEST_EXPECT_FALSE(condition)                        \
   GTEST_TEST_BOOLEAN_(!(condition), #condition, true, false, \
                       GTEST_NONFATAL_FAILURE_)
-#define ASSERT_TRUE(condition) \
+#define GTEST_ASSERT_TRUE(condition) \
   GTEST_TEST_BOOLEAN_(condition, #condition, false, true, GTEST_FATAL_FAILURE_)
-#define ASSERT_FALSE(condition)                              \
+#define GTEST_ASSERT_FALSE(condition)                        \
   GTEST_TEST_BOOLEAN_(!(condition), #condition, true, false, \
                       GTEST_FATAL_FAILURE_)
 
+// Define these macros to 1 to omit the definition of the corresponding
+// EXPECT or ASSERT, which clashes with some users' own code.
+
+#if !GTEST_DONT_DEFINE_EXPECT_TRUE
+#define EXPECT_TRUE(condition) GTEST_EXPECT_TRUE(condition)
+#endif
+
+#if !GTEST_DONT_DEFINE_EXPECT_FALSE
+#define EXPECT_FALSE(condition) GTEST_EXPECT_FALSE(condition)
+#endif
+
+#if !GTEST_DONT_DEFINE_ASSERT_TRUE
+#define ASSERT_TRUE(condition) GTEST_ASSERT_TRUE(condition)
+#endif
+
+#if !GTEST_DONT_DEFINE_ASSERT_FALSE
+#define ASSERT_FALSE(condition) GTEST_ASSERT_FALSE(condition)
+#endif
+
 // Macros for testing equalities and inequalities.
 //
 //    * {ASSERT|EXPECT}_EQ(v1, v2): Tests that v1 == v2
@@ -2134,9 +1976,9 @@ class TestWithParam : public Test, public WithParamInterface<T> {};
 
 // Asserts that val1 is less than, or almost equal to, val2.  Fails
 // otherwise.  In particular, it fails if either val1 or val2 is NaN.
-GTEST_API_ AssertionResult FloatLE(const char *expr1, const char *expr2,
+GTEST_API_ AssertionResult FloatLE(const char* expr1, const char* expr2,
                                    float val1, float val2);
-GTEST_API_ AssertionResult DoubleLE(const char *expr1, const char *expr2,
+GTEST_API_ AssertionResult DoubleLE(const char* expr1, const char* expr2,
                                     double val1, double val2);
 
 #if GTEST_OS_WINDOWS
@@ -2197,16 +2039,16 @@ class GTEST_API_ ScopedTrace {
   // Template version. Uses Message() to convert the values into strings.
   // Slow, but flexible.
   template <typename T>
-  ScopedTrace(const char *file, int line, const T &message) {
+  ScopedTrace(const char* file, int line, const T& message) {
     PushTrace(file, line, (Message() << message).GetString());
   }
 
   // Optimize for some known types.
-  ScopedTrace(const char *file, int line, const char *message) {
+  ScopedTrace(const char* file, int line, const char* message) {
     PushTrace(file, line, message ? message : "(null)");
   }
 
-  ScopedTrace(const char *file, int line, const std::string &message) {
+  ScopedTrace(const char* file, int line, const std::string& message) {
     PushTrace(file, line, message);
   }
 
@@ -2217,9 +2059,10 @@ class GTEST_API_ ScopedTrace {
   ~ScopedTrace();
 
  private:
-  void PushTrace(const char *file, int line, std::string message);
+  void PushTrace(const char* file, int line, std::string message);
 
-  GTEST_DISALLOW_COPY_AND_ASSIGN_(ScopedTrace);
+  ScopedTrace(const ScopedTrace&) = delete;
+  ScopedTrace& operator=(const ScopedTrace&) = delete;
 } GTEST_ATTRIBUTE_UNUSED_;  // A ScopedTrace object does its job in its
                             // c'tor and d'tor.  Therefore it doesn't
                             // need to be used otherwise.
@@ -2339,13 +2182,12 @@ constexpr bool StaticAssertTypeEq() noexcept {
 //     EXPECT_EQ(a_.size(), 0);
 //     EXPECT_EQ(b_.size(), 1);
 //   }
-//
-// GOOGLETEST_CM0011 DO NOT DELETE
-#if !GTEST_DONT_DEFINE_TEST
-#define TEST_F(test_fixture, test_name)              \
+#define GTEST_TEST_F(test_fixture, test_name)        \
   GTEST_TEST_(test_fixture, test_name, test_fixture, \
               ::testing::internal::GetTypeId<test_fixture>())
-#endif  // !GTEST_DONT_DEFINE_TEST
+#if !GTEST_DONT_DEFINE_TEST_F
+#define TEST_F(test_fixture, test_name) GTEST_TEST_F(test_fixture, test_name)
+#endif
 
 // Returns a path to temporary directory.
 // Tries to determine an appropriate directory for the platform.
@@ -2406,22 +2248,23 @@ GTEST_API_ std::string TempDir();
 // }
 // ...
 // int main(int argc, char** argv) {
+//   ::testing::InitGoogleTest(&argc, argv);
 //   std::vector<int> values_to_test = LoadValuesFromConfig();
 //   RegisterMyTests(values_to_test);
 //   ...
 //   return RUN_ALL_TESTS();
 // }
 //
-template <int &... ExplicitParameterBarrier, typename Factory>
-TestInfo *RegisterTest(const char *test_suite_name, const char *test_name,
-                       const char *type_param, const char *value_param,
-                       const char *file, int line, Factory factory) {
+template <int&... ExplicitParameterBarrier, typename Factory>
+TestInfo* RegisterTest(const char* test_suite_name, const char* test_name,
+                       const char* type_param, const char* value_param,
+                       const char* file, int line, Factory factory) {
   using TestT = typename std::remove_pointer<decltype(factory())>::type;
 
   class FactoryImpl : public internal::TestFactoryBase {
    public:
     explicit FactoryImpl(Factory f) : factory_(std::move(f)) {}
-    Test *CreateTest() override { return factory_(); }
+    Test* CreateTest() override { return factory_(); }
 
    private:
     Factory factory_;
@@ -2432,7 +2275,7 @@ TestInfo *RegisterTest(const char *test_suite_name, const char *test_name,
       internal::CodeLocation(file, line), internal::GetTypeId<TestT>(),
       internal::SuiteApiResolver<TestT>::GetSetUpCaseOrSuite(file, line),
       internal::SuiteApiResolver<TestT>::GetTearDownCaseOrSuite(file, line),
-      new FactoryImpl{ std::move(factory) });
+      new FactoryImpl{std::move(factory)});
 }
 
 }  // namespace testing
@@ -2451,4 +2294,4 @@ inline int RUN_ALL_TESTS() { return ::testing::UnitTest::GetInstance()->Run(); }
 
 GTEST_DISABLE_MSC_WARNINGS_POP_()  //  4251
 
-#endif  // GTEST_INCLUDE_GTEST_GTEST_H_
+#endif  // GOOGLETEST_INCLUDE_GTEST_GTEST_H_
diff --git a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/include/gtest/gtest_pred_impl.h b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/include/gtest/gtest_pred_impl.h
index 1fc21910bd7..47a24aa687a 100644
--- a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/include/gtest/gtest_pred_impl.h
+++ b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/include/gtest/gtest_pred_impl.h
@@ -26,17 +26,19 @@
 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// This file is AUTOMATICALLY GENERATED on 01/02/2019 by command
-// 'gen_gtest_pred_impl.py 5'.  DO NOT EDIT BY HAND!
 //
 // Implements a family of generic predicate assertion macros.
-// GOOGLETEST_CM0001 DO NOT DELETE
 
-#ifndef GTEST_INCLUDE_GTEST_GTEST_PRED_IMPL_H_
-#define GTEST_INCLUDE_GTEST_GTEST_PRED_IMPL_H_
+// IWYU pragma: private, include "gtest/gtest.h"
+// IWYU pragma: friend gtest/.*
+// IWYU pragma: friend gmock/.*
+
+#ifndef GOOGLETEST_INCLUDE_GTEST_GTEST_PRED_IMPL_H_
+#define GOOGLETEST_INCLUDE_GTEST_GTEST_PRED_IMPL_H_
 
-#include "gtest/gtest.h"
+#include "gtest/gtest-assertion-result.h"
+#include "gtest/internal/gtest-internal.h"
+#include "gtest/internal/gtest-port.h"
 
 namespace testing {
 
@@ -82,8 +84,8 @@ namespace testing {
 // Helper function for implementing {EXPECT|ASSERT}_PRED1.  Don't use
 // this in your code.
 template <typename Pred, typename T1>
-AssertionResult AssertPred1Helper(const char *pred_text, const char *e1,
-                                  Pred pred, const T1 &v1) {
+AssertionResult AssertPred1Helper(const char* pred_text, const char* e1,
+                                  Pred pred, const T1& v1) {
   if (pred(v1)) return AssertionSuccess();
 
   return AssertionFailure()
@@ -113,9 +115,9 @@ AssertionResult AssertPred1Helper(const char *pred_text, const char *e1,
 // Helper function for implementing {EXPECT|ASSERT}_PRED2.  Don't use
 // this in your code.
 template <typename Pred, typename T1, typename T2>
-AssertionResult AssertPred2Helper(const char *pred_text, const char *e1,
-                                  const char *e2, Pred pred, const T1 &v1,
-                                  const T2 &v2) {
+AssertionResult AssertPred2Helper(const char* pred_text, const char* e1,
+                                  const char* e2, Pred pred, const T1& v1,
+                                  const T2& v2) {
   if (pred(v1, v2)) return AssertionSuccess();
 
   return AssertionFailure()
@@ -150,9 +152,9 @@ AssertionResult AssertPred2Helper(const char *pred_text, const char *e1,
 // Helper function for implementing {EXPECT|ASSERT}_PRED3.  Don't use
 // this in your code.
 template <typename Pred, typename T1, typename T2, typename T3>
-AssertionResult AssertPred3Helper(const char *pred_text, const char *e1,
-                                  const char *e2, const char *e3, Pred pred,
-                                  const T1 &v1, const T2 &v2, const T3 &v3) {
+AssertionResult AssertPred3Helper(const char* pred_text, const char* e1,
+                                  const char* e2, const char* e3, Pred pred,
+                                  const T1& v1, const T2& v2, const T3& v3) {
   if (pred(v1, v2, v3)) return AssertionSuccess();
 
   return AssertionFailure()
@@ -189,10 +191,10 @@ AssertionResult AssertPred3Helper(const char *pred_text, const char *e1,
 // Helper function for implementing {EXPECT|ASSERT}_PRED4.  Don't use
 // this in your code.
 template <typename Pred, typename T1, typename T2, typename T3, typename T4>
-AssertionResult AssertPred4Helper(const char *pred_text, const char *e1,
-                                  const char *e2, const char *e3,
-                                  const char *e4, Pred pred, const T1 &v1,
-                                  const T2 &v2, const T3 &v3, const T4 &v4) {
+AssertionResult AssertPred4Helper(const char* pred_text, const char* e1,
+                                  const char* e2, const char* e3,
+                                  const char* e4, Pred pred, const T1& v1,
+                                  const T2& v2, const T3& v3, const T4& v4) {
   if (pred(v1, v2, v3, v4)) return AssertionSuccess();
 
   return AssertionFailure()
@@ -231,11 +233,11 @@ AssertionResult AssertPred4Helper(const char *pred_text, const char *e1,
 // this in your code.
 template <typename Pred, typename T1, typename T2, typename T3, typename T4,
           typename T5>
-AssertionResult AssertPred5Helper(const char *pred_text, const char *e1,
-                                  const char *e2, const char *e3,
-                                  const char *e4, const char *e5, Pred pred,
-                                  const T1 &v1, const T2 &v2, const T3 &v3,
-                                  const T4 &v4, const T5 &v5) {
+AssertionResult AssertPred5Helper(const char* pred_text, const char* e1,
+                                  const char* e2, const char* e3,
+                                  const char* e4, const char* e5, Pred pred,
+                                  const T1& v1, const T2& v2, const T3& v3,
+                                  const T4& v4, const T5& v5) {
   if (pred(v1, v2, v3, v4, v5)) return AssertionSuccess();
 
   return AssertionFailure()
@@ -274,4 +276,4 @@ AssertionResult AssertPred5Helper(const char *pred_text, const char *e1,
 
 }  // namespace testing
 
-#endif  // GTEST_INCLUDE_GTEST_GTEST_PRED_IMPL_H_
+#endif  // GOOGLETEST_INCLUDE_GTEST_GTEST_PRED_IMPL_H_
diff --git a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/include/gtest/gtest_prod.h b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/include/gtest/gtest_prod.h
index 3dc5b238687..1f37dc31c34 100644
--- a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/include/gtest/gtest_prod.h
+++ b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/include/gtest/gtest_prod.h
@@ -27,12 +27,11 @@
 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-//
 // Google C++ Testing and Mocking Framework definitions useful in production
-// code. GOOGLETEST_CM0003 DO NOT DELETE
+// code.
 
-#ifndef GTEST_INCLUDE_GTEST_GTEST_PROD_H_
-#define GTEST_INCLUDE_GTEST_GTEST_PROD_H_
+#ifndef GOOGLETEST_INCLUDE_GTEST_GTEST_PROD_H_
+#define GOOGLETEST_INCLUDE_GTEST_GTEST_PROD_H_
 
 // When you need to test the private or protected members of a class,
 // use the FRIEND_TEST macro to declare your tests as friends of the
@@ -58,4 +57,4 @@
 #define FRIEND_TEST(test_case_name, test_name) \
   friend class test_case_name##_##test_name##_Test
 
-#endif  // GTEST_INCLUDE_GTEST_GTEST_PROD_H_
+#endif  // GOOGLETEST_INCLUDE_GTEST_GTEST_PROD_H_
diff --git a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/include/gtest/internal/custom/README.md b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/include/gtest/internal/custom/README.md
index ff391fb4e2b..cb49e2c754c 100644
--- a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/include/gtest/internal/custom/README.md
+++ b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/include/gtest/internal/custom/README.md
@@ -15,18 +15,6 @@ The custom directory is an injection point for custom user configurations.
 
 The following macros can be defined:
 
-### Flag related macros:
-
-*   `GTEST_FLAG(flag_name)`
-*   `GTEST_USE_OWN_FLAGFILE_FLAG_` - Define to 0 when the system provides its
-    own flagfile flag parsing.
-*   `GTEST_DECLARE_bool_(name)`
-*   `GTEST_DECLARE_int32_(name)`
-*   `GTEST_DECLARE_string_(name)`
-*   `GTEST_DEFINE_bool_(name, default_val, doc)`
-*   `GTEST_DEFINE_int32_(name, default_val, doc)`
-*   `GTEST_DEFINE_string_(name, default_val, doc)`
-
 ### Logging:
 
 *   `GTEST_LOG_(severity)`
diff --git a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/include/gtest/internal/custom/gtest-port.h b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/include/gtest/internal/custom/gtest-port.h
index cd85d956d2d..9b7fb4261aa 100644
--- a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/include/gtest/internal/custom/gtest-port.h
+++ b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/include/gtest/internal/custom/gtest-port.h
@@ -31,7 +31,38 @@
 //
 // ** Custom implementation starts here **
 
-#ifndef GTEST_INCLUDE_GTEST_INTERNAL_CUSTOM_GTEST_PORT_H_
-#define GTEST_INCLUDE_GTEST_INTERNAL_CUSTOM_GTEST_PORT_H_
+#ifndef GOOGLETEST_INCLUDE_GTEST_INTERNAL_CUSTOM_GTEST_PORT_H_
+#define GOOGLETEST_INCLUDE_GTEST_INTERNAL_CUSTOM_GTEST_PORT_H_
 
-#endif  // GTEST_INCLUDE_GTEST_INTERNAL_CUSTOM_GTEST_PORT_H_
+// Use a stub Notification class.
+//
+// The built-in Notification class in GoogleTest v1.12.1 uses std::mutex and
+// std::condition_variable. The <mutex> and <condition_variable> headers of
+// mingw32 g++ (GNU 10.0.0) define std::mutex and std::condition_variable only
+// when configured with the posix threads option but don't define them when
+// configured with the win32 threads option. The Notification class is only
+// used in GoogleTest's internal tests. Since we don't build GoogleTest's
+// internal tests, we don't need a working Notification class. Although it's
+// not hard to fix the mingw32 g++ compilation errors by implementing the
+// Notification class using Windows CRITICAL_SECTION and CONDITION_VARIABLE,
+// it's simpler to just use a stub Notification class on all platforms.
+//
+// The default constructor of the stub class is deleted and the declaration of
+// the Notify() method is commented out, so that compilation will fail if any
+// code actually uses the Notification class.
+
+#define GTEST_HAS_NOTIFICATION_ 1
+namespace testing {
+namespace internal {
+class Notification {
+ public:
+  Notification() = delete;
+  Notification(const Notification&) = delete;
+  Notification& operator=(const Notification&) = delete;
+  // void Notify();
+  void WaitForNotification() {}
+};
+}  // namespace internal
+}  // namespace testing
+
+#endif  // GOOGLETEST_INCLUDE_GTEST_INTERNAL_CUSTOM_GTEST_PORT_H_
diff --git a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/include/gtest/internal/custom/gtest-printers.h b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/include/gtest/internal/custom/gtest-printers.h
index eb4467abcab..b9495d83783 100644
--- a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/include/gtest/internal/custom/gtest-printers.h
+++ b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/include/gtest/internal/custom/gtest-printers.h
@@ -36,7 +36,7 @@
 //
 // ** Custom implementation starts here **
 
-#ifndef GTEST_INCLUDE_GTEST_INTERNAL_CUSTOM_GTEST_PRINTERS_H_
-#define GTEST_INCLUDE_GTEST_INTERNAL_CUSTOM_GTEST_PRINTERS_H_
+#ifndef GOOGLETEST_INCLUDE_GTEST_INTERNAL_CUSTOM_GTEST_PRINTERS_H_
+#define GOOGLETEST_INCLUDE_GTEST_INTERNAL_CUSTOM_GTEST_PRINTERS_H_
 
-#endif  // GTEST_INCLUDE_GTEST_INTERNAL_CUSTOM_GTEST_PRINTERS_H_
+#endif  // GOOGLETEST_INCLUDE_GTEST_INTERNAL_CUSTOM_GTEST_PRINTERS_H_
diff --git a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/include/gtest/internal/custom/gtest.h b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/include/gtest/internal/custom/gtest.h
index 4c8e07be23f..afaaf17ba28 100644
--- a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/include/gtest/internal/custom/gtest.h
+++ b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/include/gtest/internal/custom/gtest.h
@@ -31,7 +31,7 @@
 //
 // ** Custom implementation starts here **
 
-#ifndef GTEST_INCLUDE_GTEST_INTERNAL_CUSTOM_GTEST_H_
-#define GTEST_INCLUDE_GTEST_INTERNAL_CUSTOM_GTEST_H_
+#ifndef GOOGLETEST_INCLUDE_GTEST_INTERNAL_CUSTOM_GTEST_H_
+#define GOOGLETEST_INCLUDE_GTEST_INTERNAL_CUSTOM_GTEST_H_
 
-#endif  // GTEST_INCLUDE_GTEST_INTERNAL_CUSTOM_GTEST_H_
+#endif  // GOOGLETEST_INCLUDE_GTEST_INTERNAL_CUSTOM_GTEST_H_
diff --git a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/include/gtest/internal/gtest-death-test-internal.h b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/include/gtest/internal/gtest-death-test-internal.h
index 3e9497d450d..45580ae805c 100644
--- a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/include/gtest/internal/gtest-death-test-internal.h
+++ b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/include/gtest/internal/gtest-death-test-internal.h
@@ -26,27 +26,31 @@
 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
+
 // The Google C++ Testing and Mocking Framework (Google Test)
 //
 // This header file defines internal utilities needed for implementing
 // death tests.  They are subject to change without notice.
-// GOOGLETEST_CM0001 DO NOT DELETE
 
-#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_DEATH_TEST_INTERNAL_H_
-#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_DEATH_TEST_INTERNAL_H_
+// IWYU pragma: private, include "gtest/gtest.h"
+// IWYU pragma: friend gtest/.*
+// IWYU pragma: friend gmock/.*
 
-#include "gtest/gtest-matchers.h"
-#include "gtest/internal/gtest-internal.h"
+#ifndef GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_DEATH_TEST_INTERNAL_H_
+#define GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_DEATH_TEST_INTERNAL_H_
 
 #include <stdio.h>
+
 #include <memory>
 
-namespace testing {
-namespace internal {
+#include "gtest/gtest-matchers.h"
+#include "gtest/internal/gtest-internal.h"
 
 GTEST_DECLARE_string_(internal_run_death_test);
 
+namespace testing {
+namespace internal {
+
 // Names of the flags (needed for parsing Google Test flags).
 const char kDeathTestStyleFlag[] = "death_test_style";
 const char kDeathTestUseFork[] = "death_test_use_fork";
@@ -80,21 +84,21 @@ class GTEST_API_ DeathTest {
   // argument is set.  If the death test should be skipped, the pointer
   // is set to NULL; otherwise, it is set to the address of a new concrete
   // DeathTest object that controls the execution of the current test.
-  static bool Create(const char *statement,
-                     Matcher<const std::string &> matcher, const char *file,
-                     int line, DeathTest **test);
+  static bool Create(const char* statement, Matcher<const std::string&> matcher,
+                     const char* file, int line, DeathTest** test);
   DeathTest();
   virtual ~DeathTest() {}
 
   // A helper class that aborts a death test when it's deleted.
   class ReturnSentinel {
    public:
-    explicit ReturnSentinel(DeathTest *test) : test_(test) {}
+    explicit ReturnSentinel(DeathTest* test) : test_(test) {}
     ~ReturnSentinel() { test_->Abort(TEST_ENCOUNTERED_RETURN_STATEMENT); }
 
    private:
-    DeathTest *const test_;
-    GTEST_DISALLOW_COPY_AND_ASSIGN_(ReturnSentinel);
+    DeathTest* const test_;
+    ReturnSentinel(const ReturnSentinel&) = delete;
+    ReturnSentinel& operator=(const ReturnSentinel&) = delete;
   } GTEST_ATTRIBUTE_UNUSED_;
 
   // An enumeration of possible roles that may be taken when a death
@@ -131,15 +135,16 @@ class GTEST_API_ DeathTest {
 
   // Returns a human-readable outcome message regarding the outcome of
   // the last death test.
-  static const char *LastMessage();
+  static const char* LastMessage();
 
-  static void set_last_death_test_message(const std::string &message);
+  static void set_last_death_test_message(const std::string& message);
 
  private:
   // A string containing a description of the outcome of the last death test.
   static std::string last_death_test_message_;
 
-  GTEST_DISALLOW_COPY_AND_ASSIGN_(DeathTest);
+  DeathTest(const DeathTest&) = delete;
+  DeathTest& operator=(const DeathTest&) = delete;
 };
 
 GTEST_DISABLE_MSC_WARNINGS_POP_()  //  4251
@@ -148,16 +153,16 @@ GTEST_DISABLE_MSC_WARNINGS_POP_()  //  4251
 class DeathTestFactory {
  public:
   virtual ~DeathTestFactory() {}
-  virtual bool Create(const char *statement,
-                      Matcher<const std::string &> matcher, const char *file,
-                      int line, DeathTest **test) = 0;
+  virtual bool Create(const char* statement,
+                      Matcher<const std::string&> matcher, const char* file,
+                      int line, DeathTest** test) = 0;
 };
 
 // A concrete DeathTestFactory implementation for normal use.
 class DefaultDeathTestFactory : public DeathTestFactory {
  public:
-  bool Create(const char *statement, Matcher<const std::string &> matcher,
-              const char *file, int line, DeathTest **test) override;
+  bool Create(const char* statement, Matcher<const std::string&> matcher,
+              const char* file, int line, DeathTest** test) override;
 };
 
 // Returns true if exit_status describes a process that was terminated
@@ -167,22 +172,22 @@ GTEST_API_ bool ExitedUnsuccessfully(int exit_status);
 // A string passed to EXPECT_DEATH (etc.) is caught by one of these overloads
 // and interpreted as a regex (rather than an Eq matcher) for legacy
 // compatibility.
-inline Matcher<const ::std::string &> MakeDeathTestMatcher(
+inline Matcher<const ::std::string&> MakeDeathTestMatcher(
     ::testing::internal::RE regex) {
   return ContainsRegex(regex.pattern());
 }
-inline Matcher<const ::std::string &> MakeDeathTestMatcher(const char *regex) {
+inline Matcher<const ::std::string&> MakeDeathTestMatcher(const char* regex) {
   return ContainsRegex(regex);
 }
-inline Matcher<const ::std::string &> MakeDeathTestMatcher(
-    const ::std::string &regex) {
+inline Matcher<const ::std::string&> MakeDeathTestMatcher(
+    const ::std::string& regex) {
   return ContainsRegex(regex);
 }
 
 // If a Matcher<const ::std::string&> is passed to EXPECT_DEATH (etc.), it's
 // used directly.
-inline Matcher<const ::std::string &> MakeDeathTestMatcher(
-    Matcher<const ::std::string &> matcher) {
+inline Matcher<const ::std::string&> MakeDeathTestMatcher(
+    Matcher<const ::std::string&> matcher) {
   return matcher;
 }
 
@@ -192,7 +197,7 @@ inline Matcher<const ::std::string &> MakeDeathTestMatcher(
 #define GTEST_EXECUTE_DEATH_TEST_STATEMENT_(statement, death_test)           \
   try {                                                                      \
     GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement);               \
-  } catch (const ::std::exception &gtest_exception) {                        \
+  } catch (const ::std::exception& gtest_exception) {                        \
     fprintf(                                                                 \
         stderr,                                                              \
         "\n%s: Caught std::exception-derived exception escaping the "        \
@@ -216,7 +221,7 @@ inline Matcher<const ::std::string &> MakeDeathTestMatcher(
 #define GTEST_DEATH_TEST_(statement, predicate, regex_or_matcher, fail)        \
   GTEST_AMBIGUOUS_ELSE_BLOCKER_                                                \
   if (::testing::internal::AlwaysTrue()) {                                     \
-    ::testing::internal::DeathTest *gtest_dt;                                  \
+    ::testing::internal::DeathTest* gtest_dt;                                  \
     if (!::testing::internal::DeathTest::Create(                               \
             #statement,                                                        \
             ::testing::internal::MakeDeathTestMatcher(regex_or_matcher),       \
@@ -238,7 +243,6 @@ inline Matcher<const ::std::string &> MakeDeathTestMatcher(
           gtest_dt->Abort(::testing::internal::DeathTest::TEST_DID_NOT_DIE);   \
           break;                                                               \
         }                                                                      \
-        default: break;                                                        \
       }                                                                        \
     }                                                                          \
   } else                                                                       \
@@ -266,7 +270,7 @@ inline Matcher<const ::std::string &> MakeDeathTestMatcher(
 // RUN_ALL_TESTS was called.
 class InternalRunDeathTestFlag {
  public:
-  InternalRunDeathTestFlag(const std::string &a_file, int a_line, int an_index,
+  InternalRunDeathTestFlag(const std::string& a_file, int a_line, int an_index,
                            int a_write_fd)
       : file_(a_file), line_(a_line), index_(an_index), write_fd_(a_write_fd) {}
 
@@ -274,7 +278,7 @@ class InternalRunDeathTestFlag {
     if (write_fd_ >= 0) posix::Close(write_fd_);
   }
 
-  const std::string &file() const { return file_; }
+  const std::string& file() const { return file_; }
   int line() const { return line_; }
   int index() const { return index_; }
   int write_fd() const { return write_fd_; }
@@ -285,17 +289,18 @@ class InternalRunDeathTestFlag {
   int index_;
   int write_fd_;
 
-  GTEST_DISALLOW_COPY_AND_ASSIGN_(InternalRunDeathTestFlag);
+  InternalRunDeathTestFlag(const InternalRunDeathTestFlag&) = delete;
+  InternalRunDeathTestFlag& operator=(const InternalRunDeathTestFlag&) = delete;
 };
 
 // Returns a newly created InternalRunDeathTestFlag object with fields
 // initialized from the GTEST_FLAG(internal_run_death_test) flag if
 // the flag is specified; otherwise returns NULL.
-InternalRunDeathTestFlag *ParseInternalRunDeathTestFlag();
+InternalRunDeathTestFlag* ParseInternalRunDeathTestFlag();
 
 #endif  // GTEST_HAS_DEATH_TEST
 
 }  // namespace internal
 }  // namespace testing
 
-#endif  // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_DEATH_TEST_INTERNAL_H_
+#endif  // GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_DEATH_TEST_INTERNAL_H_
diff --git a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/include/gtest/internal/gtest-filepath.h b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/include/gtest/internal/gtest-filepath.h
index b228d473420..a2a60a962b8 100644
--- a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/include/gtest/internal/gtest-filepath.h
+++ b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/include/gtest/internal/gtest-filepath.h
@@ -26,7 +26,7 @@
 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
+
 // Google Test filepath utilities
 //
 // This header file declares classes and functions used internally by
@@ -35,10 +35,12 @@
 // This file is #included in gtest/internal/gtest-internal.h.
 // Do not include this header file separately!
 
-// GOOGLETEST_CM0001 DO NOT DELETE
+// IWYU pragma: private, include "gtest/gtest.h"
+// IWYU pragma: friend gtest/.*
+// IWYU pragma: friend gmock/.*
 
-#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_FILEPATH_H_
-#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_FILEPATH_H_
+#ifndef GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_FILEPATH_H_
+#define GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_FILEPATH_H_
 
 #include "gtest/internal/gtest-string.h"
 
@@ -62,21 +64,21 @@ namespace internal {
 class GTEST_API_ FilePath {
  public:
   FilePath() : pathname_("") {}
-  FilePath(const FilePath &rhs) : pathname_(rhs.pathname_) {}
+  FilePath(const FilePath& rhs) : pathname_(rhs.pathname_) {}
 
-  explicit FilePath(const std::string &pathname) : pathname_(pathname) {
+  explicit FilePath(const std::string& pathname) : pathname_(pathname) {
     Normalize();
   }
 
-  FilePath &operator=(const FilePath &rhs) {
+  FilePath& operator=(const FilePath& rhs) {
     Set(rhs);
     return *this;
   }
 
-  void Set(const FilePath &rhs) { pathname_ = rhs.pathname_; }
+  void Set(const FilePath& rhs) { pathname_ = rhs.pathname_; }
 
-  const std::string &string() const { return pathname_; }
-  const char *c_str() const { return pathname_.c_str(); }
+  const std::string& string() const { return pathname_; }
+  const char* c_str() const { return pathname_.c_str(); }
 
   // Returns the current working directory, or "" if unsuccessful.
   static FilePath GetCurrentDir();
@@ -85,15 +87,15 @@ class GTEST_API_ FilePath {
   // extension = "xml", returns "dir/test.xml". If number is greater
   // than zero (e.g., 12), returns "dir/test_12.xml".
   // On Windows platform, uses \ as the separator rather than /.
-  static FilePath MakeFileName(const FilePath &directory,
-                               const FilePath &base_name, int number,
-                               const char *extension);
+  static FilePath MakeFileName(const FilePath& directory,
+                               const FilePath& base_name, int number,
+                               const char* extension);
 
   // Given directory = "dir", relative_path = "test.xml",
   // returns "dir/test.xml".
   // On Windows, uses \ as the separator rather than /.
-  static FilePath ConcatPaths(const FilePath &directory,
-                              const FilePath &relative_path);
+  static FilePath ConcatPaths(const FilePath& directory,
+                              const FilePath& relative_path);
 
   // Returns a pathname for a file that does not currently exist. The pathname
   // will be directory/base_name.extension or
@@ -103,9 +105,9 @@ class GTEST_API_ FilePath {
   // Examples: 'dir/foo_test.xml' or 'dir/foo_test_1.xml'.
   // There could be a race condition if two or more processes are calling this
   // function at the same time -- they could both pick the same filename.
-  static FilePath GenerateUniqueFileName(const FilePath &directory,
-                                         const FilePath &base_name,
-                                         const char *extension);
+  static FilePath GenerateUniqueFileName(const FilePath& directory,
+                                         const FilePath& base_name,
+                                         const char* extension);
 
   // Returns true if and only if the path is "".
   bool IsEmpty() const { return pathname_.empty(); }
@@ -135,7 +137,7 @@ class GTEST_API_ FilePath {
   // Example: FilePath("dir/file.exe").RemoveExtension("EXE") returns
   // FilePath("dir/file"). If a case-insensitive extension is not
   // found, returns a copy of the original FilePath.
-  FilePath RemoveExtension(const char *extension) const;
+  FilePath RemoveExtension(const char* extension) const;
 
   // Creates directories so that path exists. Returns true if successful or if
   // the directories already exist; returns false if unable to create
@@ -192,10 +194,10 @@ class GTEST_API_ FilePath {
 
   void Normalize();
 
-  // Returns a pointer to the last occurence of a valid path separator in
+  // Returns a pointer to the last occurrence of a valid path separator in
   // the FilePath. On Windows, for example, both '/' and '\' are valid path
   // separators. Returns NULL if no path separator was found.
-  const char *FindLastPathSeparator() const;
+  const char* FindLastPathSeparator() const;
 
   std::string pathname_;
 };  // class FilePath
@@ -205,4 +207,4 @@ class GTEST_API_ FilePath {
 
 GTEST_DISABLE_MSC_WARNINGS_POP_()  //  4251
 
-#endif  // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_FILEPATH_H_
+#endif  // GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_FILEPATH_H_
diff --git a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/include/gtest/internal/gtest-internal.h b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/include/gtest/internal/gtest-internal.h
index 9640aba836f..9b04e4c85fa 100644
--- a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/include/gtest/internal/gtest-internal.h
+++ b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/include/gtest/internal/gtest-internal.h
@@ -26,16 +26,18 @@
 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
+
 // The Google C++ Testing and Mocking Framework (Google Test)
 //
 // This header file declares functions and macros used internally by
 // Google Test.  They are subject to change without notice.
 
-// GOOGLETEST_CM0001 DO NOT DELETE
+// IWYU pragma: private, include "gtest/gtest.h"
+// IWYU pragma: friend gtest/.*
+// IWYU pragma: friend gmock/.*
 
-#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_INTERNAL_H_
-#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_INTERNAL_H_
+#ifndef GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_INTERNAL_H_
+#define GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_INTERNAL_H_
 
 #include "gtest/internal/gtest-port.h"
 
@@ -53,6 +55,7 @@
 #include <ctype.h>
 #include <float.h>
 #include <string.h>
+
 #include <cstdint>
 #include <iomanip>
 #include <limits>
@@ -91,7 +94,7 @@
 #define GTEST_STRINGIFY_(...) GTEST_STRINGIFY_HELPER_(__VA_ARGS__, )
 
 namespace proto2 {
-class Message;
+class MessageLite;
 }
 
 namespace testing {
@@ -106,7 +109,7 @@ class TestPartResult;   // Result of a test part.
 class UnitTest;         // A collection of test suites.
 
 template <typename T>
-::std::string PrintToString(const T &value);
+::std::string PrintToString(const T& value);
 
 namespace internal {
 
@@ -133,12 +136,12 @@ class IgnoredValue {
   template <typename T,
             typename std::enable_if<!std::is_convertible<T, Sink>::value,
                                     int>::type = 0>
-  IgnoredValue(const T & /* ignored */) {}  // NOLINT(runtime/explicit)
+  IgnoredValue(const T& /* ignored */) {}  // NOLINT(runtime/explicit)
 };
 
 // Appends the user-supplied message to the Google-Test-generated message.
-GTEST_API_ std::string AppendUserMessage(const std::string &gtest_msg,
-                                         const Message &user_msg);
+GTEST_API_ std::string AppendUserMessage(const std::string& gtest_msg,
+                                         const Message& user_msg);
 
 #if GTEST_HAS_EXCEPTIONS
 
@@ -153,7 +156,7 @@ GTEST_DISABLE_MSC_WARNINGS_PUSH_(
 // frameworks know how to extract and print the message inside it.
 class GTEST_API_ GoogleTestFailureException : public ::std::runtime_error {
  public:
-  explicit GoogleTestFailureException(const TestPartResult &failure);
+  explicit GoogleTestFailureException(const TestPartResult& failure);
 };
 
 GTEST_DISABLE_MSC_WARNINGS_POP_()  //  4275
@@ -168,28 +171,20 @@ namespace edit_distance {
 // See http://en.wikipedia.org/wiki/Wagner-Fischer_algorithm
 enum EditType { kMatch, kAdd, kRemove, kReplace };
 GTEST_API_ std::vector<EditType> CalculateOptimalEdits(
-    const std::vector<size_t> &left, const std::vector<size_t> &right);
+    const std::vector<size_t>& left, const std::vector<size_t>& right);
 
 // Same as above, but the input is represented as strings.
 GTEST_API_ std::vector<EditType> CalculateOptimalEdits(
-    const std::vector<std::string> &left,
-    const std::vector<std::string> &right);
+    const std::vector<std::string>& left,
+    const std::vector<std::string>& right);
 
 // Create a diff of the input strings in Unified diff format.
-GTEST_API_ std::string CreateUnifiedDiff(const std::vector<std::string> &left,
-                                         const std::vector<std::string> &right,
+GTEST_API_ std::string CreateUnifiedDiff(const std::vector<std::string>& left,
+                                         const std::vector<std::string>& right,
                                          size_t context = 2);
 
 }  // namespace edit_distance
 
-// Calculate the diff between 'left' and 'right' and return it in unified diff
-// format.
-// If not null, stores in 'total_line_count' the total number of lines found
-// in left + right.
-GTEST_API_ std::string DiffStrings(const std::string &left,
-                                   const std::string &right,
-                                   size_t *total_line_count);
-
 // Constructs and returns the message for an equality assertion
 // (e.g. ASSERT_EQ, EXPECT_STREQ, etc) failure.
 //
@@ -205,16 +200,16 @@ GTEST_API_ std::string DiffStrings(const std::string &left,
 // The ignoring_case parameter is true if and only if the assertion is a
 // *_STRCASEEQ*.  When it's true, the string " (ignoring case)" will
 // be inserted into the message.
-GTEST_API_ AssertionResult EqFailure(const char *expected_expression,
-                                     const char *actual_expression,
-                                     const std::string &expected_value,
-                                     const std::string &actual_value,
+GTEST_API_ AssertionResult EqFailure(const char* expected_expression,
+                                     const char* actual_expression,
+                                     const std::string& expected_value,
+                                     const std::string& actual_value,
                                      bool ignoring_case);
 
 // Constructs a failure message for Boolean assertions such as EXPECT_TRUE.
 GTEST_API_ std::string GetBoolAssertionFailureMessage(
-    const AssertionResult &assertion_result, const char *expression_text,
-    const char *actual_predicate_value, const char *expected_predicate_value);
+    const AssertionResult& assertion_result, const char* expression_text,
+    const char* actual_predicate_value, const char* expected_predicate_value);
 
 // This template class represents an IEEE floating-point number
 // (either single-precision or double-precision, depending on the
@@ -286,7 +281,7 @@ class FloatingPoint {
   //
   // See the following article for more details on ULP:
   // http://randomascii.wordpress.com/2012/02/25/comparing-floating-point-numbers-2012-edition/
-  static const size_t kMaxUlps = 4;
+  static const uint32_t kMaxUlps = 4;
 
   // Constructs a FloatingPoint from a raw floating-point number.
   //
@@ -294,7 +289,7 @@ class FloatingPoint {
   // around may change its bits, although the new value is guaranteed
   // to be also a NAN.  Therefore, don't expect this constructor to
   // preserve the bits in x when x is a NAN.
-  explicit FloatingPoint(const RawType &x) { u_.value_ = x; }
+  explicit FloatingPoint(const RawType& x) { u_.value_ = x; }
 
   // Static methods
 
@@ -316,7 +311,7 @@ class FloatingPoint {
   // Non-static methods
 
   // Returns the bits that represents this number.
-  const Bits &bits() const { return u_.bits_; }
+  const Bits& bits() const { return u_.bits_; }
 
   // Returns the exponent bits of this number.
   Bits exponent_bits() const { return kExponentBitMask & u_.bits_; }
@@ -340,7 +335,7 @@ class FloatingPoint {
   //   - returns false if either number is (or both are) NAN.
   //   - treats really large numbers as almost equal to infinity.
   //   - thinks +0.0 and -0.0 are 0 DLP's apart.
-  bool AlmostEquals(const FloatingPoint &rhs) const {
+  bool AlmostEquals(const FloatingPoint& rhs) const {
     // The IEEE standard says that any comparison operation involving
     // a NAN must return false.
     if (is_nan() || rhs.is_nan()) return false;
@@ -371,7 +366,7 @@ class FloatingPoint {
   //
   // Read http://en.wikipedia.org/wiki/Signed_number_representations
   // for more details on signed number representations.
-  static Bits SignAndMagnitudeToBiased(const Bits &sam) {
+  static Bits SignAndMagnitudeToBiased(const Bits& sam) {
     if (kSignBitMask & sam) {
       // sam represents a negative number.
       return ~sam + 1;
@@ -383,8 +378,8 @@ class FloatingPoint {
 
   // Given two numbers in the sign-and-magnitude representation,
   // returns the distance between them as an unsigned number.
-  static Bits DistanceBetweenSignAndMagnitudeNumbers(const Bits &sam1,
-                                                     const Bits &sam2) {
+  static Bits DistanceBetweenSignAndMagnitudeNumbers(const Bits& sam1,
+                                                     const Bits& sam2) {
     const Bits biased1 = SignAndMagnitudeToBiased(sam1);
     const Bits biased2 = SignAndMagnitudeToBiased(sam2);
     return (biased1 >= biased2) ? (biased1 - biased2) : (biased2 - biased1);
@@ -415,7 +410,7 @@ typedef FloatingPoint<double> Double;
 // used to hold such IDs.  The user should treat TypeId as an opaque
 // type: the only operation allowed on TypeId values is to compare
 // them for equality using the == operator.
-typedef const void *TypeId;
+typedef const void* TypeId;
 
 template <typename T>
 class TypeIdHelper {
@@ -456,13 +451,14 @@ class TestFactoryBase {
 
   // Creates a test instance to run. The instance is both created and destroyed
   // within TestInfoImpl::Run()
-  virtual Test *CreateTest() = 0;
+  virtual Test* CreateTest() = 0;
 
  protected:
   TestFactoryBase() {}
 
  private:
-  GTEST_DISALLOW_COPY_AND_ASSIGN_(TestFactoryBase);
+  TestFactoryBase(const TestFactoryBase&) = delete;
+  TestFactoryBase& operator=(const TestFactoryBase&) = delete;
 };
 
 // This class provides implementation of TeastFactoryBase interface.
@@ -470,7 +466,7 @@ class TestFactoryBase {
 template <class TestClass>
 class TestFactoryImpl : public TestFactoryBase {
  public:
-  Test *CreateTest() override { return new TestClass; }
+  Test* CreateTest() override { return new TestClass; }
 };
 
 #if GTEST_OS_WINDOWS
@@ -479,9 +475,9 @@ class TestFactoryImpl : public TestFactoryBase {
 // {ASSERT|EXPECT}_HRESULT_{SUCCEEDED|FAILED}
 // We pass a long instead of HRESULT to avoid causing an
 // include dependency for the HRESULT type.
-GTEST_API_ AssertionResult IsHRESULTSuccess(const char *expr,
+GTEST_API_ AssertionResult IsHRESULTSuccess(const char* expr,
                                             long hr);  // NOLINT
-GTEST_API_ AssertionResult IsHRESULTFailure(const char *expr,
+GTEST_API_ AssertionResult IsHRESULTFailure(const char* expr,
                                             long hr);  // NOLINT
 
 #endif  // GTEST_OS_WINDOWS
@@ -491,7 +487,7 @@ using SetUpTestSuiteFunc = void (*)();
 using TearDownTestSuiteFunc = void (*)();
 
 struct CodeLocation {
-  CodeLocation(const std::string &a_file, int a_line)
+  CodeLocation(const std::string& a_file, int a_line)
       : file(a_file), line(a_line) {}
 
   std::string file;
@@ -511,16 +507,17 @@ inline SetUpTearDownSuiteFuncType GetNotDefaultOrNull(
 
 template <typename T>
 //  Note that SuiteApiResolver inherits from T because
-//  SetUpTestSuite()/TearDownTestSuite() could be protected. Ths way
+//  SetUpTestSuite()/TearDownTestSuite() could be protected. This way
 //  SuiteApiResolver can access them.
 struct SuiteApiResolver : T {
   // testing::Test is only forward declared at this point. So we make it a
-  // dependend class for the compiler to be OK with it.
+  // dependent class for the compiler to be OK with it.
   using Test =
       typename std::conditional<sizeof(T) != 0, ::testing::Test, void>::type;
 
-  static SetUpTearDownSuiteFuncType GetSetUpCaseOrSuite(const char *filename,
+  static SetUpTearDownSuiteFuncType GetSetUpCaseOrSuite(const char* filename,
                                                         int line_num) {
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
     SetUpTearDownSuiteFuncType test_case_fp =
         GetNotDefaultOrNull(&T::SetUpTestCase, &Test::SetUpTestCase);
     SetUpTearDownSuiteFuncType test_suite_fp =
@@ -532,10 +529,16 @@ struct SuiteApiResolver : T {
         << filename << ":" << line_num;
 
     return test_case_fp != nullptr ? test_case_fp : test_suite_fp;
+#else
+    (void)(filename);
+    (void)(line_num);
+    return &T::SetUpTestSuite;
+#endif
   }
 
-  static SetUpTearDownSuiteFuncType GetTearDownCaseOrSuite(const char *filename,
+  static SetUpTearDownSuiteFuncType GetTearDownCaseOrSuite(const char* filename,
                                                            int line_num) {
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
     SetUpTearDownSuiteFuncType test_case_fp =
         GetNotDefaultOrNull(&T::TearDownTestCase, &Test::TearDownTestCase);
     SetUpTearDownSuiteFuncType test_suite_fp =
@@ -547,6 +550,11 @@ struct SuiteApiResolver : T {
         << filename << ":" << line_num;
 
     return test_case_fp != nullptr ? test_case_fp : test_suite_fp;
+#else
+    (void)(filename);
+    (void)(line_num);
+    return &T::TearDownTestSuite;
+#endif
   }
 };
 
@@ -555,11 +563,11 @@ struct SuiteApiResolver : T {
 //
 // Arguments:
 //
-//   test_suite_name:   name of the test suite
+//   test_suite_name:  name of the test suite
 //   name:             name of the test
-//   type_param        the name of the test's type parameter, or NULL if
+//   type_param:       the name of the test's type parameter, or NULL if
 //                     this is not a typed or a type-parameterized test.
-//   value_param       text representation of the test's value parameter,
+//   value_param:      text representation of the test's value parameter,
 //                     or NULL if this is not a type-parameterized test.
 //   code_location:    code location where the test is defined
 //   fixture_class_id: ID of the test fixture class
@@ -568,18 +576,16 @@ struct SuiteApiResolver : T {
 //   factory:          pointer to the factory that creates a test object.
 //                     The newly created TestInfo instance will assume
 //                     ownership of the factory object.
-GTEST_API_ TestInfo *MakeAndRegisterTestInfo(
-    const char *test_suite_name, const char *name, const char *type_param,
-    const char *value_param, CodeLocation code_location,
+GTEST_API_ TestInfo* MakeAndRegisterTestInfo(
+    const char* test_suite_name, const char* name, const char* type_param,
+    const char* value_param, CodeLocation code_location,
     TypeId fixture_class_id, SetUpTestSuiteFunc set_up_tc,
-    TearDownTestSuiteFunc tear_down_tc, TestFactoryBase *factory);
+    TearDownTestSuiteFunc tear_down_tc, TestFactoryBase* factory);
 
 // If *pstr starts with the given prefix, modifies *pstr to be right
 // past the prefix and returns true; otherwise leaves *pstr unchanged
 // and returns false.  None of pstr, *pstr, and prefix can be NULL.
-GTEST_API_ bool SkipPrefix(const char *prefix, const char **pstr);
-
-#if GTEST_HAS_TYPED_TEST || GTEST_HAS_TYPED_TEST_P
+GTEST_API_ bool SkipPrefix(const char* prefix, const char** pstr);
 
 GTEST_DISABLE_MSC_WARNINGS_PUSH_(4251 \
 /* class A needs to have dll-interface to be used by clients of class B */)
@@ -592,8 +598,8 @@ class GTEST_API_ TypedTestSuitePState {
   // Adds the given test name to defined_test_names_ and return true
   // if the test suite hasn't been registered; otherwise aborts the
   // program.
-  bool AddTestName(const char *file, int line, const char *case_name,
-                   const char *test_name) {
+  bool AddTestName(const char* file, int line, const char* case_name,
+                   const char* test_name) {
     if (registered_) {
       fprintf(stderr,
               "%s Test %s must be defined before "
@@ -607,11 +613,11 @@ class GTEST_API_ TypedTestSuitePState {
     return true;
   }
 
-  bool TestExists(const std::string &test_name) const {
+  bool TestExists(const std::string& test_name) const {
     return registered_tests_.count(test_name) > 0;
   }
 
-  const CodeLocation &GetCodeLocation(const std::string &test_name) const {
+  const CodeLocation& GetCodeLocation(const std::string& test_name) const {
     RegisteredTestsMap::const_iterator it = registered_tests_.find(test_name);
     GTEST_CHECK_(it != registered_tests_.end());
     return it->second;
@@ -620,9 +626,9 @@ class GTEST_API_ TypedTestSuitePState {
   // Verifies that registered_tests match the test names in
   // defined_test_names_; returns registered_tests if successful, or
   // aborts the program otherwise.
-  const char *VerifyRegisteredTestNames(const char *test_suite_name,
-                                        const char *file, int line,
-                                        const char *registered_tests);
+  const char* VerifyRegisteredTestNames(const char* test_suite_name,
+                                        const char* file, int line,
+                                        const char* registered_tests);
 
  private:
   typedef ::std::map<std::string, CodeLocation> RegisteredTestsMap;
@@ -640,8 +646,8 @@ GTEST_DISABLE_MSC_WARNINGS_POP_()  //  4251
 
 // Skips to the first non-space char after the first comma in 'str';
 // returns NULL if no comma is found in 'str'.
-inline const char *SkipComma(const char *str) {
-  const char *comma = strchr(str, ',');
+inline const char* SkipComma(const char* str) {
+  const char* comma = strchr(str, ',');
   if (comma == nullptr) {
     return nullptr;
   }
@@ -652,15 +658,15 @@ inline const char *SkipComma(const char *str) {
 
 // Returns the prefix of 'str' before the first comma in it; returns
 // the entire string if it contains no comma.
-inline std::string GetPrefixUntilComma(const char *str) {
-  const char *comma = strchr(str, ',');
+inline std::string GetPrefixUntilComma(const char* str) {
+  const char* comma = strchr(str, ',');
   return comma == nullptr ? str : std::string(str, comma);
 }
 
 // Splits a given string on a given delimiter, populating a given
 // vector with the fields.
-void SplitString(const ::std::string &str, char delimiter,
-                 ::std::vector<::std::string> *dest);
+void SplitString(const ::std::string& str, char delimiter,
+                 ::std::vector<::std::string>* dest);
 
 // The default argument to the template below for the case when the user does
 // not provide a name generator.
@@ -677,11 +683,10 @@ struct NameGeneratorSelector {
 };
 
 template <typename NameGenerator>
-void GenerateNamesRecursively(internal::None, std::vector<std::string> *, int) {
-}
+void GenerateNamesRecursively(internal::None, std::vector<std::string>*, int) {}
 
 template <typename NameGenerator, typename Types>
-void GenerateNamesRecursively(Types, std::vector<std::string> *result, int i) {
+void GenerateNamesRecursively(Types, std::vector<std::string>* result, int i) {
   result->push_back(NameGenerator::template GetName<typename Types::Head>(i));
   GenerateNamesRecursively<NameGenerator>(typename Types::Tail(), result,
                                           i + 1);
@@ -708,9 +713,9 @@ class TypeParameterizedTest {
   // specified in INSTANTIATE_TYPED_TEST_SUITE_P(Prefix, TestSuite,
   // Types).  Valid values for 'index' are [0, N - 1] where N is the
   // length of Types.
-  static bool Register(const char *prefix, const CodeLocation &code_location,
-                       const char *case_name, const char *test_names, int index,
-                       const std::vector<std::string> &type_names =
+  static bool Register(const char* prefix, const CodeLocation& code_location,
+                       const char* case_name, const char* test_names, int index,
+                       const std::vector<std::string>& type_names =
                            GenerateNames<DefaultNameGenerator, Types>()) {
     typedef typename Types::Head Type;
     typedef Fixture<Type> FixtureClass;
@@ -747,19 +752,19 @@ class TypeParameterizedTest {
 template <GTEST_TEMPLATE_ Fixture, class TestSel>
 class TypeParameterizedTest<Fixture, TestSel, internal::None> {
  public:
-  static bool Register(const char * /*prefix*/, const CodeLocation &,
-                       const char * /*case_name*/, const char * /*test_names*/,
+  static bool Register(const char* /*prefix*/, const CodeLocation&,
+                       const char* /*case_name*/, const char* /*test_names*/,
                        int /*index*/,
-                       const std::vector<std::string> & =
+                       const std::vector<std::string>& =
                            std::vector<std::string>() /*type_names*/) {
     return true;
   }
 };
 
-GTEST_API_ void RegisterTypeParameterizedTestSuite(const char *test_suite_name,
+GTEST_API_ void RegisterTypeParameterizedTestSuite(const char* test_suite_name,
                                                    CodeLocation code_location);
 GTEST_API_ void RegisterTypeParameterizedTestSuiteInstantiation(
-    const char *case_name);
+    const char* case_name);
 
 // TypeParameterizedTestSuite<Fixture, Tests, Types>::Register()
 // registers *all combinations* of 'Tests' and 'Types' with Google
@@ -768,10 +773,10 @@ GTEST_API_ void RegisterTypeParameterizedTestSuiteInstantiation(
 template <GTEST_TEMPLATE_ Fixture, typename Tests, typename Types>
 class TypeParameterizedTestSuite {
  public:
-  static bool Register(const char *prefix, CodeLocation code_location,
-                       const TypedTestSuitePState *state, const char *case_name,
-                       const char *test_names,
-                       const std::vector<std::string> &type_names =
+  static bool Register(const char* prefix, CodeLocation code_location,
+                       const TypedTestSuitePState* state, const char* case_name,
+                       const char* test_names,
+                       const std::vector<std::string>& type_names =
                            GenerateNames<DefaultNameGenerator, Types>()) {
     RegisterTypeParameterizedTestSuiteInstantiation(case_name);
     std::string test_name =
@@ -784,7 +789,7 @@ class TypeParameterizedTestSuite {
       fflush(stderr);
       posix::Abort();
     }
-    const CodeLocation &test_location = state->GetCodeLocation(test_name);
+    const CodeLocation& test_location = state->GetCodeLocation(test_name);
 
     typedef typename Tests::Head Head;
 
@@ -805,17 +810,15 @@ class TypeParameterizedTestSuite {
 template <GTEST_TEMPLATE_ Fixture, typename Types>
 class TypeParameterizedTestSuite<Fixture, internal::None, Types> {
  public:
-  static bool Register(const char * /*prefix*/, const CodeLocation &,
-                       const TypedTestSuitePState * /*state*/,
-                       const char * /*case_name*/, const char * /*test_names*/,
-                       const std::vector<std::string> & =
+  static bool Register(const char* /*prefix*/, const CodeLocation&,
+                       const TypedTestSuitePState* /*state*/,
+                       const char* /*case_name*/, const char* /*test_names*/,
+                       const std::vector<std::string>& =
                            std::vector<std::string>() /*type_names*/) {
     return true;
   }
 };
 
-#endif  // GTEST_HAS_TYPED_TEST || GTEST_HAS_TYPED_TEST_P
-
 // Returns the current OS stack trace as an std::string.
 //
 // The maximum number of stack frames to be included is specified by
@@ -826,7 +829,7 @@ class TypeParameterizedTestSuite<Fixture, internal::None, Types> {
 // For example, if Foo() calls Bar(), which in turn calls
 // GetCurrentOsStackTraceExceptTop(..., 1), Foo() will be included in
 // the trace but Bar() and GetCurrentOsStackTraceExceptTop() won't.
-GTEST_API_ std::string GetCurrentOsStackTraceExceptTop(UnitTest *unit_test,
+GTEST_API_ std::string GetCurrentOsStackTraceExceptTop(UnitTest* unit_test,
                                                        int skip_count);
 
 // Helpers for suppressing warnings on unreachable code or constant
@@ -842,17 +845,17 @@ inline bool AlwaysFalse() { return !AlwaysTrue(); }
 // variable declared in a conditional expression always being NULL in
 // the else branch.
 struct GTEST_API_ ConstCharPtr {
-  ConstCharPtr(const char *str) : value(str) {}
+  ConstCharPtr(const char* str) : value(str) {}
   operator bool() const { return true; }
-  const char *value;
+  const char* value;
 };
 
 // Helper for declaring std::string within 'if' statement
 // in pre C++17 build environment.
 struct TrueWithString {
   TrueWithString() = default;
-  explicit TrueWithString(const char *str) : value(str) {}
-  explicit TrueWithString(const std::string &str) : value(str) {}
+  explicit TrueWithString(const char* str) : value(str) {}
+  explicit TrueWithString(const std::string& str) : value(str) {}
   explicit operator bool() const { return true; }
   std::string value;
 };
@@ -876,18 +879,42 @@ class GTEST_API_ Random {
 
  private:
   uint32_t state_;
-  GTEST_DISALLOW_COPY_AND_ASSIGN_(Random);
+  Random(const Random&) = delete;
+  Random& operator=(const Random&) = delete;
 };
 
 // Turns const U&, U&, const U, and U all into U.
 #define GTEST_REMOVE_REFERENCE_AND_CONST_(T) \
   typename std::remove_const<typename std::remove_reference<T>::type>::type
 
-// IsAProtocolMessage<T>::value is a compile-time bool constant that's
-// true if and only if T is type proto2::Message or a subclass of it.
+// HasDebugStringAndShortDebugString<T>::value is a compile-time bool constant
+// that's true if and only if T has methods DebugString() and ShortDebugString()
+// that return std::string.
+template <typename T>
+class HasDebugStringAndShortDebugString {
+ private:
+  template <typename C>
+  static auto CheckDebugString(C*) -> typename std::is_same<
+      std::string, decltype(std::declval<const C>().DebugString())>::type;
+  template <typename>
+  static std::false_type CheckDebugString(...);
+
+  template <typename C>
+  static auto CheckShortDebugString(C*) -> typename std::is_same<
+      std::string, decltype(std::declval<const C>().ShortDebugString())>::type;
+  template <typename>
+  static std::false_type CheckShortDebugString(...);
+
+  using HasDebugStringType = decltype(CheckDebugString<T>(nullptr));
+  using HasShortDebugStringType = decltype(CheckShortDebugString<T>(nullptr));
+
+ public:
+  static constexpr bool value =
+      HasDebugStringType::value && HasShortDebugStringType::value;
+};
+
 template <typename T>
-struct IsAProtocolMessage
-    : public std::is_convertible<const T *, const ::proto2::Message *> {};
+constexpr bool HasDebugStringAndShortDebugString<T>::value;
 
 // When the compiler sees expression IsContainerTest<C>(0), if C is an
 // STL-style container class, the first overload of IsContainerTest
@@ -915,9 +942,9 @@ struct IsAProtocolMessage
 // IsContainerTest(...) doesn't work with Visual Age C++ and Sun C++.
 typedef int IsContainer;
 template <class C,
-          class Iterator = decltype(::std::declval<const C &>().begin()),
-          class = decltype(::std::declval<const C &>().end()),
-          class = decltype(++::std::declval<Iterator &>()),
+          class Iterator = decltype(::std::declval<const C&>().begin()),
+          class = decltype(::std::declval<const C&>().end()),
+          class = decltype(++::std::declval<Iterator&>()),
           class = decltype(*::std::declval<Iterator>()),
           class = typename C::const_iterator>
 IsContainer IsContainerTest(int /* dummy */) {
@@ -938,9 +965,9 @@ template <typename T>
 struct IsHashTable {
  private:
   template <typename U>
-  static char test(typename U::hasher *, typename U::reverse_iterator *);
+  static char test(typename U::hasher*, typename U::reverse_iterator*);
   template <typename U>
-  static int test(typename U::hasher *, ...);
+  static int test(typename U::hasher*, ...);
   template <typename U>
   static char test(...);
 
@@ -987,11 +1014,11 @@ struct IsRecursiveContainer : public IsRecursiveContainerImpl<C>::type {};
 // 0, ArrayEq() degenerates into comparing a single pair of values.
 
 template <typename T, typename U>
-bool ArrayEq(const T *lhs, size_t size, const U *rhs);
+bool ArrayEq(const T* lhs, size_t size, const U* rhs);
 
 // This generic version is used when k is 0.
 template <typename T, typename U>
-inline bool ArrayEq(const T &lhs, const U &rhs) {
+inline bool ArrayEq(const T& lhs, const U& rhs) {
   return lhs == rhs;
 }
 
@@ -1005,7 +1032,7 @@ inline bool ArrayEq(const T (&lhs)[N], const U (&rhs)[N]) {
 // the previous ArrayEq() function, arrays with different sizes would
 // lead to different copies of the template code.
 template <typename T, typename U>
-bool ArrayEq(const T *lhs, size_t size, const U *rhs) {
+bool ArrayEq(const T* lhs, size_t size, const U* rhs) {
   for (size_t i = 0; i != size; i++) {
     if (!internal::ArrayEq(lhs[i], rhs[i])) return false;
   }
@@ -1015,7 +1042,7 @@ bool ArrayEq(const T *lhs, size_t size, const U *rhs) {
 // Finds the first element in the iterator range [begin, end) that
 // equals elem.  Element may be a native array type itself.
 template <typename Iter, typename Element>
-Iter ArrayAwareFind(Iter begin, Iter end, const Element &elem) {
+Iter ArrayAwareFind(Iter begin, Iter end, const Element& elem) {
   for (Iter it = begin; it != end; ++it) {
     if (internal::ArrayEq(*it, elem)) return it;
   }
@@ -1027,11 +1054,11 @@ Iter ArrayAwareFind(Iter begin, Iter end, const Element &elem) {
 // CopyArray() degenerates into copying a single value.
 
 template <typename T, typename U>
-void CopyArray(const T *from, size_t size, U *to);
+void CopyArray(const T* from, size_t size, U* to);
 
 // This generic version is used when k is 0.
 template <typename T, typename U>
-inline void CopyArray(const T &from, U *to) {
+inline void CopyArray(const T& from, U* to) {
   *to = from;
 }
 
@@ -1045,7 +1072,7 @@ inline void CopyArray(const T (&from)[N], U (*to)[N]) {
 // the previous CopyArray() function, arrays with different sizes
 // would lead to different copies of the template code.
 template <typename T, typename U>
-void CopyArray(const T *from, size_t size, U *to) {
+void CopyArray(const T* from, size_t size, U* to) {
   for (size_t i = 0; i != size; i++) {
     internal::CopyArray(from[i], to + i);
   }
@@ -1071,21 +1098,21 @@ class NativeArray {
  public:
   // STL-style container typedefs.
   typedef Element value_type;
-  typedef Element *iterator;
-  typedef const Element *const_iterator;
+  typedef Element* iterator;
+  typedef const Element* const_iterator;
 
   // Constructs from a native array. References the source.
-  NativeArray(const Element *array, size_t count, RelationToSourceReference) {
+  NativeArray(const Element* array, size_t count, RelationToSourceReference) {
     InitRef(array, count);
   }
 
   // Constructs from a native array. Copies the source.
-  NativeArray(const Element *array, size_t count, RelationToSourceCopy) {
+  NativeArray(const Element* array, size_t count, RelationToSourceCopy) {
     InitCopy(array, count);
   }
 
   // Copy constructor.
-  NativeArray(const NativeArray &rhs) {
+  NativeArray(const NativeArray& rhs) {
     (this->*rhs.clone_)(rhs.array_, rhs.size_);
   }
 
@@ -1097,7 +1124,7 @@ class NativeArray {
   size_t size() const { return size_; }
   const_iterator begin() const { return array_; }
   const_iterator end() const { return array_ + size_; }
-  bool operator==(const NativeArray &rhs) const {
+  bool operator==(const NativeArray& rhs) const {
     return size() == rhs.size() && ArrayEq(begin(), size(), rhs.begin());
   }
 
@@ -1107,8 +1134,8 @@ class NativeArray {
                 "Type must not be a reference");
 
   // Initializes this object with a copy of the input.
-  void InitCopy(const Element *array, size_t a_size) {
-    Element *const copy = new Element[a_size];
+  void InitCopy(const Element* array, size_t a_size) {
+    Element* const copy = new Element[a_size];
     CopyArray(array, a_size, copy);
     array_ = copy;
     size_ = a_size;
@@ -1116,17 +1143,15 @@ class NativeArray {
   }
 
   // Initializes this object with a reference of the input.
-  void InitRef(const Element *array, size_t a_size) {
+  void InitRef(const Element* array, size_t a_size) {
     array_ = array;
     size_ = a_size;
     clone_ = &NativeArray::InitRef;
   }
 
-  const Element *array_;
+  const Element* array_;
   size_t size_;
-  void (NativeArray::*clone_)(const Element *, size_t);
-
-  GTEST_DISALLOW_ASSIGN_(NativeArray);
+  void (NativeArray::*clone_)(const Element*, size_t);
 };
 
 // Backport of std::index_sequence.
@@ -1150,12 +1175,18 @@ struct DoubleSequence<false, IndexSequence<I...>, sizeofT> {
 // Backport of std::make_index_sequence.
 // It uses O(ln(N)) instantiation depth.
 template <size_t N>
-struct MakeIndexSequence
-    : DoubleSequence<N % 2 == 1, typename MakeIndexSequence<N / 2>::type,
+struct MakeIndexSequenceImpl
+    : DoubleSequence<N % 2 == 1, typename MakeIndexSequenceImpl<N / 2>::type,
                      N / 2>::type {};
 
 template <>
-struct MakeIndexSequence<0> : IndexSequence<> {};
+struct MakeIndexSequenceImpl<0> : IndexSequence<> {};
+
+template <size_t N>
+using MakeIndexSequence = typename MakeIndexSequenceImpl<N>::type;
+
+template <typename... T>
+using IndexSequenceFor = typename MakeIndexSequence<sizeof...(T)>::type;
 
 template <size_t>
 struct Ignore {
@@ -1181,6 +1212,8 @@ struct ElemFromList {
           static_cast<T (*)()>(nullptr)...));
 };
 
+struct FlatTupleConstructTag {};
+
 template <typename... T>
 class FlatTuple;
 
@@ -1191,7 +1224,9 @@ template <typename... T, size_t I>
 struct FlatTupleElemBase<FlatTuple<T...>, I> {
   using value_type = typename ElemFromList<I, T...>::type;
   FlatTupleElemBase() = default;
-  explicit FlatTupleElemBase(value_type t) : value(std::move(t)) {}
+  template <typename Arg>
+  explicit FlatTupleElemBase(FlatTupleConstructTag, Arg&& t)
+      : value(std::forward<Arg>(t)) {}
   value_type value;
 };
 
@@ -1203,8 +1238,30 @@ struct FlatTupleBase<FlatTuple<T...>, IndexSequence<Idx...>>
     : FlatTupleElemBase<FlatTuple<T...>, Idx>... {
   using Indices = IndexSequence<Idx...>;
   FlatTupleBase() = default;
-  explicit FlatTupleBase(T... t)
-      : FlatTupleElemBase<FlatTuple<T...>, Idx>(std::move(t))... {}
+  template <typename... Args>
+  explicit FlatTupleBase(FlatTupleConstructTag, Args&&... args)
+      : FlatTupleElemBase<FlatTuple<T...>, Idx>(FlatTupleConstructTag{},
+                                                std::forward<Args>(args))... {}
+
+  template <size_t I>
+  const typename ElemFromList<I, T...>::type& Get() const {
+    return FlatTupleElemBase<FlatTuple<T...>, I>::value;
+  }
+
+  template <size_t I>
+  typename ElemFromList<I, T...>::type& Get() {
+    return FlatTupleElemBase<FlatTuple<T...>, I>::value;
+  }
+
+  template <typename F>
+  auto Apply(F&& f) -> decltype(std::forward<F>(f)(this->Get<Idx>()...)) {
+    return std::forward<F>(f)(Get<Idx>()...);
+  }
+
+  template <typename F>
+  auto Apply(F&& f) const -> decltype(std::forward<F>(f)(this->Get<Idx>()...)) {
+    return std::forward<F>(f)(Get<Idx>()...);
+  }
 };
 
 // Analog to std::tuple but with different tradeoffs.
@@ -1225,17 +1282,12 @@ class FlatTuple
 
  public:
   FlatTuple() = default;
-  explicit FlatTuple(T... t) : FlatTuple::FlatTupleBase(std::move(t)...) {}
+  template <typename... Args>
+  explicit FlatTuple(FlatTupleConstructTag tag, Args&&... args)
+      : FlatTuple::FlatTupleBase(tag, std::forward<Args>(args)...) {}
 
-  template <size_t I>
-  const typename ElemFromList<I, T...>::type &Get() const {
-    return static_cast<const FlatTupleElemBase<FlatTuple, I> *>(this)->value;
-  }
-
-  template <size_t I>
-  typename ElemFromList<I, T...>::type &Get() {
-    return static_cast<FlatTupleElemBase<FlatTuple, I> *>(this)->value;
-  }
+  using FlatTuple::FlatTupleBase::Apply;
+  using FlatTuple::FlatTupleBase::Get;
 };
 
 // Utility functions to be called with static_assert to induce deprecation
@@ -1268,6 +1320,22 @@ constexpr bool InstantiateTypedTestCase_P_IsDeprecated() { return true; }
 }  // namespace internal
 }  // namespace testing
 
+namespace std {
+// Some standard library implementations use `struct tuple_size` and some use
+// `class tuple_size`. Clang warns about the mismatch.
+// https://reviews.llvm.org/D55466
+#ifdef __clang__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wmismatched-tags"
+#endif
+template <typename... Ts>
+struct tuple_size<testing::internal::FlatTuple<Ts...>>
+    : std::integral_constant<size_t, sizeof...(Ts)> {};
+#ifdef __clang__
+#pragma clang diagnostic pop
+#endif
+}  // namespace std
+
 #define GTEST_MESSAGE_AT_(file, line, message, result_type)             \
   ::testing::internal::AssertHelper(result_type, file, line, message) = \
       ::testing::Message()
@@ -1290,20 +1358,74 @@ constexpr bool InstantiateTypedTestCase_P_IsDeprecated() { return true; }
 // Suppress MSVC warning 4072 (unreachable code) for the code following
 // statement if it returns or throws (or doesn't return or throw in some
 // situations).
+// NOTE: The "else" is important to keep this expansion to prevent a top-level
+// "else" from attaching to our "if".
 #define GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement) \
   if (::testing::internal::AlwaysTrue()) {                        \
     statement;                                                    \
+  } else                     /* NOLINT */                         \
+    static_assert(true, "")  // User must have a semicolon after expansion.
+
+#if GTEST_HAS_EXCEPTIONS
+
+namespace testing {
+namespace internal {
+
+class NeverThrown {
+ public:
+  const char* what() const noexcept {
+    return "this exception should never be thrown";
   }
+};
+
+}  // namespace internal
+}  // namespace testing
+
+#if GTEST_HAS_RTTI
+
+#define GTEST_EXCEPTION_TYPE_(e) ::testing::internal::GetTypeName(typeid(e))
+
+#else  // GTEST_HAS_RTTI
+
+#define GTEST_EXCEPTION_TYPE_(e) \
+  std::string { "an std::exception-derived error" }
+
+#endif  // GTEST_HAS_RTTI
+
+#define GTEST_TEST_THROW_CATCH_STD_EXCEPTION_(statement, expected_exception)   \
+  catch (typename std::conditional<                                            \
+         std::is_same<typename std::remove_cv<typename std::remove_reference<  \
+                          expected_exception>::type>::type,                    \
+                      std::exception>::value,                                  \
+         const ::testing::internal::NeverThrown&, const std::exception&>::type \
+             e) {                                                              \
+    gtest_msg.value = "Expected: " #statement                                  \
+                      " throws an exception of type " #expected_exception      \
+                      ".\n  Actual: it throws ";                               \
+    gtest_msg.value += GTEST_EXCEPTION_TYPE_(e);                               \
+    gtest_msg.value += " with description \"";                                 \
+    gtest_msg.value += e.what();                                               \
+    gtest_msg.value += "\".";                                                  \
+    goto GTEST_CONCAT_TOKEN_(gtest_label_testthrow_, __LINE__);                \
+  }
+
+#else  // GTEST_HAS_EXCEPTIONS
+
+#define GTEST_TEST_THROW_CATCH_STD_EXCEPTION_(statement, expected_exception)
+
+#endif  // GTEST_HAS_EXCEPTIONS
 
 #define GTEST_TEST_THROW_(statement, expected_exception, fail)              \
   GTEST_AMBIGUOUS_ELSE_BLOCKER_                                             \
-  if (::testing::internal::ConstCharPtr gtest_msg = "") {                   \
+  if (::testing::internal::TrueWithString gtest_msg{}) {                    \
     bool gtest_caught_expected = false;                                     \
     try {                                                                   \
       GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement);            \
-    } catch (expected_exception const &) {                                  \
+    } catch (expected_exception const&) {                                   \
       gtest_caught_expected = true;                                         \
-    } catch (...) {                                                         \
+    }                                                                       \
+    GTEST_TEST_THROW_CATCH_STD_EXCEPTION_(statement, expected_exception)    \
+    catch (...) {                                                           \
       gtest_msg.value = "Expected: " #statement                             \
                         " throws an exception of type " #expected_exception \
                         ".\n  Actual: it throws a different type.";         \
@@ -1315,19 +1437,20 @@ constexpr bool InstantiateTypedTestCase_P_IsDeprecated() { return true; }
                         ".\n  Actual: it throws nothing.";                  \
       goto GTEST_CONCAT_TOKEN_(gtest_label_testthrow_, __LINE__);           \
     }                                                                       \
-  } else                                                                    \
+  } else /*NOLINT*/                                                         \
     GTEST_CONCAT_TOKEN_(gtest_label_testthrow_, __LINE__)                   \
-        : fail(gtest_msg.value)
+        : fail(gtest_msg.value.c_str())
 
 #if GTEST_HAS_EXCEPTIONS
 
-#define GTEST_TEST_NO_THROW_CATCH_STD_EXCEPTION_()                           \
-  catch (std::exception const &e) {                                          \
-    gtest_msg.value =                                                        \
-        ("it throws std::exception-derived exception with description: \""); \
-    gtest_msg.value += e.what();                                             \
-    gtest_msg.value += "\".";                                                \
-    goto GTEST_CONCAT_TOKEN_(gtest_label_testnothrow_, __LINE__);            \
+#define GTEST_TEST_NO_THROW_CATCH_STD_EXCEPTION_()                \
+  catch (std::exception const& e) {                               \
+    gtest_msg.value = "it throws ";                               \
+    gtest_msg.value += GTEST_EXCEPTION_TYPE_(e);                  \
+    gtest_msg.value += " with description \"";                    \
+    gtest_msg.value += e.what();                                  \
+    gtest_msg.value += "\".";                                     \
+    goto GTEST_CONCAT_TOKEN_(gtest_label_testnothrow_, __LINE__); \
   }
 
 #else  // GTEST_HAS_EXCEPTIONS
@@ -1374,7 +1497,7 @@ constexpr bool InstantiateTypedTestCase_P_IsDeprecated() { return true; }
 
 // Implements Boolean test assertions such as EXPECT_TRUE. expression can be
 // either a boolean expression or an AssertionResult. text is a textual
-// represenation of expression as it was passed into the EXPECT_TRUE.
+// representation of expression as it was passed into the EXPECT_TRUE.
 #define GTEST_TEST_BOOLEAN_(expression, text, actual, expected, fail) \
   GTEST_AMBIGUOUS_ELSE_BLOCKER_                                       \
   if (const ::testing::AssertionResult gtest_ar_ =                    \
@@ -1405,37 +1528,43 @@ constexpr bool InstantiateTypedTestCase_P_IsDeprecated() { return true; }
   test_suite_name##_##test_name##_Test
 
 // Helper macro for defining tests.
-#define GTEST_TEST_(test_suite_name, test_name, parent_class, parent_id)      \
-  static_assert(sizeof(GTEST_STRINGIFY_(test_suite_name)) > 1,                \
-                "test_suite_name must not be empty");                         \
-  static_assert(sizeof(GTEST_STRINGIFY_(test_name)) > 1,                      \
-                "test_name must not be empty");                               \
-  class GTEST_TEST_CLASS_NAME_(test_suite_name, test_name)                    \
-      : public parent_class {                                                 \
-   public:                                                                    \
-    GTEST_TEST_CLASS_NAME_(test_suite_name, test_name)() {}                   \
-    ~GTEST_TEST_CLASS_NAME_(test_suite_name, test_name)() override = default; \
-    GTEST_DISALLOW_COPY_AND_ASSIGN_(GTEST_TEST_CLASS_NAME_(test_suite_name,   \
-                                                           test_name));       \
-    GTEST_DISALLOW_MOVE_AND_ASSIGN_(GTEST_TEST_CLASS_NAME_(test_suite_name,   \
-                                                           test_name));       \
-                                                                              \
-   private:                                                                   \
-    void TestBody() override;                                                 \
-    static ::testing::TestInfo *const test_info_ GTEST_ATTRIBUTE_UNUSED_;     \
-  };                                                                          \
-                                                                              \
-  ::testing::TestInfo *const GTEST_TEST_CLASS_NAME_(test_suite_name,          \
-                                                    test_name)::test_info_ =  \
-      ::testing::internal::MakeAndRegisterTestInfo(                           \
-          #test_suite_name, #test_name, nullptr, nullptr,                     \
-          ::testing::internal::CodeLocation(__FILE__, __LINE__), (parent_id), \
-          ::testing::internal::SuiteApiResolver<                              \
-              parent_class>::GetSetUpCaseOrSuite(__FILE__, __LINE__),         \
-          ::testing::internal::SuiteApiResolver<                              \
-              parent_class>::GetTearDownCaseOrSuite(__FILE__, __LINE__),      \
-          new ::testing::internal::TestFactoryImpl<GTEST_TEST_CLASS_NAME_(    \
-              test_suite_name, test_name)>);                                  \
+#define GTEST_TEST_(test_suite_name, test_name, parent_class, parent_id)       \
+  static_assert(sizeof(GTEST_STRINGIFY_(test_suite_name)) > 1,                 \
+                "test_suite_name must not be empty");                          \
+  static_assert(sizeof(GTEST_STRINGIFY_(test_name)) > 1,                       \
+                "test_name must not be empty");                                \
+  class GTEST_TEST_CLASS_NAME_(test_suite_name, test_name)                     \
+      : public parent_class {                                                  \
+   public:                                                                     \
+    GTEST_TEST_CLASS_NAME_(test_suite_name, test_name)() = default;            \
+    ~GTEST_TEST_CLASS_NAME_(test_suite_name, test_name)() override = default;  \
+    GTEST_TEST_CLASS_NAME_(test_suite_name, test_name)                         \
+    (const GTEST_TEST_CLASS_NAME_(test_suite_name, test_name) &) = delete;     \
+    GTEST_TEST_CLASS_NAME_(test_suite_name, test_name) & operator=(            \
+        const GTEST_TEST_CLASS_NAME_(test_suite_name,                          \
+                                     test_name) &) = delete; /* NOLINT */      \
+    GTEST_TEST_CLASS_NAME_(test_suite_name, test_name)                         \
+    (GTEST_TEST_CLASS_NAME_(test_suite_name, test_name) &&) noexcept = delete; \
+    GTEST_TEST_CLASS_NAME_(test_suite_name, test_name) & operator=(            \
+        GTEST_TEST_CLASS_NAME_(test_suite_name,                                \
+                               test_name) &&) noexcept = delete; /* NOLINT */  \
+                                                                               \
+   private:                                                                    \
+    void TestBody() override;                                                  \
+    static ::testing::TestInfo* const test_info_ GTEST_ATTRIBUTE_UNUSED_;      \
+  };                                                                           \
+                                                                               \
+  ::testing::TestInfo* const GTEST_TEST_CLASS_NAME_(test_suite_name,           \
+                                                    test_name)::test_info_ =   \
+      ::testing::internal::MakeAndRegisterTestInfo(                            \
+          #test_suite_name, #test_name, nullptr, nullptr,                      \
+          ::testing::internal::CodeLocation(__FILE__, __LINE__), (parent_id),  \
+          ::testing::internal::SuiteApiResolver<                               \
+              parent_class>::GetSetUpCaseOrSuite(__FILE__, __LINE__),          \
+          ::testing::internal::SuiteApiResolver<                               \
+              parent_class>::GetTearDownCaseOrSuite(__FILE__, __LINE__),       \
+          new ::testing::internal::TestFactoryImpl<GTEST_TEST_CLASS_NAME_(     \
+              test_suite_name, test_name)>);                                   \
   void GTEST_TEST_CLASS_NAME_(test_suite_name, test_name)::TestBody()
 
-#endif  // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_INTERNAL_H_
+#endif  // GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_INTERNAL_H_
diff --git a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/include/gtest/internal/gtest-param-util.h b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/include/gtest/internal/gtest-param-util.h
index 0d8fc71ce26..e7af2f904a4 100644
--- a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/include/gtest/internal/gtest-param-util.h
+++ b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/include/gtest/internal/gtest-param-util.h
@@ -29,10 +29,12 @@
 
 // Type and function utilities for implementing parameterized tests.
 
-// GOOGLETEST_CM0001 DO NOT DELETE
+// IWYU pragma: private, include "gtest/gtest.h"
+// IWYU pragma: friend gtest/.*
+// IWYU pragma: friend gmock/.*
 
-#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_H_
-#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_H_
+#ifndef GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_H_
+#define GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_H_
 
 #include <ctype.h>
 
@@ -45,17 +47,17 @@
 #include <utility>
 #include <vector>
 
-#include "gtest/internal/gtest-internal.h"
-#include "gtest/internal/gtest-port.h"
 #include "gtest/gtest-printers.h"
 #include "gtest/gtest-test-part.h"
+#include "gtest/internal/gtest-internal.h"
+#include "gtest/internal/gtest-port.h"
 
 namespace testing {
 // Input to a parameterized test name generator, describing a test parameter.
 // Consists of the parameter value and the integer parameter index.
 template <class ParamType>
 struct TestParamInfo {
-  TestParamInfo(const ParamType &a_param, size_t an_index)
+  TestParamInfo(const ParamType& a_param, size_t an_index)
       : param(a_param), index(an_index) {}
   ParamType param;
   size_t index;
@@ -65,7 +67,7 @@ struct TestParamInfo {
 // testing::PrintToString.
 struct PrintToStringParamName {
   template <class ParamType>
-  std::string operator()(const TestParamInfo<ParamType> &info) const {
+  std::string operator()(const TestParamInfo<ParamType>& info) const {
     return PrintToString(info.param);
   }
 };
@@ -79,7 +81,7 @@ namespace internal {
 // fixture class for the same test suite. This may happen when
 // TEST_P macro is used to define two tests with the same name
 // but in different namespaces.
-GTEST_API_ void ReportInvalidTestSuiteType(const char *test_suite_name,
+GTEST_API_ void ReportInvalidTestSuiteType(const char* test_suite_name,
                                            CodeLocation code_location);
 
 template <typename>
@@ -96,7 +98,7 @@ class ParamIteratorInterface {
   // A pointer to the base generator instance.
   // Used only for the purposes of iterator comparison
   // to make sure that two iterators belong to the same generator.
-  virtual const ParamGeneratorInterface<T> *BaseGenerator() const = 0;
+  virtual const ParamGeneratorInterface<T>* BaseGenerator() const = 0;
   // Advances iterator to point to the next element
   // provided by the generator. The caller is responsible
   // for not calling Advance() on an iterator equal to
@@ -104,16 +106,16 @@ class ParamIteratorInterface {
   virtual void Advance() = 0;
   // Clones the iterator object. Used for implementing copy semantics
   // of ParamIterator<T>.
-  virtual ParamIteratorInterface *Clone() const = 0;
+  virtual ParamIteratorInterface* Clone() const = 0;
   // Dereferences the current iterator and provides (read-only) access
   // to the pointed value. It is the caller's responsibility not to call
   // Current() on an iterator equal to BaseGenerator()->End().
   // Used for implementing ParamGenerator<T>::operator*().
-  virtual const T *Current() const = 0;
+  virtual const T* Current() const = 0;
   // Determines whether the given iterator and other point to the same
   // element in the sequence generated by the generator.
   // Used for implementing ParamGenerator<T>::operator==().
-  virtual bool Equals(const ParamIteratorInterface &other) const = 0;
+  virtual bool Equals(const ParamIteratorInterface& other) const = 0;
 };
 
 // Class iterating over elements provided by an implementation of
@@ -123,39 +125,39 @@ template <typename T>
 class ParamIterator {
  public:
   typedef T value_type;
-  typedef const T &reference;
+  typedef const T& reference;
   typedef ptrdiff_t difference_type;
 
   // ParamIterator assumes ownership of the impl_ pointer.
-  ParamIterator(const ParamIterator &other) : impl_(other.impl_->Clone()) {}
-  ParamIterator &operator=(const ParamIterator &other) {
+  ParamIterator(const ParamIterator& other) : impl_(other.impl_->Clone()) {}
+  ParamIterator& operator=(const ParamIterator& other) {
     if (this != &other) impl_.reset(other.impl_->Clone());
     return *this;
   }
 
-  const T &operator*() const { return *impl_->Current(); }
-  const T *operator->() const { return impl_->Current(); }
+  const T& operator*() const { return *impl_->Current(); }
+  const T* operator->() const { return impl_->Current(); }
   // Prefix version of operator++.
-  ParamIterator &operator++() {
+  ParamIterator& operator++() {
     impl_->Advance();
     return *this;
   }
   // Postfix version of operator++.
   ParamIterator operator++(int /*unused*/) {
-    ParamIteratorInterface<T> *clone = impl_->Clone();
+    ParamIteratorInterface<T>* clone = impl_->Clone();
     impl_->Advance();
     return ParamIterator(clone);
   }
-  bool operator==(const ParamIterator &other) const {
+  bool operator==(const ParamIterator& other) const {
     return impl_.get() == other.impl_.get() || impl_->Equals(*other.impl_);
   }
-  bool operator!=(const ParamIterator &other) const {
+  bool operator!=(const ParamIterator& other) const {
     return !(*this == other);
   }
 
  private:
   friend class ParamGenerator<T>;
-  explicit ParamIterator(ParamIteratorInterface<T> *impl) : impl_(impl) {}
+  explicit ParamIterator(ParamIteratorInterface<T>* impl) : impl_(impl) {}
   std::unique_ptr<ParamIteratorInterface<T>> impl_;
 };
 
@@ -169,8 +171,8 @@ class ParamGeneratorInterface {
   virtual ~ParamGeneratorInterface() {}
 
   // Generator interface definition
-  virtual ParamIteratorInterface<T> *Begin() const = 0;
-  virtual ParamIteratorInterface<T> *End() const = 0;
+  virtual ParamIteratorInterface<T>* Begin() const = 0;
+  virtual ParamIteratorInterface<T>* End() const = 0;
 };
 
 // Wraps ParamGeneratorInterface<T> and provides general generator syntax
@@ -183,10 +185,10 @@ class ParamGenerator {
  public:
   typedef ParamIterator<T> iterator;
 
-  explicit ParamGenerator(ParamGeneratorInterface<T> *impl) : impl_(impl) {}
-  ParamGenerator(const ParamGenerator &other) : impl_(other.impl_) {}
+  explicit ParamGenerator(ParamGeneratorInterface<T>* impl) : impl_(impl) {}
+  ParamGenerator(const ParamGenerator& other) : impl_(other.impl_) {}
 
-  ParamGenerator &operator=(const ParamGenerator &other) {
+  ParamGenerator& operator=(const ParamGenerator& other) {
     impl_ = other.impl_;
     return *this;
   }
@@ -206,37 +208,39 @@ template <typename T, typename IncrementT>
 class RangeGenerator : public ParamGeneratorInterface<T> {
  public:
   RangeGenerator(T begin, T end, IncrementT step)
-      : begin_(begin), end_(end), step_(step),
+      : begin_(begin),
+        end_(end),
+        step_(step),
         end_index_(CalculateEndIndex(begin, end, step)) {}
   ~RangeGenerator() override {}
 
-  ParamIteratorInterface<T> *Begin() const override {
+  ParamIteratorInterface<T>* Begin() const override {
     return new Iterator(this, begin_, 0, step_);
   }
-  ParamIteratorInterface<T> *End() const override {
+  ParamIteratorInterface<T>* End() const override {
     return new Iterator(this, end_, end_index_, step_);
   }
 
  private:
   class Iterator : public ParamIteratorInterface<T> {
    public:
-    Iterator(const ParamGeneratorInterface<T> *base, T value, int index,
+    Iterator(const ParamGeneratorInterface<T>* base, T value, int index,
              IncrementT step)
         : base_(base), value_(value), index_(index), step_(step) {}
     ~Iterator() override {}
 
-    const ParamGeneratorInterface<T> *BaseGenerator() const override {
+    const ParamGeneratorInterface<T>* BaseGenerator() const override {
       return base_;
     }
     void Advance() override {
       value_ = static_cast<T>(value_ + step_);
       index_++;
     }
-    ParamIteratorInterface<T> *Clone() const override {
+    ParamIteratorInterface<T>* Clone() const override {
       return new Iterator(*this);
     }
-    const T *Current() const override { return &value_; }
-    bool Equals(const ParamIteratorInterface<T> &other) const override {
+    const T* Current() const override { return &value_; }
+    bool Equals(const ParamIteratorInterface<T>& other) const override {
       // Having the same base generator guarantees that the other
       // iterator is of the same type and we can downcast.
       GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
@@ -248,28 +252,31 @@ class RangeGenerator : public ParamGeneratorInterface<T> {
     }
 
    private:
-    Iterator(const Iterator &other)
-        : ParamIteratorInterface<T>(), base_(other.base_), value_(other.value_),
-          index_(other.index_), step_(other.step_) {}
+    Iterator(const Iterator& other)
+        : ParamIteratorInterface<T>(),
+          base_(other.base_),
+          value_(other.value_),
+          index_(other.index_),
+          step_(other.step_) {}
 
     // No implementation - assignment is unsupported.
-    void operator=(const Iterator &other);
+    void operator=(const Iterator& other);
 
-    const ParamGeneratorInterface<T> *const base_;
+    const ParamGeneratorInterface<T>* const base_;
     T value_;
     int index_;
     const IncrementT step_;
   };  // class RangeGenerator::Iterator
 
-  static int CalculateEndIndex(const T &begin, const T &end,
-                               const IncrementT &step) {
+  static int CalculateEndIndex(const T& begin, const T& end,
+                               const IncrementT& step) {
     int end_index = 0;
     for (T i = begin; i < end; i = static_cast<T>(i + step)) end_index++;
     return end_index;
   }
 
   // No implementation - assignment is unsupported.
-  void operator=(const RangeGenerator &other);
+  void operator=(const RangeGenerator& other);
 
   const T begin_;
   const T end_;
@@ -291,10 +298,10 @@ class ValuesInIteratorRangeGenerator : public ParamGeneratorInterface<T> {
       : container_(begin, end) {}
   ~ValuesInIteratorRangeGenerator() override {}
 
-  ParamIteratorInterface<T> *Begin() const override {
+  ParamIteratorInterface<T>* Begin() const override {
     return new Iterator(this, container_.begin());
   }
-  ParamIteratorInterface<T> *End() const override {
+  ParamIteratorInterface<T>* End() const override {
     return new Iterator(this, container_.end());
   }
 
@@ -303,19 +310,19 @@ class ValuesInIteratorRangeGenerator : public ParamGeneratorInterface<T> {
 
   class Iterator : public ParamIteratorInterface<T> {
    public:
-    Iterator(const ParamGeneratorInterface<T> *base,
+    Iterator(const ParamGeneratorInterface<T>* base,
              typename ContainerType::const_iterator iterator)
         : base_(base), iterator_(iterator) {}
     ~Iterator() override {}
 
-    const ParamGeneratorInterface<T> *BaseGenerator() const override {
+    const ParamGeneratorInterface<T>* BaseGenerator() const override {
       return base_;
     }
     void Advance() override {
       ++iterator_;
       value_.reset();
     }
-    ParamIteratorInterface<T> *Clone() const override {
+    ParamIteratorInterface<T>* Clone() const override {
       return new Iterator(*this);
     }
     // We need to use cached value referenced by iterator_ because *iterator_
@@ -325,11 +332,11 @@ class ValuesInIteratorRangeGenerator : public ParamGeneratorInterface<T> {
     // can advance iterator_ beyond the end of the range, and we cannot
     // detect that fact. The client code, on the other hand, is
     // responsible for not calling Current() on an out-of-range iterator.
-    const T *Current() const override {
+    const T* Current() const override {
       if (value_.get() == nullptr) value_.reset(new T(*iterator_));
       return value_.get();
     }
-    bool Equals(const ParamIteratorInterface<T> &other) const override {
+    bool Equals(const ParamIteratorInterface<T>& other) const override {
       // Having the same base generator guarantees that the other
       // iterator is of the same type and we can downcast.
       GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
@@ -340,13 +347,14 @@ class ValuesInIteratorRangeGenerator : public ParamGeneratorInterface<T> {
     }
 
    private:
-    Iterator(const Iterator &other)
+    Iterator(const Iterator& other)
         // The explicit constructor call suppresses a false warning
         // emitted by gcc when supplied with the -Wextra option.
-        : ParamIteratorInterface<T>(), base_(other.base_),
+        : ParamIteratorInterface<T>(),
+          base_(other.base_),
           iterator_(other.iterator_) {}
 
-    const ParamGeneratorInterface<T> *const base_;
+    const ParamGeneratorInterface<T>* const base_;
     typename ContainerType::const_iterator iterator_;
     // A cached value of *iterator_. We keep it here to allow access by
     // pointer in the wrapping iterator's operator->().
@@ -357,7 +365,7 @@ class ValuesInIteratorRangeGenerator : public ParamGeneratorInterface<T> {
   };  // class ValuesInIteratorRangeGenerator::Iterator
 
   // No implementation - assignment is unsupported.
-  void operator=(const ValuesInIteratorRangeGenerator &other);
+  void operator=(const ValuesInIteratorRangeGenerator& other);
 
   const ContainerType container_;
 };  // class ValuesInIteratorRangeGenerator
@@ -367,7 +375,7 @@ class ValuesInIteratorRangeGenerator : public ParamGeneratorInterface<T> {
 // Default parameterized test name generator, returns a string containing the
 // integer test parameter index.
 template <class ParamType>
-std::string DefaultParamName(const TestParamInfo<ParamType> &info) {
+std::string DefaultParamName(const TestParamInfo<ParamType>& info) {
   Message name_stream;
   name_stream << info.index;
   return name_stream.GetString();
@@ -378,7 +386,7 @@ void TestNotEmpty() {
   static_assert(sizeof(T) == 0, "Empty arguments are not allowed.");
 }
 template <typename T = int>
-void TestNotEmpty(const T &) {}
+void TestNotEmpty(const T&) {}
 
 // INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
 //
@@ -390,7 +398,7 @@ class ParameterizedTestFactory : public TestFactoryBase {
   typedef typename TestClass::ParamType ParamType;
   explicit ParameterizedTestFactory(ParamType parameter)
       : parameter_(parameter) {}
-  Test *CreateTest() override {
+  Test* CreateTest() override {
     TestClass::SetParam(&parameter_);
     return new TestClass();
   }
@@ -398,7 +406,8 @@ class ParameterizedTestFactory : public TestFactoryBase {
  private:
   const ParamType parameter_;
 
-  GTEST_DISALLOW_COPY_AND_ASSIGN_(ParameterizedTestFactory);
+  ParameterizedTestFactory(const ParameterizedTestFactory&) = delete;
+  ParameterizedTestFactory& operator=(const ParameterizedTestFactory&) = delete;
 };
 
 // INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
@@ -410,7 +419,7 @@ class TestMetaFactoryBase {
  public:
   virtual ~TestMetaFactoryBase() {}
 
-  virtual TestFactoryBase *CreateTestFactory(ParamType parameter) = 0;
+  virtual TestFactoryBase* CreateTestFactory(ParamType parameter) = 0;
 };
 
 // INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
@@ -429,12 +438,13 @@ class TestMetaFactory
 
   TestMetaFactory() {}
 
-  TestFactoryBase *CreateTestFactory(ParamType parameter) override {
+  TestFactoryBase* CreateTestFactory(ParamType parameter) override {
     return new ParameterizedTestFactory<TestSuite>(parameter);
   }
 
  private:
-  GTEST_DISALLOW_COPY_AND_ASSIGN_(TestMetaFactory);
+  TestMetaFactory(const TestMetaFactory&) = delete;
+  TestMetaFactory& operator=(const TestMetaFactory&) = delete;
 };
 
 // INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
@@ -452,8 +462,8 @@ class ParameterizedTestSuiteInfoBase {
   virtual ~ParameterizedTestSuiteInfoBase() {}
 
   // Base part of test suite name for display purposes.
-  virtual const std::string &GetTestSuiteName() const = 0;
-  // Test case id to verify identity.
+  virtual const std::string& GetTestSuiteName() const = 0;
+  // Test suite id to verify identity.
   virtual TypeId GetTestSuiteTypeId() const = 0;
   // UnitTest class invokes this method to register tests in this
   // test suite right before running them in RUN_ALL_TESTS macro.
@@ -465,18 +475,21 @@ class ParameterizedTestSuiteInfoBase {
   ParameterizedTestSuiteInfoBase() {}
 
  private:
-  GTEST_DISALLOW_COPY_AND_ASSIGN_(ParameterizedTestSuiteInfoBase);
+  ParameterizedTestSuiteInfoBase(const ParameterizedTestSuiteInfoBase&) =
+      delete;
+  ParameterizedTestSuiteInfoBase& operator=(
+      const ParameterizedTestSuiteInfoBase&) = delete;
 };
 
 // INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
 //
 // Report a the name of a test_suit as safe to ignore
 // as the side effect of construction of this type.
-struct MarkAsIgnored {
-  explicit MarkAsIgnored(const char *test_suite);
+struct GTEST_API_ MarkAsIgnored {
+  explicit MarkAsIgnored(const char* test_suite);
 };
 
-GTEST_API_ void InsertSyntheticTestCase(const std::string &name,
+GTEST_API_ void InsertSyntheticTestCase(const std::string& name,
                                         CodeLocation location, bool has_test_p);
 
 // INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
@@ -495,17 +508,17 @@ class ParameterizedTestSuiteInfo : public ParameterizedTestSuiteInfoBase {
   using ParamType = typename TestSuite::ParamType;
   // A function that returns an instance of appropriate generator type.
   typedef ParamGenerator<ParamType>(GeneratorCreationFunc)();
-  using ParamNameGeneratorFunc = std::string(const TestParamInfo<ParamType> &);
+  using ParamNameGeneratorFunc = std::string(const TestParamInfo<ParamType>&);
 
-  explicit ParameterizedTestSuiteInfo(const char *name,
+  explicit ParameterizedTestSuiteInfo(const char* name,
                                       CodeLocation code_location)
       : test_suite_name_(name), code_location_(code_location) {}
 
-  // Test case base name for display purposes.
-  const std::string &GetTestSuiteName() const override {
+  // Test suite base name for display purposes.
+  const std::string& GetTestSuiteName() const override {
     return test_suite_name_;
   }
-  // Test case id to verify identity.
+  // Test suite id to verify identity.
   TypeId GetTestSuiteTypeId() const override { return GetTypeId<TestSuite>(); }
   // TEST_P macro uses AddTestPattern() to record information
   // about a single test in a LocalTestInfo structure.
@@ -513,17 +526,18 @@ class ParameterizedTestSuiteInfo : public ParameterizedTestSuiteInfoBase {
   // prefix). test_base_name is the name of an individual test without
   // parameter index. For the test SequenceA/FooTest.DoBar/1 FooTest is
   // test suite base name and DoBar is test base name.
-  void AddTestPattern(const char *test_suite_name, const char *test_base_name,
-                      TestMetaFactoryBase<ParamType> *meta_factory) {
-    tests_.push_back(std::shared_ptr<TestInfo>(
-        new TestInfo(test_suite_name, test_base_name, meta_factory)));
+  void AddTestPattern(const char* test_suite_name, const char* test_base_name,
+                      TestMetaFactoryBase<ParamType>* meta_factory,
+                      CodeLocation code_location) {
+    tests_.push_back(std::shared_ptr<TestInfo>(new TestInfo(
+        test_suite_name, test_base_name, meta_factory, code_location)));
   }
   // INSTANTIATE_TEST_SUITE_P macro uses AddGenerator() to record information
   // about a generator.
-  int AddTestSuiteInstantiation(const std::string &instantiation_name,
-                                GeneratorCreationFunc *func,
-                                ParamNameGeneratorFunc *name_func,
-                                const char *file, int line) {
+  int AddTestSuiteInstantiation(const std::string& instantiation_name,
+                                GeneratorCreationFunc* func,
+                                ParamNameGeneratorFunc* name_func,
+                                const char* file, int line) {
     instantiations_.push_back(
         InstantiationInfo(instantiation_name, func, name_func, file, line));
     return 0;  // Return value used only to run this method in namespace scope.
@@ -542,10 +556,10 @@ class ParameterizedTestSuiteInfo : public ParameterizedTestSuiteInfoBase {
       for (typename InstantiationContainer::iterator gen_it =
                instantiations_.begin();
            gen_it != instantiations_.end(); ++gen_it) {
-        const std::string &instantiation_name = gen_it->name;
+        const std::string& instantiation_name = gen_it->name;
         ParamGenerator<ParamType> generator((*gen_it->generator)());
-        ParamNameGeneratorFunc *name_func = gen_it->name_func;
-        const char *file = gen_it->file;
+        ParamNameGeneratorFunc* name_func = gen_it->name_func;
+        const char* file = gen_it->file;
         int line = gen_it->line;
 
         std::string test_suite_name;
@@ -582,7 +596,7 @@ class ParameterizedTestSuiteInfo : public ParameterizedTestSuiteInfoBase {
           MakeAndRegisterTestInfo(
               test_suite_name.c_str(), test_name_stream.GetString().c_str(),
               nullptr,  // No type parameter.
-              PrintToString(*param_it).c_str(), code_location_,
+              PrintToString(*param_it).c_str(), test_info->code_location,
               GetTestSuiteTypeId(),
               SuiteApiResolver<TestSuite>::GetSetUpCaseOrSuite(file, line),
               SuiteApiResolver<TestSuite>::GetTearDownCaseOrSuite(file, line),
@@ -602,43 +616,49 @@ class ParameterizedTestSuiteInfo : public ParameterizedTestSuiteInfoBase {
   // LocalTestInfo structure keeps information about a single test registered
   // with TEST_P macro.
   struct TestInfo {
-    TestInfo(const char *a_test_suite_base_name, const char *a_test_base_name,
-             TestMetaFactoryBase<ParamType> *a_test_meta_factory)
+    TestInfo(const char* a_test_suite_base_name, const char* a_test_base_name,
+             TestMetaFactoryBase<ParamType>* a_test_meta_factory,
+             CodeLocation a_code_location)
         : test_suite_base_name(a_test_suite_base_name),
           test_base_name(a_test_base_name),
-          test_meta_factory(a_test_meta_factory) {}
+          test_meta_factory(a_test_meta_factory),
+          code_location(a_code_location) {}
 
     const std::string test_suite_base_name;
     const std::string test_base_name;
     const std::unique_ptr<TestMetaFactoryBase<ParamType>> test_meta_factory;
+    const CodeLocation code_location;
   };
   using TestInfoContainer = ::std::vector<std::shared_ptr<TestInfo>>;
   // Records data received from INSTANTIATE_TEST_SUITE_P macros:
   //  <Instantiation name, Sequence generator creation function,
   //     Name generator function, Source file, Source line>
   struct InstantiationInfo {
-    InstantiationInfo(const std::string &name_in,
-                      GeneratorCreationFunc *generator_in,
-                      ParamNameGeneratorFunc *name_func_in, const char *file_in,
+    InstantiationInfo(const std::string& name_in,
+                      GeneratorCreationFunc* generator_in,
+                      ParamNameGeneratorFunc* name_func_in, const char* file_in,
                       int line_in)
-        : name(name_in), generator(generator_in), name_func(name_func_in),
-          file(file_in), line(line_in) {}
+        : name(name_in),
+          generator(generator_in),
+          name_func(name_func_in),
+          file(file_in),
+          line(line_in) {}
 
     std::string name;
-    GeneratorCreationFunc *generator;
-    ParamNameGeneratorFunc *name_func;
-    const char *file;
+    GeneratorCreationFunc* generator;
+    ParamNameGeneratorFunc* name_func;
+    const char* file;
     int line;
   };
   typedef ::std::vector<InstantiationInfo> InstantiationContainer;
 
-  static bool IsValidParamName(const std::string &name) {
+  static bool IsValidParamName(const std::string& name) {
     // Check for empty string
     if (name.empty()) return false;
 
     // Check for invalid characters
     for (std::string::size_type index = 0; index < name.size(); ++index) {
-      if (!isalnum(name[index]) && name[index] != '_') return false;
+      if (!IsAlNum(name[index]) && name[index] != '_') return false;
     }
 
     return true;
@@ -649,7 +669,9 @@ class ParameterizedTestSuiteInfo : public ParameterizedTestSuiteInfoBase {
   TestInfoContainer tests_;
   InstantiationContainer instantiations_;
 
-  GTEST_DISALLOW_COPY_AND_ASSIGN_(ParameterizedTestSuiteInfo);
+  ParameterizedTestSuiteInfo(const ParameterizedTestSuiteInfo&) = delete;
+  ParameterizedTestSuiteInfo& operator=(const ParameterizedTestSuiteInfo&) =
+      delete;
 };  // class ParameterizedTestSuiteInfo
 
 //  Legacy API is deprecated but still available
@@ -668,7 +690,7 @@ class ParameterizedTestSuiteRegistry {
  public:
   ParameterizedTestSuiteRegistry() {}
   ~ParameterizedTestSuiteRegistry() {
-    for (auto &test_suite_info : test_suite_infos_) {
+    for (auto& test_suite_info : test_suite_infos_) {
       delete test_suite_info;
     }
   }
@@ -676,10 +698,10 @@ class ParameterizedTestSuiteRegistry {
   // Looks up or creates and returns a structure containing information about
   // tests and instantiations of a particular test suite.
   template <class TestSuite>
-  ParameterizedTestSuiteInfo<TestSuite> *GetTestSuitePatternHolder(
-      const char *test_suite_name, CodeLocation code_location) {
-    ParameterizedTestSuiteInfo<TestSuite> *typed_test_info = nullptr;
-    for (auto &test_suite_info : test_suite_infos_) {
+  ParameterizedTestSuiteInfo<TestSuite>* GetTestSuitePatternHolder(
+      const char* test_suite_name, CodeLocation code_location) {
+    ParameterizedTestSuiteInfo<TestSuite>* typed_test_info = nullptr;
+    for (auto& test_suite_info : test_suite_infos_) {
       if (test_suite_info->GetTestSuiteName() == test_suite_name) {
         if (test_suite_info->GetTestSuiteTypeId() != GetTypeId<TestSuite>()) {
           // Complain about incorrect usage of Google Test facilities
@@ -705,27 +727,29 @@ class ParameterizedTestSuiteRegistry {
     return typed_test_info;
   }
   void RegisterTests() {
-    for (auto &test_suite_info : test_suite_infos_) {
+    for (auto& test_suite_info : test_suite_infos_) {
       test_suite_info->RegisterTests();
     }
   }
 //  Legacy API is deprecated but still available
 #ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
   template <class TestCase>
-  ParameterizedTestCaseInfo<TestCase> *GetTestCasePatternHolder(
-      const char *test_case_name, CodeLocation code_location) {
+  ParameterizedTestCaseInfo<TestCase>* GetTestCasePatternHolder(
+      const char* test_case_name, CodeLocation code_location) {
     return GetTestSuitePatternHolder<TestCase>(test_case_name, code_location);
   }
 
 #endif  //  GTEST_REMOVE_LEGACY_TEST_CASEAPI_
 
  private:
-  using TestSuiteInfoContainer =
-      ::std::vector<ParameterizedTestSuiteInfoBase *>;
+  using TestSuiteInfoContainer = ::std::vector<ParameterizedTestSuiteInfoBase*>;
 
   TestSuiteInfoContainer test_suite_infos_;
 
-  GTEST_DISALLOW_COPY_AND_ASSIGN_(ParameterizedTestSuiteRegistry);
+  ParameterizedTestSuiteRegistry(const ParameterizedTestSuiteRegistry&) =
+      delete;
+  ParameterizedTestSuiteRegistry& operator=(
+      const ParameterizedTestSuiteRegistry&) = delete;
 };
 
 // Keep track of what type-parameterized test suite are defined and
@@ -734,11 +758,11 @@ class ParameterizedTestSuiteRegistry {
 class TypeParameterizedTestSuiteRegistry {
  public:
   // Add a suite definition
-  void RegisterTestSuite(const char *test_suite_name,
+  void RegisterTestSuite(const char* test_suite_name,
                          CodeLocation code_location);
 
   // Add an instantiation of a suit.
-  void RegisterInstantiation(const char *test_suite_name);
+  void RegisterInstantiation(const char* test_suite_name);
 
   // For each suit repored as defined but not reported as instantiation,
   // emit a test that reports that fact (configurably, as an error).
@@ -762,15 +786,20 @@ class TypeParameterizedTestSuiteRegistry {
 // include/gtest/gtest-param-test.h.
 template <class Container>
 internal::ParamGenerator<typename Container::value_type> ValuesIn(
-    const Container &container);
+    const Container& container);
 
 namespace internal {
 // Used in the Values() function to provide polymorphic capabilities.
 
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable : 4100)
+#endif
+
 template <typename... Ts>
 class ValueArray {
  public:
-  ValueArray(Ts... v) : v_{ std::move(v)... } {}
+  explicit ValueArray(Ts... v) : v_(FlatTupleConstructTag{}, std::move(v)...) {}
 
   template <typename T>
   operator ParamGenerator<T>() const {  // NOLINT
@@ -780,26 +809,30 @@ class ValueArray {
  private:
   template <typename T, size_t... I>
   std::vector<T> MakeVector(IndexSequence<I...>) const {
-    return std::vector<T>{ static_cast<T>(v_.template Get<I>())... };
+    return std::vector<T>{static_cast<T>(v_.template Get<I>())...};
   }
 
   FlatTuple<Ts...> v_;
 };
 
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
+
 template <typename... T>
 class CartesianProductGenerator
     : public ParamGeneratorInterface<::std::tuple<T...>> {
  public:
   typedef ::std::tuple<T...> ParamType;
 
-  CartesianProductGenerator(const std::tuple<ParamGenerator<T>...> &g)
+  CartesianProductGenerator(const std::tuple<ParamGenerator<T>...>& g)
       : generators_(g) {}
   ~CartesianProductGenerator() override {}
 
-  ParamIteratorInterface<ParamType> *Begin() const override {
+  ParamIteratorInterface<ParamType>* Begin() const override {
     return new Iterator(this, generators_, false);
   }
-  ParamIteratorInterface<ParamType> *End() const override {
+  ParamIteratorInterface<ParamType>* End() const override {
     return new Iterator(this, generators_, true);
   }
 
@@ -810,17 +843,18 @@ class CartesianProductGenerator
   class IteratorImpl<IndexSequence<I...>>
       : public ParamIteratorInterface<ParamType> {
    public:
-    IteratorImpl(const ParamGeneratorInterface<ParamType> *base,
-                 const std::tuple<ParamGenerator<T>...> &generators,
+    IteratorImpl(const ParamGeneratorInterface<ParamType>* base,
+                 const std::tuple<ParamGenerator<T>...>& generators,
                  bool is_end)
-        : base_(base), begin_(std::get<I>(generators).begin()...),
+        : base_(base),
+          begin_(std::get<I>(generators).begin()...),
           end_(std::get<I>(generators).end()...),
           current_(is_end ? end_ : begin_) {
       ComputeCurrentValue();
     }
     ~IteratorImpl() override {}
 
-    const ParamGeneratorInterface<ParamType> *BaseGenerator() const override {
+    const ParamGeneratorInterface<ParamType>* BaseGenerator() const override {
       return base_;
     }
     // Advance should not be called on beyond-of-range iterators
@@ -833,19 +867,19 @@ class CartesianProductGenerator
       AdvanceIfEnd<sizeof...(T) - 1>();
       ComputeCurrentValue();
     }
-    ParamIteratorInterface<ParamType> *Clone() const override {
+    ParamIteratorInterface<ParamType>* Clone() const override {
       return new IteratorImpl(*this);
     }
 
-    const ParamType *Current() const override { return current_value_.get(); }
+    const ParamType* Current() const override { return current_value_.get(); }
 
-    bool Equals(const ParamIteratorInterface<ParamType> &other) const override {
+    bool Equals(const ParamIteratorInterface<ParamType>& other) const override {
       // Having the same base generator guarantees that the other
       // iterator is of the same type and we can downcast.
       GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
           << "The program attempted to compare iterators "
           << "from different generators." << std::endl;
-      const IteratorImpl *typed_other =
+      const IteratorImpl* typed_other =
           CheckedDowncastToActualType<const IteratorImpl>(&other);
 
       // We must report iterators equal if they both point beyond their
@@ -854,9 +888,9 @@ class CartesianProductGenerator
       if (AtEnd() && typed_other->AtEnd()) return true;
 
       bool same = true;
-      bool dummy[] = { (same = same &&
-                               std::get<I>(current_) ==
-                                   std::get<I>(typed_other->current_))... };
+      bool dummy[] = {
+          (same = same && std::get<I>(current_) ==
+                              std::get<I>(typed_other->current_))...};
       (void)dummy;
       return same;
     }
@@ -884,13 +918,13 @@ class CartesianProductGenerator
     }
     bool AtEnd() const {
       bool at_end = false;
-      bool dummy[] = { (at_end = at_end || std::get<I>(current_) ==
-                                               std::get<I>(end_))... };
+      bool dummy[] = {
+          (at_end = at_end || std::get<I>(current_) == std::get<I>(end_))...};
       (void)dummy;
       return at_end;
     }
 
-    const ParamGeneratorInterface<ParamType> *const base_;
+    const ParamGeneratorInterface<ParamType>* const base_;
     std::tuple<typename ParamGenerator<T>::iterator...> begin_;
     std::tuple<typename ParamGenerator<T>::iterator...> end_;
     std::tuple<typename ParamGenerator<T>::iterator...> current_;
@@ -905,7 +939,7 @@ class CartesianProductGenerator
 template <class... Gen>
 class CartesianProductHolder {
  public:
-  CartesianProductHolder(const Gen &... g) : generators_(g...) {}
+  CartesianProductHolder(const Gen&... g) : generators_(g...) {}
   template <typename... T>
   operator ParamGenerator<::std::tuple<T...>>() const {
     return ParamGenerator<::std::tuple<T...>>(
@@ -919,4 +953,4 @@ class CartesianProductHolder {
 }  // namespace internal
 }  // namespace testing
 
-#endif  // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_H_
+#endif  // GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_H_
diff --git a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/include/gtest/internal/gtest-port-arch.h b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/include/gtest/internal/gtest-port-arch.h
index f803a19be37..f025db76ad3 100644
--- a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/include/gtest/internal/gtest-port-arch.h
+++ b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/include/gtest/internal/gtest-port-arch.h
@@ -26,14 +26,14 @@
 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
+
 // The Google C++ Testing and Mocking Framework (Google Test)
 //
 // This header file defines the GTEST_OS_* macro.
 // It is separate from gtest-port.h so that custom/gtest-port.h can include it.
 
-#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PORT_ARCH_H_
-#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PORT_ARCH_H_
+#ifndef GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_PORT_ARCH_H_
+#define GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_PORT_ARCH_H_
 
 // Determines the platform on which Google Test is compiled.
 #ifdef __CYGWIN__
@@ -68,6 +68,7 @@
 #define GTEST_OS_OS2 1
 #elif defined __APPLE__
 #define GTEST_OS_MAC 1
+#include <TargetConditionals.h>
 #if TARGET_OS_IPHONE
 #define GTEST_OS_IOS 1
 #endif
@@ -77,6 +78,8 @@
 #define GTEST_OS_FREEBSD 1
 #elif defined __Fuchsia__
 #define GTEST_OS_FUCHSIA 1
+#elif defined(__GNU__)
+#define GTEST_OS_GNU_HURD 1
 #elif defined(__GLIBC__) && defined(__FreeBSD_kernel__)
 #define GTEST_OS_GNU_KFREEBSD 1
 #elif defined __linux__
@@ -106,6 +109,8 @@
 #define GTEST_OS_ESP8266 1
 #elif defined ESP32
 #define GTEST_OS_ESP32 1
+#elif defined(__XTENSA__)
+#define GTEST_OS_XTENSA 1
 #endif  // __CYGWIN__
 
-#endif  // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PORT_ARCH_H_
+#endif  // GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_PORT_ARCH_H_
diff --git a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/include/gtest/internal/gtest-port.h b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/include/gtest/internal/gtest-port.h
index 083da569fe9..0003d276589 100644
--- a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/include/gtest/internal/gtest-port.h
+++ b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/include/gtest/internal/gtest-port.h
@@ -26,7 +26,7 @@
 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
+
 // Low-level types and utilities for porting Google Test to various
 // platforms.  All macros ending with _ and symbols defined in an
 // internal namespace are subject to change without notice.  Code
@@ -38,10 +38,12 @@
 // files are expected to #include this.  Therefore, it cannot #include
 // any other Google Test header.
 
-// GOOGLETEST_CM0001 DO NOT DELETE
+// IWYU pragma: private, include "gtest/gtest.h"
+// IWYU pragma: friend gtest/.*
+// IWYU pragma: friend gmock/.*
 
-#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PORT_H_
-#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PORT_H_
+#ifndef GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_PORT_H_
+#define GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_PORT_H_
 
 // Environment-describing macros
 // -----------------------------
@@ -116,6 +118,7 @@
 //   GTEST_OS_DRAGONFLY - DragonFlyBSD
 //   GTEST_OS_FREEBSD  - FreeBSD
 //   GTEST_OS_FUCHSIA  - Fuchsia
+//   GTEST_OS_GNU_HURD - GNU/Hurd
 //   GTEST_OS_GNU_KFREEBSD - GNU/kFreeBSD
 //   GTEST_OS_HAIKU    - Haiku
 //   GTEST_OS_HPUX     - HP-UX
@@ -167,7 +170,7 @@
 //   GTEST_HAS_TYPED_TEST   - typed tests
 //   GTEST_HAS_TYPED_TEST_P - type-parameterized tests
 //   GTEST_IS_THREADSAFE    - Google Test is thread-safe.
-//   GOOGLETEST_CM0007 DO NOT DELETE
+//   GTEST_USES_RE2         - the RE2 regular expression library is used
 //   GTEST_USES_POSIX_RE    - enhanced POSIX regex is used. Do not confuse with
 //                            GTEST_HAS_POSIX_RE (see above) which users can
 //                            define themselves.
@@ -190,29 +193,36 @@
 //   GTEST_AMBIGUOUS_ELSE_BLOCKER_ - for disabling a gcc warning.
 //   GTEST_ATTRIBUTE_UNUSED_  - declares that a class' instances or a
 //                              variable don't have to be used.
-//   GTEST_DISALLOW_ASSIGN_   - disables copy operator=.
-//   GTEST_DISALLOW_COPY_AND_ASSIGN_ - disables copy ctor and operator=.
-//   GTEST_DISALLOW_MOVE_ASSIGN_   - disables move operator=.
-//   GTEST_DISALLOW_MOVE_AND_ASSIGN_ - disables move ctor and operator=.
 //   GTEST_MUST_USE_RESULT_   - declares that a function's result must be used.
 //   GTEST_INTENTIONAL_CONST_COND_PUSH_ - start code section where MSVC C4127 is
 //                                        suppressed (constant conditional).
 //   GTEST_INTENTIONAL_CONST_COND_POP_  - finish code section where MSVC C4127
 //                                        is suppressed.
+//   GTEST_INTERNAL_HAS_ANY - for enabling UniversalPrinter<std::any> or
+//                            UniversalPrinter<absl::any> specializations.
+//   GTEST_INTERNAL_HAS_OPTIONAL - for enabling UniversalPrinter<std::optional>
+//   or
+//                                 UniversalPrinter<absl::optional>
+//                                 specializations.
 //   GTEST_INTERNAL_HAS_STRING_VIEW - for enabling Matcher<std::string_view> or
 //                                    Matcher<absl::string_view>
 //                                    specializations.
+//   GTEST_INTERNAL_HAS_VARIANT - for enabling UniversalPrinter<std::variant> or
+//                                UniversalPrinter<absl::variant>
+//                                specializations.
 //
 // Synchronization:
 //   Mutex, MutexLock, ThreadLocal, GetThreadCount()
 //                            - synchronization primitives.
 //
 // Regular expressions:
-//   RE             - a simple regular expression class using the POSIX
-//                    Extended Regular Expression syntax on UNIX-like platforms
-//                    GOOGLETEST_CM0008 DO NOT DELETE
-//                    or a reduced regular exception syntax on other
-//                    platforms, including Windows.
+//   RE             - a simple regular expression class using
+//                     1) the RE2 syntax on all platforms when built with RE2
+//                        and Abseil as dependencies
+//                     2) the POSIX Extended Regular Expression syntax on
+//                        UNIX-like platforms,
+//                     3) A reduced regular exception syntax on other platforms,
+//                        including Windows.
 // Logging:
 //   GTEST_LOG_()   - logs messages at the specified severity level.
 //   LogToStderr()  - directs all log messages to stderr.
@@ -232,8 +242,6 @@
 //   BiggestInt     - the biggest signed integer type.
 //
 // Command-line utilities:
-//   GTEST_DECLARE_*()  - declares a flag.
-//   GTEST_DEFINE_*()   - defines a flag.
 //   GetInjectableArgvs() - returns the command line as a vector of strings.
 //
 // Environment variable utilities:
@@ -252,13 +260,23 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+
+#include <cerrno>
+// #include <condition_variable>  // Guarded by GTEST_IS_THREADSAFE below
 #include <cstdint>
+#include <iostream>
 #include <limits>
+#include <locale>
+#include <memory>
+#include <string>
+// #include <mutex>  // Guarded by GTEST_IS_THREADSAFE below
+#include <tuple>
 #include <type_traits>
+#include <vector>
 
 #ifndef _WIN32_WCE
-#include <sys/types.h>
 #include <sys/stat.h>
+#include <sys/types.h>
 #endif  // !_WIN32_WCE
 
 #if defined __APPLE__
@@ -266,15 +284,15 @@
 #include <TargetConditionals.h>
 #endif
 
-#include <iostream>  // NOLINT
-#include <memory>
-#include <string>  // NOLINT
-#include <tuple>
-#include <vector>  // NOLINT
-
 #include "gtest/internal/custom/gtest-port.h"
 #include "gtest/internal/gtest-port-arch.h"
 
+#if GTEST_HAS_ABSL
+#include "absl/flags/declare.h"
+#include "absl/flags/flag.h"
+#include "absl/flags/reflection.h"
+#endif
+
 #if !defined(GTEST_DEV_EMAIL_)
 #define GTEST_DEV_EMAIL_ "googletestframework@@googlegroups.com"
 #define GTEST_FLAG_PREFIX_ "gtest_"
@@ -343,12 +361,16 @@ typedef struct _CRITICAL_SECTION GTEST_CRITICAL_SECTION;
 // WindowsTypesTest.CRITICAL_SECTIONIs_RTL_CRITICAL_SECTION.
 typedef struct _RTL_CRITICAL_SECTION GTEST_CRITICAL_SECTION;
 #endif
+#elif GTEST_OS_XTENSA
+#include <unistd.h>
+// Xtensa toolchains define strcasecmp in the string.h header instead of
+// strings.h. string.h is already included.
 #else
 // This assumes that non-Windows OSes provide unistd.h. For OSes where this
 // is not the case, we need to include headers that provide the functions
 // mentioned above.
-#include <unistd.h>
 #include <strings.h>
+#include <unistd.h>
 #endif  // GTEST_OS_WINDOWS
 
 #if GTEST_OS_LINUX_ANDROID
@@ -363,36 +385,23 @@ typedef struct _RTL_CRITICAL_SECTION GTEST_CRITICAL_SECTION;
 // On Android, <regex.h> is only available starting with Gingerbread.
 #define GTEST_HAS_POSIX_RE (__ANDROID_API__ >= 9)
 #else
-#define GTEST_HAS_POSIX_RE (!GTEST_OS_WINDOWS)
+#define GTEST_HAS_POSIX_RE (!GTEST_OS_WINDOWS && !GTEST_OS_XTENSA)
 #endif
 #endif
 
-#if GTEST_USES_PCRE
-// The appropriate headers have already been included.
-
+// Select the regular expression implementation.
+#if GTEST_HAS_ABSL
+// When using Abseil, RE2 is required.
+#include "absl/strings/string_view.h"
+#include "re2/re2.h"
+#define GTEST_USES_RE2 1
 #elif GTEST_HAS_POSIX_RE
-
-// On some platforms, <regex.h> needs someone to define size_t, and
-// won't compile otherwise.  We can #include it here as we already
-// included <stdlib.h>, which is guaranteed to define size_t through
-// <stddef.h>.
 #include <regex.h>  // NOLINT
-
 #define GTEST_USES_POSIX_RE 1
-
-#elif GTEST_OS_WINDOWS
-
-// <regex.h> is not available on Windows.  Use our own simple regex
-// implementation instead.
-#define GTEST_USES_SIMPLE_RE 1
-
 #else
-
-// <regex.h> may not be available on this platform.  Use our own
-// simple regex implementation instead.
+// Use our own simple regex implementation.
 #define GTEST_USES_SIMPLE_RE 1
-
-#endif  // GTEST_USES_PCRE
+#endif
 
 #ifndef GTEST_HAS_EXCEPTIONS
 // The user didn't tell us whether exceptions are enabled, so we need
@@ -448,7 +457,7 @@ typedef struct _RTL_CRITICAL_SECTION GTEST_CRITICAL_SECTION;
 // no support for it at least as recent as Froyo (2.2).
 #define GTEST_HAS_STD_WSTRING                                         \
   (!(GTEST_OS_LINUX_ANDROID || GTEST_OS_CYGWIN || GTEST_OS_SOLARIS || \
-     GTEST_OS_HAIKU || GTEST_OS_ESP32 || GTEST_OS_ESP8266))
+     GTEST_OS_HAIKU || GTEST_OS_ESP32 || GTEST_OS_ESP8266 || GTEST_OS_XTENSA))
 
 #endif  // GTEST_HAS_STD_WSTRING
 
@@ -526,7 +535,7 @@ typedef struct _RTL_CRITICAL_SECTION GTEST_CRITICAL_SECTION;
   (GTEST_OS_LINUX || GTEST_OS_MAC || GTEST_OS_HPUX || GTEST_OS_QNX ||          \
    GTEST_OS_FREEBSD || GTEST_OS_NACL || GTEST_OS_NETBSD || GTEST_OS_FUCHSIA || \
    GTEST_OS_DRAGONFLY || GTEST_OS_GNU_KFREEBSD || GTEST_OS_OPENBSD ||          \
-   GTEST_OS_HAIKU)
+   GTEST_OS_HAIKU || GTEST_OS_GNU_HURD)
 #endif  // GTEST_HAS_PTHREAD
 
 #if GTEST_HAS_PTHREAD
@@ -571,7 +580,7 @@ typedef struct _RTL_CRITICAL_SECTION GTEST_CRITICAL_SECTION;
 // By default, we assume that stream redirection is supported on all
 // platforms except known mobile ones.
 #if GTEST_OS_WINDOWS_MOBILE || GTEST_OS_WINDOWS_PHONE || \
-    GTEST_OS_WINDOWS_RT || GTEST_OS_ESP8266
+    GTEST_OS_WINDOWS_RT || GTEST_OS_ESP8266 || GTEST_OS_XTENSA
 #define GTEST_HAS_STREAM_REDIRECTION 0
 #else
 #define GTEST_HAS_STREAM_REDIRECTION 1
@@ -585,7 +594,8 @@ typedef struct _RTL_CRITICAL_SECTION GTEST_CRITICAL_SECTION;
      (GTEST_OS_WINDOWS_DESKTOP && _MSC_VER) || GTEST_OS_WINDOWS_MINGW ||  \
      GTEST_OS_AIX || GTEST_OS_HPUX || GTEST_OS_OPENBSD || GTEST_OS_QNX || \
      GTEST_OS_FREEBSD || GTEST_OS_NETBSD || GTEST_OS_FUCHSIA ||           \
-     GTEST_OS_DRAGONFLY || GTEST_OS_GNU_KFREEBSD || GTEST_OS_HAIKU)
+     GTEST_OS_DRAGONFLY || GTEST_OS_GNU_KFREEBSD || GTEST_OS_HAIKU ||     \
+     GTEST_OS_GNU_HURD)
 #define GTEST_HAS_DEATH_TEST 1
 #endif
 
@@ -605,7 +615,8 @@ typedef struct _RTL_CRITICAL_SECTION GTEST_CRITICAL_SECTION;
 
 // Determines whether test results can be streamed to a socket.
 #if GTEST_OS_LINUX || GTEST_OS_GNU_KFREEBSD || GTEST_OS_DRAGONFLY || \
-    GTEST_OS_FREEBSD || GTEST_OS_NETBSD || GTEST_OS_OPENBSD
+    GTEST_OS_FREEBSD || GTEST_OS_NETBSD || GTEST_OS_OPENBSD ||       \
+    GTEST_OS_GNU_HURD
 #define GTEST_CAN_STREAM_RESULTS_ 1
 #endif
 
@@ -657,8 +668,8 @@ typedef struct _RTL_CRITICAL_SECTION GTEST_CRITICAL_SECTION;
 // matches the selected implementation. See
 // https://sourceforge.net/p/mingw-w64/wiki2/gnu%20printf/.
 #define GTEST_ATTRIBUTE_PRINTF_(string_index, first_to_check) \
-  __attribute__(                                              \
-      (__format__(__MINGW_PRINTF_FORMAT, string_index, first_to_check)))
+  __attribute__((                                             \
+      __format__(__MINGW_PRINTF_FORMAT, string_index, first_to_check)))
 #else
 #define GTEST_ATTRIBUTE_PRINTF_(string_index, first_to_check) \
   __attribute__((__format__(__printf__, string_index, first_to_check)))
@@ -667,27 +678,6 @@ typedef struct _RTL_CRITICAL_SECTION GTEST_CRITICAL_SECTION;
 #define GTEST_ATTRIBUTE_PRINTF_(string_index, first_to_check)
 #endif
 
-// A macro to disallow copy operator=
-// This should be used in the private: declarations for a class.
-#define GTEST_DISALLOW_ASSIGN_(type) type &operator=(type const &) = delete
-
-// A macro to disallow copy constructor and operator=
-// This should be used in the private: declarations for a class.
-#define GTEST_DISALLOW_COPY_AND_ASSIGN_(type) \
-  type(type const &) = delete;                \
-  GTEST_DISALLOW_ASSIGN_(type)
-
-// A macro to disallow move operator=
-// This should be used in the private: declarations for a class.
-#define GTEST_DISALLOW_MOVE_ASSIGN_(type) \
-  type &operator=(type &&) noexcept = delete
-
-// A macro to disallow move constructor and operator=
-// This should be used in the private: declarations for a class.
-#define GTEST_DISALLOW_MOVE_AND_ASSIGN_(type) \
-  type(type &&) noexcept = delete;            \
-  GTEST_DISALLOW_MOVE_ASSIGN_(type)
-
 // Tell the compiler to warn about unused return values for functions declared
 // with this macro.  The macro should be used on function declarations
 // following the argument list:
@@ -736,6 +726,12 @@ typedef struct _RTL_CRITICAL_SECTION GTEST_CRITICAL_SECTION;
 
 #endif  // GTEST_IS_THREADSAFE
 
+#if GTEST_IS_THREADSAFE
+// Some platforms don't support including these threading related headers.
+#include <condition_variable>  // NOLINT
+#include <mutex>               // NOLINT
+#endif                         // GTEST_IS_THREADSAFE
+
 // GTEST_API_ qualifies all symbols that must be exported. The definitions below
 // are guarded by #ifndef to give embedders a chance to define GTEST_API_ in
 // gtest/internal/custom/gtest-port.h
@@ -768,6 +764,20 @@ typedef struct _RTL_CRITICAL_SECTION GTEST_CRITICAL_SECTION;
 #define GTEST_NO_INLINE_
 #endif
 
+#if defined(__clang__)
+// Nested ifs to avoid triggering MSVC warning.
+#if __has_attribute(disable_tail_calls)
+// Ask the compiler not to perform tail call optimization inside
+// the marked function.
+#define GTEST_NO_TAIL_CALL_ __attribute__((disable_tail_calls))
+#endif
+#elif __GNUC__
+#define GTEST_NO_TAIL_CALL_ \
+  __attribute__((optimize("no-optimize-sibling-calls")))
+#else
+#define GTEST_NO_TAIL_CALL_
+#endif
+
 // _LIBCPP_VERSION is defined by the libc++ library from the LLVM project.
 #if !defined(GTEST_HAS_CXXABI_H_)
 #if defined(__GLIBCXX__) || (defined(_LIBCPP_VERSION) && !defined(_MSC_VER))
@@ -843,25 +853,37 @@ namespace internal {
 // Secret object, which is what we want.
 class Secret;
 
-// The GTEST_COMPILE_ASSERT_ is a legacy macro used to verify that a compile
-// time expression is true (in new code, use static_assert instead). For
-// example, you could use it to verify the size of a static array:
-//
-//   GTEST_COMPILE_ASSERT_(GTEST_ARRAY_SIZE_(names) == NUM_NAMES,
-//                         names_incorrect_size);
-//
-// The second argument to the macro must be a valid C++ identifier. If the
-// expression is false, compiler will issue an error containing this identifier.
-#define GTEST_COMPILE_ASSERT_(expr, msg) static_assert(expr, #msg)
-
 // A helper for suppressing warnings on constant condition.  It just
 // returns 'condition'.
 GTEST_API_ bool IsTrue(bool condition);
 
 // Defines RE.
 
-#if GTEST_USES_PCRE
-// if used, PCRE is injected by custom/gtest-port.h
+#if GTEST_USES_RE2
+
+// This is almost `using RE = ::RE2`, except it is copy-constructible, and it
+// needs to disambiguate the `std::string`, `absl::string_view`, and `const
+// char*` constructors.
+class GTEST_API_ RE {
+ public:
+  RE(absl::string_view regex) : regex_(regex) {}                  // NOLINT
+  RE(const char* regex) : RE(absl::string_view(regex)) {}         // NOLINT
+  RE(const std::string& regex) : RE(absl::string_view(regex)) {}  // NOLINT
+  RE(const RE& other) : RE(other.pattern()) {}
+
+  const std::string& pattern() const { return regex_.pattern(); }
+
+  static bool FullMatch(absl::string_view str, const RE& re) {
+    return RE2::FullMatch(str, re.regex_);
+  }
+  static bool PartialMatch(absl::string_view str, const RE& re) {
+    return RE2::PartialMatch(str, re.regex_);
+  }
+
+ private:
+  RE2 regex_;
+};
+
 #elif GTEST_USES_POSIX_RE || GTEST_USES_SIMPLE_RE
 
 // A simple C++ wrapper for <regex.h>.  It uses the POSIX Extended
@@ -870,34 +892,34 @@ class GTEST_API_ RE {
  public:
   // A copy constructor is required by the Standard to initialize object
   // references from r-values.
-  RE(const RE &other) { Init(other.pattern()); }
+  RE(const RE& other) { Init(other.pattern()); }
 
   // Constructs an RE from a string.
-  RE(const ::std::string &regex) { Init(regex.c_str()); }  // NOLINT
+  RE(const ::std::string& regex) { Init(regex.c_str()); }  // NOLINT
 
-  RE(const char *regex) { Init(regex); }  // NOLINT
+  RE(const char* regex) { Init(regex); }  // NOLINT
   ~RE();
 
   // Returns the string representation of the regex.
-  const char *pattern() const { return pattern_; }
+  const char* pattern() const { return pattern_; }
 
   // FullMatch(str, re) returns true if and only if regular expression re
   // matches the entire str.
   // PartialMatch(str, re) returns true if and only if regular expression re
   // matches a substring of str (including str itself).
-  static bool FullMatch(const ::std::string &str, const RE &re) {
+  static bool FullMatch(const ::std::string& str, const RE& re) {
     return FullMatch(str.c_str(), re);
   }
-  static bool PartialMatch(const ::std::string &str, const RE &re) {
+  static bool PartialMatch(const ::std::string& str, const RE& re) {
     return PartialMatch(str.c_str(), re);
   }
 
-  static bool FullMatch(const char *str, const RE &re);
-  static bool PartialMatch(const char *str, const RE &re);
+  static bool FullMatch(const char* str, const RE& re);
+  static bool PartialMatch(const char* str, const RE& re);
 
  private:
-  void Init(const char *regex);
-  const char *pattern_;
+  void Init(const char* regex);
+  const char* pattern_;
   bool is_valid_;
 
 #if GTEST_USES_POSIX_RE
@@ -907,23 +929,21 @@ class GTEST_API_ RE {
 
 #else  // GTEST_USES_SIMPLE_RE
 
-  const char *full_pattern_;  // For FullMatch();
+  const char* full_pattern_;  // For FullMatch();
 
 #endif
-
-  GTEST_DISALLOW_ASSIGN_(RE);
 };
 
-#endif  // GTEST_USES_PCRE
+#endif  // ::testing::internal::RE implementation
 
 // Formats a source file path and a line number as they would appear
 // in an error message from the compiler used to compile this code.
-GTEST_API_ ::std::string FormatFileLocation(const char *file, int line);
+GTEST_API_ ::std::string FormatFileLocation(const char* file, int line);
 
 // Formats a file location for compiler-independent XML output.
 // Although this function is not platform dependent, we put it next to
 // FormatFileLocation in order to contrast the two functions.
-GTEST_API_ ::std::string FormatCompilerIndependentFileLocation(const char *file,
+GTEST_API_ ::std::string FormatCompilerIndependentFileLocation(const char* file,
                                                                int line);
 
 // Defines logging utilities:
@@ -939,17 +959,18 @@ enum GTestLogSeverity { GTEST_INFO, GTEST_WARNING, GTEST_ERROR, GTEST_FATAL };
 // scope.
 class GTEST_API_ GTestLog {
  public:
-  GTestLog(GTestLogSeverity severity, const char *file, int line);
+  GTestLog(GTestLogSeverity severity, const char* file, int line);
 
   // Flushes the buffers and, if severity is GTEST_FATAL, aborts the program.
   ~GTestLog();
 
-  ::std::ostream &GetStream() { return ::std::cerr; }
+  ::std::ostream& GetStream() { return ::std::cerr; }
 
  private:
   const GTestLogSeverity severity_;
 
-  GTEST_DISALLOW_COPY_AND_ASSIGN_(GTestLog);
+  GTestLog(const GTestLog&) = delete;
+  GTestLog& operator=(const GTestLog&) = delete;
 };
 
 #if !defined(GTEST_LOG_)
@@ -969,7 +990,7 @@ inline void FlushInfoLog() { fflush(nullptr); }
 //
 // GTEST_CHECK_ is an all-mode assert. It aborts the program if the condition
 // is not satisfied.
-//  Synopsys:
+//  Synopsis:
 //    GTEST_CHECK_(boolean_condition);
 //     or
 //    GTEST_CHECK_(boolean_condition) << "Additional message";
@@ -1009,11 +1030,11 @@ inline void FlushInfoLog() { fflush(nullptr); }
 // standard, and necessary so that "T" can always bind to "const T&".
 template <typename T>
 struct ConstRef {
-  typedef const T &type;
+  typedef const T& type;
 };
 template <typename T>
-struct ConstRef<T &> {
-  typedef T &type;
+struct ConstRef<T&> {
+  typedef T& type;
 };
 
 // The argument T must depend on some template parameters.
@@ -1027,7 +1048,7 @@ struct ConstRef<T &> {
 // const Foo*).  When you use ImplicitCast_, the compiler checks that
 // the cast is safe.  Such explicit ImplicitCast_s are necessary in
 // surprisingly many situations where C++ demands an exact type match
-// instead of an argument type convertable to a target type.
+// instead of an argument type convertible to a target type.
 //
 // The syntax for using ImplicitCast_ is the same as for static_cast:
 //
@@ -1067,7 +1088,7 @@ inline To ImplicitCast_(To x) {
 // similar functions users may have (e.g., down_cast). The internal
 // namespace alone is not enough because the function can be found by ADL.
 template <typename To, typename From>  // use like this: DownCast_<T*>(foo);
-inline To DownCast_(From *f) {         // so we only accept pointers
+inline To DownCast_(From* f) {         // so we only accept pointers
   // Ensures that To is a sub-type of From *.  This test is here only
   // for compile-time type checking, and has no overhead in an
   // optimized build at run-time, as it will be optimized away
@@ -1076,7 +1097,7 @@ inline To DownCast_(From *f) {         // so we only accept pointers
   if (false) {
     GTEST_INTENTIONAL_CONST_COND_POP_()
     const To to = nullptr;
-    ::testing::internal::ImplicitCast_<From *>(to);
+    ::testing::internal::ImplicitCast_<From*>(to);
   }
 
 #if GTEST_HAS_RTTI
@@ -1092,17 +1113,17 @@ inline To DownCast_(From *f) {         // so we only accept pointers
 // When RTTI is available, the function performs a runtime
 // check to enforce this.
 template <class Derived, class Base>
-Derived *CheckedDowncastToActualType(Base *base) {
+Derived* CheckedDowncastToActualType(Base* base) {
 #if GTEST_HAS_RTTI
   GTEST_CHECK_(typeid(*base) == typeid(Derived));
 #endif
 
 #if GTEST_HAS_DOWNCAST_
-  return ::down_cast<Derived *>(base);
+  return ::down_cast<Derived*>(base);
 #elif GTEST_HAS_RTTI
-  return dynamic_cast<Derived *>(base);  // NOLINT
+  return dynamic_cast<Derived*>(base);  // NOLINT
 #else
-  return static_cast<Derived *>(base);  // Poor man's downcast.
+  return static_cast<Derived*>(base);  // Poor man's downcast.
 #endif
 }
 
@@ -1121,10 +1142,10 @@ GTEST_API_ std::string GetCapturedStderr();
 
 #endif  // GTEST_HAS_STREAM_REDIRECTION
 // Returns the size (in bytes) of a file.
-GTEST_API_ size_t GetFileSize(FILE *file);
+GTEST_API_ size_t GetFileSize(FILE* file);
 
 // Reads the entire content of a file as a string.
-GTEST_API_ std::string ReadEntireFile(FILE *file);
+GTEST_API_ std::string ReadEntireFile(FILE* file);
 
 // All command line arguments.
 GTEST_API_ std::vector<std::string> GetArgvs();
@@ -1133,76 +1154,16 @@ GTEST_API_ std::vector<std::string> GetArgvs();
 
 std::vector<std::string> GetInjectableArgvs();
 // Deprecated: pass the args vector by value instead.
-void SetInjectableArgvs(const std::vector<std::string> *new_argvs);
-void SetInjectableArgvs(const std::vector<std::string> &new_argvs);
+void SetInjectableArgvs(const std::vector<std::string>* new_argvs);
+void SetInjectableArgvs(const std::vector<std::string>& new_argvs);
 void ClearInjectableArgvs();
 
 #endif  // GTEST_HAS_DEATH_TEST
 
 // Defines synchronization primitives.
 #if GTEST_IS_THREADSAFE
-#if GTEST_HAS_PTHREAD
-// Sleeps for (roughly) n milliseconds.  This function is only for testing
-// Google Test's own constructs.  Don't use it in user tests, either
-// directly or indirectly.
-inline void SleepMilliseconds(int n) {
-  const timespec time = {
-    0,                  // 0 seconds.
-    n * 1000L * 1000L,  // And n ms.
-  };
-  nanosleep(&time, nullptr);
-}
-#endif  // GTEST_HAS_PTHREAD
-
-#if GTEST_HAS_NOTIFICATION_
-// Notification has already been imported into the namespace.
-// Nothing to do here.
-
-#elif GTEST_HAS_PTHREAD
-// Allows a controller thread to pause execution of newly created
-// threads until notified.  Instances of this class must be created
-// and destroyed in the controller thread.
-//
-// This class is only for testing Google Test's own constructs. Do not
-// use it in user tests, either directly or indirectly.
-class Notification {
- public:
-  Notification() : notified_(false) {
-    GTEST_CHECK_POSIX_SUCCESS_(pthread_mutex_init(&mutex_, nullptr));
-  }
-  ~Notification() { pthread_mutex_destroy(&mutex_); }
-
-  // Notifies all threads created with this notification to start. Must
-  // be called from the controller thread.
-  void Notify() {
-    pthread_mutex_lock(&mutex_);
-    notified_ = true;
-    pthread_mutex_unlock(&mutex_);
-  }
-
-  // Blocks until the controller thread notifies. Must be called from a test
-  // thread.
-  void WaitForNotification() {
-    for (;;) {
-      pthread_mutex_lock(&mutex_);
-      const bool notified = notified_;
-      pthread_mutex_unlock(&mutex_);
-      if (notified) break;
-      SleepMilliseconds(10);
-    }
-  }
-
- private:
-  pthread_mutex_t mutex_;
-  bool notified_;
-
-  GTEST_DISALLOW_COPY_AND_ASSIGN_(Notification);
-};
-
-#elif GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_PHONE && !GTEST_OS_WINDOWS_RT
-
-GTEST_API_ void SleepMilliseconds(int n);
 
+#if GTEST_OS_WINDOWS
 // Provides leak-safe Windows kernel handle ownership.
 // Used in death tests and in threading support.
 class GTEST_API_ AutoHandle {
@@ -1212,7 +1173,7 @@ class GTEST_API_ AutoHandle {
   // undesirable because it defines a lot of symbols and macros that tend to
   // conflict with client code. This assumption is verified by
   // WindowsTypesTest.HANDLEIsVoidStar.
-  typedef void *Handle;
+  typedef void* Handle;
   AutoHandle();
   explicit AutoHandle(Handle handle);
 
@@ -1229,8 +1190,18 @@ class GTEST_API_ AutoHandle {
 
   Handle handle_;
 
-  GTEST_DISALLOW_COPY_AND_ASSIGN_(AutoHandle);
+  AutoHandle(const AutoHandle&) = delete;
+  AutoHandle& operator=(const AutoHandle&) = delete;
 };
+#endif
+
+#if GTEST_HAS_NOTIFICATION_
+// Notification has already been imported into the namespace.
+// Nothing to do here.
+
+#else
+GTEST_DISABLE_MSC_WARNINGS_PUSH_(4251 \
+/* class A needs to have dll-interface to be used by clients of class B */)
 
 // Allows a controller thread to pause execution of newly created
 // threads until notified.  Instances of this class must be created
@@ -1238,17 +1209,34 @@ class GTEST_API_ AutoHandle {
 //
 // This class is only for testing Google Test's own constructs. Do not
 // use it in user tests, either directly or indirectly.
+// TODO(b/203539622): Replace unconditionally with absl::Notification.
 class GTEST_API_ Notification {
  public:
-  Notification();
-  void Notify();
-  void WaitForNotification();
+  Notification() : notified_(false) {}
+  Notification(const Notification&) = delete;
+  Notification& operator=(const Notification&) = delete;
 
- private:
-  AutoHandle event_;
+  // Notifies all threads created with this notification to start. Must
+  // be called from the controller thread.
+  void Notify() {
+    std::lock_guard<std::mutex> lock(mu_);
+    notified_ = true;
+    cv_.notify_all();
+  }
+
+  // Blocks until the controller thread notifies. Must be called from a test
+  // thread.
+  void WaitForNotification() {
+    std::unique_lock<std::mutex> lock(mu_);
+    cv_.wait(lock, [this]() { return notified_; });
+  }
 
-  GTEST_DISALLOW_COPY_AND_ASSIGN_(Notification);
+ private:
+  std::mutex mu_;
+  std::condition_variable cv_;
+  bool notified_;
 };
+GTEST_DISABLE_MSC_WARNINGS_POP_()  // 4251
 #endif  // GTEST_HAS_NOTIFICATION_
 
 // On MinGW, we can have both GTEST_OS_WINDOWS and GTEST_HAS_PTHREAD
@@ -1273,8 +1261,8 @@ class ThreadWithParamBase {
 // example, SunStudio) treat them as different types.  Since class methods
 // cannot be defined with C-linkage we need to define a free C-function to
 // pass into pthread_create().
-extern "C" inline void *ThreadFuncWithCLinkage(void *thread) {
-  static_cast<ThreadWithParamBase *>(thread)->Run();
+extern "C" inline void* ThreadFuncWithCLinkage(void* thread) {
+  static_cast<ThreadWithParamBase*>(thread)->Run();
   return nullptr;
 }
 
@@ -1295,10 +1283,12 @@ class ThreadWithParam : public ThreadWithParamBase {
  public:
   typedef void UserThreadFunc(T);
 
-  ThreadWithParam(UserThreadFunc *func, T param, Notification *thread_can_start)
-      : func_(func), param_(param), thread_can_start_(thread_can_start),
+  ThreadWithParam(UserThreadFunc* func, T param, Notification* thread_can_start)
+      : func_(func),
+        param_(param),
+        thread_can_start_(thread_can_start),
         finished_(false) {
-    ThreadWithParamBase *const base = this;
+    ThreadWithParamBase* const base = this;
     // The thread can be created only after all fields except thread_
     // have been initialized.
     GTEST_CHECK_POSIX_SUCCESS_(
@@ -1319,16 +1309,17 @@ class ThreadWithParam : public ThreadWithParamBase {
   }
 
  private:
-  UserThreadFunc *const func_;  // User-supplied thread function.
+  UserThreadFunc* const func_;  // User-supplied thread function.
   const T param_;  // User-supplied parameter to the thread function.
   // When non-NULL, used to block execution until the controller thread
   // notifies.
-  Notification *const thread_can_start_;
+  Notification* const thread_can_start_;
   bool finished_;  // true if and only if we know that the thread function has
                    // finished.
   pthread_t thread_;  // The native thread object.
 
-  GTEST_DISALLOW_COPY_AND_ASSIGN_(ThreadWithParam);
+  ThreadWithParam(const ThreadWithParam&) = delete;
+  ThreadWithParam& operator=(const ThreadWithParam&) = delete;
 };
 #endif  // !GTEST_OS_WINDOWS && GTEST_HAS_PTHREAD ||
         // GTEST_HAS_MUTEX_AND_THREAD_LOCAL_
@@ -1389,9 +1380,10 @@ class GTEST_API_ Mutex {
   // by the linker.
   MutexType type_;
   long critical_section_init_phase_;  // NOLINT
-  GTEST_CRITICAL_SECTION *critical_section_;
+  GTEST_CRITICAL_SECTION* critical_section_;
 
-  GTEST_DISALLOW_COPY_AND_ASSIGN_(Mutex);
+  Mutex(const Mutex&) = delete;
+  Mutex& operator=(const Mutex&) = delete;
 };
 
 #define GTEST_DECLARE_STATIC_MUTEX_(mutex) \
@@ -1407,14 +1399,15 @@ class GTEST_API_ Mutex {
 // "MutexLock l(&mu)".  Hence the typedef trick below.
 class GTestMutexLock {
  public:
-  explicit GTestMutexLock(Mutex *mutex) : mutex_(mutex) { mutex_->Lock(); }
+  explicit GTestMutexLock(Mutex* mutex) : mutex_(mutex) { mutex_->Lock(); }
 
   ~GTestMutexLock() { mutex_->Unlock(); }
 
  private:
-  Mutex *const mutex_;
+  Mutex* const mutex_;
 
-  GTEST_DISALLOW_COPY_AND_ASSIGN_(GTestMutexLock);
+  GTestMutexLock(const GTestMutexLock&) = delete;
+  GTestMutexLock& operator=(const GTestMutexLock&) = delete;
 };
 
 typedef GTestMutexLock MutexLock;
@@ -1434,14 +1427,15 @@ class ThreadLocalBase {
   // this ThreadLocal<T>'s constructor and returns it.  It is the caller's
   // responsibility not to call this when the ThreadLocal<T> instance already
   // has a value on the current thread.
-  virtual ThreadLocalValueHolderBase *NewValueForCurrentThread() const = 0;
+  virtual ThreadLocalValueHolderBase* NewValueForCurrentThread() const = 0;
 
  protected:
   ThreadLocalBase() {}
   virtual ~ThreadLocalBase() {}
 
  private:
-  GTEST_DISALLOW_COPY_AND_ASSIGN_(ThreadLocalBase);
+  ThreadLocalBase(const ThreadLocalBase&) = delete;
+  ThreadLocalBase& operator=(const ThreadLocalBase&) = delete;
 };
 
 // Maps a thread to a set of ThreadLocals that have values instantiated on that
@@ -1451,12 +1445,12 @@ class GTEST_API_ ThreadLocalRegistry {
  public:
   // Registers thread_local_instance as having value on the current thread.
   // Returns a value that can be used to identify the thread from other threads.
-  static ThreadLocalValueHolderBase *GetValueOnCurrentThread(
-      const ThreadLocalBase *thread_local_instance);
+  static ThreadLocalValueHolderBase* GetValueOnCurrentThread(
+      const ThreadLocalBase* thread_local_instance);
 
   // Invoked when a ThreadLocal instance is destroyed.
   static void OnThreadLocalDestroyed(
-      const ThreadLocalBase *thread_local_instance);
+      const ThreadLocalBase* thread_local_instance);
 };
 
 class GTEST_API_ ThreadWithParamBase {
@@ -1470,7 +1464,7 @@ class GTEST_API_ ThreadWithParamBase {
     virtual void Run() = 0;
   };
 
-  ThreadWithParamBase(Runnable *runnable, Notification *thread_can_start);
+  ThreadWithParamBase(Runnable* runnable, Notification* thread_can_start);
   virtual ~ThreadWithParamBase();
 
  private:
@@ -1483,25 +1477,27 @@ class ThreadWithParam : public ThreadWithParamBase {
  public:
   typedef void UserThreadFunc(T);
 
-  ThreadWithParam(UserThreadFunc *func, T param, Notification *thread_can_start)
+  ThreadWithParam(UserThreadFunc* func, T param, Notification* thread_can_start)
       : ThreadWithParamBase(new RunnableImpl(func, param), thread_can_start) {}
   virtual ~ThreadWithParam() {}
 
  private:
   class RunnableImpl : public Runnable {
    public:
-    RunnableImpl(UserThreadFunc *func, T param) : func_(func), param_(param) {}
+    RunnableImpl(UserThreadFunc* func, T param) : func_(func), param_(param) {}
     virtual ~RunnableImpl() {}
     virtual void Run() { func_(param_); }
 
    private:
-    UserThreadFunc *const func_;
+    UserThreadFunc* const func_;
     const T param_;
 
-    GTEST_DISALLOW_COPY_AND_ASSIGN_(RunnableImpl);
+    RunnableImpl(const RunnableImpl&) = delete;
+    RunnableImpl& operator=(const RunnableImpl&) = delete;
   };
 
-  GTEST_DISALLOW_COPY_AND_ASSIGN_(ThreadWithParam);
+  ThreadWithParam(const ThreadWithParam&) = delete;
+  ThreadWithParam& operator=(const ThreadWithParam&) = delete;
 };
 
 // Implements thread-local storage on Windows systems.
@@ -1535,15 +1531,15 @@ template <typename T>
 class ThreadLocal : public ThreadLocalBase {
  public:
   ThreadLocal() : default_factory_(new DefaultValueHolderFactory()) {}
-  explicit ThreadLocal(const T &value)
+  explicit ThreadLocal(const T& value)
       : default_factory_(new InstanceValueHolderFactory(value)) {}
 
-  ~ThreadLocal() { ThreadLocalRegistry::OnThreadLocalDestroyed(this); }
+  ~ThreadLocal() override { ThreadLocalRegistry::OnThreadLocalDestroyed(this); }
 
-  T *pointer() { return GetOrCreateValue(); }
-  const T *pointer() const { return GetOrCreateValue(); }
-  const T &get() const { return *pointer(); }
-  void set(const T &value) { *pointer() = value; }
+  T* pointer() { return GetOrCreateValue(); }
+  const T* pointer() const { return GetOrCreateValue(); }
+  const T& get() const { return *pointer(); }
+  void set(const T& value) { *pointer() = value; }
 
  private:
   // Holds a value of T.  Can be deleted via its base class without the caller
@@ -1551,22 +1547,23 @@ class ThreadLocal : public ThreadLocalBase {
   class ValueHolder : public ThreadLocalValueHolderBase {
    public:
     ValueHolder() : value_() {}
-    explicit ValueHolder(const T &value) : value_(value) {}
+    explicit ValueHolder(const T& value) : value_(value) {}
 
-    T *pointer() { return &value_; }
+    T* pointer() { return &value_; }
 
    private:
     T value_;
-    GTEST_DISALLOW_COPY_AND_ASSIGN_(ValueHolder);
+    ValueHolder(const ValueHolder&) = delete;
+    ValueHolder& operator=(const ValueHolder&) = delete;
   };
 
-  T *GetOrCreateValue() const {
-    return static_cast<ValueHolder *>(
+  T* GetOrCreateValue() const {
+    return static_cast<ValueHolder*>(
                ThreadLocalRegistry::GetValueOnCurrentThread(this))
         ->pointer();
   }
 
-  virtual ThreadLocalValueHolderBase *NewValueForCurrentThread() const {
+  ThreadLocalValueHolderBase* NewValueForCurrentThread() const override {
     return default_factory_->MakeNewHolder();
   }
 
@@ -1574,37 +1571,43 @@ class ThreadLocal : public ThreadLocalBase {
    public:
     ValueHolderFactory() {}
     virtual ~ValueHolderFactory() {}
-    virtual ValueHolder *MakeNewHolder() const = 0;
+    virtual ValueHolder* MakeNewHolder() const = 0;
 
    private:
-    GTEST_DISALLOW_COPY_AND_ASSIGN_(ValueHolderFactory);
+    ValueHolderFactory(const ValueHolderFactory&) = delete;
+    ValueHolderFactory& operator=(const ValueHolderFactory&) = delete;
   };
 
   class DefaultValueHolderFactory : public ValueHolderFactory {
    public:
     DefaultValueHolderFactory() {}
-    ValueHolder *MakeNewHolder() const override { return new ValueHolder(); }
+    ValueHolder* MakeNewHolder() const override { return new ValueHolder(); }
 
    private:
-    GTEST_DISALLOW_COPY_AND_ASSIGN_(DefaultValueHolderFactory);
+    DefaultValueHolderFactory(const DefaultValueHolderFactory&) = delete;
+    DefaultValueHolderFactory& operator=(const DefaultValueHolderFactory&) =
+        delete;
   };
 
   class InstanceValueHolderFactory : public ValueHolderFactory {
    public:
-    explicit InstanceValueHolderFactory(const T &value) : value_(value) {}
-    ValueHolder *MakeNewHolder() const override {
+    explicit InstanceValueHolderFactory(const T& value) : value_(value) {}
+    ValueHolder* MakeNewHolder() const override {
       return new ValueHolder(value_);
     }
 
    private:
     const T value_;  // The value for each thread.
 
-    GTEST_DISALLOW_COPY_AND_ASSIGN_(InstanceValueHolderFactory);
+    InstanceValueHolderFactory(const InstanceValueHolderFactory&) = delete;
+    InstanceValueHolderFactory& operator=(const InstanceValueHolderFactory&) =
+        delete;
   };
 
   std::unique_ptr<ValueHolderFactory> default_factory_;
 
-  GTEST_DISALLOW_COPY_AND_ASSIGN_(ThreadLocal);
+  ThreadLocal(const ThreadLocal&) = delete;
+  ThreadLocal& operator=(const ThreadLocal&) = delete;
 };
 
 #elif GTEST_HAS_PTHREAD
@@ -1664,7 +1667,7 @@ class MutexBase {
 // This allows initialization to work whether pthread_t is a scalar or struct.
 // The flag -Wmissing-field-initializers must not be specified for this to work.
 #define GTEST_DEFINE_STATIC_MUTEX_(mutex) \
-  ::testing::internal::MutexBase mutex = { PTHREAD_MUTEX_INITIALIZER, false, 0 }
+  ::testing::internal::MutexBase mutex = {PTHREAD_MUTEX_INITIALIZER, false, 0}
 
 // The Mutex class can only be used for mutexes created at runtime. It
 // shares its API with MutexBase otherwise.
@@ -1677,7 +1680,8 @@ class Mutex : public MutexBase {
   ~Mutex() { GTEST_CHECK_POSIX_SUCCESS_(pthread_mutex_destroy(&mutex_)); }
 
  private:
-  GTEST_DISALLOW_COPY_AND_ASSIGN_(Mutex);
+  Mutex(const Mutex&) = delete;
+  Mutex& operator=(const Mutex&) = delete;
 };
 
 // We cannot name this class MutexLock because the ctor declaration would
@@ -1687,14 +1691,15 @@ class Mutex : public MutexBase {
 // "MutexLock l(&mu)".  Hence the typedef trick below.
 class GTestMutexLock {
  public:
-  explicit GTestMutexLock(MutexBase *mutex) : mutex_(mutex) { mutex_->Lock(); }
+  explicit GTestMutexLock(MutexBase* mutex) : mutex_(mutex) { mutex_->Lock(); }
 
   ~GTestMutexLock() { mutex_->Unlock(); }
 
  private:
-  MutexBase *const mutex_;
+  MutexBase* const mutex_;
 
-  GTEST_DISALLOW_COPY_AND_ASSIGN_(GTestMutexLock);
+  GTestMutexLock(const GTestMutexLock&) = delete;
+  GTestMutexLock& operator=(const GTestMutexLock&) = delete;
 };
 
 typedef GTestMutexLock MutexLock;
@@ -1712,8 +1717,8 @@ class ThreadLocalValueHolderBase {
 
 // Called by pthread to delete thread-local data stored by
 // pthread_setspecific().
-extern "C" inline void DeleteThreadLocalValue(void *value_holder) {
-  delete static_cast<ThreadLocalValueHolderBase *>(value_holder);
+extern "C" inline void DeleteThreadLocalValue(void* value_holder) {
+  delete static_cast<ThreadLocalValueHolderBase*>(value_holder);
 }
 
 // Implements thread-local storage on pthreads-based systems.
@@ -1722,7 +1727,7 @@ class GTEST_API_ ThreadLocal {
  public:
   ThreadLocal()
       : key_(CreateKey()), default_factory_(new DefaultValueHolderFactory()) {}
-  explicit ThreadLocal(const T &value)
+  explicit ThreadLocal(const T& value)
       : key_(CreateKey()),
         default_factory_(new InstanceValueHolderFactory(value)) {}
 
@@ -1735,23 +1740,24 @@ class GTEST_API_ ThreadLocal {
     GTEST_CHECK_POSIX_SUCCESS_(pthread_key_delete(key_));
   }
 
-  T *pointer() { return GetOrCreateValue(); }
-  const T *pointer() const { return GetOrCreateValue(); }
-  const T &get() const { return *pointer(); }
-  void set(const T &value) { *pointer() = value; }
+  T* pointer() { return GetOrCreateValue(); }
+  const T* pointer() const { return GetOrCreateValue(); }
+  const T& get() const { return *pointer(); }
+  void set(const T& value) { *pointer() = value; }
 
  private:
   // Holds a value of type T.
   class ValueHolder : public ThreadLocalValueHolderBase {
    public:
     ValueHolder() : value_() {}
-    explicit ValueHolder(const T &value) : value_(value) {}
+    explicit ValueHolder(const T& value) : value_(value) {}
 
-    T *pointer() { return &value_; }
+    T* pointer() { return &value_; }
 
    private:
     T value_;
-    GTEST_DISALLOW_COPY_AND_ASSIGN_(ValueHolder);
+    ValueHolder(const ValueHolder&) = delete;
+    ValueHolder& operator=(const ValueHolder&) = delete;
   };
 
   static pthread_key_t CreateKey() {
@@ -1763,15 +1769,15 @@ class GTEST_API_ ThreadLocal {
     return key;
   }
 
-  T *GetOrCreateValue() const {
-    ThreadLocalValueHolderBase *const holder =
-        static_cast<ThreadLocalValueHolderBase *>(pthread_getspecific(key_));
+  T* GetOrCreateValue() const {
+    ThreadLocalValueHolderBase* const holder =
+        static_cast<ThreadLocalValueHolderBase*>(pthread_getspecific(key_));
     if (holder != nullptr) {
       return CheckedDowncastToActualType<ValueHolder>(holder)->pointer();
     }
 
-    ValueHolder *const new_holder = default_factory_->MakeNewHolder();
-    ThreadLocalValueHolderBase *const holder_base = new_holder;
+    ValueHolder* const new_holder = default_factory_->MakeNewHolder();
+    ThreadLocalValueHolderBase* const holder_base = new_holder;
     GTEST_CHECK_POSIX_SUCCESS_(pthread_setspecific(key_, holder_base));
     return new_holder->pointer();
   }
@@ -1780,39 +1786,45 @@ class GTEST_API_ ThreadLocal {
    public:
     ValueHolderFactory() {}
     virtual ~ValueHolderFactory() {}
-    virtual ValueHolder *MakeNewHolder() const = 0;
+    virtual ValueHolder* MakeNewHolder() const = 0;
 
    private:
-    GTEST_DISALLOW_COPY_AND_ASSIGN_(ValueHolderFactory);
+    ValueHolderFactory(const ValueHolderFactory&) = delete;
+    ValueHolderFactory& operator=(const ValueHolderFactory&) = delete;
   };
 
   class DefaultValueHolderFactory : public ValueHolderFactory {
    public:
     DefaultValueHolderFactory() {}
-    ValueHolder *MakeNewHolder() const override { return new ValueHolder(); }
+    ValueHolder* MakeNewHolder() const override { return new ValueHolder(); }
 
    private:
-    GTEST_DISALLOW_COPY_AND_ASSIGN_(DefaultValueHolderFactory);
+    DefaultValueHolderFactory(const DefaultValueHolderFactory&) = delete;
+    DefaultValueHolderFactory& operator=(const DefaultValueHolderFactory&) =
+        delete;
   };
 
   class InstanceValueHolderFactory : public ValueHolderFactory {
    public:
-    explicit InstanceValueHolderFactory(const T &value) : value_(value) {}
-    ValueHolder *MakeNewHolder() const override {
+    explicit InstanceValueHolderFactory(const T& value) : value_(value) {}
+    ValueHolder* MakeNewHolder() const override {
       return new ValueHolder(value_);
     }
 
    private:
     const T value_;  // The value for each thread.
 
-    GTEST_DISALLOW_COPY_AND_ASSIGN_(InstanceValueHolderFactory);
+    InstanceValueHolderFactory(const InstanceValueHolderFactory&) = delete;
+    InstanceValueHolderFactory& operator=(const InstanceValueHolderFactory&) =
+        delete;
   };
 
   // A key pthreads uses for looking up per-thread values.
   const pthread_key_t key_;
   std::unique_ptr<ValueHolderFactory> default_factory_;
 
-  GTEST_DISALLOW_COPY_AND_ASSIGN_(ThreadLocal);
+  ThreadLocal(const ThreadLocal&) = delete;
+  ThreadLocal& operator=(const ThreadLocal&) = delete;
 };
 
 #endif  // GTEST_HAS_MUTEX_AND_THREAD_LOCAL_
@@ -1844,7 +1856,7 @@ class Mutex {
 // "MutexLock l(&mu)".  Hence the typedef trick below.
 class GTestMutexLock {
  public:
-  explicit GTestMutexLock(Mutex *) {}  // NOLINT
+  explicit GTestMutexLock(Mutex*) {}  // NOLINT
 };
 
 typedef GTestMutexLock MutexLock;
@@ -1853,11 +1865,11 @@ template <typename T>
 class GTEST_API_ ThreadLocal {
  public:
   ThreadLocal() : value_() {}
-  explicit ThreadLocal(const T &value) : value_(value) {}
-  T *pointer() { return &value_; }
-  const T *pointer() const { return &value_; }
-  const T &get() const { return value_; }
-  void set(const T &value) { value_ = value; }
+  explicit ThreadLocal(const T& value) : value_(value) {}
+  T* pointer() { return &value_; }
+  const T* pointer() const { return &value_; }
+  const T& get() const { return value_; }
+  void set(const T& value) { value_ = value; }
 
  private:
   T value_;
@@ -1905,6 +1917,19 @@ inline bool IsUpper(char ch) {
 inline bool IsXDigit(char ch) {
   return isxdigit(static_cast<unsigned char>(ch)) != 0;
 }
+#ifdef __cpp_char8_t
+inline bool IsXDigit(char8_t ch) {
+  return isxdigit(static_cast<unsigned char>(ch)) != 0;
+}
+#endif
+inline bool IsXDigit(char16_t ch) {
+  const unsigned char low_byte = static_cast<unsigned char>(ch);
+  return ch == low_byte && isxdigit(low_byte) != 0;
+}
+inline bool IsXDigit(char32_t ch) {
+  const unsigned char low_byte = static_cast<unsigned char>(ch);
+  return ch == low_byte && isxdigit(low_byte) != 0;
+}
 inline bool IsXDigit(wchar_t ch) {
   const unsigned char low_byte = static_cast<unsigned char>(ch);
   return ch == low_byte && isxdigit(low_byte) != 0;
@@ -1938,66 +1963,78 @@ namespace posix {
 typedef struct _stat StatStruct;
 
 #ifdef __BORLANDC__
-inline int IsATTY(int fd) { return isatty(fd); }
-inline int StrCaseCmp(const char *s1, const char *s2) {
+inline int DoIsATTY(int fd) { return isatty(fd); }
+inline int StrCaseCmp(const char* s1, const char* s2) {
   return stricmp(s1, s2);
 }
-inline char *StrDup(const char *src) { return strdup(src); }
+inline char* StrDup(const char* src) { return strdup(src); }
 #else  // !__BORLANDC__
-#if GTEST_OS_WINDOWS_MOBILE
-inline int IsATTY(int /* fd */) { return 0; }
+#if GTEST_OS_WINDOWS_MOBILE || GTEST_OS_ZOS || GTEST_OS_IOS || \
+    GTEST_OS_WINDOWS_PHONE || GTEST_OS_WINDOWS_RT || defined(ESP_PLATFORM)
+inline int DoIsATTY(int /* fd */) { return 0; }
 #else
-inline int IsATTY(int fd) { return _isatty(fd); }
+inline int DoIsATTY(int fd) { return _isatty(fd); }
 #endif  // GTEST_OS_WINDOWS_MOBILE
-inline int StrCaseCmp(const char *s1, const char *s2) {
+inline int StrCaseCmp(const char* s1, const char* s2) {
   return _stricmp(s1, s2);
 }
-inline char *StrDup(const char *src) { return _strdup(src); }
+inline char* StrDup(const char* src) { return _strdup(src); }
 #endif  // __BORLANDC__
 
 #if GTEST_OS_WINDOWS_MOBILE
-inline int FileNo(FILE *file) { return reinterpret_cast<int>(_fileno(file)); }
+inline int FileNo(FILE* file) { return reinterpret_cast<int>(_fileno(file)); }
 // Stat(), RmDir(), and IsDir() are not needed on Windows CE at this
 // time and thus not defined there.
 #else
-inline int FileNo(FILE *file) { return _fileno(file); }
-inline int Stat(const char *path, StatStruct *buf) { return _stat(path, buf); }
-inline int RmDir(const char *dir) { return _rmdir(dir); }
-inline bool IsDir(const StatStruct &st) { return (_S_IFDIR & st.st_mode) != 0; }
+inline int FileNo(FILE* file) { return _fileno(file); }
+inline int Stat(const char* path, StatStruct* buf) { return _stat(path, buf); }
+inline int RmDir(const char* dir) { return _rmdir(dir); }
+inline bool IsDir(const StatStruct& st) { return (_S_IFDIR & st.st_mode) != 0; }
 #endif  // GTEST_OS_WINDOWS_MOBILE
 
 #elif GTEST_OS_ESP8266
 typedef struct stat StatStruct;
 
-inline int FileNo(FILE *file) { return fileno(file); }
-inline int IsATTY(int fd) { return isatty(fd); }
-inline int Stat(const char *path, StatStruct *buf) {
+inline int FileNo(FILE* file) { return fileno(file); }
+inline int DoIsATTY(int fd) { return isatty(fd); }
+inline int Stat(const char* path, StatStruct* buf) {
   // stat function not implemented on ESP8266
   return 0;
 }
-inline int StrCaseCmp(const char *s1, const char *s2) {
+inline int StrCaseCmp(const char* s1, const char* s2) {
   return strcasecmp(s1, s2);
 }
-inline char *StrDup(const char *src) { return strdup(src); }
-inline int RmDir(const char *dir) { return rmdir(dir); }
-inline bool IsDir(const StatStruct &st) { return S_ISDIR(st.st_mode); }
+inline char* StrDup(const char* src) { return strdup(src); }
+inline int RmDir(const char* dir) { return rmdir(dir); }
+inline bool IsDir(const StatStruct& st) { return S_ISDIR(st.st_mode); }
 
 #else
 
 typedef struct stat StatStruct;
 
-inline int FileNo(FILE *file) { return fileno(file); }
-inline int IsATTY(int fd) { return isatty(fd); }
-inline int Stat(const char *path, StatStruct *buf) { return stat(path, buf); }
-inline int StrCaseCmp(const char *s1, const char *s2) {
+inline int FileNo(FILE* file) { return fileno(file); }
+inline int DoIsATTY(int fd) { return isatty(fd); }
+inline int Stat(const char* path, StatStruct* buf) { return stat(path, buf); }
+inline int StrCaseCmp(const char* s1, const char* s2) {
   return strcasecmp(s1, s2);
 }
-inline char *StrDup(const char *src) { return strdup(src); }
-inline int RmDir(const char *dir) { return rmdir(dir); }
-inline bool IsDir(const StatStruct &st) { return S_ISDIR(st.st_mode); }
+inline char* StrDup(const char* src) { return strdup(src); }
+inline int RmDir(const char* dir) { return rmdir(dir); }
+inline bool IsDir(const StatStruct& st) { return S_ISDIR(st.st_mode); }
 
 #endif  // GTEST_OS_WINDOWS
 
+inline int IsATTY(int fd) {
+  // DoIsATTY might change errno (for example ENOTTY in case you redirect stdout
+  // to a file on Linux), which is unexpected, so save the previous value, and
+  // restore it after the call.
+  int savedErrno = errno;
+  int isAttyValue = DoIsATTY(fd);
+  errno = savedErrno;
+
+  return isAttyValue;
+}
+
 // Functions deprecated by MSVC 8.0.
 
 GTEST_DISABLE_MSC_DEPRECATED_PUSH_()
@@ -2006,39 +2043,48 @@ GTEST_DISABLE_MSC_DEPRECATED_PUSH_()
 // StrError() aren't needed on Windows CE at this time and thus not
 // defined there.
 
-#if !GTEST_OS_WINDOWS_MOBILE && !GTEST_OS_WINDOWS_PHONE && !GTEST_OS_WINDOWS_RT
-inline int ChDir(const char *dir) { return chdir(dir); }
+#if !GTEST_OS_WINDOWS_MOBILE && !GTEST_OS_WINDOWS_PHONE && \
+    !GTEST_OS_WINDOWS_RT && !GTEST_OS_ESP8266 && !GTEST_OS_XTENSA
+inline int ChDir(const char* dir) { return chdir(dir); }
 #endif
-inline FILE *FOpen(const char *path, const char *mode) {
+inline FILE* FOpen(const char* path, const char* mode) {
+#if GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_MINGW
+  struct wchar_codecvt : public std::codecvt<wchar_t, char, std::mbstate_t> {};
+  std::wstring_convert<wchar_codecvt> converter;
+  std::wstring wide_path = converter.from_bytes(path);
+  std::wstring wide_mode = converter.from_bytes(mode);
+  return _wfopen(wide_path.c_str(), wide_mode.c_str());
+#else   // GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_MINGW
   return fopen(path, mode);
+#endif  // GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_MINGW
 }
 #if !GTEST_OS_WINDOWS_MOBILE
-inline FILE *FReopen(const char *path, const char *mode, FILE *stream) {
+inline FILE* FReopen(const char* path, const char* mode, FILE* stream) {
   return freopen(path, mode, stream);
 }
-inline FILE *FDOpen(int fd, const char *mode) { return fdopen(fd, mode); }
+inline FILE* FDOpen(int fd, const char* mode) { return fdopen(fd, mode); }
 #endif
-inline int FClose(FILE *fp) { return fclose(fp); }
+inline int FClose(FILE* fp) { return fclose(fp); }
 #if !GTEST_OS_WINDOWS_MOBILE
-inline int Read(int fd, void *buf, unsigned int count) {
+inline int Read(int fd, void* buf, unsigned int count) {
   return static_cast<int>(read(fd, buf, count));
 }
-inline int Write(int fd, const void *buf, unsigned int count) {
+inline int Write(int fd, const void* buf, unsigned int count) {
   return static_cast<int>(write(fd, buf, count));
 }
 inline int Close(int fd) { return close(fd); }
-inline const char *StrError(int errnum) { return strerror(errnum); }
+inline const char* StrError(int errnum) { return strerror(errnum); }
 #endif
-inline const char *GetEnv(const char *name) {
+inline const char* GetEnv(const char* name) {
 #if GTEST_OS_WINDOWS_MOBILE || GTEST_OS_WINDOWS_PHONE || \
-    GTEST_OS_WINDOWS_RT || GTEST_OS_ESP8266
+    GTEST_OS_WINDOWS_RT || GTEST_OS_ESP8266 || GTEST_OS_XTENSA
   // We are on an embedded platform, which has no environment variables.
   static_cast<void>(name);  // To prevent 'unused argument' warning.
   return nullptr;
 #elif defined(__BORLANDC__) || defined(__SunOS_5_8) || defined(__SunOS_5_9)
   // Environment variables which we programmatically clear will be set to the
   // empty string rather than unset (NULL).  Handle that case.
-  const char *const env = getenv(name);
+  const char* const env = getenv(name);
   return (env != nullptr && env[0] != '\0') ? env : nullptr;
 #else
   return getenv(name);
@@ -2053,9 +2099,7 @@ GTEST_DISABLE_MSC_DEPRECATED_POP_()
 // imitation of standard behaviour.
 [[noreturn]] void Abort();
 #else
-[[noreturn]] inline void Abort() {
-  abort();
-}
+[[noreturn]] inline void Abort() { abort(); }
 #endif  // GTEST_OS_WINDOWS_MOBILE
 
 }  // namespace posix
@@ -2133,32 +2177,79 @@ using TimeInMillis = int64_t;  // Represents time in milliseconds.
 
 // Macro for referencing flags.
 #if !defined(GTEST_FLAG)
+#define GTEST_FLAG_NAME_(name) gtest_##name
 #define GTEST_FLAG(name) FLAGS_gtest_##name
 #endif  // !defined(GTEST_FLAG)
 
-#if !defined(GTEST_USE_OWN_FLAGFILE_FLAG_)
-#define GTEST_USE_OWN_FLAGFILE_FLAG_ 1
-#endif  // !defined(GTEST_USE_OWN_FLAGFILE_FLAG_)
+// Pick a command line flags implementation.
+#if GTEST_HAS_ABSL
 
-#if !defined(GTEST_DECLARE_bool_)
-#define GTEST_FLAG_SAVER_ ::testing::internal::GTestFlagSaver
+// Macros for defining flags.
+#define GTEST_DEFINE_bool_(name, default_val, doc) \
+  ABSL_FLAG(bool, GTEST_FLAG_NAME_(name), default_val, doc)
+#define GTEST_DEFINE_int32_(name, default_val, doc) \
+  ABSL_FLAG(int32_t, GTEST_FLAG_NAME_(name), default_val, doc)
+#define GTEST_DEFINE_string_(name, default_val, doc) \
+  ABSL_FLAG(std::string, GTEST_FLAG_NAME_(name), default_val, doc)
 
 // Macros for declaring flags.
-#define GTEST_DECLARE_bool_(name) GTEST_API_ extern bool GTEST_FLAG(name)
+#define GTEST_DECLARE_bool_(name) \
+  ABSL_DECLARE_FLAG(bool, GTEST_FLAG_NAME_(name))
 #define GTEST_DECLARE_int32_(name) \
-  GTEST_API_ extern std::int32_t GTEST_FLAG(name)
+  ABSL_DECLARE_FLAG(int32_t, GTEST_FLAG_NAME_(name))
 #define GTEST_DECLARE_string_(name) \
-  GTEST_API_ extern ::std::string GTEST_FLAG(name)
+  ABSL_DECLARE_FLAG(std::string, GTEST_FLAG_NAME_(name))
+
+#define GTEST_FLAG_SAVER_ ::absl::FlagSaver
+
+#define GTEST_FLAG_GET(name) ::absl::GetFlag(GTEST_FLAG(name))
+#define GTEST_FLAG_SET(name, value) \
+  (void)(::absl::SetFlag(&GTEST_FLAG(name), value))
+#define GTEST_USE_OWN_FLAGFILE_FLAG_ 0
+
+#else  // GTEST_HAS_ABSL
 
 // Macros for defining flags.
-#define GTEST_DEFINE_bool_(name, default_val, doc) \
-  GTEST_API_ bool GTEST_FLAG(name) = (default_val)
-#define GTEST_DEFINE_int32_(name, default_val, doc) \
-  GTEST_API_ std::int32_t GTEST_FLAG(name) = (default_val)
-#define GTEST_DEFINE_string_(name, default_val, doc) \
-  GTEST_API_ ::std::string GTEST_FLAG(name) = (default_val)
+#define GTEST_DEFINE_bool_(name, default_val, doc)  \
+  namespace testing {                               \
+  GTEST_API_ bool GTEST_FLAG(name) = (default_val); \
+  }                                                 \
+  static_assert(true, "no-op to require trailing semicolon")
+#define GTEST_DEFINE_int32_(name, default_val, doc)         \
+  namespace testing {                                       \
+  GTEST_API_ std::int32_t GTEST_FLAG(name) = (default_val); \
+  }                                                         \
+  static_assert(true, "no-op to require trailing semicolon")
+#define GTEST_DEFINE_string_(name, default_val, doc)         \
+  namespace testing {                                        \
+  GTEST_API_ ::std::string GTEST_FLAG(name) = (default_val); \
+  }                                                          \
+  static_assert(true, "no-op to require trailing semicolon")
+
+// Macros for declaring flags.
+#define GTEST_DECLARE_bool_(name)          \
+  namespace testing {                      \
+  GTEST_API_ extern bool GTEST_FLAG(name); \
+  }                                        \
+  static_assert(true, "no-op to require trailing semicolon")
+#define GTEST_DECLARE_int32_(name)                 \
+  namespace testing {                              \
+  GTEST_API_ extern std::int32_t GTEST_FLAG(name); \
+  }                                                \
+  static_assert(true, "no-op to require trailing semicolon")
+#define GTEST_DECLARE_string_(name)                 \
+  namespace testing {                               \
+  GTEST_API_ extern ::std::string GTEST_FLAG(name); \
+  }                                                 \
+  static_assert(true, "no-op to require trailing semicolon")
+
+#define GTEST_FLAG_SAVER_ ::testing::internal::GTestFlagSaver
 
-#endif  // !defined(GTEST_DECLARE_bool_)
+#define GTEST_FLAG_GET(name) ::testing::GTEST_FLAG(name)
+#define GTEST_FLAG_SET(name, value) (void)(::testing::GTEST_FLAG(name) = value)
+#define GTEST_USE_OWN_FLAGFILE_FLAG_ 1
+
+#endif  // GTEST_HAS_ABSL
 
 // Thread annotations
 #if !defined(GTEST_EXCLUSIVE_LOCK_REQUIRED_)
@@ -2169,14 +2260,15 @@ using TimeInMillis = int64_t;  // Represents time in milliseconds.
 // Parses 'str' for a 32-bit signed integer.  If successful, writes the result
 // to *value and returns true; otherwise leaves *value unchanged and returns
 // false.
-bool ParseInt32(const Message &src_text, const char *str, int32_t *value);
+GTEST_API_ bool ParseInt32(const Message& src_text, const char* str,
+                           int32_t* value);
 
 // Parses a bool/int32_t/string from the environment variable
 // corresponding to the given Google Test flag.
-bool BoolFromGTestEnv(const char *flag, bool default_val);
-GTEST_API_ int32_t Int32FromGTestEnv(const char *flag, int32_t default_val);
+bool BoolFromGTestEnv(const char* flag, bool default_val);
+GTEST_API_ int32_t Int32FromGTestEnv(const char* flag, int32_t default_val);
 std::string OutputFlagAlsoCheckEnvVar();
-const char *StringFromGTestEnv(const char *flag, const char *default_val);
+const char* StringFromGTestEnv(const char* flag, const char* default_val);
 
 }  // namespace internal
 }  // namespace testing
@@ -2202,6 +2294,66 @@ const char *StringFromGTestEnv(const char *flag, const char *default_val);
 #endif  // !defined(GTEST_INTERNAL_DEPRECATED)
 
 #if GTEST_HAS_ABSL
+// Always use absl::any for UniversalPrinter<> specializations if googletest
+// is built with absl support.
+#define GTEST_INTERNAL_HAS_ANY 1
+#include "absl/types/any.h"
+namespace testing {
+namespace internal {
+using Any = ::absl::any;
+}  // namespace internal
+}  // namespace testing
+#else
+#ifdef __has_include
+#if __has_include(<any>) && __cplusplus >= 201703L
+// Otherwise for C++17 and higher use std::any for UniversalPrinter<>
+// specializations.
+#define GTEST_INTERNAL_HAS_ANY 1
+#include <any>
+namespace testing {
+namespace internal {
+using Any = ::std::any;
+}  // namespace internal
+}  // namespace testing
+// The case where absl is configured NOT to alias std::any is not
+// supported.
+#endif  // __has_include(<any>) && __cplusplus >= 201703L
+#endif  // __has_include
+#endif  // GTEST_HAS_ABSL
+
+#if GTEST_HAS_ABSL
+// Always use absl::optional for UniversalPrinter<> specializations if
+// googletest is built with absl support.
+#define GTEST_INTERNAL_HAS_OPTIONAL 1
+#include "absl/types/optional.h"
+namespace testing {
+namespace internal {
+template <typename T>
+using Optional = ::absl::optional<T>;
+inline ::absl::nullopt_t Nullopt() { return ::absl::nullopt; }
+}  // namespace internal
+}  // namespace testing
+#else
+#ifdef __has_include
+#if __has_include(<optional>) && __cplusplus >= 201703L
+// Otherwise for C++17 and higher use std::optional for UniversalPrinter<>
+// specializations.
+#define GTEST_INTERNAL_HAS_OPTIONAL 1
+#include <optional>
+namespace testing {
+namespace internal {
+template <typename T>
+using Optional = ::std::optional<T>;
+inline ::std::nullopt_t Nullopt() { return ::std::nullopt; }
+}  // namespace internal
+}  // namespace testing
+// The case where absl is configured NOT to alias std::optional is not
+// supported.
+#endif  // __has_include(<optional>) && __cplusplus >= 201703L
+#endif  // __has_include
+#endif  // GTEST_HAS_ABSL
+
+#if GTEST_HAS_ABSL
 // Always use absl::string_view for Matcher<> specializations if googletest
 // is built with absl support.
 #define GTEST_INTERNAL_HAS_STRING_VIEW 1
@@ -2223,10 +2375,39 @@ namespace internal {
 using StringView = ::std::string_view;
 }  // namespace internal
 }  // namespace testing
-   // The case where absl is configured NOT to alias std::string_view is not
-   // supported.
+// The case where absl is configured NOT to alias std::string_view is not
+// supported.
 #endif  // __has_include(<string_view>) && __cplusplus >= 201703L
 #endif  // __has_include
 #endif  // GTEST_HAS_ABSL
 
-#endif  // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PORT_H_
+#if GTEST_HAS_ABSL
+// Always use absl::variant for UniversalPrinter<> specializations if googletest
+// is built with absl support.
+#define GTEST_INTERNAL_HAS_VARIANT 1
+#include "absl/types/variant.h"
+namespace testing {
+namespace internal {
+template <typename... T>
+using Variant = ::absl::variant<T...>;
+}  // namespace internal
+}  // namespace testing
+#else
+#ifdef __has_include
+#if __has_include(<variant>) && __cplusplus >= 201703L
+// Otherwise for C++17 and higher use std::variant for UniversalPrinter<>
+// specializations.
+#define GTEST_INTERNAL_HAS_VARIANT 1
+#include <variant>
+namespace testing {
+namespace internal {
+template <typename... T>
+using Variant = ::std::variant<T...>;
+}  // namespace internal
+}  // namespace testing
+// The case where absl is configured NOT to alias std::variant is not supported.
+#endif  // __has_include(<variant>) && __cplusplus >= 201703L
+#endif  // __has_include
+#endif  // GTEST_HAS_ABSL
+
+#endif  // GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_PORT_H_
diff --git a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/include/gtest/internal/gtest-string.h b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/include/gtest/internal/gtest-string.h
index f1f933097d5..cca2e1f2ad9 100644
--- a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/include/gtest/internal/gtest-string.h
+++ b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/include/gtest/internal/gtest-string.h
@@ -26,7 +26,7 @@
 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
+
 // The Google C++ Testing and Mocking Framework (Google Test)
 //
 // This header file declares the String class and functions used internally by
@@ -36,10 +36,12 @@
 // This header file is #included by gtest-internal.h.
 // It should not be #included by other files.
 
-// GOOGLETEST_CM0001 DO NOT DELETE
+// IWYU pragma: private, include "gtest/gtest.h"
+// IWYU pragma: friend gtest/.*
+// IWYU pragma: friend gmock/.*
 
-#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_STRING_H_
-#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_STRING_H_
+#ifndef GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_STRING_H_
+#define GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_STRING_H_
 
 #ifdef __BORLANDC__
 // string.h is not guaranteed to provide strcpy on C++ Builder.
@@ -47,6 +49,7 @@
 #endif
 
 #include <string.h>
+
 #include <cstdint>
 #include <string>
 
@@ -67,7 +70,7 @@ class GTEST_API_ String {
   //
   // This is different from strdup() in string.h, which allocates
   // memory using malloc().
-  static const char *CloneCString(const char *c_str);
+  static const char* CloneCString(const char* c_str);
 
 #if GTEST_OS_WINDOWS_MOBILE
   // Windows CE does not have the 'ANSI' versions of Win32 APIs. To be
@@ -82,7 +85,7 @@ class GTEST_API_ String {
   // The wide string is created using the ANSI codepage (CP_ACP) to
   // match the behaviour of the ANSI versions of Win32 calls and the
   // C runtime.
-  static LPCWSTR AnsiToUtf16(const char *c_str);
+  static LPCWSTR AnsiToUtf16(const char* c_str);
 
   // Creates an ANSI string from the given wide string, allocating
   // memory using new. The caller is responsible for deleting the return
@@ -92,7 +95,7 @@ class GTEST_API_ String {
   // The returned string is created using the ANSI codepage (CP_ACP) to
   // match the behaviour of the ANSI versions of Win32 calls and the
   // C runtime.
-  static const char *Utf16ToAnsi(LPCWSTR utf16_str);
+  static const char* Utf16ToAnsi(LPCWSTR utf16_str);
 #endif
 
   // Compares two C strings.  Returns true if and only if they have the same
@@ -101,13 +104,13 @@ class GTEST_API_ String {
   // Unlike strcmp(), this function can handle NULL argument(s).  A
   // NULL C string is considered different to any non-NULL C string,
   // including the empty string.
-  static bool CStringEquals(const char *lhs, const char *rhs);
+  static bool CStringEquals(const char* lhs, const char* rhs);
 
   // Converts a wide C string to a String using the UTF-8 encoding.
   // NULL will be converted to "(null)".  If an error occurred during
   // the conversion, "(failed to convert from wide string)" is
   // returned.
-  static std::string ShowWideCString(const wchar_t *wide_c_str);
+  static std::string ShowWideCString(const wchar_t* wide_c_str);
 
   // Compares two wide C strings.  Returns true if and only if they have the
   // same content.
@@ -115,7 +118,7 @@ class GTEST_API_ String {
   // Unlike wcscmp(), this function can handle NULL argument(s).  A
   // NULL C string is considered different to any non-NULL C string,
   // including the empty string.
-  static bool WideCStringEquals(const wchar_t *lhs, const wchar_t *rhs);
+  static bool WideCStringEquals(const wchar_t* lhs, const wchar_t* rhs);
 
   // Compares two C strings, ignoring case.  Returns true if and only if
   // they have the same content.
@@ -123,7 +126,7 @@ class GTEST_API_ String {
   // Unlike strcasecmp(), this function can handle NULL argument(s).
   // A NULL C string is considered different to any non-NULL C string,
   // including the empty string.
-  static bool CaseInsensitiveCStringEquals(const char *lhs, const char *rhs);
+  static bool CaseInsensitiveCStringEquals(const char* lhs, const char* rhs);
 
   // Compares two wide C strings, ignoring case.  Returns true if and only if
   // they have the same content.
@@ -137,17 +140,20 @@ class GTEST_API_ String {
   // which compares according to LC_CTYPE category of the current locale.
   // On MacOS X, it uses towlower, which also uses LC_CTYPE category of the
   // current locale.
-  static bool CaseInsensitiveWideCStringEquals(const wchar_t *lhs,
-                                               const wchar_t *rhs);
+  static bool CaseInsensitiveWideCStringEquals(const wchar_t* lhs,
+                                               const wchar_t* rhs);
 
   // Returns true if and only if the given string ends with the given suffix,
   // ignoring case. Any string is considered to end with an empty suffix.
-  static bool EndsWithCaseInsensitive(const std::string &str,
-                                      const std::string &suffix);
+  static bool EndsWithCaseInsensitive(const std::string& str,
+                                      const std::string& suffix);
 
   // Formats an int value as "%02d".
   static std::string FormatIntWidth2(int value);  // "%02d" for width == 2
 
+  // Formats an int value to given width with leading zeros.
+  static std::string FormatIntWidthN(int value, int width);
+
   // Formats an int value as "%X".
   static std::string FormatHexInt(int value);
 
@@ -163,9 +169,9 @@ class GTEST_API_ String {
 
 // Gets the content of the stringstream's buffer as an std::string.  Each '\0'
 // character in the buffer is replaced with "\\0".
-GTEST_API_ std::string StringStreamToString(::std::stringstream *stream);
+GTEST_API_ std::string StringStreamToString(::std::stringstream* stream);
 
 }  // namespace internal
 }  // namespace testing
 
-#endif  // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_STRING_H_
+#endif  // GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_STRING_H_
diff --git a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/include/gtest/internal/gtest-type-util.h b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/include/gtest/internal/gtest-type-util.h
index 3b3a651dc0c..6bc02a7de30 100644
--- a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/include/gtest/internal/gtest-type-util.h
+++ b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/include/gtest/internal/gtest-type-util.h
@@ -30,10 +30,12 @@
 // Type utilities needed for implementing typed and type-parameterized
 // tests.
 
-// GOOGLETEST_CM0001 DO NOT DELETE
+// IWYU pragma: private, include "gtest/gtest.h"
+// IWYU pragma: friend gtest/.*
+// IWYU pragma: friend gmock/.*
 
-#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TYPE_UTIL_H_
-#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TYPE_UTIL_H_
+#ifndef GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_TYPE_UTIL_H_
+#define GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_TYPE_UTIL_H_
 
 #include "gtest/internal/gtest-port.h"
 
@@ -64,14 +66,10 @@ inline std::string CanonicalizeForStdLibVersioning(std::string s) {
   return s;
 }
 
-// GetTypeName<T>() returns a human-readable name of type T.
-// NB: This function is also used in Google Mock, so don't move it inside of
-// the typed-test-only section below.
-template <typename T>
-std::string GetTypeName() {
 #if GTEST_HAS_RTTI
-
-  const char *const name = typeid(T).name();
+// GetTypeName(const std::type_info&) returns a human-readable name of type T.
+inline std::string GetTypeName(const std::type_info& type) {
+  const char* const name = type.name();
 #if GTEST_HAS_CXXABI_H_ || defined(__HP_aCC)
   int status = 0;
   // gcc's implementation of typeid(T).name() mangles the type name,
@@ -79,23 +77,29 @@ std::string GetTypeName() {
 #if GTEST_HAS_CXXABI_H_
   using abi::__cxa_demangle;
 #endif  // GTEST_HAS_CXXABI_H_
-  char *const readable_name = __cxa_demangle(name, nullptr, nullptr, &status);
+  char* const readable_name = __cxa_demangle(name, nullptr, nullptr, &status);
   const std::string name_str(status == 0 ? readable_name : name);
   free(readable_name);
   return CanonicalizeForStdLibVersioning(name_str);
 #else
   return name;
 #endif  // GTEST_HAS_CXXABI_H_ || __HP_aCC
+}
+#endif  // GTEST_HAS_RTTI
 
+// GetTypeName<T>() returns a human-readable name of type T if and only if
+// RTTI is enabled, otherwise it returns a dummy type name.
+// NB: This function is also used in Google Mock, so don't move it inside of
+// the typed-test-only section below.
+template <typename T>
+std::string GetTypeName() {
+#if GTEST_HAS_RTTI
+  return GetTypeName(typeid(T));
 #else
-
   return "<type>";
-
 #endif  // GTEST_HAS_RTTI
 }
 
-#if GTEST_HAS_TYPED_TEST || GTEST_HAS_TYPED_TEST_P
-
 // A unique type indicating an empty node
 struct None {};
 
@@ -172,8 +176,6 @@ struct GenerateTypeList {
   using type = typename proxy::type;
 };
 
-#endif  // GTEST_HAS_TYPED_TEST || GTEST_HAS_TYPED_TEST_P
-
 }  // namespace internal
 
 template <typename... Ts>
@@ -181,4 +183,4 @@ using Types = internal::ProxyTypeList<Ts...>;
 
 }  // namespace testing
 
-#endif  // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TYPE_UTIL_H_
+#endif  // GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_TYPE_UTIL_H_
diff --git a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/src/gtest-all.cc b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/src/gtest-all.cc
index ad292905cf3..2a70ed88c78 100644
--- a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/src/gtest-all.cc
+++ b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/src/gtest-all.cc
@@ -38,7 +38,7 @@
 #include "gtest/gtest.h"
 
 // The following lines pull in the real gtest *.cc files.
-#include "src/gtest.cc"
+#include "src/gtest-assertion-result.cc"
 #include "src/gtest-death-test.cc"
 #include "src/gtest-filepath.cc"
 #include "src/gtest-matchers.cc"
@@ -46,3 +46,4 @@
 #include "src/gtest-printers.cc"
 #include "src/gtest-test-part.cc"
 #include "src/gtest-typed-test.cc"
+#include "src/gtest.cc"
diff --git a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/src/gtest-assertion-result.cc b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/src/gtest-assertion-result.cc
new file mode 100644
index 00000000000..f1c0b10dc9e
--- /dev/null
+++ b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/src/gtest-assertion-result.cc
@@ -0,0 +1,77 @@
+// Copyright 2005, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// The Google C++ Testing and Mocking Framework (Google Test)
+//
+// This file defines the AssertionResult type.
+
+#include "gtest/gtest-assertion-result.h"
+
+#include <string>
+#include <utility>
+
+#include "gtest/gtest-message.h"
+
+namespace testing {
+
+// AssertionResult constructors.
+// Used in EXPECT_TRUE/FALSE(assertion_result).
+AssertionResult::AssertionResult(const AssertionResult& other)
+    : success_(other.success_),
+      message_(other.message_.get() != nullptr
+                   ? new ::std::string(*other.message_)
+                   : static_cast< ::std::string*>(nullptr)) {}
+
+// Swaps two AssertionResults.
+void AssertionResult::swap(AssertionResult& other) {
+  using std::swap;
+  swap(success_, other.success_);
+  swap(message_, other.message_);
+}
+
+// Returns the assertion's negation. Used with EXPECT/ASSERT_FALSE.
+AssertionResult AssertionResult::operator!() const {
+  AssertionResult negation(!success_);
+  if (message_.get() != nullptr) negation << *message_;
+  return negation;
+}
+
+// Makes a successful assertion result.
+AssertionResult AssertionSuccess() { return AssertionResult(true); }
+
+// Makes a failed assertion result.
+AssertionResult AssertionFailure() { return AssertionResult(false); }
+
+// Makes a failed assertion result with the given failure message.
+// Deprecated; use AssertionFailure() << message.
+AssertionResult AssertionFailure(const Message& message) {
+  return AssertionFailure() << message;
+}
+
+}  // namespace testing
diff --git a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/src/gtest-death-test.cc b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/src/gtest-death-test.cc
index c38551cda14..e6abc6278ae 100644
--- a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/src/gtest-death-test.cc
+++ b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/src/gtest-death-test.cc
@@ -32,10 +32,11 @@
 
 #include "gtest/gtest-death-test.h"
 
+#include <functional>
 #include <utility>
 
-#include "gtest/internal/gtest-port.h"
 #include "gtest/internal/custom/gtest.h"
+#include "gtest/internal/gtest-port.h"
 
 #if GTEST_HAS_DEATH_TEST
 
@@ -95,9 +96,12 @@ namespace testing {
 // used internally at Google, is "threadsafe".
 static const char kDefaultDeathTestStyle[] = GTEST_DEFAULT_DEATH_TEST_STYLE;
 
+}  // namespace testing
+
 GTEST_DEFINE_string_(
     death_test_style,
-    internal::StringFromGTestEnv("death_test_style", kDefaultDeathTestStyle),
+    testing::internal::StringFromGTestEnv("death_test_style",
+                                          testing::kDefaultDeathTestStyle),
     "Indicates how to run a death test in a forked child process: "
     "\"threadsafe\" (child process re-executes the test binary "
     "from the beginning, running only the specific death test) or "
@@ -106,7 +110,7 @@ GTEST_DEFINE_string_(
 
 GTEST_DEFINE_bool_(
     death_test_use_fork,
-    internal::BoolFromGTestEnv("death_test_use_fork", false),
+    testing::internal::BoolFromGTestEnv("death_test_use_fork", false),
     "Instructs to use fork()/_exit() instead of clone() in death tests. "
     "Ignored and always uses fork() on POSIX systems where clone() is not "
     "implemented. Useful when running under valgrind or similar tools if "
@@ -116,7 +120,6 @@ GTEST_DEFINE_bool_(
     "work in 99% of the cases. Once valgrind is fixed, this flag will "
     "most likely be removed.");
 
-namespace internal {
 GTEST_DEFINE_string_(
     internal_run_death_test, "",
     "Indicates the file, line number, temporal index of "
@@ -125,7 +128,8 @@ GTEST_DEFINE_string_(
     "the '|' characters.  This flag is specified if and only if the "
     "current process is a sub-process launched for running a thread-safe "
     "death test.  FOR INTERNAL USE ONLY.");
-}  // namespace internal
+
+namespace testing {
 
 #if GTEST_HAS_DEATH_TEST
 
@@ -147,12 +151,12 @@ bool InDeathTestChild() {
 
   // On Windows and Fuchsia, death tests are thread-safe regardless of the value
   // of the death_test_style flag.
-  return !GTEST_FLAG(internal_run_death_test).empty();
+  return !GTEST_FLAG_GET(internal_run_death_test).empty();
 
 #else
 
-  if (GTEST_FLAG(death_test_style) == "threadsafe")
-    return !GTEST_FLAG(internal_run_death_test).empty();
+  if (GTEST_FLAG_GET(death_test_style) == "threadsafe")
+    return !GTEST_FLAG_GET(internal_run_death_test).empty();
   else
     return g_in_fast_death_test_child;
 #endif
@@ -245,7 +249,7 @@ static std::string DeathTestThreadWarning(size_t thread_count) {
     msg << "detected " << thread_count << " threads.";
   }
   msg << " See "
-         "https://github.com/google/googletest/blob/master/googletest/docs/"
+         "https://github.com/google/googletest/blob/master/docs/"
          "advanced.md#death-tests-and-threads"
       << " for more explanation and suggested solutions, especially if"
       << " this is the last message you see before your test times out.";
@@ -280,14 +284,14 @@ enum DeathTestOutcome { IN_PROGRESS, DIED, LIVED, RETURNED, THREW };
 // message is propagated back to the parent process.  Otherwise, the
 // message is simply printed to stderr.  In either case, the program
 // then exits with status 1.
-static void DeathTestAbort(const std::string &message) {
+static void DeathTestAbort(const std::string& message) {
   // On a POSIX system, this function may be called from a threadsafe-style
   // death test child process, which operates on a very small stack.  Use
   // the heap for any additional non-minuscule memory requirements.
-  const InternalRunDeathTestFlag *const flag =
+  const InternalRunDeathTestFlag* const flag =
       GetUnitTestImpl()->internal_run_death_test_flag();
   if (flag != nullptr) {
-    FILE *parent = posix::FDOpen(flag->write_fd(), "w");
+    FILE* parent = posix::FDOpen(flag->write_fd(), "w");
     fputc(kDeathTestInternalError, parent);
     fprintf(parent, "%s", message.c_str());
     fflush(parent);
@@ -365,7 +369,7 @@ static void FailFromInternalError(int fd) {
 // Death test constructor.  Increments the running death test count
 // for the current test.
 DeathTest::DeathTest() {
-  TestInfo *const info = GetUnitTestImpl()->current_test_info();
+  TestInfo* const info = GetUnitTestImpl()->current_test_info();
   if (info == nullptr) {
     DeathTestAbort(
         "Cannot run a death test outside of a TEST or "
@@ -375,18 +379,18 @@ DeathTest::DeathTest() {
 
 // Creates and returns a death test by dispatching to the current
 // death test factory.
-bool DeathTest::Create(const char *statement,
-                       Matcher<const std::string &> matcher, const char *file,
-                       int line, DeathTest **test) {
+bool DeathTest::Create(const char* statement,
+                       Matcher<const std::string&> matcher, const char* file,
+                       int line, DeathTest** test) {
   return GetUnitTestImpl()->death_test_factory()->Create(
       statement, std::move(matcher), file, line, test);
 }
 
-const char *DeathTest::LastMessage() {
+const char* DeathTest::LastMessage() {
   return last_death_test_message_.c_str();
 }
 
-void DeathTest::set_last_death_test_message(const std::string &message) {
+void DeathTest::set_last_death_test_message(const std::string& message) {
   last_death_test_message_ = message;
 }
 
@@ -395,9 +399,14 @@ std::string DeathTest::last_death_test_message_;
 // Provides cross platform implementation for some death functionality.
 class DeathTestImpl : public DeathTest {
  protected:
-  DeathTestImpl(const char *a_statement, Matcher<const std::string &> matcher)
-      : statement_(a_statement), matcher_(std::move(matcher)), spawned_(false),
-        status_(-1), outcome_(IN_PROGRESS), read_fd_(-1), write_fd_(-1) {}
+  DeathTestImpl(const char* a_statement, Matcher<const std::string&> matcher)
+      : statement_(a_statement),
+        matcher_(std::move(matcher)),
+        spawned_(false),
+        status_(-1),
+        outcome_(IN_PROGRESS),
+        read_fd_(-1),
+        write_fd_(-1) {}
 
   // read_fd_ is expected to be closed and cleared by a derived class.
   ~DeathTestImpl() override { GTEST_DEATH_TEST_CHECK_(read_fd_ == -1); }
@@ -405,7 +414,7 @@ class DeathTestImpl : public DeathTest {
   void Abort(AbortReason reason) override;
   bool Passed(bool status_ok) override;
 
-  const char *statement() const { return statement_; }
+  const char* statement() const { return statement_; }
   bool spawned() const { return spawned_; }
   void set_spawned(bool is_spawned) { spawned_ = is_spawned; }
   int status() const { return status_; }
@@ -429,9 +438,9 @@ class DeathTestImpl : public DeathTest {
  private:
   // The textual content of the code this object is testing.  This class
   // doesn't own this string and should not attempt to delete it.
-  const char *const statement_;
+  const char* const statement_;
   // A matcher that's expected to match the stderr output by the child process.
-  Matcher<const std::string &> matcher_;
+  Matcher<const std::string&> matcher_;
   // True if the death test child process has been successfully spawned.
   bool spawned_;
   // The exit status of the child process.
@@ -468,9 +477,15 @@ void DeathTestImpl::ReadAndInterpretStatusByte() {
     set_outcome(DIED);
   } else if (bytes_read == 1) {
     switch (flag) {
-      case kDeathTestReturned: set_outcome(RETURNED); break;
-      case kDeathTestThrew: set_outcome(THREW); break;
-      case kDeathTestLived: set_outcome(LIVED); break;
+      case kDeathTestReturned:
+        set_outcome(RETURNED);
+        break;
+      case kDeathTestThrew:
+        set_outcome(THREW);
+        break;
+      case kDeathTestLived:
+        set_outcome(LIVED);
+        break;
       case kDeathTestInternalError:
         FailFromInternalError(read_fd());  // Does not return.
         break;
@@ -497,11 +512,9 @@ void DeathTestImpl::Abort(AbortReason reason) {
   // The parent process considers the death test to be a failure if
   // it finds any data in our pipe.  So, here we write a single flag byte
   // to the pipe, then exit.
-  const char status_ch = reason == TEST_DID_NOT_DIE
-                             ? kDeathTestLived
-                             : reason == TEST_THREW_EXCEPTION
-                                   ? kDeathTestThrew
-                                   : kDeathTestReturned;
+  const char status_ch = reason == TEST_DID_NOT_DIE       ? kDeathTestLived
+                         : reason == TEST_THREW_EXCEPTION ? kDeathTestThrew
+                                                          : kDeathTestReturned;
 
   GTEST_DEATH_TEST_CHECK_SYSCALL_(posix::Write(write_fd(), &status_ch, 1));
   // We are leaking the descriptor here because on some platforms (i.e.,
@@ -518,7 +531,7 @@ void DeathTestImpl::Abort(AbortReason reason) {
 // Returns an indented copy of stderr output for a death test.
 // This makes distinguishing death test output lines from regular log lines
 // much easier.
-static ::std::string FormatDeathTestOutput(const ::std::string &output) {
+static ::std::string FormatDeathTestOutput(const ::std::string& output) {
   ::std::string ret;
   for (size_t at = 0;;) {
     const size_t line_end = output.find('\n', at);
@@ -639,10 +652,10 @@ bool DeathTestImpl::Passed(bool status_ok) {
 //
 class WindowsDeathTest : public DeathTestImpl {
  public:
-  WindowsDeathTest(const char *a_statement,
-                   Matcher<const std::string &> matcher, const char *file,
-                   int line)
-      : DeathTestImpl(a_statement, std::move(matcher)), file_(file),
+  WindowsDeathTest(const char* a_statement, Matcher<const std::string&> matcher,
+                   const char* file, int line)
+      : DeathTestImpl(a_statement, std::move(matcher)),
+        file_(file),
         line_(line) {}
 
   // All of these virtual functions are inherited from DeathTest.
@@ -651,7 +664,7 @@ class WindowsDeathTest : public DeathTestImpl {
 
  private:
   // The name of the file in which the death test is located.
-  const char *const file_;
+  const char* const file_;
   // The line number on which the death test is located.
   const int line_;
   // Handle to the write end of the pipe to the child process.
@@ -673,13 +686,15 @@ int WindowsDeathTest::Wait() {
 
   // Wait until the child either signals that it has acquired the write end
   // of the pipe or it dies.
-  const HANDLE wait_handles[2] = { child_handle_.Get(), event_handle_.Get() };
+  const HANDLE wait_handles[2] = {child_handle_.Get(), event_handle_.Get()};
   switch (::WaitForMultipleObjects(2, wait_handles,
                                    FALSE,  // Waits for any of the handles.
                                    INFINITE)) {
     case WAIT_OBJECT_0:
-    case WAIT_OBJECT_0 + 1: break;
-    default: GTEST_DEATH_TEST_CHECK_(false);  // Should not get here.
+    case WAIT_OBJECT_0 + 1:
+      break;
+    default:
+      GTEST_DEATH_TEST_CHECK_(false);  // Should not get here.
   }
 
   // The child has acquired the write end of the pipe or exited.
@@ -709,10 +724,10 @@ int WindowsDeathTest::Wait() {
 // --gtest_internal_run_death_test flags such that it knows to run the
 // current death test only.
 DeathTest::TestRole WindowsDeathTest::AssumeRole() {
-  const UnitTestImpl *const impl = GetUnitTestImpl();
-  const InternalRunDeathTestFlag *const flag =
+  const UnitTestImpl* const impl = GetUnitTestImpl();
+  const InternalRunDeathTestFlag* const flag =
       impl->internal_run_death_test_flag();
-  const TestInfo *const info = impl->current_test_info();
+  const TestInfo* const info = impl->current_test_info();
   const int death_test_index = info->result()->death_test_count();
 
   if (flag != nullptr) {
@@ -724,8 +739,8 @@ DeathTest::TestRole WindowsDeathTest::AssumeRole() {
 
   // WindowsDeathTest uses an anonymous pipe to communicate results of
   // a death test.
-  SECURITY_ATTRIBUTES handles_are_inheritable = { sizeof(SECURITY_ATTRIBUTES),
-                                                  nullptr, TRUE };
+  SECURITY_ATTRIBUTES handles_are_inheritable = {sizeof(SECURITY_ATTRIBUTES),
+                                                 nullptr, TRUE};
   HANDLE read_handle, write_handle;
   GTEST_DEATH_TEST_CHECK_(::CreatePipe(&read_handle, &write_handle,
                                        &handles_are_inheritable,
@@ -741,12 +756,12 @@ DeathTest::TestRole WindowsDeathTest::AssumeRole() {
       nullptr));  // The even is unnamed.
   GTEST_DEATH_TEST_CHECK_(event_handle_.Get() != nullptr);
   const std::string filter_flag = std::string("--") + GTEST_FLAG_PREFIX_ +
-                                  kFilterFlag + "=" + info->test_suite_name() +
-                                  "." + info->name();
+                                  "filter=" + info->test_suite_name() + "." +
+                                  info->name();
   const std::string internal_flag =
-      std::string("--") + GTEST_FLAG_PREFIX_ + kInternalRunDeathTestFlag + "=" +
-      file_ + "|" + StreamableToString(line_) + "|" +
-      StreamableToString(death_test_index) + "|" +
+      std::string("--") + GTEST_FLAG_PREFIX_ +
+      "internal_run_death_test=" + file_ + "|" + StreamableToString(line_) +
+      "|" + StreamableToString(death_test_index) + "|" +
       StreamableToString(static_cast<unsigned int>(::GetCurrentProcessId())) +
       // size_t has the same width as pointers on both 32-bit and 64-bit
       // Windows platforms.
@@ -779,9 +794,9 @@ DeathTest::TestRole WindowsDeathTest::AssumeRole() {
   PROCESS_INFORMATION process_info;
   GTEST_DEATH_TEST_CHECK_(
       ::CreateProcessA(
-          executable_path, const_cast<char *>(command_line.c_str()),
-          nullptr,  // Retuned process handle is not inheritable.
-          nullptr,  // Retuned thread handle is not inheritable.
+          executable_path, const_cast<char*>(command_line.c_str()),
+          nullptr,  // Returned process handle is not inheritable.
+          nullptr,  // Returned thread handle is not inheritable.
           TRUE,  // Child inherits all inheritable handles (for write_handle_).
           0x0,   // Default creation flags.
           nullptr,  // Inherit the parent's environment.
@@ -797,10 +812,10 @@ DeathTest::TestRole WindowsDeathTest::AssumeRole() {
 
 class FuchsiaDeathTest : public DeathTestImpl {
  public:
-  FuchsiaDeathTest(const char *a_statement,
-                   Matcher<const std::string &> matcher, const char *file,
-                   int line)
-      : DeathTestImpl(a_statement, std::move(matcher)), file_(file),
+  FuchsiaDeathTest(const char* a_statement, Matcher<const std::string&> matcher,
+                   const char* file, int line)
+      : DeathTestImpl(a_statement, std::move(matcher)),
+        file_(file),
         line_(line) {}
 
   // All of these virtual functions are inherited from DeathTest.
@@ -810,7 +825,7 @@ class FuchsiaDeathTest : public DeathTestImpl {
 
  private:
   // The name of the file in which the death test is located.
-  const char *const file_;
+  const char* const file_;
   // The line number on which the death test is located.
   const int line_;
   // The stderr data captured by the child process.
@@ -827,28 +842,28 @@ class Arguments {
   Arguments() { args_.push_back(nullptr); }
 
   ~Arguments() {
-    for (std::vector<char *>::iterator i = args_.begin(); i != args_.end();
+    for (std::vector<char*>::iterator i = args_.begin(); i != args_.end();
          ++i) {
       free(*i);
     }
   }
-  void AddArgument(const char *argument) {
+  void AddArgument(const char* argument) {
     args_.insert(args_.end() - 1, posix::StrDup(argument));
   }
 
   template <typename Str>
-  void AddArguments(const ::std::vector<Str> &arguments) {
+  void AddArguments(const ::std::vector<Str>& arguments) {
     for (typename ::std::vector<Str>::const_iterator i = arguments.begin();
          i != arguments.end(); ++i) {
       args_.insert(args_.end() - 1, posix::StrDup(i->c_str()));
     }
   }
-  char *const *Argv() { return &args_[0]; }
+  char* const* Argv() { return &args_[0]; }
 
-  int size() { return args_.size() - 1; }
+  int size() { return static_cast<int>(args_.size()) - 1; }
 
  private:
-  std::vector<char *> args_;
+  std::vector<char*> args_;
 };
 
 // Waits for the child in a death test to exit, returning its exit
@@ -868,19 +883,18 @@ int FuchsiaDeathTest::Wait() {
   GTEST_DEATH_TEST_CHECK_(status_zx == ZX_OK);
 
   // Register to wait for the child process to terminate.
-  status_zx = child_process_.wait_async(
-      port, kProcessKey, ZX_PROCESS_TERMINATED, ZX_WAIT_ASYNC_ONCE);
+  status_zx =
+      child_process_.wait_async(port, kProcessKey, ZX_PROCESS_TERMINATED, 0);
   GTEST_DEATH_TEST_CHECK_(status_zx == ZX_OK);
 
   // Register to wait for the socket to be readable or closed.
   status_zx = stderr_socket_.wait_async(
-      port, kSocketKey, ZX_SOCKET_READABLE | ZX_SOCKET_PEER_CLOSED,
-      ZX_WAIT_ASYNC_ONCE);
+      port, kSocketKey, ZX_SOCKET_READABLE | ZX_SOCKET_PEER_CLOSED, 0);
   GTEST_DEATH_TEST_CHECK_(status_zx == ZX_OK);
 
   // Register to wait for an exception.
-  status_zx = exception_channel_.wait_async(
-      port, kExceptionKey, ZX_CHANNEL_READABLE, ZX_WAIT_ASYNC_ONCE);
+  status_zx = exception_channel_.wait_async(port, kExceptionKey,
+                                            ZX_CHANNEL_READABLE, 0);
   GTEST_DEATH_TEST_CHECK_(status_zx == ZX_OK);
 
   bool process_terminated = false;
@@ -920,8 +934,7 @@ int FuchsiaDeathTest::Wait() {
         } else {
           GTEST_DEATH_TEST_CHECK_(status_zx == ZX_ERR_SHOULD_WAIT);
           status_zx = stderr_socket_.wait_async(
-              port, kSocketKey, ZX_SOCKET_READABLE | ZX_SOCKET_PEER_CLOSED,
-              ZX_WAIT_ASYNC_ONCE);
+              port, kSocketKey, ZX_SOCKET_READABLE | ZX_SOCKET_PEER_CLOSED, 0);
           GTEST_DEATH_TEST_CHECK_(status_zx == ZX_OK);
         }
       } else {
@@ -938,8 +951,8 @@ int FuchsiaDeathTest::Wait() {
                                       nullptr, nullptr);
   GTEST_DEATH_TEST_CHECK_(status_zx == ZX_OK);
 
-  GTEST_DEATH_TEST_CHECK_(buffer.exited);
-  set_status(buffer.return_code);
+  GTEST_DEATH_TEST_CHECK_(buffer.flags & ZX_INFO_PROCESS_FLAG_EXITED);
+  set_status(static_cast<int>(buffer.return_code));
   return status();
 }
 
@@ -949,10 +962,10 @@ int FuchsiaDeathTest::Wait() {
 // --gtest_internal_run_death_test flags such that it knows to run the
 // current death test only.
 DeathTest::TestRole FuchsiaDeathTest::AssumeRole() {
-  const UnitTestImpl *const impl = GetUnitTestImpl();
-  const InternalRunDeathTestFlag *const flag =
+  const UnitTestImpl* const impl = GetUnitTestImpl();
+  const InternalRunDeathTestFlag* const flag =
       impl->internal_run_death_test_flag();
-  const TestInfo *const info = impl->current_test_info();
+  const TestInfo* const info = impl->current_test_info();
   const int death_test_index = info->result()->death_test_count();
 
   if (flag != nullptr) {
@@ -967,8 +980,8 @@ DeathTest::TestRole FuchsiaDeathTest::AssumeRole() {
 
   // Build the child process command line.
   const std::string filter_flag = std::string("--") + GTEST_FLAG_PREFIX_ +
-                                  kFilterFlag + "=" + info->test_suite_name() +
-                                  "." + info->name();
+                                  "filter=" + info->test_suite_name() + "." +
+                                  info->name();
   const std::string internal_flag = std::string("--") + GTEST_FLAG_PREFIX_ +
                                     kInternalRunDeathTestFlag + "=" + file_ +
                                     "|" + StreamableToString(line_) + "|" +
@@ -988,7 +1001,7 @@ DeathTest::TestRole FuchsiaDeathTest::AssumeRole() {
 
   // Set the pipe handle for the child.
   fdio_spawn_action_t spawn_actions[2] = {};
-  fdio_spawn_action_t *add_handle_action = &spawn_actions[0];
+  fdio_spawn_action_t* add_handle_action = &spawn_actions[0];
   add_handle_action->action = FDIO_SPAWN_ACTION_ADD_HANDLE;
   add_handle_action->h.id = PA_HND(PA_FD, kFuchsiaReadPipeFd);
   add_handle_action->h.handle = child_pipe_handle;
@@ -1005,7 +1018,7 @@ DeathTest::TestRole FuchsiaDeathTest::AssumeRole() {
   // Make the stderr socket nonblocking.
   GTEST_DEATH_TEST_CHECK_(fcntl(stderr_producer_fd, F_SETFL, 0) == 0);
 
-  fdio_spawn_action_t *add_stderr_action = &spawn_actions[1];
+  fdio_spawn_action_t* add_stderr_action = &spawn_actions[1];
   add_stderr_action->action = FDIO_SPAWN_ACTION_CLONE_FD;
   add_stderr_action->fd.local_fd = stderr_producer_fd;
   add_stderr_action->fd.target_fd = STDERR_FILENO;
@@ -1046,7 +1059,7 @@ std::string FuchsiaDeathTest::GetErrorLogs() { return captured_stderr_; }
 // left undefined.
 class ForkingDeathTest : public DeathTestImpl {
  public:
-  ForkingDeathTest(const char *statement, Matcher<const std::string &> matcher);
+  ForkingDeathTest(const char* statement, Matcher<const std::string&> matcher);
 
   // All of these virtual functions are inherited from DeathTest.
   int Wait() override;
@@ -1060,8 +1073,8 @@ class ForkingDeathTest : public DeathTestImpl {
 };
 
 // Constructs a ForkingDeathTest.
-ForkingDeathTest::ForkingDeathTest(const char *a_statement,
-                                   Matcher<const std::string &> matcher)
+ForkingDeathTest::ForkingDeathTest(const char* a_statement,
+                                   Matcher<const std::string&> matcher)
     : DeathTestImpl(a_statement, std::move(matcher)), child_pid_(-1) {}
 
 // Waits for the child in a death test to exit, returning its exit
@@ -1082,7 +1095,7 @@ int ForkingDeathTest::Wait() {
 // in the child process.
 class NoExecDeathTest : public ForkingDeathTest {
  public:
-  NoExecDeathTest(const char *a_statement, Matcher<const std::string &> matcher)
+  NoExecDeathTest(const char* a_statement, Matcher<const std::string&> matcher)
       : ForkingDeathTest(a_statement, std::move(matcher)) {}
   TestRole AssumeRole() override;
 };
@@ -1137,9 +1150,10 @@ DeathTest::TestRole NoExecDeathTest::AssumeRole() {
 // only this specific death test to be run.
 class ExecDeathTest : public ForkingDeathTest {
  public:
-  ExecDeathTest(const char *a_statement, Matcher<const std::string &> matcher,
-                const char *file, int line)
-      : ForkingDeathTest(a_statement, std::move(matcher)), file_(file),
+  ExecDeathTest(const char* a_statement, Matcher<const std::string&> matcher,
+                const char* file, int line)
+      : ForkingDeathTest(a_statement, std::move(matcher)),
+        file_(file),
         line_(line) {}
   TestRole AssumeRole() override;
 
@@ -1154,7 +1168,7 @@ class ExecDeathTest : public ForkingDeathTest {
     return args;
   }
   // The name of the file in which the death test is located.
-  const char *const file_;
+  const char* const file_;
   // The line number on which the death test is located.
   const int line_;
 };
@@ -1165,61 +1179,49 @@ class Arguments {
   Arguments() { args_.push_back(nullptr); }
 
   ~Arguments() {
-    for (std::vector<char *>::iterator i = args_.begin(); i != args_.end();
+    for (std::vector<char*>::iterator i = args_.begin(); i != args_.end();
          ++i) {
       free(*i);
     }
   }
-  void AddArgument(const char *argument) {
+  void AddArgument(const char* argument) {
     args_.insert(args_.end() - 1, posix::StrDup(argument));
   }
 
   template <typename Str>
-  void AddArguments(const ::std::vector<Str> &arguments) {
+  void AddArguments(const ::std::vector<Str>& arguments) {
     for (typename ::std::vector<Str>::const_iterator i = arguments.begin();
          i != arguments.end(); ++i) {
       args_.insert(args_.end() - 1, posix::StrDup(i->c_str()));
     }
   }
-  char *const *Argv() { return &args_[0]; }
+  char* const* Argv() { return &args_[0]; }
 
  private:
-  std::vector<char *> args_;
+  std::vector<char*> args_;
 };
 
 // A struct that encompasses the arguments to the child process of a
 // threadsafe-style death test process.
 struct ExecDeathTestArgs {
-  char *const *argv;  // Command-line arguments for the child's call to exec
+  char* const* argv;  // Command-line arguments for the child's call to exec
   int close_fd;       // File descriptor to close; the read end of a pipe
 };
 
-#if GTEST_OS_MAC
-inline char **GetEnviron() {
-  // When Google Test is built as a framework on MacOS X, the environ variable
-  // is unavailable. Apple's documentation (man environ) recommends using
-  // _NSGetEnviron() instead.
-  return *_NSGetEnviron();
-}
-#else
-// Some POSIX platforms expect you to declare environ. extern "C" makes
-// it reside in the global namespace.
-extern "C" char **environ;
-inline char **GetEnviron() { return environ; }
-#endif  // GTEST_OS_MAC
-
-#if !GTEST_OS_QNX
+#if GTEST_OS_QNX
+extern "C" char** environ;
+#else   // GTEST_OS_QNX
 // The main function for a threadsafe-style death test child process.
 // This function is called in a clone()-ed process and thus must avoid
 // any potentially unsafe operations like malloc or libc functions.
-static int ExecDeathTestChildMain(void *child_arg) {
-  ExecDeathTestArgs *const args = static_cast<ExecDeathTestArgs *>(child_arg);
+static int ExecDeathTestChildMain(void* child_arg) {
+  ExecDeathTestArgs* const args = static_cast<ExecDeathTestArgs*>(child_arg);
   GTEST_DEATH_TEST_CHECK_SYSCALL_(close(args->close_fd));
 
   // We need to execute the test program in the same environment where
   // it was originally invoked.  Therefore we change to the original
   // working directory first.
-  const char *const original_dir =
+  const char* const original_dir =
       UnitTest::GetInstance()->original_working_dir();
   // We can safely call chdir() as it's a direct system call.
   if (chdir(original_dir) != 0) {
@@ -1228,17 +1230,17 @@ static int ExecDeathTestChildMain(void *child_arg) {
     return EXIT_FAILURE;
   }
 
-  // We can safely call execve() as it's a direct system call.  We
+  // We can safely call execv() as it's almost a direct system call. We
   // cannot use execvp() as it's a libc function and thus potentially
-  // unsafe.  Since execve() doesn't search the PATH, the user must
+  // unsafe.  Since execv() doesn't search the PATH, the user must
   // invoke the test program via a valid path that contains at least
   // one path separator.
-  execve(args->argv[0], args->argv, GetEnviron());
-  DeathTestAbort(std::string("execve(") + args->argv[0] + ", ...) in " +
+  execv(args->argv[0], args->argv);
+  DeathTestAbort(std::string("execv(") + args->argv[0] + ", ...) in " +
                  original_dir + " failed: " + GetLastErrnoDescription());
   return EXIT_FAILURE;
 }
-#endif  // !GTEST_OS_QNX
+#endif  // GTEST_OS_QNX
 
 #if GTEST_HAS_CLONE
 // Two utility routines that together determine the direction the stack
@@ -1250,21 +1252,26 @@ static int ExecDeathTestChildMain(void *child_arg) {
 // GTEST_NO_INLINE_ is required to prevent GCC 4.6 from inlining
 // StackLowerThanAddress into StackGrowsDown, which then doesn't give
 // correct answer.
-static void StackLowerThanAddress(const void *ptr,
-                                  bool *result) GTEST_NO_INLINE_;
+static void StackLowerThanAddress(const void* ptr,
+                                  bool* result) GTEST_NO_INLINE_;
+// Make sure sanitizers do not tamper with the stack here.
+// Ideally, we want to use `__builtin_frame_address` instead of a local variable
+// address with sanitizer disabled, but it does not work when the
+// compiler optimizes the stack frame out, which happens on PowerPC targets.
 // HWAddressSanitizer add a random tag to the MSB of the local variable address,
 // making comparison result unpredictable.
+GTEST_ATTRIBUTE_NO_SANITIZE_ADDRESS_
 GTEST_ATTRIBUTE_NO_SANITIZE_HWADDRESS_
-static void StackLowerThanAddress(const void *ptr, bool *result) {
-  int dummy;
-  *result = (&dummy < ptr);
+static void StackLowerThanAddress(const void* ptr, bool* result) {
+  int dummy = 0;
+  *result = std::less<const void*>()(&dummy, ptr);
 }
 
 // Make sure AddressSanitizer does not tamper with the stack here.
 GTEST_ATTRIBUTE_NO_SANITIZE_ADDRESS_
 GTEST_ATTRIBUTE_NO_SANITIZE_HWADDRESS_
 static bool StackGrowsDown() {
-  int dummy;
+  int dummy = 0;
   bool result;
   StackLowerThanAddress(&dummy, &result);
   return result;
@@ -1278,8 +1285,8 @@ static bool StackGrowsDown() {
 // fork supports only single-threaded environments, so this function uses
 // spawn(2) there instead.  The function dies with an error message if
 // anything goes wrong.
-static pid_t ExecDeathTestSpawnChild(char *const *argv, int close_fd) {
-  ExecDeathTestArgs args = { argv, close_fd };
+static pid_t ExecDeathTestSpawnChild(char* const* argv, int close_fd) {
+  ExecDeathTestArgs args = {argv, close_fd};
   pid_t child_pid = -1;
 
 #if GTEST_OS_QNX
@@ -1291,7 +1298,7 @@ static pid_t ExecDeathTestSpawnChild(char *const *argv, int close_fd) {
   // We need to execute the test program in the same environment where
   // it was originally invoked.  Therefore we change to the original
   // working directory first.
-  const char *const original_dir =
+  const char* const original_dir =
       UnitTest::GetInstance()->original_working_dir();
   // We can safely call chdir() as it's a direct system call.
   if (chdir(original_dir) != 0) {
@@ -1305,10 +1312,9 @@ static pid_t ExecDeathTestSpawnChild(char *const *argv, int close_fd) {
   GTEST_DEATH_TEST_CHECK_SYSCALL_(fd_flags = fcntl(close_fd, F_GETFD));
   GTEST_DEATH_TEST_CHECK_SYSCALL_(
       fcntl(close_fd, F_SETFD, fd_flags | FD_CLOEXEC));
-  struct inheritance inherit = { 0 };
+  struct inheritance inherit = {0};
   // spawn is a system call.
-  child_pid =
-      spawn(args.argv[0], 0, nullptr, &inherit, args.argv, GetEnviron());
+  child_pid = spawn(args.argv[0], 0, nullptr, &inherit, args.argv, environ);
   // Restores the current working directory.
   GTEST_DEATH_TEST_CHECK_(fchdir(cwd_fd) != -1);
   GTEST_DEATH_TEST_CHECK_SYSCALL_(close(cwd_fd));
@@ -1328,13 +1334,13 @@ static pid_t ExecDeathTestSpawnChild(char *const *argv, int close_fd) {
 #endif  // GTEST_OS_LINUX
 
 #if GTEST_HAS_CLONE
-  const bool use_fork = GTEST_FLAG(death_test_use_fork);
+  const bool use_fork = GTEST_FLAG_GET(death_test_use_fork);
 
   if (!use_fork) {
     static const bool stack_grows_down = StackGrowsDown();
     const auto stack_size = static_cast<size_t>(getpagesize() * 2);
     // MMAP_ANONYMOUS is not defined on Mac, so we use MAP_ANON instead.
-    void *const stack = mmap(nullptr, stack_size, PROT_READ | PROT_WRITE,
+    void* const stack = mmap(nullptr, stack_size, PROT_READ | PROT_WRITE,
                              MAP_ANON | MAP_PRIVATE, -1, 0);
     GTEST_DEATH_TEST_CHECK_(stack != MAP_FAILED);
 
@@ -1345,8 +1351,8 @@ static pid_t ExecDeathTestSpawnChild(char *const *argv, int close_fd) {
     // than 64.  We assume stack and stack_size already have alignment of
     // kMaxStackAlignment.
     const size_t kMaxStackAlignment = 64;
-    void *const stack_top =
-        static_cast<char *>(stack) +
+    void* const stack_top =
+        static_cast<char*>(stack) +
         (stack_grows_down ? stack_size - kMaxStackAlignment : 0);
     GTEST_DEATH_TEST_CHECK_(
         static_cast<size_t>(stack_size) > kMaxStackAlignment &&
@@ -1379,10 +1385,10 @@ static pid_t ExecDeathTestSpawnChild(char *const *argv, int close_fd) {
 // and --gtest_internal_run_death_test flags to cause only the current
 // death test to be re-run.
 DeathTest::TestRole ExecDeathTest::AssumeRole() {
-  const UnitTestImpl *const impl = GetUnitTestImpl();
-  const InternalRunDeathTestFlag *const flag =
+  const UnitTestImpl* const impl = GetUnitTestImpl();
+  const InternalRunDeathTestFlag* const flag =
       impl->internal_run_death_test_flag();
-  const TestInfo *const info = impl->current_test_info();
+  const TestInfo* const info = impl->current_test_info();
   const int death_test_index = info->result()->death_test_count();
 
   if (flag != nullptr) {
@@ -1397,11 +1403,11 @@ DeathTest::TestRole ExecDeathTest::AssumeRole() {
   GTEST_DEATH_TEST_CHECK_(fcntl(pipe_fd[1], F_SETFD, 0) != -1);
 
   const std::string filter_flag = std::string("--") + GTEST_FLAG_PREFIX_ +
-                                  kFilterFlag + "=" + info->test_suite_name() +
-                                  "." + info->name();
+                                  "filter=" + info->test_suite_name() + "." +
+                                  info->name();
   const std::string internal_flag = std::string("--") + GTEST_FLAG_PREFIX_ +
-                                    kInternalRunDeathTestFlag + "=" + file_ +
-                                    "|" + StreamableToString(line_) + "|" +
+                                    "internal_run_death_test=" + file_ + "|" +
+                                    StreamableToString(line_) + "|" +
                                     StreamableToString(death_test_index) + "|" +
                                     StreamableToString(pipe_fd[1]);
   Arguments args;
@@ -1431,12 +1437,12 @@ DeathTest::TestRole ExecDeathTest::AssumeRole() {
 // by the "test" argument to its address.  If the test should be
 // skipped, sets that pointer to NULL.  Returns true, unless the
 // flag is set to an invalid value.
-bool DefaultDeathTestFactory::Create(const char *statement,
-                                     Matcher<const std::string &> matcher,
-                                     const char *file, int line,
-                                     DeathTest **test) {
-  UnitTestImpl *const impl = GetUnitTestImpl();
-  const InternalRunDeathTestFlag *const flag =
+bool DefaultDeathTestFactory::Create(const char* statement,
+                                     Matcher<const std::string&> matcher,
+                                     const char* file, int line,
+                                     DeathTest** test) {
+  UnitTestImpl* const impl = GetUnitTestImpl();
+  const InternalRunDeathTestFlag* const flag =
       impl->internal_run_death_test_flag();
   const int death_test_index =
       impl->current_test_info()->increment_death_test_count();
@@ -1459,23 +1465,23 @@ bool DefaultDeathTestFactory::Create(const char *statement,
 
 #if GTEST_OS_WINDOWS
 
-  if (GTEST_FLAG(death_test_style) == "threadsafe" ||
-      GTEST_FLAG(death_test_style) == "fast") {
+  if (GTEST_FLAG_GET(death_test_style) == "threadsafe" ||
+      GTEST_FLAG_GET(death_test_style) == "fast") {
     *test = new WindowsDeathTest(statement, std::move(matcher), file, line);
   }
 
 #elif GTEST_OS_FUCHSIA
 
-  if (GTEST_FLAG(death_test_style) == "threadsafe" ||
-      GTEST_FLAG(death_test_style) == "fast") {
+  if (GTEST_FLAG_GET(death_test_style) == "threadsafe" ||
+      GTEST_FLAG_GET(death_test_style) == "fast") {
     *test = new FuchsiaDeathTest(statement, std::move(matcher), file, line);
   }
 
 #else
 
-  if (GTEST_FLAG(death_test_style) == "threadsafe") {
+  if (GTEST_FLAG_GET(death_test_style) == "threadsafe") {
     *test = new ExecDeathTest(statement, std::move(matcher), file, line);
-  } else if (GTEST_FLAG(death_test_style) == "fast") {
+  } else if (GTEST_FLAG_GET(death_test_style) == "fast") {
     *test = new NoExecDeathTest(statement, std::move(matcher));
   }
 
@@ -1483,7 +1489,7 @@ bool DefaultDeathTestFactory::Create(const char *statement,
 
   else {  // NOLINT - this is more readable than unbalanced brackets inside #if.
     DeathTest::set_last_death_test_message("Unknown death test style \"" +
-                                           GTEST_FLAG(death_test_style) +
+                                           GTEST_FLAG_GET(death_test_style) +
                                            "\" encountered");
     return false;
   }
@@ -1557,15 +1563,15 @@ static int GetStatusFileDescriptor(unsigned int parent_process_id,
 // Returns a newly created InternalRunDeathTestFlag object with fields
 // initialized from the GTEST_FLAG(internal_run_death_test) flag if
 // the flag is specified; otherwise returns NULL.
-InternalRunDeathTestFlag *ParseInternalRunDeathTestFlag() {
-  if (GTEST_FLAG(internal_run_death_test) == "") return nullptr;
+InternalRunDeathTestFlag* ParseInternalRunDeathTestFlag() {
+  if (GTEST_FLAG_GET(internal_run_death_test) == "") return nullptr;
 
   // GTEST_HAS_DEATH_TEST implies that we have ::std::string, so we
   // can use it here.
   int line = -1;
   int index = -1;
   ::std::vector< ::std::string> fields;
-  SplitString(GTEST_FLAG(internal_run_death_test).c_str(), '|', &fields);
+  SplitString(GTEST_FLAG_GET(internal_run_death_test), '|', &fields);
   int write_fd = -1;
 
 #if GTEST_OS_WINDOWS
@@ -1580,7 +1586,7 @@ InternalRunDeathTestFlag *ParseInternalRunDeathTestFlag() {
       !ParseNaturalNumber(fields[4], &write_handle_as_size_t) ||
       !ParseNaturalNumber(fields[5], &event_handle_as_size_t)) {
     DeathTestAbort("Bad --gtest_internal_run_death_test flag: " +
-                   GTEST_FLAG(internal_run_death_test));
+                   GTEST_FLAG_GET(internal_run_death_test));
   }
   write_fd = GetStatusFileDescriptor(parent_process_id, write_handle_as_size_t,
                                      event_handle_as_size_t);
@@ -1590,7 +1596,7 @@ InternalRunDeathTestFlag *ParseInternalRunDeathTestFlag() {
   if (fields.size() != 3 || !ParseNaturalNumber(fields[1], &line) ||
       !ParseNaturalNumber(fields[2], &index)) {
     DeathTestAbort("Bad --gtest_internal_run_death_test flag: " +
-                   GTEST_FLAG(internal_run_death_test));
+                   GTEST_FLAG_GET(internal_run_death_test));
   }
 
 #else
@@ -1599,7 +1605,7 @@ InternalRunDeathTestFlag *ParseInternalRunDeathTestFlag() {
       !ParseNaturalNumber(fields[2], &index) ||
       !ParseNaturalNumber(fields[3], &write_fd)) {
     DeathTestAbort("Bad --gtest_internal_run_death_test flag: " +
-                   GTEST_FLAG(internal_run_death_test));
+                   GTEST_FLAG_GET(internal_run_death_test));
   }
 
 #endif  // GTEST_OS_WINDOWS
diff --git a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/src/gtest-filepath.cc b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/src/gtest-filepath.cc
index f9427e0f186..f6ee90cdb7c 100644
--- a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/src/gtest-filepath.cc
+++ b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/src/gtest-filepath.cc
@@ -30,8 +30,9 @@
 #include "gtest/internal/gtest-filepath.h"
 
 #include <stdlib.h>
-#include "gtest/internal/gtest-port.h"
+
 #include "gtest/gtest-message.h"
+#include "gtest/internal/gtest-port.h"
 
 #if GTEST_OS_WINDOWS_MOBILE
 #include <windows.h>
@@ -40,6 +41,7 @@
 #include <io.h>
 #else
 #include <limits.h>
+
 #include <climits>  // Some Linux distributions define PATH_MAX here.
 #endif              // GTEST_OS_WINDOWS_MOBILE
 
@@ -92,17 +94,18 @@ static bool IsPathSeparator(char c) {
 
 // Returns the current working directory, or "" if unsuccessful.
 FilePath FilePath::GetCurrentDir() {
-#if GTEST_OS_WINDOWS_MOBILE || GTEST_OS_WINDOWS_PHONE || \
-    GTEST_OS_WINDOWS_RT || GTEST_OS_ESP8266 || GTEST_OS_ESP32
+#if GTEST_OS_WINDOWS_MOBILE || GTEST_OS_WINDOWS_PHONE ||         \
+    GTEST_OS_WINDOWS_RT || GTEST_OS_ESP8266 || GTEST_OS_ESP32 || \
+    GTEST_OS_XTENSA
   // These platforms do not have a current directory, so we just return
   // something reasonable.
   return FilePath(kCurrentDirectoryString);
 #elif GTEST_OS_WINDOWS
-  char cwd[GTEST_PATH_MAX_ + 1] = { '\0' };
+  char cwd[GTEST_PATH_MAX_ + 1] = {'\0'};
   return FilePath(_getcwd(cwd, sizeof(cwd)) == nullptr ? "" : cwd);
 #else
-  char cwd[GTEST_PATH_MAX_ + 1] = { '\0' };
-  char *result = getcwd(cwd, sizeof(cwd));
+  char cwd[GTEST_PATH_MAX_ + 1] = {'\0'};
+  char* result = getcwd(cwd, sizeof(cwd));
 #if GTEST_OS_NACL
   // getcwd will likely fail in NaCl due to the sandbox, so return something
   // reasonable. The user may have provided a shim implementation for getcwd,
@@ -117,7 +120,7 @@ FilePath FilePath::GetCurrentDir() {
 // Example: FilePath("dir/file.exe").RemoveExtension("EXE") returns
 // FilePath("dir/file"). If a case-insensitive extension is not
 // found, returns a copy of the original FilePath.
-FilePath FilePath::RemoveExtension(const char *extension) const {
+FilePath FilePath::RemoveExtension(const char* extension) const {
   const std::string dot_extension = std::string(".") + extension;
   if (String::EndsWithCaseInsensitive(pathname_, dot_extension)) {
     return FilePath(
@@ -129,10 +132,10 @@ FilePath FilePath::RemoveExtension(const char *extension) const {
 // Returns a pointer to the last occurrence of a valid path separator in
 // the FilePath. On Windows, for example, both '/' and '\' are valid path
 // separators. Returns NULL if no path separator was found.
-const char *FilePath::FindLastPathSeparator() const {
-  const char *const last_sep = strrchr(c_str(), kPathSeparator);
+const char* FilePath::FindLastPathSeparator() const {
+  const char* const last_sep = strrchr(c_str(), kPathSeparator);
 #if GTEST_HAS_ALT_PATH_SEP_
-  const char *const last_alt_sep = strrchr(c_str(), kAlternatePathSeparator);
+  const char* const last_alt_sep = strrchr(c_str(), kAlternatePathSeparator);
   // Comparing two pointers of which only one is NULL is undefined.
   if (last_alt_sep != nullptr &&
       (last_sep == nullptr || last_alt_sep > last_sep)) {
@@ -149,7 +152,7 @@ const char *FilePath::FindLastPathSeparator() const {
 // returns an empty FilePath ("").
 // On Windows platform, '\' is the path separator, otherwise it is '/'.
 FilePath FilePath::RemoveDirectoryName() const {
-  const char *const last_sep = FindLastPathSeparator();
+  const char* const last_sep = FindLastPathSeparator();
   return last_sep ? FilePath(last_sep + 1) : *this;
 }
 
@@ -160,7 +163,7 @@ FilePath FilePath::RemoveDirectoryName() const {
 // not have a file, like "just/a/dir/", it returns the FilePath unmodified.
 // On Windows platform, '\' is the path separator, otherwise it is '/'.
 FilePath FilePath::RemoveFileName() const {
-  const char *const last_sep = FindLastPathSeparator();
+  const char* const last_sep = FindLastPathSeparator();
   std::string dir;
   if (last_sep) {
     dir = std::string(c_str(), static_cast<size_t>(last_sep + 1 - c_str()));
@@ -176,9 +179,9 @@ FilePath FilePath::RemoveFileName() const {
 // extension = "xml", returns "dir/test.xml". If number is greater
 // than zero (e.g., 12), returns "dir/test_12.xml".
 // On Windows platform, uses \ as the separator rather than /.
-FilePath FilePath::MakeFileName(const FilePath &directory,
-                                const FilePath &base_name, int number,
-                                const char *extension) {
+FilePath FilePath::MakeFileName(const FilePath& directory,
+                                const FilePath& base_name, int number,
+                                const char* extension) {
   std::string file;
   if (number == 0) {
     file = base_name.string() + "." + extension;
@@ -191,8 +194,8 @@ FilePath FilePath::MakeFileName(const FilePath &directory,
 
 // Given directory = "dir", relative_path = "test.xml", returns "dir/test.xml".
 // On Windows, uses \ as the separator rather than /.
-FilePath FilePath::ConcatPaths(const FilePath &directory,
-                               const FilePath &relative_path) {
+FilePath FilePath::ConcatPaths(const FilePath& directory,
+                               const FilePath& relative_path) {
   if (directory.IsEmpty()) return relative_path;
   const FilePath dir(directory.RemoveTrailingPathSeparator());
   return FilePath(dir.string() + kPathSeparator + relative_path.string());
@@ -207,7 +210,7 @@ bool FilePath::FileOrDirectoryExists() const {
   delete[] unicode;
   return attributes != kInvalidFileAttributes;
 #else
-  posix::StatStruct file_stat;
+  posix::StatStruct file_stat{};
   return posix::Stat(pathname_.c_str(), &file_stat) == 0;
 #endif  // GTEST_OS_WINDOWS_MOBILE
 }
@@ -219,10 +222,10 @@ bool FilePath::DirectoryExists() const {
 #if GTEST_OS_WINDOWS
   // Don't strip off trailing separator if path is a root directory on
   // Windows (like "C:\\").
-  const FilePath &path(IsRootDirectory() ? *this
+  const FilePath& path(IsRootDirectory() ? *this
                                          : RemoveTrailingPathSeparator());
 #else
-  const FilePath &path(*this);
+  const FilePath& path(*this);
 #endif
 
 #if GTEST_OS_WINDOWS_MOBILE
@@ -234,7 +237,7 @@ bool FilePath::DirectoryExists() const {
     result = true;
   }
 #else
-  posix::StatStruct file_stat;
+  posix::StatStruct file_stat{};
   result =
       posix::Stat(path.c_str(), &file_stat) == 0 && posix::IsDir(file_stat);
 #endif  // GTEST_OS_WINDOWS_MOBILE
@@ -254,7 +257,7 @@ bool FilePath::IsRootDirectory() const {
 
 // Returns true if pathname describes an absolute path.
 bool FilePath::IsAbsolutePath() const {
-  const char *const name = pathname_.c_str();
+  const char* const name = pathname_.c_str();
 #if GTEST_OS_WINDOWS
   return pathname_.length() >= 3 &&
          ((name[0] >= 'a' && name[0] <= 'z') ||
@@ -273,9 +276,9 @@ bool FilePath::IsAbsolutePath() const {
 // Examples: 'dir/foo_test.xml' or 'dir/foo_test_1.xml'.
 // There could be a race condition if two or more processes are calling this
 // function at the same time -- they could both pick the same filename.
-FilePath FilePath::GenerateUniqueFileName(const FilePath &directory,
-                                          const FilePath &base_name,
-                                          const char *extension) {
+FilePath FilePath::GenerateUniqueFileName(const FilePath& directory,
+                                          const FilePath& base_name,
+                                          const char* extension) {
   FilePath full_pathname;
   int number = 0;
   do {
@@ -320,7 +323,7 @@ bool FilePath::CreateFolder() const {
   delete[] unicode;
 #elif GTEST_OS_WINDOWS
   int result = _mkdir(pathname_.c_str());
-#elif GTEST_OS_ESP8266
+#elif GTEST_OS_ESP8266 || GTEST_OS_XTENSA
   // do nothing
   int result = 0;
 #else
@@ -345,32 +348,19 @@ FilePath FilePath::RemoveTrailingPathSeparator() const {
 // For example, "bar///foo" becomes "bar/foo". Does not eliminate other
 // redundancies that might be in a pathname involving "." or "..".
 void FilePath::Normalize() {
-  if (pathname_.c_str() == nullptr) {
-    pathname_ = "";
-    return;
-  }
-  const char *src = pathname_.c_str();
-  char *const dest = new char[pathname_.length() + 1];
-  char *dest_ptr = dest;
-  memset(dest_ptr, 0, pathname_.length() + 1);
-
-  while (*src != '\0') {
-    *dest_ptr = *src;
-    if (!IsPathSeparator(*src)) {
-      src++;
+  auto out = pathname_.begin();
+
+  for (const char character : pathname_) {
+    if (!IsPathSeparator(character)) {
+      *(out++) = character;
+    } else if (out == pathname_.begin() || *std::prev(out) != kPathSeparator) {
+      *(out++) = kPathSeparator;
     } else {
-#if GTEST_HAS_ALT_PATH_SEP_
-      if (*dest_ptr == kAlternatePathSeparator) {
-        *dest_ptr = kPathSeparator;
-      }
-#endif
-      while (IsPathSeparator(*src)) src++;
+      continue;
     }
-    dest_ptr++;
   }
-  *dest_ptr = '\0';
-  pathname_ = dest;
-  delete[] dest;
+
+  pathname_.erase(out, pathname_.end());
 }
 
 }  // namespace internal
diff --git a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/src/gtest-internal-inl.h b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/src/gtest-internal-inl.h
index 16d8cde669d..0b9e929c689 100644
--- a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/src/gtest-internal-inl.h
+++ b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/src/gtest-internal-inl.h
@@ -31,8 +31,8 @@
 // This file contains purely Google Test's internal implementation.  Please
 // DO NOT #INCLUDE IT IN A USER PROGRAM.
 
-#ifndef GTEST_SRC_GTEST_INTERNAL_INL_H_
-#define GTEST_SRC_GTEST_INTERNAL_INL_H_
+#ifndef GOOGLETEST_SRC_GTEST_INTERNAL_INL_H_
+#define GOOGLETEST_SRC_GTEST_INTERNAL_INL_H_
 
 #ifndef _WIN32_WCE
 #include <errno.h>
@@ -58,14 +58,12 @@
 #include <windows.h>  // NOLINT
 #endif                // GTEST_OS_WINDOWS
 
-#include "gtest/gtest.h"
 #include "gtest/gtest-spi.h"
+#include "gtest/gtest.h"
 
 GTEST_DISABLE_MSC_WARNINGS_PUSH_(4251 \
 /* class A needs to have dll-interface to be used by clients of class B */)
 
-namespace testing {
-
 // Declares the flags.
 //
 // We don't want the users to modify this flag in the code, but want
@@ -73,30 +71,13 @@ namespace testing {
 // declare it here as opposed to in gtest.h.
 GTEST_DECLARE_bool_(death_test_use_fork);
 
+namespace testing {
 namespace internal {
 
 // The value of GetTestTypeId() as seen from within the Google Test
 // library.  This is solely for testing GetTestTypeId().
 GTEST_API_ extern const TypeId kTestTypeIdInGoogleTest;
 
-// Names of the flags (needed for parsing Google Test flags).
-const char kAlsoRunDisabledTestsFlag[] = "also_run_disabled_tests";
-const char kBreakOnFailureFlag[] = "break_on_failure";
-const char kCatchExceptionsFlag[] = "catch_exceptions";
-const char kColorFlag[] = "color";
-const char kFilterFlag[] = "filter";
-const char kListTestsFlag[] = "list_tests";
-const char kOutputFlag[] = "output";
-const char kPrintTimeFlag[] = "print_time";
-const char kPrintUTF8Flag[] = "print_utf8";
-const char kRandomSeedFlag[] = "random_seed";
-const char kRepeatFlag[] = "repeat";
-const char kShuffleFlag[] = "shuffle";
-const char kStackTraceDepthFlag[] = "stack_trace_depth";
-const char kStreamResultToFlag[] = "stream_result_to";
-const char kThrowOnFailureFlag[] = "throw_on_failure";
-const char kFlagfileFlag[] = "flagfile";
-
 // A valid random seed must be in [1, kMaxRandomSeed].
 const int kMaxRandomSeed = 99999;
 
@@ -123,8 +104,7 @@ GTEST_API_ std::string FormatEpochTimeInMillisAsIso8601(TimeInMillis ms);
 //
 // On success, stores the value of the flag in *value, and returns
 // true.  On failure, returns false without changing *value.
-GTEST_API_ bool ParseInt32Flag(const char *str, const char *flag,
-                               int32_t *value);
+GTEST_API_ bool ParseFlag(const char* str, const char* flag, int32_t* value);
 
 // Returns a random seed in range [1, kMaxRandomSeed] based on the
 // given --gtest_random_seed flag value.
@@ -159,46 +139,54 @@ class GTestFlagSaver {
  public:
   // The c'tor.
   GTestFlagSaver() {
-    also_run_disabled_tests_ = GTEST_FLAG(also_run_disabled_tests);
-    break_on_failure_ = GTEST_FLAG(break_on_failure);
-    catch_exceptions_ = GTEST_FLAG(catch_exceptions);
-    color_ = GTEST_FLAG(color);
-    death_test_style_ = GTEST_FLAG(death_test_style);
-    death_test_use_fork_ = GTEST_FLAG(death_test_use_fork);
-    filter_ = GTEST_FLAG(filter);
-    internal_run_death_test_ = GTEST_FLAG(internal_run_death_test);
-    list_tests_ = GTEST_FLAG(list_tests);
-    output_ = GTEST_FLAG(output);
-    print_time_ = GTEST_FLAG(print_time);
-    print_utf8_ = GTEST_FLAG(print_utf8);
-    random_seed_ = GTEST_FLAG(random_seed);
-    repeat_ = GTEST_FLAG(repeat);
-    shuffle_ = GTEST_FLAG(shuffle);
-    stack_trace_depth_ = GTEST_FLAG(stack_trace_depth);
-    stream_result_to_ = GTEST_FLAG(stream_result_to);
-    throw_on_failure_ = GTEST_FLAG(throw_on_failure);
+    also_run_disabled_tests_ = GTEST_FLAG_GET(also_run_disabled_tests);
+    break_on_failure_ = GTEST_FLAG_GET(break_on_failure);
+    catch_exceptions_ = GTEST_FLAG_GET(catch_exceptions);
+    color_ = GTEST_FLAG_GET(color);
+    death_test_style_ = GTEST_FLAG_GET(death_test_style);
+    death_test_use_fork_ = GTEST_FLAG_GET(death_test_use_fork);
+    fail_fast_ = GTEST_FLAG_GET(fail_fast);
+    filter_ = GTEST_FLAG_GET(filter);
+    internal_run_death_test_ = GTEST_FLAG_GET(internal_run_death_test);
+    list_tests_ = GTEST_FLAG_GET(list_tests);
+    output_ = GTEST_FLAG_GET(output);
+    brief_ = GTEST_FLAG_GET(brief);
+    print_time_ = GTEST_FLAG_GET(print_time);
+    print_utf8_ = GTEST_FLAG_GET(print_utf8);
+    random_seed_ = GTEST_FLAG_GET(random_seed);
+    repeat_ = GTEST_FLAG_GET(repeat);
+    recreate_environments_when_repeating_ =
+        GTEST_FLAG_GET(recreate_environments_when_repeating);
+    shuffle_ = GTEST_FLAG_GET(shuffle);
+    stack_trace_depth_ = GTEST_FLAG_GET(stack_trace_depth);
+    stream_result_to_ = GTEST_FLAG_GET(stream_result_to);
+    throw_on_failure_ = GTEST_FLAG_GET(throw_on_failure);
   }
 
   // The d'tor is not virtual.  DO NOT INHERIT FROM THIS CLASS.
   ~GTestFlagSaver() {
-    GTEST_FLAG(also_run_disabled_tests) = also_run_disabled_tests_;
-    GTEST_FLAG(break_on_failure) = break_on_failure_;
-    GTEST_FLAG(catch_exceptions) = catch_exceptions_;
-    GTEST_FLAG(color) = color_;
-    GTEST_FLAG(death_test_style) = death_test_style_;
-    GTEST_FLAG(death_test_use_fork) = death_test_use_fork_;
-    GTEST_FLAG(filter) = filter_;
-    GTEST_FLAG(internal_run_death_test) = internal_run_death_test_;
-    GTEST_FLAG(list_tests) = list_tests_;
-    GTEST_FLAG(output) = output_;
-    GTEST_FLAG(print_time) = print_time_;
-    GTEST_FLAG(print_utf8) = print_utf8_;
-    GTEST_FLAG(random_seed) = random_seed_;
-    GTEST_FLAG(repeat) = repeat_;
-    GTEST_FLAG(shuffle) = shuffle_;
-    GTEST_FLAG(stack_trace_depth) = stack_trace_depth_;
-    GTEST_FLAG(stream_result_to) = stream_result_to_;
-    GTEST_FLAG(throw_on_failure) = throw_on_failure_;
+    GTEST_FLAG_SET(also_run_disabled_tests, also_run_disabled_tests_);
+    GTEST_FLAG_SET(break_on_failure, break_on_failure_);
+    GTEST_FLAG_SET(catch_exceptions, catch_exceptions_);
+    GTEST_FLAG_SET(color, color_);
+    GTEST_FLAG_SET(death_test_style, death_test_style_);
+    GTEST_FLAG_SET(death_test_use_fork, death_test_use_fork_);
+    GTEST_FLAG_SET(filter, filter_);
+    GTEST_FLAG_SET(fail_fast, fail_fast_);
+    GTEST_FLAG_SET(internal_run_death_test, internal_run_death_test_);
+    GTEST_FLAG_SET(list_tests, list_tests_);
+    GTEST_FLAG_SET(output, output_);
+    GTEST_FLAG_SET(brief, brief_);
+    GTEST_FLAG_SET(print_time, print_time_);
+    GTEST_FLAG_SET(print_utf8, print_utf8_);
+    GTEST_FLAG_SET(random_seed, random_seed_);
+    GTEST_FLAG_SET(repeat, repeat_);
+    GTEST_FLAG_SET(recreate_environments_when_repeating,
+                   recreate_environments_when_repeating_);
+    GTEST_FLAG_SET(shuffle, shuffle_);
+    GTEST_FLAG_SET(stack_trace_depth, stack_trace_depth_);
+    GTEST_FLAG_SET(stream_result_to, stream_result_to_);
+    GTEST_FLAG_SET(throw_on_failure, throw_on_failure_);
   }
 
  private:
@@ -209,14 +197,17 @@ class GTestFlagSaver {
   std::string color_;
   std::string death_test_style_;
   bool death_test_use_fork_;
+  bool fail_fast_;
   std::string filter_;
   std::string internal_run_death_test_;
   bool list_tests_;
   std::string output_;
+  bool brief_;
   bool print_time_;
   bool print_utf8_;
   int32_t random_seed_;
   int32_t repeat_;
+  bool recreate_environments_when_repeating_;
   bool shuffle_;
   int32_t stack_trace_depth_;
   std::string stream_result_to_;
@@ -244,7 +235,7 @@ GTEST_API_ std::string CodePointToUtf8(uint32_t code_point);
 // as '(Invalid Unicode 0xXXXXXXXX)'. If the string is in UTF16 encoding
 // and contains invalid UTF-16 surrogate pairs, values in those pairs
 // will be encoded as individual Unicode characters from Basic Normal Plane.
-GTEST_API_ std::string WideStringToUtf8(const wchar_t *str, int num_chars);
+GTEST_API_ std::string WideStringToUtf8(const wchar_t* str, int num_chars);
 
 // Reads the GTEST_SHARD_STATUS_FILE environment variable, and creates the file
 // if the variable is present. If a file already exists at this location, this
@@ -258,14 +249,14 @@ void WriteToShardStatusFileIfNeeded();
 // an error and exits. If in_subprocess_for_death_test, sharding is
 // disabled because it must only be applied to the original test
 // process. Otherwise, we could filter out death tests we intended to execute.
-GTEST_API_ bool ShouldShard(const char *total_shards_str,
-                            const char *shard_index_str,
+GTEST_API_ bool ShouldShard(const char* total_shards_str,
+                            const char* shard_index_str,
                             bool in_subprocess_for_death_test);
 
 // Parses the environment variable var as a 32-bit integer. If it is unset,
 // returns default_val. If it is not a 32-bit integer, prints an error and
 // and aborts.
-GTEST_API_ int32_t Int32FromEnvOrDie(const char *env_var, int32_t default_val);
+GTEST_API_ int32_t Int32FromEnvOrDie(const char* env_var, int32_t default_val);
 
 // Given the total number of shards, the shard index, and the test id,
 // returns true if and only if the test should be run on this shard. The test id
@@ -279,11 +270,11 @@ GTEST_API_ bool ShouldRunTestOnShard(int total_shards, int shard_index,
 // Returns the number of elements in the given container that satisfy
 // the given predicate.
 template <class Container, typename Predicate>
-inline int CountIf(const Container &c, Predicate predicate) {
+inline int CountIf(const Container& c, Predicate predicate) {
   // Implemented as an explicit loop since std::count_if() in libCstd on
   // Solaris has a non-standard signature.
   int count = 0;
-  for (typename Container::const_iterator it = c.begin(); it != c.end(); ++it) {
+  for (auto it = c.begin(); it != c.end(); ++it) {
     if (predicate(*it)) ++count;
   }
   return count;
@@ -291,14 +282,14 @@ inline int CountIf(const Container &c, Predicate predicate) {
 
 // Applies a function/functor to each element in the container.
 template <class Container, typename Functor>
-void ForEach(const Container &c, Functor functor) {
+void ForEach(const Container& c, Functor functor) {
   std::for_each(c.begin(), c.end(), functor);
 }
 
 // Returns the i-th element of the vector, or default_value if i is not
 // in range [0, v.size()).
 template <typename E>
-inline E GetElementOr(const std::vector<E> &v, int i, E default_value) {
+inline E GetElementOr(const std::vector<E>& v, int i, E default_value) {
   return (i < 0 || i >= static_cast<int>(v.size())) ? default_value
                                                     : v[static_cast<size_t>(i)];
 }
@@ -308,8 +299,8 @@ inline E GetElementOr(const std::vector<E> &v, int i, E default_value) {
 // i.e. [begin, end) are shuffled, where 'end' == size() means to
 // shuffle to the end of the vector.
 template <typename E>
-void ShuffleRange(internal::Random *random, int begin, int end,
-                  std::vector<E> *v) {
+void ShuffleRange(internal::Random* random, int begin, int end,
+                  std::vector<E>* v) {
   const int size = static_cast<int>(v->size());
   GTEST_CHECK_(0 <= begin && begin <= size)
       << "Invalid shuffle range start " << begin << ": must be in range [0, "
@@ -332,14 +323,14 @@ void ShuffleRange(internal::Random *random, int begin, int end,
 
 // Performs an in-place shuffle of the vector's elements.
 template <typename E>
-inline void Shuffle(internal::Random *random, std::vector<E> *v) {
+inline void Shuffle(internal::Random* random, std::vector<E>* v) {
   ShuffleRange(random, 0, static_cast<int>(v->size()), v);
 }
 
 // A function for deleting an object.  Handy for being used as a
 // functor.
 template <typename T>
-static void Delete(T *x) {
+static void Delete(T* x) {
   delete x;
 }
 
@@ -351,10 +342,10 @@ class TestPropertyKeyIs {
   // Constructor.
   //
   // TestPropertyKeyIs has NO default constructor.
-  explicit TestPropertyKeyIs(const std::string &key) : key_(key) {}
+  explicit TestPropertyKeyIs(const std::string& key) : key_(key) {}
 
   // Returns true if and only if the test name of test property matches on key_.
-  bool operator()(const TestProperty &test_property) const {
+  bool operator()(const TestProperty& test_property) const {
     return test_property.key() == key_;
   }
 
@@ -386,17 +377,10 @@ class GTEST_API_ UnitTestOptions {
 
   // Functions for processing the gtest_filter flag.
 
-  // Returns true if and only if the wildcard pattern matches the string.
-  // The first ':' or '\0' character in pattern marks the end of it.
-  //
-  // This recursive algorithm isn't very efficient, but is clear and
-  // works well enough for matching test names, which are short.
-  static bool PatternMatchesString(const char *pattern, const char *str);
-
   // Returns true if and only if the user-specified filter matches the test
   // suite name and the test name.
-  static bool FilterMatchesTest(const std::string &test_suite_name,
-                                const std::string &test_name);
+  static bool FilterMatchesTest(const std::string& test_suite_name,
+                                const std::string& test_name);
 
 #if GTEST_OS_WINDOWS
   // Function for supporting the gtest_catch_exception flag.
@@ -409,7 +393,7 @@ class GTEST_API_ UnitTestOptions {
 
   // Returns true if "name" matches the ':' separated list of glob-style
   // filters in "filter".
-  static bool MatchesFilter(const std::string &name, const char *filter);
+  static bool MatchesFilter(const std::string& name, const char* filter);
 };
 
 // Returns the current application's name, removing directory path if that
@@ -437,10 +421,12 @@ class OsStackTraceGetterInterface {
 
   // This string is inserted in place of stack frames that are part of
   // Google Test's implementation.
-  static const char *const kElidedFramesMarker;
+  static const char* const kElidedFramesMarker;
 
  private:
-  GTEST_DISALLOW_COPY_AND_ASSIGN_(OsStackTraceGetterInterface);
+  OsStackTraceGetterInterface(const OsStackTraceGetterInterface&) = delete;
+  OsStackTraceGetterInterface& operator=(const OsStackTraceGetterInterface&) =
+      delete;
 };
 
 // A working implementation of the OsStackTraceGetterInterface interface.
@@ -459,15 +445,16 @@ class OsStackTraceGetter : public OsStackTraceGetterInterface {
   // We do this because the address of the frame immediately below
   // the user code changes between the call to UponLeavingGTest()
   // and any calls to the stack trace code from within the user code.
-  void *caller_frame_ = nullptr;
+  void* caller_frame_ = nullptr;
 #endif  // GTEST_HAS_ABSL
 
-  GTEST_DISALLOW_COPY_AND_ASSIGN_(OsStackTraceGetter);
+  OsStackTraceGetter(const OsStackTraceGetter&) = delete;
+  OsStackTraceGetter& operator=(const OsStackTraceGetter&) = delete;
 };
 
 // Information about a Google Test trace point.
 struct TraceInfo {
-  const char *file;
+  const char* file;
   int line;
   std::string message;
 };
@@ -477,15 +464,18 @@ struct TraceInfo {
 class DefaultGlobalTestPartResultReporter
     : public TestPartResultReporterInterface {
  public:
-  explicit DefaultGlobalTestPartResultReporter(UnitTestImpl *unit_test);
+  explicit DefaultGlobalTestPartResultReporter(UnitTestImpl* unit_test);
   // Implements the TestPartResultReporterInterface. Reports the test part
   // result in the current test.
-  void ReportTestPartResult(const TestPartResult &result) override;
+  void ReportTestPartResult(const TestPartResult& result) override;
 
  private:
-  UnitTestImpl *const unit_test_;
+  UnitTestImpl* const unit_test_;
 
-  GTEST_DISALLOW_COPY_AND_ASSIGN_(DefaultGlobalTestPartResultReporter);
+  DefaultGlobalTestPartResultReporter(
+      const DefaultGlobalTestPartResultReporter&) = delete;
+  DefaultGlobalTestPartResultReporter& operator=(
+      const DefaultGlobalTestPartResultReporter&) = delete;
 };
 
 // This is the default per thread test part result reporter used in
@@ -493,15 +483,18 @@ class DefaultGlobalTestPartResultReporter
 class DefaultPerThreadTestPartResultReporter
     : public TestPartResultReporterInterface {
  public:
-  explicit DefaultPerThreadTestPartResultReporter(UnitTestImpl *unit_test);
+  explicit DefaultPerThreadTestPartResultReporter(UnitTestImpl* unit_test);
   // Implements the TestPartResultReporterInterface. The implementation just
   // delegates to the current global test part result reporter of *unit_test_.
-  void ReportTestPartResult(const TestPartResult &result) override;
+  void ReportTestPartResult(const TestPartResult& result) override;
 
  private:
-  UnitTestImpl *const unit_test_;
+  UnitTestImpl* const unit_test_;
 
-  GTEST_DISALLOW_COPY_AND_ASSIGN_(DefaultPerThreadTestPartResultReporter);
+  DefaultPerThreadTestPartResultReporter(
+      const DefaultPerThreadTestPartResultReporter&) = delete;
+  DefaultPerThreadTestPartResultReporter& operator=(
+      const DefaultPerThreadTestPartResultReporter&) = delete;
 };
 
 // The private implementation of the UnitTest class.  We don't protect
@@ -510,7 +503,7 @@ class DefaultPerThreadTestPartResultReporter
 // proper locking.
 class GTEST_API_ UnitTestImpl {
  public:
-  explicit UnitTestImpl(UnitTest *parent);
+  explicit UnitTestImpl(UnitTest* parent);
   virtual ~UnitTestImpl();
 
   // There are two different ways to register your own TestPartResultReporter.
@@ -521,18 +514,18 @@ class GTEST_API_ UnitTestImpl {
   // test part result for the currently running test.
 
   // Returns the global test part result reporter.
-  TestPartResultReporterInterface *GetGlobalTestPartResultReporter();
+  TestPartResultReporterInterface* GetGlobalTestPartResultReporter();
 
   // Sets the global test part result reporter.
   void SetGlobalTestPartResultReporter(
-      TestPartResultReporterInterface *reporter);
+      TestPartResultReporterInterface* reporter);
 
   // Returns the test part result reporter for the current thread.
-  TestPartResultReporterInterface *GetTestPartResultReporterForCurrentThread();
+  TestPartResultReporterInterface* GetTestPartResultReporterForCurrentThread();
 
   // Sets the test part result reporter for the current thread.
   void SetTestPartResultReporterForCurrentThread(
-      TestPartResultReporterInterface *reporter);
+      TestPartResultReporterInterface* reporter);
 
   // Gets the number of successful test suites.
   int successful_test_suite_count() const;
@@ -590,44 +583,44 @@ class GTEST_API_ UnitTestImpl {
 
   // Gets the i-th test suite among all the test suites. i can range from 0 to
   // total_test_suite_count() - 1. If i is not in that range, returns NULL.
-  const TestSuite *GetTestSuite(int i) const {
+  const TestSuite* GetTestSuite(int i) const {
     const int index = GetElementOr(test_suite_indices_, i, -1);
     return index < 0 ? nullptr : test_suites_[static_cast<size_t>(i)];
   }
 
   //  Legacy API is deprecated but still available
 #ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
-  const TestCase *GetTestCase(int i) const { return GetTestSuite(i); }
+  const TestCase* GetTestCase(int i) const { return GetTestSuite(i); }
 #endif  //  GTEST_REMOVE_LEGACY_TEST_CASEAPI_
 
   // Gets the i-th test suite among all the test suites. i can range from 0 to
   // total_test_suite_count() - 1. If i is not in that range, returns NULL.
-  TestSuite *GetMutableSuiteCase(int i) {
+  TestSuite* GetMutableSuiteCase(int i) {
     const int index = GetElementOr(test_suite_indices_, i, -1);
     return index < 0 ? nullptr : test_suites_[static_cast<size_t>(index)];
   }
 
   // Provides access to the event listener list.
-  TestEventListeners *listeners() { return &listeners_; }
+  TestEventListeners* listeners() { return &listeners_; }
 
   // Returns the TestResult for the test that's currently running, or
   // the TestResult for the ad hoc test if no test is running.
-  TestResult *current_test_result();
+  TestResult* current_test_result();
 
   // Returns the TestResult for the ad hoc test.
-  const TestResult *ad_hoc_test_result() const { return &ad_hoc_test_result_; }
+  const TestResult* ad_hoc_test_result() const { return &ad_hoc_test_result_; }
 
   // Sets the OS stack trace getter.
   //
   // Does nothing if the input and the current OS stack trace getter
   // are the same; otherwise, deletes the old getter and makes the
   // input the current getter.
-  void set_os_stack_trace_getter(OsStackTraceGetterInterface *getter);
+  void set_os_stack_trace_getter(OsStackTraceGetterInterface* getter);
 
   // Returns the current OS stack trace getter if it is not NULL;
   // otherwise, creates an OsStackTraceGetter, makes it the current
   // getter, and returns it.
-  OsStackTraceGetterInterface *os_stack_trace_getter();
+  OsStackTraceGetterInterface* os_stack_trace_getter();
 
   // Returns the current OS stack trace as an std::string.
   //
@@ -639,7 +632,8 @@ class GTEST_API_ UnitTestImpl {
   // For example, if Foo() calls Bar(), which in turn calls
   // CurrentOsStackTraceExceptTop(1), Foo() will be included in the
   // trace but Bar() and CurrentOsStackTraceExceptTop() won't.
-  std::string CurrentOsStackTraceExceptTop(int skip_count) GTEST_NO_INLINE_;
+  std::string CurrentOsStackTraceExceptTop(int skip_count)
+      GTEST_NO_INLINE_ GTEST_NO_TAIL_CALL_;
 
   // Finds and returns a TestSuite with the given name.  If one doesn't
   // exist, creates one and returns it.
@@ -647,17 +641,17 @@ class GTEST_API_ UnitTestImpl {
   // Arguments:
   //
   //   test_suite_name: name of the test suite
-  //   type_param:     the name of the test's type parameter, or NULL if
-  //                   this is not a typed or a type-parameterized test.
-  //   set_up_tc:      pointer to the function that sets up the test suite
-  //   tear_down_tc:   pointer to the function that tears down the test suite
-  TestSuite *GetTestSuite(const char *test_suite_name, const char *type_param,
+  //   type_param:      the name of the test's type parameter, or NULL if
+  //                    this is not a typed or a type-parameterized test.
+  //   set_up_tc:       pointer to the function that sets up the test suite
+  //   tear_down_tc:    pointer to the function that tears down the test suite
+  TestSuite* GetTestSuite(const char* test_suite_name, const char* type_param,
                           internal::SetUpTestSuiteFunc set_up_tc,
                           internal::TearDownTestSuiteFunc tear_down_tc);
 
 //  Legacy API is deprecated but still available
 #ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
-  TestCase *GetTestCase(const char *test_case_name, const char *type_param,
+  TestCase* GetTestCase(const char* test_case_name, const char* type_param,
                         internal::SetUpTestSuiteFunc set_up_tc,
                         internal::TearDownTestSuiteFunc tear_down_tc) {
     return GetTestSuite(test_case_name, type_param, set_up_tc, tear_down_tc);
@@ -673,7 +667,8 @@ class GTEST_API_ UnitTestImpl {
   //   test_info:    the TestInfo object
   void AddTestInfo(internal::SetUpTestSuiteFunc set_up_tc,
                    internal::TearDownTestSuiteFunc tear_down_tc,
-                   TestInfo *test_info) {
+                   TestInfo* test_info) {
+#if GTEST_HAS_DEATH_TEST
     // In order to support thread-safe death tests, we need to
     // remember the original working directory when the test program
     // was first invoked.  We cannot do this in RUN_ALL_TESTS(), as
@@ -686,6 +681,7 @@ class GTEST_API_ UnitTestImpl {
       GTEST_CHECK_(!original_working_dir_.IsEmpty())
           << "Failed to get the current working directory.";
     }
+#endif  // GTEST_HAS_DEATH_TEST
 
     GetTestSuite(test_info->test_suite_name(), test_info->type_param(),
                  set_up_tc, tear_down_tc)
@@ -694,30 +690,30 @@ class GTEST_API_ UnitTestImpl {
 
   // Returns ParameterizedTestSuiteRegistry object used to keep track of
   // value-parameterized tests and instantiate and register them.
-  internal::ParameterizedTestSuiteRegistry &parameterized_test_registry() {
+  internal::ParameterizedTestSuiteRegistry& parameterized_test_registry() {
     return parameterized_test_registry_;
   }
 
-  std::set<std::string> *ignored_parameterized_test_suites() {
+  std::set<std::string>* ignored_parameterized_test_suites() {
     return &ignored_parameterized_test_suites_;
   }
 
   // Returns TypeParameterizedTestSuiteRegistry object used to keep track of
   // type-parameterized tests and instantiations of them.
-  internal::TypeParameterizedTestSuiteRegistry &
+  internal::TypeParameterizedTestSuiteRegistry&
   type_parameterized_test_registry() {
     return type_parameterized_test_registry_;
   }
 
   // Sets the TestSuite object for the test that's currently running.
-  void set_current_test_suite(TestSuite *a_current_test_suite) {
+  void set_current_test_suite(TestSuite* a_current_test_suite) {
     current_test_suite_ = a_current_test_suite;
   }
 
   // Sets the TestInfo object for the test that's currently running.  If
   // current_test_info is NULL, the assertion results will be stored in
   // ad_hoc_test_result_.
-  void set_current_test_info(TestInfo *a_current_test_info) {
+  void set_current_test_info(TestInfo* a_current_test_info) {
     current_test_info_ = a_current_test_info;
   }
 
@@ -747,7 +743,7 @@ class GTEST_API_ UnitTestImpl {
   // context of a test or a test suite, or to the global property set. If the
   // result already contains a property with the same key, the value will be
   // updated.
-  void RecordProperty(const TestProperty &test_property);
+  void RecordProperty(const TestProperty& test_property);
 
   enum ReactionToSharding { HONOR_SHARDING_PROTOCOL, IGNORE_SHARDING_PROTOCOL };
 
@@ -762,19 +758,19 @@ class GTEST_API_ UnitTestImpl {
   // Prints the names of the tests matching the user-specified filter flag.
   void ListTestsMatchingFilter();
 
-  const TestSuite *current_test_suite() const { return current_test_suite_; }
-  TestInfo *current_test_info() { return current_test_info_; }
-  const TestInfo *current_test_info() const { return current_test_info_; }
+  const TestSuite* current_test_suite() const { return current_test_suite_; }
+  TestInfo* current_test_info() { return current_test_info_; }
+  const TestInfo* current_test_info() const { return current_test_info_; }
 
   // Returns the vector of environments that need to be set-up/torn-down
   // before/after the tests are run.
-  std::vector<Environment *> &environments() { return environments_; }
+  std::vector<Environment*>& environments() { return environments_; }
 
   // Getters for the per-thread Google Test trace stack.
-  std::vector<TraceInfo> &gtest_trace_stack() {
+  std::vector<TraceInfo>& gtest_trace_stack() {
     return *(gtest_trace_stack_.pointer());
   }
-  const std::vector<TraceInfo> &gtest_trace_stack() const {
+  const std::vector<TraceInfo>& gtest_trace_stack() const {
     return gtest_trace_stack_.get();
   }
 
@@ -786,12 +782,12 @@ class GTEST_API_ UnitTestImpl {
   // flag, or NULL if that flag was not specified.
   // This information is useful only in a death test child process.
   // Must not be called before a call to InitGoogleTest.
-  const InternalRunDeathTestFlag *internal_run_death_test_flag() const {
+  const InternalRunDeathTestFlag* internal_run_death_test_flag() const {
     return internal_run_death_test_flag_.get();
   }
 
   // Returns a pointer to the current death test factory.
-  internal::DeathTestFactory *death_test_factory() {
+  internal::DeathTestFactory* death_test_factory() {
     return death_test_factory_.get();
   }
 
@@ -821,7 +817,7 @@ class GTEST_API_ UnitTestImpl {
   int random_seed() const { return random_seed_; }
 
   // Gets the random number generator.
-  internal::Random *random() { return &random_; }
+  internal::Random* random() { return &random_; }
 
   // Shuffles all test suites, and the tests within each test suite,
   // making sure that death tests are still run first.
@@ -842,7 +838,7 @@ class GTEST_API_ UnitTestImpl {
   void set_catch_exceptions(bool value) { catch_exceptions_ = value; }
 
   // The UnitTest object that owns this implementation object.
-  UnitTest *const parent_;
+  UnitTest* const parent_;
 
   // The working directory when the first TEST() or TEST_F() was
   // executed.
@@ -854,22 +850,22 @@ class GTEST_API_ UnitTestImpl {
       default_per_thread_test_part_result_reporter_;
 
   // Points to (but doesn't own) the global test part result reporter.
-  TestPartResultReporterInterface *global_test_part_result_repoter_;
+  TestPartResultReporterInterface* global_test_part_result_repoter_;
 
   // Protects read and write access to global_test_part_result_reporter_.
   internal::Mutex global_test_part_result_reporter_mutex_;
 
   // Points to (but doesn't own) the per-thread test part result reporter.
-  internal::ThreadLocal<TestPartResultReporterInterface *>
+  internal::ThreadLocal<TestPartResultReporterInterface*>
       per_thread_test_part_result_reporter_;
 
   // The vector of environments that need to be set-up/torn-down
   // before/after the tests are run.
-  std::vector<Environment *> environments_;
+  std::vector<Environment*> environments_;
 
   // The vector of TestSuites in their original order.  It owns the
   // elements in the vector.
-  std::vector<TestSuite *> test_suites_;
+  std::vector<TestSuite*> test_suites_;
 
   // Provides a level of indirection for the test suite list to allow
   // easy shuffling and restoring the test suite order.  The i-th
@@ -897,13 +893,13 @@ class GTEST_API_ UnitTestImpl {
   // changes as Google Test goes through one test suite after another.
   // When no test is running, this is set to NULL and Google Test
   // stores assertion results in ad_hoc_test_result_.  Initially NULL.
-  TestSuite *current_test_suite_;
+  TestSuite* current_test_suite_;
 
   // This points to the TestInfo for the currently running test.  It
   // changes as Google Test goes through one test after another.  When
   // no test is running, this is set to NULL and Google Test stores
   // assertion results in ad_hoc_test_result_.  Initially NULL.
-  TestInfo *current_test_info_;
+  TestInfo* current_test_info_;
 
   // Normally, a user only writes assertions inside a TEST or TEST_F,
   // or inside a function called by a TEST or TEST_F.  Since Google
@@ -923,7 +919,7 @@ class GTEST_API_ UnitTestImpl {
   // object is destructed.  By default, an OsStackTraceGetter is used,
   // but the user can set this field to use a custom getter if that is
   // desired.
-  OsStackTraceGetterInterface *os_stack_trace_getter_;
+  OsStackTraceGetterInterface* os_stack_trace_getter_;
 
   // True if and only if PostFlagParsingInit() has been called.
   bool post_flag_parse_init_performed_;
@@ -955,12 +951,13 @@ class GTEST_API_ UnitTestImpl {
   // starts.
   bool catch_exceptions_;
 
-  GTEST_DISALLOW_COPY_AND_ASSIGN_(UnitTestImpl);
+  UnitTestImpl(const UnitTestImpl&) = delete;
+  UnitTestImpl& operator=(const UnitTestImpl&) = delete;
 };  // class UnitTestImpl
 
 // Convenience function for accessing the global UnitTest
 // implementation object.
-inline UnitTestImpl *GetUnitTestImpl() {
+inline UnitTestImpl* GetUnitTestImpl() {
   return UnitTest::GetInstance()->impl();
 }
 
@@ -968,7 +965,7 @@ inline UnitTestImpl *GetUnitTestImpl() {
 
 // Internal helper functions for implementing the simple regular
 // expression matcher.
-GTEST_API_ bool IsInSet(char ch, const char *str);
+GTEST_API_ bool IsInSet(char ch, const char* str);
 GTEST_API_ bool IsAsciiDigit(char ch);
 GTEST_API_ bool IsAsciiPunct(char ch);
 GTEST_API_ bool IsRepeat(char ch);
@@ -976,19 +973,19 @@ GTEST_API_ bool IsAsciiWhiteSpace(char ch);
 GTEST_API_ bool IsAsciiWordChar(char ch);
 GTEST_API_ bool IsValidEscape(char ch);
 GTEST_API_ bool AtomMatchesChar(bool escaped, char pattern, char ch);
-GTEST_API_ bool ValidateRegex(const char *regex);
-GTEST_API_ bool MatchRegexAtHead(const char *regex, const char *str);
+GTEST_API_ bool ValidateRegex(const char* regex);
+GTEST_API_ bool MatchRegexAtHead(const char* regex, const char* str);
 GTEST_API_ bool MatchRepetitionAndRegexAtHead(bool escaped, char ch,
-                                              char repeat, const char *regex,
-                                              const char *str);
-GTEST_API_ bool MatchRegexAnywhere(const char *regex, const char *str);
+                                              char repeat, const char* regex,
+                                              const char* str);
+GTEST_API_ bool MatchRegexAnywhere(const char* regex, const char* str);
 
 #endif  // GTEST_USES_SIMPLE_RE
 
 // Parses the command line for Google Test flags, without initializing
 // other parts of Google Test.
-GTEST_API_ void ParseGoogleTestFlagsOnly(int *argc, char **argv);
-GTEST_API_ void ParseGoogleTestFlagsOnly(int *argc, wchar_t **argv);
+GTEST_API_ void ParseGoogleTestFlagsOnly(int* argc, char** argv);
+GTEST_API_ void ParseGoogleTestFlagsOnly(int* argc, wchar_t** argv);
 
 #if GTEST_HAS_DEATH_TEST
 
@@ -1001,7 +998,7 @@ GTEST_API_ std::string GetLastErrnoDescription();
 // GTEST_HAS_DEATH_TEST implies that we have ::std::string, so we can use
 // it here.
 template <typename Integer>
-bool ParseNaturalNumber(const ::std::string &str, Integer *number) {
+bool ParseNaturalNumber(const ::std::string& str, Integer* number) {
   // Fail fast if the given string does not begin with a digit;
   // this bypasses strtoXXX's "optional leading whitespace and plus
   // or minus sign" semantics, which are undesirable here.
@@ -1010,7 +1007,7 @@ bool ParseNaturalNumber(const ::std::string &str, Integer *number) {
   }
   errno = 0;
 
-  char *end;
+  char* end;
   // BiggestConvertible is the largest integer type that system-provided
   // string-to-number conversion routines can return.
   using BiggestConvertible = unsigned long long;  // NOLINT
@@ -1037,18 +1034,18 @@ bool ParseNaturalNumber(const ::std::string &str, Integer *number) {
 // constructs. Do not use it in user tests, either directly or indirectly.
 class TestResultAccessor {
  public:
-  static void RecordProperty(TestResult *test_result,
-                             const std::string &xml_element,
-                             const TestProperty &property) {
+  static void RecordProperty(TestResult* test_result,
+                             const std::string& xml_element,
+                             const TestProperty& property) {
     test_result->RecordProperty(xml_element, property);
   }
 
-  static void ClearTestPartResults(TestResult *test_result) {
+  static void ClearTestPartResults(TestResult* test_result) {
     test_result->ClearTestPartResults();
   }
 
-  static const std::vector<testing::TestPartResult> &test_part_results(
-      const TestResult &test_result) {
+  static const std::vector<testing::TestPartResult>& test_part_results(
+      const TestResult& test_result) {
     return test_result.test_part_results();
   }
 };
@@ -1064,19 +1061,19 @@ class StreamingListener : public EmptyTestEventListener {
     virtual ~AbstractSocketWriter() {}
 
     // Sends a string to the socket.
-    virtual void Send(const std::string &message) = 0;
+    virtual void Send(const std::string& message) = 0;
 
     // Closes the socket.
     virtual void CloseConnection() {}
 
     // Sends a string and a newline to the socket.
-    void SendLn(const std::string &message) { Send(message + "\n"); }
+    void SendLn(const std::string& message) { Send(message + "\n"); }
   };
 
   // Concrete class for actually writing strings to a socket.
   class SocketWriter : public AbstractSocketWriter {
    public:
-    SocketWriter(const std::string &host, const std::string &port)
+    SocketWriter(const std::string& host, const std::string& port)
         : sockfd_(-1), host_name_(host), port_num_(port) {
       MakeConnection();
     }
@@ -1086,7 +1083,7 @@ class StreamingListener : public EmptyTestEventListener {
     }
 
     // Sends a string to the socket.
-    void Send(const std::string &message) override {
+    void Send(const std::string& message) override {
       GTEST_CHECK_(sockfd_ != -1)
           << "Send() can be called only when there is a connection.";
 
@@ -1114,27 +1111,28 @@ class StreamingListener : public EmptyTestEventListener {
     const std::string host_name_;
     const std::string port_num_;
 
-    GTEST_DISALLOW_COPY_AND_ASSIGN_(SocketWriter);
+    SocketWriter(const SocketWriter&) = delete;
+    SocketWriter& operator=(const SocketWriter&) = delete;
   };  // class SocketWriter
 
   // Escapes '=', '&', '%', and '\n' characters in str as "%xx".
-  static std::string UrlEncode(const char *str);
+  static std::string UrlEncode(const char* str);
 
-  StreamingListener(const std::string &host, const std::string &port)
+  StreamingListener(const std::string& host, const std::string& port)
       : socket_writer_(new SocketWriter(host, port)) {
     Start();
   }
 
-  explicit StreamingListener(AbstractSocketWriter *socket_writer)
+  explicit StreamingListener(AbstractSocketWriter* socket_writer)
       : socket_writer_(socket_writer) {
     Start();
   }
 
-  void OnTestProgramStart(const UnitTest & /* unit_test */) override {
+  void OnTestProgramStart(const UnitTest& /* unit_test */) override {
     SendLn("event=TestProgramStart");
   }
 
-  void OnTestProgramEnd(const UnitTest &unit_test) override {
+  void OnTestProgramEnd(const UnitTest& unit_test) override {
     // Note that Google Test current only report elapsed time for each
     // test iteration, not for the entire test program.
     SendLn("event=TestProgramEnd&passed=" + FormatBool(unit_test.Passed()));
@@ -1143,13 +1141,13 @@ class StreamingListener : public EmptyTestEventListener {
     socket_writer_->CloseConnection();
   }
 
-  void OnTestIterationStart(const UnitTest & /* unit_test */,
+  void OnTestIterationStart(const UnitTest& /* unit_test */,
                             int iteration) override {
     SendLn("event=TestIterationStart&iteration=" +
            StreamableToString(iteration));
   }
 
-  void OnTestIterationEnd(const UnitTest &unit_test,
+  void OnTestIterationEnd(const UnitTest& unit_test,
                           int /* iteration */) override {
     SendLn("event=TestIterationEnd&passed=" + FormatBool(unit_test.Passed()) +
            "&elapsed_time=" + StreamableToString(unit_test.elapsed_time()) +
@@ -1157,31 +1155,31 @@ class StreamingListener : public EmptyTestEventListener {
   }
 
   // Note that "event=TestCaseStart" is a wire format and has to remain
-  // "case" for compatibilty
-  void OnTestCaseStart(const TestCase &test_case) override {
-    SendLn(std::string("event=TestCaseStart&name=") + test_case.name());
+  // "case" for compatibility
+  void OnTestSuiteStart(const TestSuite& test_suite) override {
+    SendLn(std::string("event=TestCaseStart&name=") + test_suite.name());
   }
 
   // Note that "event=TestCaseEnd" is a wire format and has to remain
-  // "case" for compatibilty
-  void OnTestCaseEnd(const TestCase &test_case) override {
-    SendLn("event=TestCaseEnd&passed=" + FormatBool(test_case.Passed()) +
-           "&elapsed_time=" + StreamableToString(test_case.elapsed_time()) +
+  // "case" for compatibility
+  void OnTestSuiteEnd(const TestSuite& test_suite) override {
+    SendLn("event=TestCaseEnd&passed=" + FormatBool(test_suite.Passed()) +
+           "&elapsed_time=" + StreamableToString(test_suite.elapsed_time()) +
            "ms");
   }
 
-  void OnTestStart(const TestInfo &test_info) override {
+  void OnTestStart(const TestInfo& test_info) override {
     SendLn(std::string("event=TestStart&name=") + test_info.name());
   }
 
-  void OnTestEnd(const TestInfo &test_info) override {
+  void OnTestEnd(const TestInfo& test_info) override {
     SendLn("event=TestEnd&passed=" +
            FormatBool((test_info.result())->Passed()) + "&elapsed_time=" +
            StreamableToString((test_info.result())->elapsed_time()) + "ms");
   }
 
-  void OnTestPartResult(const TestPartResult &test_part_result) override {
-    const char *file_name = test_part_result.file_name();
+  void OnTestPartResult(const TestPartResult& test_part_result) override {
+    const char* file_name = test_part_result.file_name();
     if (file_name == nullptr) file_name = "";
     SendLn("event=TestPartResult&file=" + UrlEncode(file_name) +
            "&line=" + StreamableToString(test_part_result.line_number()) +
@@ -1190,7 +1188,7 @@ class StreamingListener : public EmptyTestEventListener {
 
  private:
   // Sends the given message and a newline to the socket.
-  void SendLn(const std::string &message) { socket_writer_->SendLn(message); }
+  void SendLn(const std::string& message) { socket_writer_->SendLn(message); }
 
   // Called at the start of streaming to notify the receiver what
   // protocol we are using.
@@ -1200,7 +1198,8 @@ class StreamingListener : public EmptyTestEventListener {
 
   const std::unique_ptr<AbstractSocketWriter> socket_writer_;
 
-  GTEST_DISALLOW_COPY_AND_ASSIGN_(StreamingListener);
+  StreamingListener(const StreamingListener&) = delete;
+  StreamingListener& operator=(const StreamingListener&) = delete;
 };  // class StreamingListener
 
 #endif  // GTEST_CAN_STREAM_RESULTS_
@@ -1210,4 +1209,4 @@ class StreamingListener : public EmptyTestEventListener {
 
 GTEST_DISABLE_MSC_WARNINGS_POP_()  //  4251
 
-#endif  // GTEST_SRC_GTEST_INTERNAL_INL_H_
+#endif  // GOOGLETEST_SRC_GTEST_INTERNAL_INL_H_
diff --git a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/src/gtest-matchers.cc b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/src/gtest-matchers.cc
index 27aaa2b7c52..7e3bcc0cff3 100644
--- a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/src/gtest-matchers.cc
+++ b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/src/gtest-matchers.cc
@@ -32,58 +32,59 @@
 // This file implements just enough of the matcher interface to allow
 // EXPECT_DEATH and friends to accept a matcher argument.
 
-#include "gtest/internal/gtest-internal.h"
-#include "gtest/internal/gtest-port.h"
 #include "gtest/gtest-matchers.h"
 
 #include <string>
 
+#include "gtest/internal/gtest-internal.h"
+#include "gtest/internal/gtest-port.h"
+
 namespace testing {
 
 // Constructs a matcher that matches a const std::string& whose value is
 // equal to s.
-Matcher<const std::string &>::Matcher(const std::string &s) { *this = Eq(s); }
+Matcher<const std::string&>::Matcher(const std::string& s) { *this = Eq(s); }
 
 // Constructs a matcher that matches a const std::string& whose value is
 // equal to s.
-Matcher<const std::string &>::Matcher(const char *s) {
+Matcher<const std::string&>::Matcher(const char* s) {
   *this = Eq(std::string(s));
 }
 
 // Constructs a matcher that matches a std::string whose value is equal to
 // s.
-Matcher<std::string>::Matcher(const std::string &s) { *this = Eq(s); }
+Matcher<std::string>::Matcher(const std::string& s) { *this = Eq(s); }
 
 // Constructs a matcher that matches a std::string whose value is equal to
 // s.
-Matcher<std::string>::Matcher(const char *s) { *this = Eq(std::string(s)); }
+Matcher<std::string>::Matcher(const char* s) { *this = Eq(std::string(s)); }
 
 #if GTEST_INTERNAL_HAS_STRING_VIEW
 // Constructs a matcher that matches a const StringView& whose value is
 // equal to s.
-Matcher<const internal::StringView &>::Matcher(const std::string &s) {
+Matcher<const internal::StringView&>::Matcher(const std::string& s) {
   *this = Eq(s);
 }
 
 // Constructs a matcher that matches a const StringView& whose value is
 // equal to s.
-Matcher<const internal::StringView &>::Matcher(const char *s) {
+Matcher<const internal::StringView&>::Matcher(const char* s) {
   *this = Eq(std::string(s));
 }
 
 // Constructs a matcher that matches a const StringView& whose value is
 // equal to s.
-Matcher<const internal::StringView &>::Matcher(internal::StringView s) {
+Matcher<const internal::StringView&>::Matcher(internal::StringView s) {
   *this = Eq(std::string(s));
 }
 
 // Constructs a matcher that matches a StringView whose value is equal to
 // s.
-Matcher<internal::StringView>::Matcher(const std::string &s) { *this = Eq(s); }
+Matcher<internal::StringView>::Matcher(const std::string& s) { *this = Eq(s); }
 
 // Constructs a matcher that matches a StringView whose value is equal to
 // s.
-Matcher<internal::StringView>::Matcher(const char *s) {
+Matcher<internal::StringView>::Matcher(const char* s) {
   *this = Eq(std::string(s));
 }
 
diff --git a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/src/gtest-port.cc b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/src/gtest-port.cc
index adfdbef9c67..d797fe4d586 100644
--- a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/src/gtest-port.cc
+++ b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/src/gtest-port.cc
@@ -33,14 +33,16 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+
 #include <cstdint>
 #include <fstream>
 #include <memory>
 
 #if GTEST_OS_WINDOWS
-#include <windows.h>
 #include <io.h>
 #include <sys/stat.h>
+#include <windows.h>
+
 #include <map>  // Used in ThreadLocal.
 #ifdef _MSC_VER
 #include <crtdbg.h>
@@ -79,8 +81,8 @@
 #include <zircon/syscalls.h>
 #endif  // GTEST_OS_FUCHSIA
 
-#include "gtest/gtest-spi.h"
 #include "gtest/gtest-message.h"
+#include "gtest/gtest-spi.h"
 #include "gtest/internal/gtest-internal.h"
 #include "gtest/internal/gtest-string.h"
 #include "src/gtest-internal-inl.h"
@@ -88,20 +90,11 @@
 namespace testing {
 namespace internal {
 
-#if defined(_MSC_VER) || defined(__BORLANDC__)
-// MSVC and C++Builder do not provide a definition of STDERR_FILENO.
-const int kStdOutFileno = 1;
-const int kStdErrFileno = 2;
-#else
-const int kStdOutFileno = STDOUT_FILENO;
-const int kStdErrFileno = STDERR_FILENO;
-#endif  // _MSC_VER
-
-#if GTEST_OS_LINUX
+#if GTEST_OS_LINUX || GTEST_OS_GNU_HURD
 
 namespace {
 template <typename T>
-T ReadProcFileField(const std::string &filename, int field) {
+T ReadProcFileField(const std::string& filename, int field) {
   std::string dummy;
   std::ifstream file(filename.c_str());
   while (field-- > 0) {
@@ -182,12 +175,12 @@ size_t GetThreadCount() {
 // we cannot detect it.
 size_t GetThreadCount() {
   int mib[] = {
-    CTL_KERN,
-    KERN_PROC,
-    KERN_PROC_PID | KERN_PROC_SHOW_THREADS,
-    getpid(),
-    sizeof(struct kinfo_proc),
-    0,
+      CTL_KERN,
+      KERN_PROC,
+      KERN_PROC_PID | KERN_PROC_SHOW_THREADS,
+      getpid(),
+      sizeof(struct kinfo_proc),
+      0,
   };
   u_int miblen = sizeof(mib) / sizeof(mib[0]);
 
@@ -196,7 +189,8 @@ size_t GetThreadCount() {
   if (sysctl(mib, miblen, NULL, &size, NULL, 0)) {
     return 0;
   }
-  mib[5] = size / mib[4];
+
+  mib[5] = static_cast<int>(size / static_cast<size_t>(mib[4]));
 
   // populate array of structs
   struct kinfo_proc info[mib[5]];
@@ -205,8 +199,8 @@ size_t GetThreadCount() {
   }
 
   // exclude empty members
-  int nthreads = 0;
-  for (int i = 0; i < size / mib[4]; i++) {
+  size_t nthreads = 0;
+  for (size_t i = 0; i < size / static_cast<size_t>(mib[4]); i++) {
     if (info[i].p_tid != -1) nthreads++;
   }
   return nthreads;
@@ -272,8 +266,6 @@ size_t GetThreadCount() {
 
 #if GTEST_IS_THREADSAFE && GTEST_OS_WINDOWS
 
-void SleepMilliseconds(int n) { ::Sleep(static_cast<DWORD>(n)); }
-
 AutoHandle::AutoHandle() : handle_(INVALID_HANDLE_VALUE) {}
 
 AutoHandle::AutoHandle(Handle handle) : handle_(handle) {}
@@ -304,22 +296,10 @@ bool AutoHandle::IsCloseable() const {
   return handle_ != nullptr && handle_ != INVALID_HANDLE_VALUE;
 }
 
-Notification::Notification()
-    : event_(::CreateEvent(nullptr,     // Default security attributes.
-                           TRUE,        // Do not reset automatically.
-                           FALSE,       // Initially unset.
-                           nullptr)) {  // Anonymous event.
-  GTEST_CHECK_(event_.Get() != nullptr);
-}
-
-void Notification::Notify() { GTEST_CHECK_(::SetEvent(event_.Get()) != FALSE); }
-
-void Notification::WaitForNotification() {
-  GTEST_CHECK_(::WaitForSingleObject(event_.Get(), INFINITE) == WAIT_OBJECT_0);
-}
-
 Mutex::Mutex()
-    : owner_thread_id_(0), type_(kDynamic), critical_section_init_phase_(0),
+    : owner_thread_id_(0),
+      type_(kDynamic),
+      critical_section_init_phase_(0),
       critical_section_(new CRITICAL_SECTION) {
   ::InitializeCriticalSection(critical_section_);
 }
@@ -374,18 +354,19 @@ class MemoryIsNotDeallocated {
     old_crtdbg_flag_ = _CrtSetDbgFlag(_CRTDBG_REPORT_FLAG);
     // Set heap allocation block type to _IGNORE_BLOCK so that MS debug CRT
     // doesn't report mem leak if there's no matching deallocation.
-    _CrtSetDbgFlag(old_crtdbg_flag_ & ~_CRTDBG_ALLOC_MEM_DF);
+    (void)_CrtSetDbgFlag(old_crtdbg_flag_ & ~_CRTDBG_ALLOC_MEM_DF);
   }
 
   ~MemoryIsNotDeallocated() {
     // Restore the original _CRTDBG_ALLOC_MEM_DF flag
-    _CrtSetDbgFlag(old_crtdbg_flag_);
+    (void)_CrtSetDbgFlag(old_crtdbg_flag_);
   }
 
  private:
   int old_crtdbg_flag_;
 
-  GTEST_DISALLOW_COPY_AND_ASSIGN_(MemoryIsNotDeallocated);
+  MemoryIsNotDeallocated(const MemoryIsNotDeallocated&) = delete;
+  MemoryIsNotDeallocated& operator=(const MemoryIsNotDeallocated&) = delete;
 };
 #endif  // _MSC_VER
 
@@ -425,7 +406,8 @@ void Mutex::ThreadSafeLazyInit() {
         }
         break;
 
-      case 2: break;  // The mutex is already initialized and ready for use.
+      case 2:
+        break;  // The mutex is already initialized and ready for use.
 
       default:
         GTEST_CHECK_(false)
@@ -439,9 +421,9 @@ namespace {
 
 class ThreadWithParamSupport : public ThreadWithParamBase {
  public:
-  static HANDLE CreateThread(Runnable *runnable,
-                             Notification *thread_can_start) {
-    ThreadMainParam *param = new ThreadMainParam(runnable, thread_can_start);
+  static HANDLE CreateThread(Runnable* runnable,
+                             Notification* thread_can_start) {
+    ThreadMainParam* param = new ThreadMainParam(runnable, thread_can_start);
     DWORD thread_id;
     HANDLE thread_handle = ::CreateThread(
         nullptr,  // Default security.
@@ -460,16 +442,16 @@ class ThreadWithParamSupport : public ThreadWithParamBase {
 
  private:
   struct ThreadMainParam {
-    ThreadMainParam(Runnable *runnable, Notification *thread_can_start)
+    ThreadMainParam(Runnable* runnable, Notification* thread_can_start)
         : runnable_(runnable), thread_can_start_(thread_can_start) {}
     std::unique_ptr<Runnable> runnable_;
     // Does not own.
-    Notification *thread_can_start_;
+    Notification* thread_can_start_;
   };
 
-  static DWORD WINAPI ThreadMain(void *ptr) {
+  static DWORD WINAPI ThreadMain(void* ptr) {
     // Transfers ownership.
-    std::unique_ptr<ThreadMainParam> param(static_cast<ThreadMainParam *>(ptr));
+    std::unique_ptr<ThreadMainParam> param(static_cast<ThreadMainParam*>(ptr));
     if (param->thread_can_start_ != nullptr)
       param->thread_can_start_->WaitForNotification();
     param->runnable_->Run();
@@ -479,13 +461,14 @@ class ThreadWithParamSupport : public ThreadWithParamBase {
   // Prohibit instantiation.
   ThreadWithParamSupport();
 
-  GTEST_DISALLOW_COPY_AND_ASSIGN_(ThreadWithParamSupport);
+  ThreadWithParamSupport(const ThreadWithParamSupport&) = delete;
+  ThreadWithParamSupport& operator=(const ThreadWithParamSupport&) = delete;
 };
 
 }  // namespace
 
-ThreadWithParamBase::ThreadWithParamBase(Runnable *runnable,
-                                         Notification *thread_can_start)
+ThreadWithParamBase::ThreadWithParamBase(Runnable* runnable,
+                                         Notification* thread_can_start)
     : thread_(
           ThreadWithParamSupport::CreateThread(runnable, thread_can_start)) {}
 
@@ -504,14 +487,14 @@ class ThreadLocalRegistryImpl {
  public:
   // Registers thread_local_instance as having value on the current thread.
   // Returns a value that can be used to identify the thread from other threads.
-  static ThreadLocalValueHolderBase *GetValueOnCurrentThread(
-      const ThreadLocalBase *thread_local_instance) {
+  static ThreadLocalValueHolderBase* GetValueOnCurrentThread(
+      const ThreadLocalBase* thread_local_instance) {
 #ifdef _MSC_VER
     MemoryIsNotDeallocated memory_is_not_deallocated;
 #endif  // _MSC_VER
     DWORD current_thread = ::GetCurrentThreadId();
     MutexLock lock(&mutex_);
-    ThreadIdToThreadLocals *const thread_to_thread_locals =
+    ThreadIdToThreadLocals* const thread_to_thread_locals =
         GetThreadLocalsMapLocked();
     ThreadIdToThreadLocals::iterator thread_local_pos =
         thread_to_thread_locals->find(current_thread);
@@ -522,7 +505,7 @@ class ThreadLocalRegistryImpl {
               .first;
       StartWatcherThreadFor(current_thread);
     }
-    ThreadLocalValues &thread_local_values = thread_local_pos->second;
+    ThreadLocalValues& thread_local_values = thread_local_pos->second;
     ThreadLocalValues::iterator value_pos =
         thread_local_values.find(thread_local_instance);
     if (value_pos == thread_local_values.end()) {
@@ -538,18 +521,18 @@ class ThreadLocalRegistryImpl {
   }
 
   static void OnThreadLocalDestroyed(
-      const ThreadLocalBase *thread_local_instance) {
+      const ThreadLocalBase* thread_local_instance) {
     std::vector<std::shared_ptr<ThreadLocalValueHolderBase> > value_holders;
     // Clean up the ThreadLocalValues data structure while holding the lock, but
     // defer the destruction of the ThreadLocalValueHolderBases.
     {
       MutexLock lock(&mutex_);
-      ThreadIdToThreadLocals *const thread_to_thread_locals =
+      ThreadIdToThreadLocals* const thread_to_thread_locals =
           GetThreadLocalsMapLocked();
       for (ThreadIdToThreadLocals::iterator it =
                thread_to_thread_locals->begin();
            it != thread_to_thread_locals->end(); ++it) {
-        ThreadLocalValues &thread_local_values = it->second;
+        ThreadLocalValues& thread_local_values = it->second;
         ThreadLocalValues::iterator value_pos =
             thread_local_values.find(thread_local_instance);
         if (value_pos != thread_local_values.end()) {
@@ -571,12 +554,12 @@ class ThreadLocalRegistryImpl {
     // lock, but defer the destruction of the ThreadLocalValueHolderBases.
     {
       MutexLock lock(&mutex_);
-      ThreadIdToThreadLocals *const thread_to_thread_locals =
+      ThreadIdToThreadLocals* const thread_to_thread_locals =
           GetThreadLocalsMapLocked();
       ThreadIdToThreadLocals::iterator thread_local_pos =
           thread_to_thread_locals->find(thread_id);
       if (thread_local_pos != thread_to_thread_locals->end()) {
-        ThreadLocalValues &thread_local_values = thread_local_pos->second;
+        ThreadLocalValues& thread_local_values = thread_local_pos->second;
         for (ThreadLocalValues::iterator value_pos =
                  thread_local_values.begin();
              value_pos != thread_local_values.end(); ++value_pos) {
@@ -591,7 +574,7 @@ class ThreadLocalRegistryImpl {
 
  private:
   // In a particular thread, maps a ThreadLocal object to its value.
-  typedef std::map<const ThreadLocalBase *,
+  typedef std::map<const ThreadLocalBase*,
                    std::shared_ptr<ThreadLocalValueHolderBase> >
       ThreadLocalValues;
   // Stores all ThreadIdToThreadLocals having values in a thread, indexed by
@@ -617,7 +600,8 @@ class ThreadLocalRegistryImpl {
         &ThreadLocalRegistryImpl::WatcherThreadFunc,
         reinterpret_cast<LPVOID>(new ThreadIdAndHandle(thread_id, thread)),
         CREATE_SUSPENDED, &watcher_thread_id);
-    GTEST_CHECK_(watcher_thread != nullptr);
+    GTEST_CHECK_(watcher_thread != nullptr)
+        << "CreateThread failed with error " << ::GetLastError() << ".";
     // Give the watcher thread the same priority as ours to avoid being
     // blocked by it.
     ::SetThreadPriority(watcher_thread,
@@ -629,8 +613,8 @@ class ThreadLocalRegistryImpl {
   // Monitors exit from a given thread and notifies those
   // ThreadIdToThreadLocals about thread termination.
   static DWORD WINAPI WatcherThreadFunc(LPVOID param) {
-    const ThreadIdAndHandle *tah =
-        reinterpret_cast<const ThreadIdAndHandle *>(param);
+    const ThreadIdAndHandle* tah =
+        reinterpret_cast<const ThreadIdAndHandle*>(param);
     GTEST_CHECK_(::WaitForSingleObject(tah->second, INFINITE) == WAIT_OBJECT_0);
     OnThreadExit(tah->first);
     ::CloseHandle(tah->second);
@@ -639,12 +623,12 @@ class ThreadLocalRegistryImpl {
   }
 
   // Returns map of thread local instances.
-  static ThreadIdToThreadLocals *GetThreadLocalsMapLocked() {
+  static ThreadIdToThreadLocals* GetThreadLocalsMapLocked() {
     mutex_.AssertHeld();
 #ifdef _MSC_VER
     MemoryIsNotDeallocated memory_is_not_deallocated;
 #endif  // _MSC_VER
-    static ThreadIdToThreadLocals *map = new ThreadIdToThreadLocals();
+    static ThreadIdToThreadLocals* map = new ThreadIdToThreadLocals();
     return map;
   }
 
@@ -654,17 +638,18 @@ class ThreadLocalRegistryImpl {
   static Mutex thread_map_mutex_;
 };
 
-Mutex ThreadLocalRegistryImpl::mutex_(Mutex::kStaticMutex);
-Mutex ThreadLocalRegistryImpl::thread_map_mutex_(Mutex::kStaticMutex);
+Mutex ThreadLocalRegistryImpl::mutex_(Mutex::kStaticMutex);  // NOLINT
+Mutex ThreadLocalRegistryImpl::thread_map_mutex_(
+    Mutex::kStaticMutex);  // NOLINT
 
-ThreadLocalValueHolderBase *ThreadLocalRegistry::GetValueOnCurrentThread(
-    const ThreadLocalBase *thread_local_instance) {
+ThreadLocalValueHolderBase* ThreadLocalRegistry::GetValueOnCurrentThread(
+    const ThreadLocalBase* thread_local_instance) {
   return ThreadLocalRegistryImpl::GetValueOnCurrentThread(
       thread_local_instance);
 }
 
 void ThreadLocalRegistry::OnThreadLocalDestroyed(
-    const ThreadLocalBase *thread_local_instance) {
+    const ThreadLocalBase* thread_local_instance) {
   ThreadLocalRegistryImpl::OnThreadLocalDestroyed(thread_local_instance);
 }
 
@@ -683,11 +668,11 @@ RE::~RE() {
     regfree(&partial_regex_);
     regfree(&full_regex_);
   }
-  free(const_cast<char *>(pattern_));
+  free(const_cast<char*>(pattern_));
 }
 
 // Returns true if and only if regular expression re matches the entire str.
-bool RE::FullMatch(const char *str, const RE &re) {
+bool RE::FullMatch(const char* str, const RE& re) {
   if (!re.is_valid_) return false;
 
   regmatch_t match;
@@ -696,7 +681,7 @@ bool RE::FullMatch(const char *str, const RE &re) {
 
 // Returns true if and only if regular expression re matches a substring of
 // str (including str itself).
-bool RE::PartialMatch(const char *str, const RE &re) {
+bool RE::PartialMatch(const char* str, const RE& re) {
   if (!re.is_valid_) return false;
 
   regmatch_t match;
@@ -704,13 +689,13 @@ bool RE::PartialMatch(const char *str, const RE &re) {
 }
 
 // Initializes an RE from its string representation.
-void RE::Init(const char *regex) {
+void RE::Init(const char* regex) {
   pattern_ = posix::StrDup(regex);
 
   // Reserves enough bytes to hold the regular expression used for a
   // full match.
   const size_t full_regex_len = strlen(regex) + 10;
-  char *const full_pattern = new char[full_regex_len];
+  char* const full_pattern = new char[full_regex_len];
 
   snprintf(full_pattern, full_regex_len, "^(%s)$", regex);
   is_valid_ = regcomp(&full_regex_, full_pattern, REG_EXTENDED) == 0;
@@ -723,7 +708,7 @@ void RE::Init(const char *regex) {
   // versions of Cygwin) doesn't accept the empty string as a valid
   // regex.  We change it to an equivalent form "()" to be safe.
   if (is_valid_) {
-    const char *const partial_regex = (*regex == '\0') ? "()" : regex;
+    const char* const partial_regex = (*regex == '\0') ? "()" : regex;
     is_valid_ = regcomp(&partial_regex_, partial_regex, REG_EXTENDED) == 0;
   }
   EXPECT_TRUE(is_valid_)
@@ -737,7 +722,7 @@ void RE::Init(const char *regex) {
 
 // Returns true if and only if ch appears anywhere in str (excluding the
 // terminating '\0' character).
-bool IsInSet(char ch, const char *str) {
+bool IsInSet(char ch, const char* str) {
   return ch != '\0' && strchr(str, ch) != nullptr;
 }
 
@@ -765,17 +750,28 @@ bool IsValidEscape(char c) {
 bool AtomMatchesChar(bool escaped, char pattern_char, char ch) {
   if (escaped) {  // "\\p" where p is pattern_char.
     switch (pattern_char) {
-      case 'd': return IsAsciiDigit(ch);
-      case 'D': return !IsAsciiDigit(ch);
-      case 'f': return ch == '\f';
-      case 'n': return ch == '\n';
-      case 'r': return ch == '\r';
-      case 's': return IsAsciiWhiteSpace(ch);
-      case 'S': return !IsAsciiWhiteSpace(ch);
-      case 't': return ch == '\t';
-      case 'v': return ch == '\v';
-      case 'w': return IsAsciiWordChar(ch);
-      case 'W': return !IsAsciiWordChar(ch);
+      case 'd':
+        return IsAsciiDigit(ch);
+      case 'D':
+        return !IsAsciiDigit(ch);
+      case 'f':
+        return ch == '\f';
+      case 'n':
+        return ch == '\n';
+      case 'r':
+        return ch == '\r';
+      case 's':
+        return IsAsciiWhiteSpace(ch);
+      case 'S':
+        return !IsAsciiWhiteSpace(ch);
+      case 't':
+        return ch == '\t';
+      case 'v':
+        return ch == '\v';
+      case 'w':
+        return IsAsciiWordChar(ch);
+      case 'W':
+        return !IsAsciiWordChar(ch);
     }
     return IsAsciiPunct(pattern_char) && pattern_char == ch;
   }
@@ -784,7 +780,7 @@ bool AtomMatchesChar(bool escaped, char pattern_char, char ch) {
 }
 
 // Helper function used by ValidateRegex() to format error messages.
-static std::string FormatRegexSyntaxError(const char *regex, int index) {
+static std::string FormatRegexSyntaxError(const char* regex, int index) {
   return (Message() << "Syntax error at index " << index
                     << " in simple regular expression \"" << regex << "\": ")
       .GetString();
@@ -792,7 +788,7 @@ static std::string FormatRegexSyntaxError(const char *regex, int index) {
 
 // Generates non-fatal failures and returns false if regex is invalid;
 // otherwise returns true.
-bool ValidateRegex(const char *regex) {
+bool ValidateRegex(const char* regex) {
   if (regex == nullptr) {
     ADD_FAILURE() << "NULL is not a valid simple regular expression.";
     return false;
@@ -853,7 +849,7 @@ bool ValidateRegex(const char *regex) {
 // probably time out anyway.  We are fine with this limitation as
 // std::string has it too.
 bool MatchRepetitionAndRegexAtHead(bool escaped, char c, char repeat,
-                                   const char *regex, const char *str) {
+                                   const char* regex, const char* str) {
   const size_t min_count = (repeat == '+') ? 1 : 0;
   const size_t max_count = (repeat == '?') ? 1 : static_cast<size_t>(-1) - 1;
   // We cannot call numeric_limits::max() as it conflicts with the
@@ -876,7 +872,7 @@ bool MatchRepetitionAndRegexAtHead(bool escaped, char c, char repeat,
 // Returns true if and only if regex matches a prefix of str. regex must
 // be a valid simple regular expression and not start with "^", or the
 // result is undefined.
-bool MatchRegexAtHead(const char *regex, const char *str) {
+bool MatchRegexAtHead(const char* regex, const char* str) {
   if (*regex == '\0')  // An empty regex matches a prefix of anything.
     return true;
 
@@ -910,7 +906,7 @@ bool MatchRegexAtHead(const char *regex, const char *str) {
 // stack space normally.  In rare cases the time complexity can be
 // exponential with respect to the regex length + the string length,
 // but usually it's must faster (often close to linear).
-bool MatchRegexAnywhere(const char *regex, const char *str) {
+bool MatchRegexAnywhere(const char* regex, const char* str) {
   if (regex == nullptr || str == nullptr) return false;
 
   if (*regex == '^') return MatchRegexAtHead(regex + 1, str);
@@ -925,23 +921,23 @@ bool MatchRegexAnywhere(const char *regex, const char *str) {
 // Implements the RE class.
 
 RE::~RE() {
-  free(const_cast<char *>(pattern_));
-  free(const_cast<char *>(full_pattern_));
+  free(const_cast<char*>(pattern_));
+  free(const_cast<char*>(full_pattern_));
 }
 
 // Returns true if and only if regular expression re matches the entire str.
-bool RE::FullMatch(const char *str, const RE &re) {
+bool RE::FullMatch(const char* str, const RE& re) {
   return re.is_valid_ && MatchRegexAnywhere(re.full_pattern_, str);
 }
 
 // Returns true if and only if regular expression re matches a substring of
 // str (including str itself).
-bool RE::PartialMatch(const char *str, const RE &re) {
+bool RE::PartialMatch(const char* str, const RE& re) {
   return re.is_valid_ && MatchRegexAnywhere(re.pattern_, str);
 }
 
 // Initializes an RE from its string representation.
-void RE::Init(const char *regex) {
+void RE::Init(const char* regex) {
   pattern_ = full_pattern_ = nullptr;
   if (regex != nullptr) {
     pattern_ = posix::StrDup(regex);
@@ -957,7 +953,7 @@ void RE::Init(const char *regex) {
   // Reserves enough bytes to hold the regular expression used for a
   // full match: we need space to prepend a '^', append a '$', and
   // terminate the string with '\0'.
-  char *buffer = static_cast<char *>(malloc(len + 3));
+  char* buffer = static_cast<char*>(malloc(len + 3));
   full_pattern_ = buffer;
 
   if (*regex != '^')
@@ -980,7 +976,7 @@ const char kUnknownFile[] = "unknown file";
 
 // Formats a source file path and a line number as they would appear
 // in an error message from the compiler used to compile this code.
-GTEST_API_ ::std::string FormatFileLocation(const char *file, int line) {
+GTEST_API_ ::std::string FormatFileLocation(const char* file, int line) {
   const std::string file_name(file == nullptr ? kUnknownFile : file);
 
   if (line < 0) {
@@ -998,7 +994,7 @@ GTEST_API_ ::std::string FormatFileLocation(const char *file, int line) {
 // FormatFileLocation in order to contrast the two functions.
 // Note that FormatCompilerIndependentFileLocation() does NOT append colon
 // to the file location it produces, unlike FormatFileLocation().
-GTEST_API_ ::std::string FormatCompilerIndependentFileLocation(const char *file,
+GTEST_API_ ::std::string FormatCompilerIndependentFileLocation(const char* file,
                                                                int line) {
   const std::string file_name(file == nullptr ? kUnknownFile : file);
 
@@ -1008,14 +1004,12 @@ GTEST_API_ ::std::string FormatCompilerIndependentFileLocation(const char *file,
     return file_name + ":" + StreamableToString(line);
 }
 
-GTestLog::GTestLog(GTestLogSeverity severity, const char *file, int line)
+GTestLog::GTestLog(GTestLogSeverity severity, const char* file, int line)
     : severity_(severity) {
-  const char *const marker =
-      severity == GTEST_INFO
-          ? "[  INFO ]"
-          : severity == GTEST_WARNING
-                ? "[WARNING]"
-                : severity == GTEST_ERROR ? "[ ERROR ]" : "[ FATAL ]";
+  const char* const marker = severity == GTEST_INFO      ? "[  INFO ]"
+                             : severity == GTEST_WARNING ? "[WARNING]"
+                             : severity == GTEST_ERROR   ? "[ ERROR ]"
+                                                         : "[ FATAL ]";
   GetStream() << ::std::endl
               << marker << " " << FormatFileLocation(file, line).c_str()
               << ": ";
@@ -1042,8 +1036,8 @@ class CapturedStream {
   // The ctor redirects the stream to a temporary file.
   explicit CapturedStream(int fd) : fd_(fd), uncaptured_fd_(dup(fd)) {
 #if GTEST_OS_WINDOWS
-    char temp_dir_path[MAX_PATH + 1] = { '\0' };   // NOLINT
-    char temp_file_path[MAX_PATH + 1] = { '\0' };  // NOLINT
+    char temp_dir_path[MAX_PATH + 1] = {'\0'};   // NOLINT
+    char temp_file_path[MAX_PATH + 1] = {'\0'};  // NOLINT
 
     ::GetTempPathA(sizeof(temp_dir_path), temp_dir_path);
     const UINT success = ::GetTempFileNameA(temp_dir_path, "gtest_redir",
@@ -1057,9 +1051,9 @@ class CapturedStream {
     filename_ = temp_file_path;
 #else
     // There's no guarantee that a test has write access to the current
-    // directory, so we create the temporary file in the /tmp directory
-    // instead. We use /tmp on most systems, and /sdcard on Android.
-    // That's because Android doesn't have /tmp.
+    // directory, so we create the temporary file in a temporary directory.
+    std::string name_template;
+
 #if GTEST_OS_LINUX_ANDROID
     // Note: Android applications are expected to call the framework's
     // Context.getExternalStorageDirectory() method through JNI to get
@@ -1072,17 +1066,46 @@ class CapturedStream {
     // The location /data/local/tmp is directly accessible from native code.
     // '/sdcard' and other variants cannot be relied on, as they are not
     // guaranteed to be mounted, or may have a delay in mounting.
-    char name_template[] = "/data/local/tmp/gtest_captured_stream.XXXXXX";
+    name_template = "/data/local/tmp/";
+#elif GTEST_OS_IOS
+    char user_temp_dir[PATH_MAX + 1];
+
+    // Documented alternative to NSTemporaryDirectory() (for obtaining creating
+    // a temporary directory) at
+    // https://developer.apple.com/library/archive/documentation/Security/Conceptual/SecureCodingGuide/Articles/RaceConditions.html#//apple_ref/doc/uid/TP40002585-SW10
+    //
+    // _CS_DARWIN_USER_TEMP_DIR (as well as _CS_DARWIN_USER_CACHE_DIR) is not
+    // documented in the confstr() man page at
+    // https://developer.apple.com/library/archive/documentation/System/Conceptual/ManPages_iPhoneOS/man3/confstr.3.html#//apple_ref/doc/man/3/confstr
+    // but are still available, according to the WebKit patches at
+    // https://trac.webkit.org/changeset/262004/webkit
+    // https://trac.webkit.org/changeset/263705/webkit
+    //
+    // The confstr() implementation falls back to getenv("TMPDIR"). See
+    // https://opensource.apple.com/source/Libc/Libc-1439.100.3/gen/confstr.c.auto.html
+    ::confstr(_CS_DARWIN_USER_TEMP_DIR, user_temp_dir, sizeof(user_temp_dir));
+
+    name_template = user_temp_dir;
+    if (name_template.back() != GTEST_PATH_SEP_[0])
+      name_template.push_back(GTEST_PATH_SEP_[0]);
 #else
-    char name_template[] = "/tmp/captured_stream.XXXXXX";
-#endif  // GTEST_OS_LINUX_ANDROID
-    const int captured_fd = mkstemp(name_template);
+    name_template = "/tmp/";
+#endif
+    name_template.append("gtest_captured_stream.XXXXXX");
+
+    // mkstemp() modifies the string bytes in place, and does not go beyond the
+    // string's length. This results in well-defined behavior in C++17.
+    //
+    // The const_cast is needed below C++17. The constraints on std::string
+    // implementations in C++11 and above make assumption behind the const_cast
+    // fairly safe.
+    const int captured_fd = ::mkstemp(const_cast<char*>(name_template.data()));
     if (captured_fd == -1) {
       GTEST_LOG_(WARNING)
           << "Failed to create tmp file " << name_template
           << " for test; does the test have access to the /tmp directory?";
     }
-    filename_ = name_template;
+    filename_ = std::move(name_template);
 #endif  // GTEST_OS_WINDOWS
     fflush(nullptr);
     dup2(captured_fd, fd_);
@@ -1100,7 +1123,7 @@ class CapturedStream {
       uncaptured_fd_ = -1;
     }
 
-    FILE *const file = posix::FOpen(filename_.c_str(), "r");
+    FILE* const file = posix::FOpen(filename_.c_str(), "r");
     if (file == nullptr) {
       GTEST_LOG_(FATAL) << "Failed to open tmp file " << filename_
                         << " for capturing stream.";
@@ -1116,17 +1139,18 @@ class CapturedStream {
   // Name of the temporary file holding the stderr output.
   ::std::string filename_;
 
-  GTEST_DISALLOW_COPY_AND_ASSIGN_(CapturedStream);
+  CapturedStream(const CapturedStream&) = delete;
+  CapturedStream& operator=(const CapturedStream&) = delete;
 };
 
 GTEST_DISABLE_MSC_DEPRECATED_POP_()
 
-static CapturedStream *g_captured_stderr = nullptr;
-static CapturedStream *g_captured_stdout = nullptr;
+static CapturedStream* g_captured_stderr = nullptr;
+static CapturedStream* g_captured_stdout = nullptr;
 
 // Starts capturing an output stream (stdout/stderr).
-static void CaptureStream(int fd, const char *stream_name,
-                          CapturedStream **stream) {
+static void CaptureStream(int fd, const char* stream_name,
+                          CapturedStream** stream) {
   if (*stream != nullptr) {
     GTEST_LOG_(FATAL) << "Only one " << stream_name
                       << " capturer can exist at a time.";
@@ -1135,7 +1159,7 @@ static void CaptureStream(int fd, const char *stream_name,
 }
 
 // Stops capturing the output stream and returns the captured string.
-static std::string GetCapturedStream(CapturedStream **captured_stream) {
+static std::string GetCapturedStream(CapturedStream** captured_stream) {
   const std::string content = (*captured_stream)->GetCapturedString();
 
   delete *captured_stream;
@@ -1144,6 +1168,15 @@ static std::string GetCapturedStream(CapturedStream **captured_stream) {
   return content;
 }
 
+#if defined(_MSC_VER) || defined(__BORLANDC__)
+// MSVC and C++Builder do not provide a definition of STDERR_FILENO.
+const int kStdOutFileno = 1;
+const int kStdErrFileno = 2;
+#else
+const int kStdOutFileno = STDOUT_FILENO;
+const int kStdErrFileno = STDERR_FILENO;
+#endif  // defined(_MSC_VER) || defined(__BORLANDC__)
+
 // Starts capturing stdout.
 void CaptureStdout() {
   CaptureStream(kStdOutFileno, "stdout", &g_captured_stdout);
@@ -1166,14 +1199,14 @@ std::string GetCapturedStderr() {
 
 #endif  // GTEST_HAS_STREAM_REDIRECTION
 
-size_t GetFileSize(FILE *file) {
+size_t GetFileSize(FILE* file) {
   fseek(file, 0, SEEK_END);
   return static_cast<size_t>(ftell(file));
 }
 
-std::string ReadEntireFile(FILE *file) {
+std::string ReadEntireFile(FILE* file) {
   const size_t file_size = GetFileSize(file);
-  char *const buffer = new char[file_size];
+  char* const buffer = new char[file_size];
 
   size_t bytes_last_read = 0;  // # of bytes read in the last fread()
   size_t bytes_read = 0;       // # of bytes read so far
@@ -1195,7 +1228,7 @@ std::string ReadEntireFile(FILE *file) {
 }
 
 #if GTEST_HAS_DEATH_TEST
-static const std::vector<std::string> *g_injected_test_argvs =
+static const std::vector<std::string>* g_injected_test_argvs =
     nullptr;  // Owned.
 
 std::vector<std::string> GetInjectableArgvs() {
@@ -1205,12 +1238,12 @@ std::vector<std::string> GetInjectableArgvs() {
   return GetArgvs();
 }
 
-void SetInjectableArgvs(const std::vector<std::string> *new_argvs) {
+void SetInjectableArgvs(const std::vector<std::string>* new_argvs) {
   if (g_injected_test_argvs != new_argvs) delete g_injected_test_argvs;
   g_injected_test_argvs = new_argvs;
 }
 
-void SetInjectableArgvs(const std::vector<std::string> &new_argvs) {
+void SetInjectableArgvs(const std::vector<std::string>& new_argvs) {
   SetInjectableArgvs(
       new std::vector<std::string>(new_argvs.begin(), new_argvs.end()));
 }
@@ -1233,7 +1266,7 @@ void Abort() {
 // Returns the name of the environment variable corresponding to the
 // given flag.  For example, FlagToEnvVar("foo") will return
 // "GTEST_FOO" in the open-source version.
-static std::string FlagToEnvVar(const char *flag) {
+static std::string FlagToEnvVar(const char* flag) {
   const std::string full_flag =
       (Message() << GTEST_FLAG_PREFIX_ << flag).GetString();
 
@@ -1248,9 +1281,9 @@ static std::string FlagToEnvVar(const char *flag) {
 // Parses 'str' for a 32-bit signed integer.  If successful, writes
 // the result to *value and returns true; otherwise leaves *value
 // unchanged and returns false.
-bool ParseInt32(const Message &src_text, const char *str, int32_t *value) {
+bool ParseInt32(const Message& src_text, const char* str, int32_t* value) {
   // Parses the environment variable as a decimal integer.
-  char *end = nullptr;
+  char* end = nullptr;
   const long long_value = strtol(str, &end, 10);  // NOLINT
 
   // Has strtol() consumed all characters in the string?
@@ -1290,12 +1323,12 @@ bool ParseInt32(const Message &src_text, const char *str, int32_t *value) {
 // the given flag; if it's not set, returns default_value.
 //
 // The value is considered true if and only if it's not "0".
-bool BoolFromGTestEnv(const char *flag, bool default_value) {
+bool BoolFromGTestEnv(const char* flag, bool default_value) {
 #if defined(GTEST_GET_BOOL_FROM_ENV_)
   return GTEST_GET_BOOL_FROM_ENV_(flag, default_value);
 #else
   const std::string env_var = FlagToEnvVar(flag);
-  const char *const string_value = posix::GetEnv(env_var.c_str());
+  const char* const string_value = posix::GetEnv(env_var.c_str());
   return string_value == nullptr ? default_value
                                  : strcmp(string_value, "0") != 0;
 #endif  // defined(GTEST_GET_BOOL_FROM_ENV_)
@@ -1304,12 +1337,12 @@ bool BoolFromGTestEnv(const char *flag, bool default_value) {
 // Reads and returns a 32-bit integer stored in the environment
 // variable corresponding to the given flag; if it isn't set or
 // doesn't represent a valid 32-bit integer, returns default_value.
-int32_t Int32FromGTestEnv(const char *flag, int32_t default_value) {
+int32_t Int32FromGTestEnv(const char* flag, int32_t default_value) {
 #if defined(GTEST_GET_INT32_FROM_ENV_)
   return GTEST_GET_INT32_FROM_ENV_(flag, default_value);
 #else
   const std::string env_var = FlagToEnvVar(flag);
-  const char *const string_value = posix::GetEnv(env_var.c_str());
+  const char* const string_value = posix::GetEnv(env_var.c_str());
   if (string_value == nullptr) {
     // The environment variable is not set.
     return default_value;
@@ -1338,7 +1371,7 @@ int32_t Int32FromGTestEnv(const char *flag, int32_t default_value) {
 // and if it is set we prepend "xml:" to its value, if it not set we return ""
 std::string OutputFlagAlsoCheckEnvVar() {
   std::string default_value_for_output_flag = "";
-  const char *xml_output_file_env = posix::GetEnv("XML_OUTPUT_FILE");
+  const char* xml_output_file_env = posix::GetEnv("XML_OUTPUT_FILE");
   if (nullptr != xml_output_file_env) {
     default_value_for_output_flag = std::string("xml:") + xml_output_file_env;
   }
@@ -1347,12 +1380,12 @@ std::string OutputFlagAlsoCheckEnvVar() {
 
 // Reads and returns the string environment variable corresponding to
 // the given flag; if it's not set, returns default_value.
-const char *StringFromGTestEnv(const char *flag, const char *default_value) {
+const char* StringFromGTestEnv(const char* flag, const char* default_value) {
 #if defined(GTEST_GET_STRING_FROM_ENV_)
   return GTEST_GET_STRING_FROM_ENV_(flag, default_value);
 #else
   const std::string env_var = FlagToEnvVar(flag);
-  const char *const value = posix::GetEnv(env_var.c_str());
+  const char* const value = posix::GetEnv(env_var.c_str());
   return value == nullptr ? default_value : value;
 #endif  // defined(GTEST_GET_STRING_FROM_ENV_)
 }
diff --git a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/src/gtest-printers.cc b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/src/gtest-printers.cc
index 8399386a990..f3976d230da 100644
--- a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/src/gtest-printers.cc
+++ b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/src/gtest-printers.cc
@@ -41,11 +41,16 @@
 // defines Foo.
 
 #include "gtest/gtest-printers.h"
+
 #include <stdio.h>
+
 #include <cctype>
+#include <cstdint>
 #include <cwchar>
 #include <ostream>  // NOLINT
 #include <string>
+#include <type_traits>
+
 #include "gtest/internal/gtest-port.h"
 #include "src/gtest-internal-inl.h"
 
@@ -60,8 +65,8 @@ GTEST_ATTRIBUTE_NO_SANITIZE_MEMORY_
 GTEST_ATTRIBUTE_NO_SANITIZE_ADDRESS_
 GTEST_ATTRIBUTE_NO_SANITIZE_HWADDRESS_
 GTEST_ATTRIBUTE_NO_SANITIZE_THREAD_
-void PrintByteSegmentInObjectTo(const unsigned char *obj_bytes, size_t start,
-                                size_t count, ostream *os) {
+void PrintByteSegmentInObjectTo(const unsigned char* obj_bytes, size_t start,
+                                size_t count, ostream* os) {
   char text[5] = "";
   for (size_t i = 0; i != count; i++) {
     const size_t j = start + i;
@@ -79,8 +84,8 @@ void PrintByteSegmentInObjectTo(const unsigned char *obj_bytes, size_t start,
 }
 
 // Prints the bytes in the given value to the given ostream.
-void PrintBytesInObjectToImpl(const unsigned char *obj_bytes, size_t count,
-                              ostream *os) {
+void PrintBytesInObjectToImpl(const unsigned char* obj_bytes, size_t count,
+                              ostream* os) {
   // Tells the user how big the object is.
   *os << count << "-byte object <";
 
@@ -101,24 +106,30 @@ void PrintBytesInObjectToImpl(const unsigned char *obj_bytes, size_t count,
   *os << ">";
 }
 
+// Helpers for widening a character to char32_t. Since the standard does not
+// specify if char / wchar_t is signed or unsigned, it is important to first
+// convert it to the unsigned type of the same width before widening it to
+// char32_t.
+template <typename CharType>
+char32_t ToChar32(CharType in) {
+  return static_cast<char32_t>(
+      static_cast<typename std::make_unsigned<CharType>::type>(in));
+}
+
 }  // namespace
 
-namespace internal2 {
+namespace internal {
 
 // Delegates to PrintBytesInObjectToImpl() to print the bytes in the
 // given object.  The delegation simplifies the implementation, which
 // uses the << operator and thus is easier done outside of the
 // ::testing::internal namespace, which contains a << operator that
 // sometimes conflicts with the one in STL.
-void PrintBytesInObjectTo(const unsigned char *obj_bytes, size_t count,
-                          ostream *os) {
+void PrintBytesInObjectTo(const unsigned char* obj_bytes, size_t count,
+                          ostream* os) {
   PrintBytesInObjectToImpl(obj_bytes, count, os);
 }
 
-}  // namespace internal2
-
-namespace internal {
-
 // Depending on the value of a char (or wchar_t), we print it in one
 // of three formats:
 //   - as is if it's a printable ASCII (e.g. 'a', '2', ' '),
@@ -129,34 +140,52 @@ enum CharFormat { kAsIs, kHexEscape, kSpecialEscape };
 // Returns true if c is a printable ASCII character.  We test the
 // value of c directly instead of calling isprint(), which is buggy on
 // Windows Mobile.
-inline bool IsPrintableAscii(wchar_t c) { return 0x20 <= c && c <= 0x7E; }
-
-// Prints a wide or narrow char c as a character literal without the
-// quotes, escaping it when necessary; returns how c was formatted.
-// The template argument UnsignedChar is the unsigned version of Char,
-// which is the type of c.
-template <typename UnsignedChar, typename Char>
-static CharFormat PrintAsCharLiteralTo(Char c, ostream *os) {
-  wchar_t w_c = static_cast<wchar_t>(c);
-  switch (w_c) {
-    case L'\0': *os << "\\0"; break;
-    case L'\'': *os << "\\'"; break;
-    case L'\\': *os << "\\\\"; break;
-    case L'\a': *os << "\\a"; break;
-    case L'\b': *os << "\\b"; break;
-    case L'\f': *os << "\\f"; break;
-    case L'\n': *os << "\\n"; break;
-    case L'\r': *os << "\\r"; break;
-    case L'\t': *os << "\\t"; break;
-    case L'\v': *os << "\\v"; break;
+inline bool IsPrintableAscii(char32_t c) { return 0x20 <= c && c <= 0x7E; }
+
+// Prints c (of type char, char8_t, char16_t, char32_t, or wchar_t) as a
+// character literal without the quotes, escaping it when necessary; returns how
+// c was formatted.
+template <typename Char>
+static CharFormat PrintAsCharLiteralTo(Char c, ostream* os) {
+  const char32_t u_c = ToChar32(c);
+  switch (u_c) {
+    case L'\0':
+      *os << "\\0";
+      break;
+    case L'\'':
+      *os << "\\'";
+      break;
+    case L'\\':
+      *os << "\\\\";
+      break;
+    case L'\a':
+      *os << "\\a";
+      break;
+    case L'\b':
+      *os << "\\b";
+      break;
+    case L'\f':
+      *os << "\\f";
+      break;
+    case L'\n':
+      *os << "\\n";
+      break;
+    case L'\r':
+      *os << "\\r";
+      break;
+    case L'\t':
+      *os << "\\t";
+      break;
+    case L'\v':
+      *os << "\\v";
+      break;
     default:
-      if (IsPrintableAscii(w_c)) {
+      if (IsPrintableAscii(u_c)) {
         *os << static_cast<char>(c);
         return kAsIs;
       } else {
         ostream::fmtflags flags = os->flags();
-        *os << "\\x" << std::hex << std::uppercase
-            << static_cast<int>(static_cast<UnsignedChar>(c));
+        *os << "\\x" << std::hex << std::uppercase << static_cast<int>(u_c);
         os->flags(flags);
         return kHexEscape;
       }
@@ -164,32 +193,65 @@ static CharFormat PrintAsCharLiteralTo(Char c, ostream *os) {
   return kSpecialEscape;
 }
 
-// Prints a wchar_t c as if it's part of a string literal, escaping it when
+// Prints a char32_t c as if it's part of a string literal, escaping it when
 // necessary; returns how c was formatted.
-static CharFormat PrintAsStringLiteralTo(wchar_t c, ostream *os) {
+static CharFormat PrintAsStringLiteralTo(char32_t c, ostream* os) {
   switch (c) {
-    case L'\'': *os << "'"; return kAsIs;
-    case L'"': *os << "\\\""; return kSpecialEscape;
-    default: return PrintAsCharLiteralTo<wchar_t>(c, os);
+    case L'\'':
+      *os << "'";
+      return kAsIs;
+    case L'"':
+      *os << "\\\"";
+      return kSpecialEscape;
+    default:
+      return PrintAsCharLiteralTo(c, os);
   }
 }
 
+static const char* GetCharWidthPrefix(char) { return ""; }
+
+static const char* GetCharWidthPrefix(signed char) { return ""; }
+
+static const char* GetCharWidthPrefix(unsigned char) { return ""; }
+
+#ifdef __cpp_char8_t
+static const char* GetCharWidthPrefix(char8_t) { return "u8"; }
+#endif
+
+static const char* GetCharWidthPrefix(char16_t) { return "u"; }
+
+static const char* GetCharWidthPrefix(char32_t) { return "U"; }
+
+static const char* GetCharWidthPrefix(wchar_t) { return "L"; }
+
 // Prints a char c as if it's part of a string literal, escaping it when
 // necessary; returns how c was formatted.
-static CharFormat PrintAsStringLiteralTo(char c, ostream *os) {
-  return PrintAsStringLiteralTo(
-      static_cast<wchar_t>(static_cast<unsigned char>(c)), os);
+static CharFormat PrintAsStringLiteralTo(char c, ostream* os) {
+  return PrintAsStringLiteralTo(ToChar32(c), os);
 }
 
-// Prints a wide or narrow character c and its code.  '\0' is printed
-// as "'\\0'", other unprintable characters are also properly escaped
-// using the standard C++ escape sequence.  The template argument
-// UnsignedChar is the unsigned version of Char, which is the type of c.
-template <typename UnsignedChar, typename Char>
-void PrintCharAndCodeTo(Char c, ostream *os) {
+#ifdef __cpp_char8_t
+static CharFormat PrintAsStringLiteralTo(char8_t c, ostream* os) {
+  return PrintAsStringLiteralTo(ToChar32(c), os);
+}
+#endif
+
+static CharFormat PrintAsStringLiteralTo(char16_t c, ostream* os) {
+  return PrintAsStringLiteralTo(ToChar32(c), os);
+}
+
+static CharFormat PrintAsStringLiteralTo(wchar_t c, ostream* os) {
+  return PrintAsStringLiteralTo(ToChar32(c), os);
+}
+
+// Prints a character c (of type char, char8_t, char16_t, char32_t, or wchar_t)
+// and its code. '\0' is printed as "'\\0'", other unprintable characters are
+// also properly escaped using the standard C++ escape sequence.
+template <typename Char>
+void PrintCharAndCodeTo(Char c, ostream* os) {
   // First, print c as a literal in the most readable form we can find.
-  *os << ((sizeof(c) > 1) ? "L'" : "'");
-  const CharFormat format = PrintAsCharLiteralTo<UnsignedChar>(c, os);
+  *os << GetCharWidthPrefix(c) << "'";
+  const CharFormat format = PrintAsCharLiteralTo(c, os);
   *os << "'";
 
   // To aid user debugging, we also print c's code in decimal, unless
@@ -209,28 +271,75 @@ void PrintCharAndCodeTo(Char c, ostream *os) {
   *os << ")";
 }
 
-void PrintTo(unsigned char c, ::std::ostream *os) {
-  PrintCharAndCodeTo<unsigned char>(c, os);
-}
-void PrintTo(signed char c, ::std::ostream *os) {
-  PrintCharAndCodeTo<unsigned char>(c, os);
-}
+void PrintTo(unsigned char c, ::std::ostream* os) { PrintCharAndCodeTo(c, os); }
+void PrintTo(signed char c, ::std::ostream* os) { PrintCharAndCodeTo(c, os); }
 
 // Prints a wchar_t as a symbol if it is printable or as its internal
 // code otherwise and also as its code.  L'\0' is printed as "L'\\0'".
-void PrintTo(wchar_t wc, ostream *os) { PrintCharAndCodeTo<wchar_t>(wc, os); }
+void PrintTo(wchar_t wc, ostream* os) { PrintCharAndCodeTo(wc, os); }
+
+// TODO(dcheng): Consider making this delegate to PrintCharAndCodeTo() as well.
+void PrintTo(char32_t c, ::std::ostream* os) {
+  *os << std::hex << "U+" << std::uppercase << std::setfill('0') << std::setw(4)
+      << static_cast<uint32_t>(c);
+}
+
+// gcc/clang __{u,}int128_t
+#if defined(__SIZEOF_INT128__)
+void PrintTo(__uint128_t v, ::std::ostream* os) {
+  if (v == 0) {
+    *os << "0";
+    return;
+  }
+
+  // Buffer large enough for ceil(log10(2^128))==39 and the null terminator
+  char buf[40];
+  char* p = buf + sizeof(buf);
+
+  // Some configurations have a __uint128_t, but no support for built in
+  // division. Do manual long division instead.
+
+  uint64_t high = static_cast<uint64_t>(v >> 64);
+  uint64_t low = static_cast<uint64_t>(v);
+
+  *--p = 0;
+  while (high != 0 || low != 0) {
+    uint64_t high_mod = high % 10;
+    high = high / 10;
+    // This is the long division algorithm specialized for a divisor of 10 and
+    // only two elements.
+    // Notable values:
+    //   2^64 / 10 == 1844674407370955161
+    //   2^64 % 10 == 6
+    const uint64_t carry = 6 * high_mod + low % 10;
+    low = low / 10 + high_mod * 1844674407370955161 + carry / 10;
+
+    char digit = static_cast<char>(carry % 10);
+    *--p = '0' + digit;
+  }
+  *os << p;
+}
+void PrintTo(__int128_t v, ::std::ostream* os) {
+  __uint128_t uv = static_cast<__uint128_t>(v);
+  if (v < 0) {
+    *os << "-";
+    uv = -uv;
+  }
+  PrintTo(uv, os);
+}
+#endif  // __SIZEOF_INT128__
 
 // Prints the given array of characters to the ostream.  CharType must be either
-// char or wchar_t.
+// char, char8_t, char16_t, char32_t, or wchar_t.
 // The array starts at begin, the length is len, it may include '\0' characters
 // and may not be NUL-terminated.
 template <typename CharType>
 GTEST_ATTRIBUTE_NO_SANITIZE_MEMORY_ GTEST_ATTRIBUTE_NO_SANITIZE_ADDRESS_
     GTEST_ATTRIBUTE_NO_SANITIZE_HWADDRESS_
         GTEST_ATTRIBUTE_NO_SANITIZE_THREAD_ static CharFormat
-        PrintCharsAsStringTo(const CharType *begin, size_t len, ostream *os) {
-  const char *const kQuoteBegin = sizeof(CharType) == 1 ? "\"" : "L\"";
-  *os << kQuoteBegin;
+        PrintCharsAsStringTo(const CharType* begin, size_t len, ostream* os) {
+  const char* const quote_prefix = GetCharWidthPrefix(*begin);
+  *os << quote_prefix << "\"";
   bool is_previous_hex = false;
   CharFormat print_format = kAsIs;
   for (size_t index = 0; index < len; ++index) {
@@ -239,7 +348,7 @@ GTEST_ATTRIBUTE_NO_SANITIZE_MEMORY_ GTEST_ATTRIBUTE_NO_SANITIZE_ADDRESS_
       // Previous character is of '\x..' form and this character can be
       // interpreted as another hexadecimal digit in its number. Break string to
       // disambiguate.
-      *os << "\" " << kQuoteBegin;
+      *os << "\" " << quote_prefix << "\"";
     }
     is_previous_hex = PrintAsStringLiteralTo(cur, os) == kHexEscape;
     // Remember if any characters required hex escaping.
@@ -257,8 +366,8 @@ template <typename CharType>
 GTEST_ATTRIBUTE_NO_SANITIZE_MEMORY_ GTEST_ATTRIBUTE_NO_SANITIZE_ADDRESS_
     GTEST_ATTRIBUTE_NO_SANITIZE_HWADDRESS_
         GTEST_ATTRIBUTE_NO_SANITIZE_THREAD_ static void
-        UniversalPrintCharArray(const CharType *begin, size_t len,
-                                ostream *os) {
+        UniversalPrintCharArray(const CharType* begin, size_t len,
+                                ostream* os) {
   // The code
   //   const char kFoo[] = "foo";
   // generates an array of 4, not 3, elements, with the last one being '\0'.
@@ -280,26 +389,61 @@ GTEST_ATTRIBUTE_NO_SANITIZE_MEMORY_ GTEST_ATTRIBUTE_NO_SANITIZE_ADDRESS_
 }
 
 // Prints a (const) char array of 'len' elements, starting at address 'begin'.
-void UniversalPrintArray(const char *begin, size_t len, ostream *os) {
+void UniversalPrintArray(const char* begin, size_t len, ostream* os) {
+  UniversalPrintCharArray(begin, len, os);
+}
+
+#ifdef __cpp_char8_t
+// Prints a (const) char8_t array of 'len' elements, starting at address
+// 'begin'.
+void UniversalPrintArray(const char8_t* begin, size_t len, ostream* os) {
+  UniversalPrintCharArray(begin, len, os);
+}
+#endif
+
+// Prints a (const) char16_t array of 'len' elements, starting at address
+// 'begin'.
+void UniversalPrintArray(const char16_t* begin, size_t len, ostream* os) {
+  UniversalPrintCharArray(begin, len, os);
+}
+
+// Prints a (const) char32_t array of 'len' elements, starting at address
+// 'begin'.
+void UniversalPrintArray(const char32_t* begin, size_t len, ostream* os) {
   UniversalPrintCharArray(begin, len, os);
 }
 
 // Prints a (const) wchar_t array of 'len' elements, starting at address
 // 'begin'.
-void UniversalPrintArray(const wchar_t *begin, size_t len, ostream *os) {
+void UniversalPrintArray(const wchar_t* begin, size_t len, ostream* os) {
   UniversalPrintCharArray(begin, len, os);
 }
 
-// Prints the given C string to the ostream.
-void PrintTo(const char *s, ostream *os) {
+namespace {
+
+// Prints a null-terminated C-style string to the ostream.
+template <typename Char>
+void PrintCStringTo(const Char* s, ostream* os) {
   if (s == nullptr) {
     *os << "NULL";
   } else {
-    *os << ImplicitCast_<const void *>(s) << " pointing to ";
-    PrintCharsAsStringTo(s, strlen(s), os);
+    *os << ImplicitCast_<const void*>(s) << " pointing to ";
+    PrintCharsAsStringTo(s, std::char_traits<Char>::length(s), os);
   }
 }
 
+}  // anonymous namespace
+
+void PrintTo(const char* s, ostream* os) { PrintCStringTo(s, os); }
+
+#ifdef __cpp_char8_t
+void PrintTo(const char8_t* s, ostream* os) { PrintCStringTo(s, os); }
+#endif
+
+void PrintTo(const char16_t* s, ostream* os) { PrintCStringTo(s, os); }
+
+void PrintTo(const char32_t* s, ostream* os) { PrintCStringTo(s, os); }
+
 // MSVC compiler can be configured to define whar_t as a typedef
 // of unsigned short. Defining an overload for const wchar_t* in that case
 // would cause pointers to unsigned shorts be printed as wide strings,
@@ -308,20 +452,13 @@ void PrintTo(const char *s, ostream *os) {
 // wchar_t is implemented as a native type.
 #if !defined(_MSC_VER) || defined(_NATIVE_WCHAR_T_DEFINED)
 // Prints the given wide C string to the ostream.
-void PrintTo(const wchar_t *s, ostream *os) {
-  if (s == nullptr) {
-    *os << "NULL";
-  } else {
-    *os << ImplicitCast_<const void *>(s) << " pointing to ";
-    PrintCharsAsStringTo(s, wcslen(s), os);
-  }
-}
+void PrintTo(const wchar_t* s, ostream* os) { PrintCStringTo(s, os); }
 #endif  // wchar_t is native
 
 namespace {
 
-bool ContainsUnprintableControlCodes(const char *str, size_t length) {
-  const unsigned char *s = reinterpret_cast<const unsigned char *>(str);
+bool ContainsUnprintableControlCodes(const char* str, size_t length) {
+  const unsigned char* s = reinterpret_cast<const unsigned char*>(str);
 
   for (size_t i = 0; i < length; i++) {
     unsigned char ch = *s++;
@@ -329,8 +466,10 @@ bool ContainsUnprintableControlCodes(const char *str, size_t length) {
       switch (ch) {
         case '\t':
         case '\n':
-        case '\r': break;
-        default: return true;
+        case '\r':
+          break;
+        default:
+          return true;
       }
     }
   }
@@ -339,8 +478,8 @@ bool ContainsUnprintableControlCodes(const char *str, size_t length) {
 
 bool IsUTF8TrailByte(unsigned char t) { return 0x80 <= t && t <= 0xbf; }
 
-bool IsValidUTF8(const char *str, size_t length) {
-  const unsigned char *s = reinterpret_cast<const unsigned char *>(str);
+bool IsValidUTF8(const char* str, size_t length) {
+  const unsigned char* s = reinterpret_cast<const unsigned char*>(str);
 
   for (size_t i = 0; i < length;) {
     unsigned char lead = s[i++];
@@ -372,7 +511,7 @@ bool IsValidUTF8(const char *str, size_t length) {
   return true;
 }
 
-void ConditionalPrintAsText(const char *str, size_t length, ostream *os) {
+void ConditionalPrintAsText(const char* str, size_t length, ostream* os) {
   if (!ContainsUnprintableControlCodes(str, length) &&
       IsValidUTF8(str, length)) {
     *os << "\n    As Text: \"" << str << "\"";
@@ -381,16 +520,30 @@ void ConditionalPrintAsText(const char *str, size_t length, ostream *os) {
 
 }  // anonymous namespace
 
-void PrintStringTo(const ::std::string &s, ostream *os) {
+void PrintStringTo(const ::std::string& s, ostream* os) {
   if (PrintCharsAsStringTo(s.data(), s.size(), os) == kHexEscape) {
-    if (GTEST_FLAG(print_utf8)) {
+    if (GTEST_FLAG_GET(print_utf8)) {
       ConditionalPrintAsText(s.data(), s.size(), os);
     }
   }
 }
 
+#ifdef __cpp_char8_t
+void PrintU8StringTo(const ::std::u8string& s, ostream* os) {
+  PrintCharsAsStringTo(s.data(), s.size(), os);
+}
+#endif
+
+void PrintU16StringTo(const ::std::u16string& s, ostream* os) {
+  PrintCharsAsStringTo(s.data(), s.size(), os);
+}
+
+void PrintU32StringTo(const ::std::u32string& s, ostream* os) {
+  PrintCharsAsStringTo(s.data(), s.size(), os);
+}
+
 #if GTEST_HAS_STD_WSTRING
-void PrintWideStringTo(const ::std::wstring &s, ostream *os) {
+void PrintWideStringTo(const ::std::wstring& s, ostream* os) {
   PrintCharsAsStringTo(s.data(), s.size(), os);
 }
 #endif  // GTEST_HAS_STD_WSTRING
diff --git a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/src/gtest-test-part.cc b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/src/gtest-test-part.cc
index 44b0e2b3f0c..eb7c8d1cf92 100644
--- a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/src/gtest-test-part.cc
+++ b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/src/gtest-test-part.cc
@@ -41,34 +41,32 @@ using internal::GetUnitTestImpl;
 
 // Gets the summary of the failure message by omitting the stack trace
 // in it.
-std::string TestPartResult::ExtractSummary(const char *message) {
-  const char *const stack_trace = strstr(message, internal::kStackTraceMarker);
+std::string TestPartResult::ExtractSummary(const char* message) {
+  const char* const stack_trace = strstr(message, internal::kStackTraceMarker);
   return stack_trace == nullptr ? message : std::string(message, stack_trace);
 }
 
 // Prints a TestPartResult object.
-std::ostream &operator<<(std::ostream &os, const TestPartResult &result) {
+std::ostream& operator<<(std::ostream& os, const TestPartResult& result) {
   return os << internal::FormatFileLocation(result.file_name(),
                                             result.line_number())
             << " "
-            << (result.type() == TestPartResult::kSuccess
-                    ? "Success"
-                    : result.type() == TestPartResult::kSkip
-                          ? "Skipped"
-                          : result.type() == TestPartResult::kFatalFailure
-                                ? "Fatal failure"
-                                : "Non-fatal failure")
+            << (result.type() == TestPartResult::kSuccess ? "Success"
+                : result.type() == TestPartResult::kSkip  ? "Skipped"
+                : result.type() == TestPartResult::kFatalFailure
+                    ? "Fatal failure"
+                    : "Non-fatal failure")
             << ":\n"
             << result.message() << std::endl;
 }
 
 // Appends a TestPartResult to the array.
-void TestPartResultArray::Append(const TestPartResult &result) {
+void TestPartResultArray::Append(const TestPartResult& result) {
   array_.push_back(result);
 }
 
 // Returns the TestPartResult at the given index (0-based).
-const TestPartResult &TestPartResultArray::GetTestPartResult(int index) const {
+const TestPartResult& TestPartResultArray::GetTestPartResult(int index) const {
   if (index < 0 || index >= size()) {
     printf("\nInvalid index (%d) into TestPartResultArray.\n", index);
     internal::posix::Abort();
@@ -97,7 +95,7 @@ HasNewFatalFailureHelper::~HasNewFatalFailureHelper() {
 }
 
 void HasNewFatalFailureHelper::ReportTestPartResult(
-    const TestPartResult &result) {
+    const TestPartResult& result) {
   if (result.fatally_failed()) has_new_fatal_failure_ = true;
   original_reporter_->ReportTestPartResult(result);
 }
diff --git a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/src/gtest-typed-test.cc b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/src/gtest-typed-test.cc
index 04effad17ac..a2828b83c66 100644
--- a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/src/gtest-typed-test.cc
+++ b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/src/gtest-typed-test.cc
@@ -34,16 +34,14 @@
 namespace testing {
 namespace internal {
 
-#if GTEST_HAS_TYPED_TEST_P
-
 // Skips to the first non-space char in str. Returns an empty string if str
 // contains only whitespace characters.
-static const char *SkipSpaces(const char *str) {
+static const char* SkipSpaces(const char* str) {
   while (IsSpace(*str)) str++;
   return str;
 }
 
-static std::vector<std::string> SplitIntoTestNames(const char *src) {
+static std::vector<std::string> SplitIntoTestNames(const char* src) {
   std::vector<std::string> name_vec;
   src = SkipSpaces(src);
   for (; src != nullptr; src = SkipComma(src)) {
@@ -55,9 +53,9 @@ static std::vector<std::string> SplitIntoTestNames(const char *src) {
 // Verifies that registered_tests match the test names in
 // registered_tests_; returns registered_tests if successful, or
 // aborts the program otherwise.
-const char *TypedTestSuitePState::VerifyRegisteredTestNames(
-    const char *test_suite_name, const char *file, int line,
-    const char *registered_tests) {
+const char* TypedTestSuitePState::VerifyRegisteredTestNames(
+    const char* test_suite_name, const char* file, int line,
+    const char* registered_tests) {
   RegisterTypeParameterizedTestSuite(test_suite_name, CodeLocation(file, line));
 
   typedef RegisteredTestsMap::const_iterator RegisteredTestIter;
@@ -70,22 +68,13 @@ const char *TypedTestSuitePState::VerifyRegisteredTestNames(
   std::set<std::string> tests;
   for (std::vector<std::string>::const_iterator name_it = name_vec.begin();
        name_it != name_vec.end(); ++name_it) {
-    const std::string &name = *name_it;
+    const std::string& name = *name_it;
     if (tests.count(name) != 0) {
       errors << "Test " << name << " is listed more than once.\n";
       continue;
     }
 
-    bool found = false;
-    for (RegisteredTestIter it = registered_tests_.begin();
-         it != registered_tests_.end(); ++it) {
-      if (name == it->first) {
-        found = true;
-        break;
-      }
-    }
-
-    if (found) {
+    if (registered_tests_.count(name) != 0) {
       tests.insert(name);
     } else {
       errors << "No test named " << name
@@ -100,7 +89,7 @@ const char *TypedTestSuitePState::VerifyRegisteredTestNames(
     }
   }
 
-  const std::string &errors_str = errors.GetString();
+  const std::string& errors_str = errors.GetString();
   if (errors_str != "") {
     fprintf(stderr, "%s %s", FormatFileLocation(file, line).c_str(),
             errors_str.c_str());
@@ -111,7 +100,5 @@ const char *TypedTestSuitePState::VerifyRegisteredTestNames(
   return registered_tests;
 }
 
-#endif  // GTEST_HAS_TYPED_TEST_P
-
 }  // namespace internal
 }  // namespace testing
diff --git a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/src/gtest.cc b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/src/gtest.cc
index 021c82e0b57..6f31dd22603 100644
--- a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/src/gtest.cc
+++ b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/src/gtest.cc
@@ -31,11 +31,8 @@
 // The Google C++ Testing and Mocking Framework (Google Test)
 
 #include "gtest/gtest.h"
-#include "gtest/internal/custom/gtest.h"
-#include "gtest/gtest-spi.h"
 
 #include <ctype.h>
-#include <math.h>
 #include <stdarg.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -44,18 +41,25 @@
 #include <wctype.h>
 
 #include <algorithm>
+#include <chrono>  // NOLINT
+#include <cmath>
 #include <cstdint>
+#include <initializer_list>
 #include <iomanip>
+#include <iterator>
 #include <limits>
 #include <list>
 #include <map>
 #include <ostream>  // NOLINT
 #include <sstream>
+#include <unordered_set>
 #include <vector>
 
-#if GTEST_OS_LINUX
+#include "gtest/gtest-assertion-result.h"
+#include "gtest/gtest-spi.h"
+#include "gtest/internal/custom/gtest.h"
 
-#define GTEST_HAS_GETTIMEOFDAY_ 1
+#if GTEST_OS_LINUX
 
 #include <fcntl.h>   // NOLINT
 #include <limits.h>  // NOLINT
@@ -65,10 +69,10 @@
 #include <sys/mman.h>  // NOLINT
 #include <sys/time.h>  // NOLINT
 #include <unistd.h>    // NOLINT
+
 #include <string>
 
 #elif GTEST_OS_ZOS
-#define GTEST_HAS_GETTIMEOFDAY_ 1
 #include <sys/time.h>  // NOLINT
 
 // On z/OS we additionally need strings.h for strcasecmp.
@@ -85,26 +89,20 @@
 #undef min
 
 #ifdef _MSC_VER
-#include <crtdbg.h>    // NOLINT
-#include <debugapi.h>  // NOLINT
+#include <crtdbg.h>  // NOLINT
 #endif
 
 #include <io.h>         // NOLINT
+#include <sys/stat.h>   // NOLINT
 #include <sys/timeb.h>  // NOLINT
 #include <sys/types.h>  // NOLINT
-#include <sys/stat.h>   // NOLINT
 
 #if GTEST_OS_WINDOWS_MINGW
-// MinGW has gettimeofday() but not _ftime64().
-#define GTEST_HAS_GETTIMEOFDAY_ 1
 #include <sys/time.h>  // NOLINT
 #endif                 // GTEST_OS_WINDOWS_MINGW
 
 #else
 
-// Assume other platforms have gettimeofday().
-#define GTEST_HAS_GETTIMEOFDAY_ 1
-
 // cpplint thinks that the header is already included, so we want to
 // silence it.
 #include <sys/time.h>  // NOLINT
@@ -139,7 +137,10 @@
 #include "absl/debugging/failure_signal_handler.h"
 #include "absl/debugging/stacktrace.h"
 #include "absl/debugging/symbolize.h"
+#include "absl/flags/parse.h"
+#include "absl/flags/usage.h"
 #include "absl/strings/str_cat.h"
+#include "absl/strings/str_replace.h"
 #endif  // GTEST_HAS_ABSL
 
 namespace testing {
@@ -185,9 +186,9 @@ const char kStackTraceMarker[] = "\nStack trace:\n";
 // is specified on the command line.
 bool g_help_flag = false;
 
-// Utilty function to Open File for Writing
-static FILE *OpenFileForWriting(const std::string &output_file) {
-  FILE *fileout = nullptr;
+// Utility function to Open File for Writing
+static FILE* OpenFileForWriting(const std::string& output_file) {
+  FILE* fileout = nullptr;
   FilePath output_file_path(output_file);
   FilePath output_dir(output_file_path.RemoveFileName());
 
@@ -204,8 +205,8 @@ static FILE *OpenFileForWriting(const std::string &output_file) {
 
 // Bazel passes in the argument to '--test_filter' via the TESTBRIDGE_TEST_ONLY
 // environment variable.
-static const char *GetDefaultFilter() {
-  const char *const testbridge_test_only =
+static const char* GetDefaultFilter() {
+  const char* const testbridge_test_only =
       internal::posix::GetEnv("TESTBRIDGE_TEST_ONLY");
   if (testbridge_test_only != nullptr) {
     return testbridge_test_only;
@@ -213,30 +214,53 @@ static const char *GetDefaultFilter() {
   return kUniversalFilter;
 }
 
+// Bazel passes in the argument to '--test_runner_fail_fast' via the
+// TESTBRIDGE_TEST_RUNNER_FAIL_FAST environment variable.
+static bool GetDefaultFailFast() {
+  const char* const testbridge_test_runner_fail_fast =
+      internal::posix::GetEnv("TESTBRIDGE_TEST_RUNNER_FAIL_FAST");
+  if (testbridge_test_runner_fail_fast != nullptr) {
+    return strcmp(testbridge_test_runner_fail_fast, "1") == 0;
+  }
+  return false;
+}
+
+}  // namespace testing
+
+GTEST_DEFINE_bool_(
+    fail_fast,
+    testing::internal::BoolFromGTestEnv("fail_fast",
+                                        testing::GetDefaultFailFast()),
+    "True if and only if a test failure should stop further test execution.");
+
 GTEST_DEFINE_bool_(
     also_run_disabled_tests,
-    internal::BoolFromGTestEnv("also_run_disabled_tests", false),
+    testing::internal::BoolFromGTestEnv("also_run_disabled_tests", false),
     "Run disabled tests too, in addition to the tests normally being run.");
 
 GTEST_DEFINE_bool_(
-    break_on_failure, internal::BoolFromGTestEnv("break_on_failure", false),
+    break_on_failure,
+    testing::internal::BoolFromGTestEnv("break_on_failure", false),
     "True if and only if a failed assertion should be a debugger "
     "break-point.");
 
 GTEST_DEFINE_bool_(catch_exceptions,
-                   internal::BoolFromGTestEnv("catch_exceptions", true),
+                   testing::internal::BoolFromGTestEnv("catch_exceptions",
+                                                       true),
                    "True if and only if " GTEST_NAME_
                    " should catch exceptions and treat them as test failures.");
 
 GTEST_DEFINE_string_(
-    color, internal::StringFromGTestEnv("color", "auto"),
+    color, testing::internal::StringFromGTestEnv("color", "auto"),
     "Whether to use colors in the output.  Valid values: yes, no, "
     "and auto.  'auto' means to use colors if the output is "
     "being sent to a terminal and the TERM environment variable "
     "is set to a terminal type that supports colors.");
 
 GTEST_DEFINE_string_(
-    filter, internal::StringFromGTestEnv("filter", GetDefaultFilter()),
+    filter,
+    testing::internal::StringFromGTestEnv("filter",
+                                          testing::GetDefaultFilter()),
     "A colon-separated list of glob (not regex) patterns "
     "for filtering the tests to run, optionally followed by a "
     "'-' and a : separated list of negative patterns (tests to "
@@ -245,7 +269,8 @@ GTEST_DEFINE_string_(
 
 GTEST_DEFINE_bool_(
     install_failure_signal_handler,
-    internal::BoolFromGTestEnv("install_failure_signal_handler", false),
+    testing::internal::BoolFromGTestEnv("install_failure_signal_handler",
+                                        false),
     "If true and supported on the current platform, " GTEST_NAME_
     " should "
     "install a signal handler that dumps debugging information when fatal "
@@ -260,8 +285,8 @@ GTEST_DEFINE_bool_(list_tests, false, "List all tests without running them.");
 //   ''
 GTEST_DEFINE_string_(
     output,
-    internal::StringFromGTestEnv("output",
-                                 internal::OutputFlagAlsoCheckEnvVar().c_str()),
+    testing::internal::StringFromGTestEnv(
+        "output", testing::internal::OutputFlagAlsoCheckEnvVar().c_str()),
     "A format (defaults to \"xml\" but can be specified to be \"json\"), "
     "optionally followed by a colon and an output file name or directory. "
     "A directory is indicated by a trailing pathname separator. "
@@ -271,57 +296,80 @@ GTEST_DEFINE_string_(
     "executable's name and, if necessary, made unique by adding "
     "digits.");
 
-GTEST_DEFINE_bool_(print_time, internal::BoolFromGTestEnv("print_time", true),
+GTEST_DEFINE_bool_(
+    brief, testing::internal::BoolFromGTestEnv("brief", false),
+    "True if only test failures should be displayed in text output.");
+
+GTEST_DEFINE_bool_(print_time,
+                   testing::internal::BoolFromGTestEnv("print_time", true),
                    "True if and only if " GTEST_NAME_
                    " should display elapsed time in text output.");
 
-GTEST_DEFINE_bool_(print_utf8, internal::BoolFromGTestEnv("print_utf8", true),
+GTEST_DEFINE_bool_(print_utf8,
+                   testing::internal::BoolFromGTestEnv("print_utf8", true),
                    "True if and only if " GTEST_NAME_
                    " prints UTF8 characters as text.");
 
 GTEST_DEFINE_int32_(
-    random_seed, internal::Int32FromGTestEnv("random_seed", 0),
+    random_seed, testing::internal::Int32FromGTestEnv("random_seed", 0),
     "Random number seed to use when shuffling test orders.  Must be in range "
     "[1, 99999], or 0 to use a seed based on the current time.");
 
 GTEST_DEFINE_int32_(
-    repeat, internal::Int32FromGTestEnv("repeat", 1),
+    repeat, testing::internal::Int32FromGTestEnv("repeat", 1),
     "How many times to repeat each test.  Specify a negative number "
     "for repeating forever.  Useful for shaking out flaky tests.");
 
+GTEST_DEFINE_bool_(
+    recreate_environments_when_repeating,
+    testing::internal::BoolFromGTestEnv("recreate_environments_when_repeating",
+                                        false),
+    "Controls whether global test environments are recreated for each repeat "
+    "of the tests. If set to false the global test environments are only set "
+    "up once, for the first iteration, and only torn down once, for the last. "
+    "Useful for shaking out flaky tests with stable, expensive test "
+    "environments. If --gtest_repeat is set to a negative number, meaning "
+    "there is no last run, the environments will always be recreated to avoid "
+    "leaks.");
+
 GTEST_DEFINE_bool_(show_internal_stack_frames, false,
                    "True if and only if " GTEST_NAME_
                    " should include internal stack frames when "
                    "printing test failure stack traces.");
 
-GTEST_DEFINE_bool_(shuffle, internal::BoolFromGTestEnv("shuffle", false),
+GTEST_DEFINE_bool_(shuffle,
+                   testing::internal::BoolFromGTestEnv("shuffle", false),
                    "True if and only if " GTEST_NAME_
                    " should randomize tests' order on every run.");
 
 GTEST_DEFINE_int32_(
     stack_trace_depth,
-    internal::Int32FromGTestEnv("stack_trace_depth", kMaxStackTraceDepth),
+    testing::internal::Int32FromGTestEnv("stack_trace_depth",
+                                         testing::kMaxStackTraceDepth),
     "The maximum number of stack frames to print when an "
     "assertion fails.  The valid range is 0 through 100, inclusive.");
 
 GTEST_DEFINE_string_(
-    stream_result_to, internal::StringFromGTestEnv("stream_result_to", ""),
+    stream_result_to,
+    testing::internal::StringFromGTestEnv("stream_result_to", ""),
     "This flag specifies the host name and the port number on which to stream "
     "test results. Example: \"localhost:555\". The flag is effective only on "
     "Linux.");
 
 GTEST_DEFINE_bool_(
-    throw_on_failure, internal::BoolFromGTestEnv("throw_on_failure", false),
+    throw_on_failure,
+    testing::internal::BoolFromGTestEnv("throw_on_failure", false),
     "When this flag is specified, a failed assertion will throw an exception "
     "if exceptions are enabled or exit the program with a non-zero code "
     "otherwise. For use with an external test framework.");
 
 #if GTEST_USE_OWN_FLAGFILE_FLAG_
 GTEST_DEFINE_string_(
-    flagfile, internal::StringFromGTestEnv("flagfile", ""),
+    flagfile, testing::internal::StringFromGTestEnv("flagfile", ""),
     "This flag specifies the flagfile to read command-line flags from.");
 #endif  // GTEST_USE_OWN_FLAGFILE_FLAG_
 
+namespace testing {
 namespace internal {
 
 // Generates a random number from [0, range), using a Linear
@@ -351,7 +399,7 @@ static bool GTestIsInitialized() { return GetArgvs().size() > 0; }
 // Iterates over a vector of TestSuites, keeping a running sum of the
 // results of calling a given int-returning method on each.
 // Returns the sum.
-static int SumOverTestSuiteList(const std::vector<TestSuite *> &case_list,
+static int SumOverTestSuiteList(const std::vector<TestSuite*>& case_list,
                                 int (TestSuite::*method)() const) {
   int sum = 0;
   for (size_t i = 0; i < case_list.size(); i++) {
@@ -361,30 +409,30 @@ static int SumOverTestSuiteList(const std::vector<TestSuite *> &case_list,
 }
 
 // Returns true if and only if the test suite passed.
-static bool TestSuitePassed(const TestSuite *test_suite) {
+static bool TestSuitePassed(const TestSuite* test_suite) {
   return test_suite->should_run() && test_suite->Passed();
 }
 
 // Returns true if and only if the test suite failed.
-static bool TestSuiteFailed(const TestSuite *test_suite) {
+static bool TestSuiteFailed(const TestSuite* test_suite) {
   return test_suite->should_run() && test_suite->Failed();
 }
 
 // Returns true if and only if test_suite contains at least one test that
 // should run.
-static bool ShouldRunTestSuite(const TestSuite *test_suite) {
+static bool ShouldRunTestSuite(const TestSuite* test_suite) {
   return test_suite->should_run();
 }
 
 // AssertHelper constructor.
-AssertHelper::AssertHelper(TestPartResult::Type type, const char *file,
-                           int line, const char *message)
+AssertHelper::AssertHelper(TestPartResult::Type type, const char* file,
+                           int line, const char* message)
     : data_(new AssertHelperData(type, file, line, message)) {}
 
 AssertHelper::~AssertHelper() { delete data_; }
 
 // Message assignment, for assertion streaming support.
-void AssertHelper::operator=(const Message &message) const {
+void AssertHelper::operator=(const Message& message) const {
   UnitTest::GetInstance()->AddTestPartResult(
       data_->type, data_->file, data_->line,
       AppendUserMessage(data_->message, message),
@@ -396,7 +444,7 @@ void AssertHelper::operator=(const Message &message) const {
 namespace {
 
 // When TEST_P is found without a matching INSTANTIATE_TEST_SUITE_P
-// to creates test cases for it, a syntetic test case is
+// to creates test cases for it, a synthetic test case is
 // inserted to report ether an error or a log message.
 //
 // This configuration bit will likely be removed at some point.
@@ -406,9 +454,10 @@ constexpr bool kErrorOnUninstantiatedTypeParameterizedTest = true;
 // A test that fails at a given file/line location with a given message.
 class FailureTest : public Test {
  public:
-  explicit FailureTest(const CodeLocation &loc, std::string error_message,
+  explicit FailureTest(const CodeLocation& loc, std::string error_message,
                        bool as_error)
-      : loc_(loc), error_message_(std::move(error_message)),
+      : loc_(loc),
+        error_message_(std::move(error_message)),
         as_error_(as_error) {}
 
   void TestBody() override {
@@ -428,20 +477,20 @@ class FailureTest : public Test {
 
 }  // namespace
 
-std::set<std::string> *GetIgnoredParameterizedTestSuites() {
+std::set<std::string>* GetIgnoredParameterizedTestSuites() {
   return UnitTest::GetInstance()->impl()->ignored_parameterized_test_suites();
 }
 
 // Add a given test_suit to the list of them allow to go un-instantiated.
-MarkAsIgnored::MarkAsIgnored(const char *test_suite) {
+MarkAsIgnored::MarkAsIgnored(const char* test_suite) {
   GetIgnoredParameterizedTestSuites()->insert(test_suite);
 }
 
 // If this parameterized test suite has no instantiations (and that
 // has not been marked as okay), emit a test case reporting that.
-void InsertSyntheticTestCase(const std::string &name, CodeLocation location,
+void InsertSyntheticTestCase(const std::string& name, CodeLocation location,
                              bool has_test_p) {
-  const auto &ignored = *GetIgnoredParameterizedTestSuites();
+  const auto& ignored = *GetIgnoredParameterizedTestSuites();
   if (ignored.find(name) != ignored.end()) return;
 
   const char kMissingInstantiation[] =  //
@@ -463,7 +512,7 @@ void InsertSyntheticTestCase(const std::string &name, CodeLocation location,
       "removed but the rest got left behind.";
 
   std::string message =
-      "Paramaterized test suite " + name +
+      "Parameterized test suite " + name +
       (has_test_p ? kMissingInstantiation : kMissingTestCase) +
       "\n\n"
       "To suppress this error for this test suite, insert the following line "
@@ -472,7 +521,7 @@ void InsertSyntheticTestCase(const std::string &name, CodeLocation location,
       "GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(" +
       name + ");";
 
-  std::string full_name = "UninstantiatedParamaterizedTestSuite<" + name + ">";
+  std::string full_name = "UninstantiatedParameterizedTestSuite<" + name + ">";
   RegisterTest(  //
       "GoogleTestVerification", full_name.c_str(),
       nullptr,  // No type parameter.
@@ -483,25 +532,25 @@ void InsertSyntheticTestCase(const std::string &name, CodeLocation location,
       });
 }
 
-void RegisterTypeParameterizedTestSuite(const char *test_suite_name,
+void RegisterTypeParameterizedTestSuite(const char* test_suite_name,
                                         CodeLocation code_location) {
   GetUnitTestImpl()->type_parameterized_test_registry().RegisterTestSuite(
       test_suite_name, code_location);
 }
 
-void RegisterTypeParameterizedTestSuiteInstantiation(const char *case_name) {
+void RegisterTypeParameterizedTestSuiteInstantiation(const char* case_name) {
   GetUnitTestImpl()->type_parameterized_test_registry().RegisterInstantiation(
       case_name);
 }
 
 void TypeParameterizedTestSuiteRegistry::RegisterTestSuite(
-    const char *test_suite_name, CodeLocation code_location) {
+    const char* test_suite_name, CodeLocation code_location) {
   suites_.emplace(std::string(test_suite_name),
                   TypeParameterizedTestSuiteInfo(code_location));
 }
 
 void TypeParameterizedTestSuiteRegistry::RegisterInstantiation(
-    const char *test_suite_name) {
+    const char* test_suite_name) {
   auto it = suites_.find(std::string(test_suite_name));
   if (it != suites_.end()) {
     it->second.instantiated = true;
@@ -512,13 +561,13 @@ void TypeParameterizedTestSuiteRegistry::RegisterInstantiation(
 }
 
 void TypeParameterizedTestSuiteRegistry::CheckForInstantiations() {
-  const auto &ignored = *GetIgnoredParameterizedTestSuites();
-  for (const auto &testcase : suites_) {
+  const auto& ignored = *GetIgnoredParameterizedTestSuites();
+  for (const auto& testcase : suites_) {
     if (testcase.second.instantiated) continue;
     if (ignored.find(testcase.first) != ignored.end()) continue;
 
     std::string message =
-        "Type paramaterized test suite " + testcase.first +
+        "Type parameterized test suite " + testcase.first +
         " is defined via REGISTER_TYPED_TEST_SUITE_P, but never instantiated "
         "via INSTANTIATE_TYPED_TEST_SUITE_P. None of the test cases will run."
         "\n\n"
@@ -528,13 +577,13 @@ void TypeParameterizedTestSuiteRegistry::CheckForInstantiations() {
         "utilities.)"
         "\n\n"
         "To suppress this error for this test suite, insert the following line "
-        "(in a non-header) in the namespace it is definedin in:"
+        "(in a non-header) in the namespace it is defined in:"
         "\n\n"
         "GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(" +
         testcase.first + ");";
 
     std::string full_name =
-        "UninstantiatedTypeParamaterizedTestSuite<" + testcase.first + ">";
+        "UninstantiatedTypeParameterizedTestSuite<" + testcase.first + ">";
     RegisterTest(  //
         "GoogleTestVerification", full_name.c_str(),
         nullptr,  // No type parameter.
@@ -554,7 +603,7 @@ static ::std::vector<std::string> g_argvs;
 #if defined(GTEST_CUSTOM_GET_ARGVS_)
   // GTEST_CUSTOM_GET_ARGVS_() may return a container of std::string or
   // ::string. This code converts it to the appropriate type.
-  const auto &custom = GTEST_CUSTOM_GET_ARGVS_();
+  const auto& custom = GTEST_CUSTOM_GET_ARGVS_();
   return ::std::vector<std::string>(custom.begin(), custom.end());
 #else   // defined(GTEST_CUSTOM_GET_ARGVS_)
   return g_argvs;
@@ -579,8 +628,9 @@ FilePath GetCurrentExecutableName() {
 
 // Returns the output format, or "" for normal printed output.
 std::string UnitTestOptions::GetOutputFormat() {
-  const char *const gtest_output_flag = GTEST_FLAG(output).c_str();
-  const char *const colon = strchr(gtest_output_flag, ':');
+  std::string s = GTEST_FLAG_GET(output);
+  const char* const gtest_output_flag = s.c_str();
+  const char* const colon = strchr(gtest_output_flag, ':');
   return (colon == nullptr)
              ? std::string(gtest_output_flag)
              : std::string(gtest_output_flag,
@@ -590,12 +640,13 @@ std::string UnitTestOptions::GetOutputFormat() {
 // Returns the name of the requested output file, or the default if none
 // was explicitly specified.
 std::string UnitTestOptions::GetAbsolutePathToOutputFile() {
-  const char *const gtest_output_flag = GTEST_FLAG(output).c_str();
+  std::string s = GTEST_FLAG_GET(output);
+  const char* const gtest_output_flag = s.c_str();
 
   std::string format = GetOutputFormat();
   if (format.empty()) format = std::string(kDefaultOutputFormat);
 
-  const char *const colon = strchr(gtest_output_flag, ':');
+  const char* const colon = strchr(gtest_output_flag, ':');
   if (colon == nullptr)
     return internal::FilePath::MakeFileName(
                internal::FilePath(
@@ -617,76 +668,172 @@ std::string UnitTestOptions::GetAbsolutePathToOutputFile() {
   return result.string();
 }
 
-// Returns true if and only if the wildcard pattern matches the string.
-// The first ':' or '\0' character in pattern marks the end of it.
+// Returns true if and only if the wildcard pattern matches the string. Each
+// pattern consists of regular characters, single-character wildcards (?), and
+// multi-character wildcards (*).
 //
-// This recursive algorithm isn't very efficient, but is clear and
-// works well enough for matching test names, which are short.
-bool UnitTestOptions::PatternMatchesString(const char *pattern,
-                                           const char *str) {
-  switch (*pattern) {
-    case '\0':
-    case ':':  // Either ':' or '\0' marks the end of the pattern.
-      return *str == '\0';
-    case '?':  // Matches any single character.
-      return *str != '\0' && PatternMatchesString(pattern + 1, str + 1);
-    case '*':  // Matches any string (possibly empty) of characters.
-      return (*str != '\0' && PatternMatchesString(pattern, str + 1)) ||
-             PatternMatchesString(pattern + 1, str);
-    default:  // Non-special character.  Matches itself.
-      return *pattern == *str && PatternMatchesString(pattern + 1, str + 1);
-  }
-}
-
-bool UnitTestOptions::MatchesFilter(const std::string &name,
-                                    const char *filter) {
-  const char *cur_pattern = filter;
-  for (;;) {
-    if (PatternMatchesString(cur_pattern, name.c_str())) {
-      return true;
+// This function implements a linear-time string globbing algorithm based on
+// https://research.swtch.com/glob.
+static bool PatternMatchesString(const std::string& name_str,
+                                 const char* pattern, const char* pattern_end) {
+  const char* name = name_str.c_str();
+  const char* const name_begin = name;
+  const char* const name_end = name + name_str.size();
+
+  const char* pattern_next = pattern;
+  const char* name_next = name;
+
+  while (pattern < pattern_end || name < name_end) {
+    if (pattern < pattern_end) {
+      switch (*pattern) {
+        default:  // Match an ordinary character.
+          if (name < name_end && *name == *pattern) {
+            ++pattern;
+            ++name;
+            continue;
+          }
+          break;
+        case '?':  // Match any single character.
+          if (name < name_end) {
+            ++pattern;
+            ++name;
+            continue;
+          }
+          break;
+        case '*':
+          // Match zero or more characters. Start by skipping over the wildcard
+          // and matching zero characters from name. If that fails, restart and
+          // match one more character than the last attempt.
+          pattern_next = pattern;
+          name_next = name + 1;
+          ++pattern;
+          continue;
+      }
+    }
+    // Failed to match a character. Restart if possible.
+    if (name_begin < name_next && name_next <= name_end) {
+      pattern = pattern_next;
+      name = name_next;
+      continue;
     }
+    return false;
+  }
+  return true;
+}
 
-    // Finds the next pattern in the filter.
-    cur_pattern = strchr(cur_pattern, ':');
+namespace {
+
+bool IsGlobPattern(const std::string& pattern) {
+  return std::any_of(pattern.begin(), pattern.end(),
+                     [](const char c) { return c == '?' || c == '*'; });
+}
 
-    // Returns if no more pattern can be found.
-    if (cur_pattern == nullptr) {
-      return false;
+class UnitTestFilter {
+ public:
+  UnitTestFilter() = default;
+
+  // Constructs a filter from a string of patterns separated by `:`.
+  explicit UnitTestFilter(const std::string& filter) {
+    // By design "" filter matches "" string.
+    std::vector<std::string> all_patterns;
+    SplitString(filter, ':', &all_patterns);
+    const auto exact_match_patterns_begin = std::partition(
+        all_patterns.begin(), all_patterns.end(), &IsGlobPattern);
+
+    glob_patterns_.reserve(static_cast<size_t>(
+        std::distance(all_patterns.begin(), exact_match_patterns_begin)));
+    std::move(all_patterns.begin(), exact_match_patterns_begin,
+              std::inserter(glob_patterns_, glob_patterns_.begin()));
+    std::move(
+        exact_match_patterns_begin, all_patterns.end(),
+        std::inserter(exact_match_patterns_, exact_match_patterns_.begin()));
+  }
+
+  // Returns true if and only if name matches at least one of the patterns in
+  // the filter.
+  bool MatchesName(const std::string& name) const {
+    return exact_match_patterns_.count(name) > 0 ||
+           std::any_of(glob_patterns_.begin(), glob_patterns_.end(),
+                       [&name](const std::string& pattern) {
+                         return PatternMatchesString(
+                             name, pattern.c_str(),
+                             pattern.c_str() + pattern.size());
+                       });
+  }
+
+ private:
+  std::vector<std::string> glob_patterns_;
+  std::unordered_set<std::string> exact_match_patterns_;
+};
+
+class PositiveAndNegativeUnitTestFilter {
+ public:
+  // Constructs a positive and a negative filter from a string. The string
+  // contains a positive filter optionally followed by a '-' character and a
+  // negative filter. In case only a negative filter is provided the positive
+  // filter will be assumed "*".
+  // A filter is a list of patterns separated by ':'.
+  explicit PositiveAndNegativeUnitTestFilter(const std::string& filter) {
+    std::vector<std::string> positive_and_negative_filters;
+
+    // NOTE: `SplitString` always returns a non-empty container.
+    SplitString(filter, '-', &positive_and_negative_filters);
+    const auto& positive_filter = positive_and_negative_filters.front();
+
+    if (positive_and_negative_filters.size() > 1) {
+      positive_filter_ = UnitTestFilter(
+          positive_filter.empty() ? kUniversalFilter : positive_filter);
+
+      // TODO(b/214626361): Fail on multiple '-' characters
+      // For the moment to preserve old behavior we concatenate the rest of the
+      // string parts with `-` as separator to generate the negative filter.
+      auto negative_filter_string = positive_and_negative_filters[1];
+      for (std::size_t i = 2; i < positive_and_negative_filters.size(); i++)
+        negative_filter_string =
+            negative_filter_string + '-' + positive_and_negative_filters[i];
+      negative_filter_ = UnitTestFilter(negative_filter_string);
+    } else {
+      // In case we don't have a negative filter and positive filter is ""
+      // we do not use kUniversalFilter by design as opposed to when we have a
+      // negative filter.
+      positive_filter_ = UnitTestFilter(positive_filter);
     }
+  }
 
-    // Skips the pattern separater (the ':' character).
-    cur_pattern++;
+  // Returns true if and only if test name (this is generated by appending test
+  // suit name and test name via a '.' character) matches the positive filter
+  // and does not match the negative filter.
+  bool MatchesTest(const std::string& test_suite_name,
+                   const std::string& test_name) const {
+    return MatchesName(test_suite_name + "." + test_name);
   }
+
+  // Returns true if and only if name matches the positive filter and does not
+  // match the negative filter.
+  bool MatchesName(const std::string& name) const {
+    return positive_filter_.MatchesName(name) &&
+           !negative_filter_.MatchesName(name);
+  }
+
+ private:
+  UnitTestFilter positive_filter_;
+  UnitTestFilter negative_filter_;
+};
+}  // namespace
+
+bool UnitTestOptions::MatchesFilter(const std::string& name_str,
+                                    const char* filter) {
+  return UnitTestFilter(filter).MatchesName(name_str);
 }
 
 // Returns true if and only if the user-specified filter matches the test
 // suite name and the test name.
-bool UnitTestOptions::FilterMatchesTest(const std::string &test_suite_name,
-                                        const std::string &test_name) {
-  const std::string &full_name = test_suite_name + "." + test_name.c_str();
-
+bool UnitTestOptions::FilterMatchesTest(const std::string& test_suite_name,
+                                        const std::string& test_name) {
   // Split --gtest_filter at '-', if there is one, to separate into
   // positive filter and negative filter portions
-  const char *const p = GTEST_FLAG(filter).c_str();
-  const char *const dash = strchr(p, '-');
-  std::string positive;
-  std::string negative;
-  if (dash == nullptr) {
-    positive = GTEST_FLAG(filter).c_str();  // Whole string is a positive filter
-    negative = "";
-  } else {
-    positive = std::string(p, dash);   // Everything up to the dash
-    negative = std::string(dash + 1);  // Everything after the dash
-    if (positive.empty()) {
-      // Treat '-test1' as the same as '*-test1'
-      positive = kUniversalFilter;
-    }
-  }
-
-  // A filter is a colon-separated list of patterns.  It matches a
-  // test if any pattern in it matches the test.
-  return (MatchesFilter(full_name, positive.c_str()) &&
-          !MatchesFilter(full_name, negative.c_str()));
+  return PositiveAndNegativeUnitTestFilter(GTEST_FLAG_GET(filter))
+      .MatchesTest(test_suite_name, test_name);
 }
 
 #if GTEST_HAS_SEH
@@ -706,7 +853,7 @@ int UnitTestOptions::GTestShouldProcessSEH(DWORD exception_code) {
 
   bool should_handle = true;
 
-  if (!GTEST_FLAG(catch_exceptions))
+  if (!GTEST_FLAG_GET(catch_exceptions))
     should_handle = false;
   else if (exception_code == EXCEPTION_BREAKPOINT)
     should_handle = false;
@@ -723,7 +870,7 @@ int UnitTestOptions::GTestShouldProcessSEH(DWORD exception_code) {
 // Google Test.  The 'result' parameter specifies where to report the
 // results. Intercepts only failures from the current thread.
 ScopedFakeTestPartResultReporter::ScopedFakeTestPartResultReporter(
-    TestPartResultArray *result)
+    TestPartResultArray* result)
     : intercept_mode_(INTERCEPT_ONLY_CURRENT_THREAD), result_(result) {
   Init();
 }
@@ -732,13 +879,13 @@ ScopedFakeTestPartResultReporter::ScopedFakeTestPartResultReporter(
 // Google Test.  The 'result' parameter specifies where to report the
 // results.
 ScopedFakeTestPartResultReporter::ScopedFakeTestPartResultReporter(
-    InterceptMode intercept_mode, TestPartResultArray *result)
+    InterceptMode intercept_mode, TestPartResultArray* result)
     : intercept_mode_(intercept_mode), result_(result) {
   Init();
 }
 
 void ScopedFakeTestPartResultReporter::Init() {
-  internal::UnitTestImpl *const impl = internal::GetUnitTestImpl();
+  internal::UnitTestImpl* const impl = internal::GetUnitTestImpl();
   if (intercept_mode_ == INTERCEPT_ALL_THREADS) {
     old_reporter_ = impl->GetGlobalTestPartResultReporter();
     impl->SetGlobalTestPartResultReporter(this);
@@ -751,7 +898,7 @@ void ScopedFakeTestPartResultReporter::Init() {
 // The d'tor restores the test part result reporter used by Google Test
 // before.
 ScopedFakeTestPartResultReporter::~ScopedFakeTestPartResultReporter() {
-  internal::UnitTestImpl *const impl = internal::GetUnitTestImpl();
+  internal::UnitTestImpl* const impl = internal::GetUnitTestImpl();
   if (intercept_mode_ == INTERCEPT_ALL_THREADS) {
     impl->SetGlobalTestPartResultReporter(old_reporter_);
   } else {
@@ -762,7 +909,7 @@ ScopedFakeTestPartResultReporter::~ScopedFakeTestPartResultReporter() {
 // Increments the test part result count and remembers the result.
 // This method is from the TestPartResultReporterInterface interface.
 void ScopedFakeTestPartResultReporter::ReportTestPartResult(
-    const TestPartResult &result) {
+    const TestPartResult& result) {
   result_->Append(result);
 }
 
@@ -786,12 +933,12 @@ extern const TypeId kTestTypeIdInGoogleTest = GetTestTypeId();
 // This predicate-formatter checks that 'results' contains a test part
 // failure of the given type and that the failure message contains the
 // given substring.
-static AssertionResult HasOneFailure(const char * /* results_expr */,
-                                     const char * /* type_expr */,
-                                     const char * /* substr_expr */,
-                                     const TestPartResultArray &results,
+static AssertionResult HasOneFailure(const char* /* results_expr */,
+                                     const char* /* type_expr */,
+                                     const char* /* substr_expr */,
+                                     const TestPartResultArray& results,
                                      TestPartResult::Type type,
-                                     const std::string &substr) {
+                                     const std::string& substr) {
   const std::string expected(type == TestPartResult::kFatalFailure
                                  ? "1 fatal failure"
                                  : "1 non-fatal failure");
@@ -805,7 +952,7 @@ static AssertionResult HasOneFailure(const char * /* results_expr */,
     return AssertionFailure() << msg;
   }
 
-  const TestPartResult &r = results.GetTestPartResult(0);
+  const TestPartResult& r = results.GetTestPartResult(0);
   if (r.type() != type) {
     return AssertionFailure() << "Expected: " << expected << "\n"
                               << "  Actual:\n"
@@ -825,9 +972,9 @@ static AssertionResult HasOneFailure(const char * /* results_expr */,
 // The constructor of SingleFailureChecker remembers where to look up
 // test part results, what type of failure we expect, and what
 // substring the failure message should contain.
-SingleFailureChecker::SingleFailureChecker(const TestPartResultArray *results,
+SingleFailureChecker::SingleFailureChecker(const TestPartResultArray* results,
                                            TestPartResult::Type type,
-                                           const std::string &substr)
+                                           const std::string& substr)
     : results_(results), type_(type), substr_(substr) {}
 
 // The destructor of SingleFailureChecker verifies that the given
@@ -839,26 +986,26 @@ SingleFailureChecker::~SingleFailureChecker() {
 }
 
 DefaultGlobalTestPartResultReporter::DefaultGlobalTestPartResultReporter(
-    UnitTestImpl *unit_test)
+    UnitTestImpl* unit_test)
     : unit_test_(unit_test) {}
 
 void DefaultGlobalTestPartResultReporter::ReportTestPartResult(
-    const TestPartResult &result) {
+    const TestPartResult& result) {
   unit_test_->current_test_result()->AddTestPartResult(result);
   unit_test_->listeners()->repeater()->OnTestPartResult(result);
 }
 
 DefaultPerThreadTestPartResultReporter::DefaultPerThreadTestPartResultReporter(
-    UnitTestImpl *unit_test)
+    UnitTestImpl* unit_test)
     : unit_test_(unit_test) {}
 
 void DefaultPerThreadTestPartResultReporter::ReportTestPartResult(
-    const TestPartResult &result) {
+    const TestPartResult& result) {
   unit_test_->GetGlobalTestPartResultReporter()->ReportTestPartResult(result);
 }
 
 // Returns the global test part result reporter.
-TestPartResultReporterInterface *
+TestPartResultReporterInterface*
 UnitTestImpl::GetGlobalTestPartResultReporter() {
   internal::MutexLock lock(&global_test_part_result_reporter_mutex_);
   return global_test_part_result_repoter_;
@@ -866,20 +1013,20 @@ UnitTestImpl::GetGlobalTestPartResultReporter() {
 
 // Sets the global test part result reporter.
 void UnitTestImpl::SetGlobalTestPartResultReporter(
-    TestPartResultReporterInterface *reporter) {
+    TestPartResultReporterInterface* reporter) {
   internal::MutexLock lock(&global_test_part_result_reporter_mutex_);
   global_test_part_result_repoter_ = reporter;
 }
 
 // Returns the test part result reporter for the current thread.
-TestPartResultReporterInterface *
+TestPartResultReporterInterface*
 UnitTestImpl::GetTestPartResultReporterForCurrentThread() {
   return per_thread_test_part_result_reporter_.get();
 }
 
 // Sets the test part result reporter for the current thread.
 void UnitTestImpl::SetTestPartResultReporterForCurrentThread(
-    TestPartResultReporterInterface *reporter) {
+    TestPartResultReporterInterface* reporter) {
   per_thread_test_part_result_reporter_.set(reporter);
 }
 
@@ -957,50 +1104,36 @@ int UnitTestImpl::test_to_run_count() const {
 // trace but Bar() and CurrentOsStackTraceExceptTop() won't.
 std::string UnitTestImpl::CurrentOsStackTraceExceptTop(int skip_count) {
   return os_stack_trace_getter()->CurrentStackTrace(
-      static_cast<int>(GTEST_FLAG(stack_trace_depth)), skip_count + 1
+      static_cast<int>(GTEST_FLAG_GET(stack_trace_depth)), skip_count + 1
       // Skips the user-specified number of frames plus this function
       // itself.
   );  // NOLINT
 }
 
-// Returns the current time in milliseconds.
+// A helper class for measuring elapsed times.
+class Timer {
+ public:
+  Timer() : start_(std::chrono::steady_clock::now()) {}
+
+  // Return time elapsed in milliseconds since the timer was created.
+  TimeInMillis Elapsed() {
+    return std::chrono::duration_cast<std::chrono::milliseconds>(
+               std::chrono::steady_clock::now() - start_)
+        .count();
+  }
+
+ private:
+  std::chrono::steady_clock::time_point start_;
+};
+
+// Returns a timestamp as milliseconds since the epoch. Note this time may jump
+// around subject to adjustments by the system, to measure elapsed time use
+// Timer instead.
 TimeInMillis GetTimeInMillis() {
-#if GTEST_OS_WINDOWS_MOBILE || defined(__BORLANDC__)
-  // Difference between 1970-01-01 and 1601-01-01 in milliseconds.
-  // http://analogous.blogspot.com/2005/04/epoch.html
-  const TimeInMillis kJavaEpochToWinFileTimeDelta =
-      static_cast<TimeInMillis>(116444736UL) * 100000UL;
-  const DWORD kTenthMicrosInMilliSecond = 10000;
-
-  SYSTEMTIME now_systime;
-  FILETIME now_filetime;
-  ULARGE_INTEGER now_int64;
-  GetSystemTime(&now_systime);
-  if (SystemTimeToFileTime(&now_systime, &now_filetime)) {
-    now_int64.LowPart = now_filetime.dwLowDateTime;
-    now_int64.HighPart = now_filetime.dwHighDateTime;
-    now_int64.QuadPart = (now_int64.QuadPart / kTenthMicrosInMilliSecond) -
-                         kJavaEpochToWinFileTimeDelta;
-    return now_int64.QuadPart;
-  }
-  return 0;
-#elif GTEST_OS_WINDOWS && !GTEST_HAS_GETTIMEOFDAY_
-  __timeb64 now;
-
-  // MSVC 8 deprecates _ftime64(), so we want to suppress warning 4996
-  // (deprecated function) there.
-  GTEST_DISABLE_MSC_DEPRECATED_PUSH_()
-  _ftime64(&now);
-  GTEST_DISABLE_MSC_DEPRECATED_POP_()
-
-  return static_cast<TimeInMillis>(now.time) * 1000 + now.millitm;
-#elif GTEST_HAS_GETTIMEOFDAY_
-  struct timeval now;
-  gettimeofday(&now, nullptr);
-  return static_cast<TimeInMillis>(now.tv_sec) * 1000 + now.tv_usec / 1000;
-#else
-#error "Don't know how to get the current time on your system."
-#endif
+  return std::chrono::duration_cast<std::chrono::milliseconds>(
+             std::chrono::system_clock::now() -
+             std::chrono::system_clock::from_time_t(0))
+      .count();
 }
 
 // Utilities
@@ -1012,12 +1145,12 @@ TimeInMillis GetTimeInMillis() {
 // memory using new. The caller is responsible for deleting the return
 // value using delete[]. Returns the wide string, or NULL if the
 // input is NULL.
-LPCWSTR String::AnsiToUtf16(const char *ansi) {
+LPCWSTR String::AnsiToUtf16(const char* ansi) {
   if (!ansi) return nullptr;
   const int length = strlen(ansi);
   const int unicode_length =
       MultiByteToWideChar(CP_ACP, 0, ansi, length, nullptr, 0);
-  WCHAR *unicode = new WCHAR[unicode_length + 1];
+  WCHAR* unicode = new WCHAR[unicode_length + 1];
   MultiByteToWideChar(CP_ACP, 0, ansi, length, unicode, unicode_length);
   unicode[unicode_length] = 0;
   return unicode;
@@ -1027,11 +1160,11 @@ LPCWSTR String::AnsiToUtf16(const char *ansi) {
 // memory using new. The caller is responsible for deleting the return
 // value using delete[]. Returns the ANSI string, or NULL if the
 // input is NULL.
-const char *String::Utf16ToAnsi(LPCWSTR utf16_str) {
+const char* String::Utf16ToAnsi(LPCWSTR utf16_str) {
   if (!utf16_str) return nullptr;
   const int ansi_length = WideCharToMultiByte(CP_ACP, 0, utf16_str, -1, nullptr,
                                               0, nullptr, nullptr);
-  char *ansi = new char[ansi_length + 1];
+  char* ansi = new char[ansi_length + 1];
   WideCharToMultiByte(CP_ACP, 0, utf16_str, -1, ansi, ansi_length, nullptr,
                       nullptr);
   ansi[ansi_length] = 0;
@@ -1046,7 +1179,7 @@ const char *String::Utf16ToAnsi(LPCWSTR utf16_str) {
 // Unlike strcmp(), this function can handle NULL argument(s).  A NULL
 // C string is considered different to any non-NULL C string,
 // including the empty string.
-bool String::CStringEquals(const char *lhs, const char *rhs) {
+bool String::CStringEquals(const char* lhs, const char* rhs) {
   if (lhs == nullptr) return rhs == nullptr;
 
   if (rhs == nullptr) return false;
@@ -1058,8 +1191,8 @@ bool String::CStringEquals(const char *lhs, const char *rhs) {
 
 // Converts an array of wide chars to a narrow string using the UTF-8
 // encoding, and streams the result to the given Message object.
-static void StreamWideCharsToMessage(const wchar_t *wstr, size_t length,
-                                     Message *msg) {
+static void StreamWideCharsToMessage(const wchar_t* wstr, size_t length,
+                                     Message* msg) {
   for (size_t i = 0; i != length;) {  // NOLINT
     if (wstr[i] != L'\0') {
       *msg << WideStringToUtf8(wstr + i, static_cast<int>(length - i));
@@ -1073,8 +1206,8 @@ static void StreamWideCharsToMessage(const wchar_t *wstr, size_t length,
 
 #endif  // GTEST_HAS_STD_WSTRING
 
-void SplitString(const ::std::string &str, char delimiter,
-                 ::std::vector< ::std::string> *dest) {
+void SplitString(const ::std::string& str, char delimiter,
+                 ::std::vector< ::std::string>* dest) {
   ::std::vector< ::std::string> parsed;
   ::std::string::size_type pos = 0;
   while (::testing::internal::AlwaysTrue()) {
@@ -1105,17 +1238,17 @@ Message::Message() : ss_(new ::std::stringstream) {
 
 // These two overloads allow streaming a wide C string to a Message
 // using the UTF-8 encoding.
-Message &Message::operator<<(const wchar_t *wide_c_str) {
+Message& Message::operator<<(const wchar_t* wide_c_str) {
   return *this << internal::String::ShowWideCString(wide_c_str);
 }
-Message &Message::operator<<(wchar_t *wide_c_str) {
+Message& Message::operator<<(wchar_t* wide_c_str) {
   return *this << internal::String::ShowWideCString(wide_c_str);
 }
 
 #if GTEST_HAS_STD_WSTRING
 // Converts the given wide string to a narrow string using the UTF-8
 // encoding, and streams the result to this Message object.
-Message &Message::operator<<(const ::std::wstring &wstr) {
+Message& Message::operator<<(const ::std::wstring& wstr) {
   internal::StreamWideCharsToMessage(wstr.c_str(), wstr.length(), this);
   return *this;
 }
@@ -1127,45 +1260,11 @@ std::string Message::GetString() const {
   return internal::StringStreamToString(ss_.get());
 }
 
-// AssertionResult constructors.
-// Used in EXPECT_TRUE/FALSE(assertion_result).
-AssertionResult::AssertionResult(const AssertionResult &other)
-    : success_(other.success_),
-      message_(other.message_.get() != nullptr
-                   ? new ::std::string(*other.message_)
-                   : static_cast< ::std::string *>(nullptr)) {}
-
-// Swaps two AssertionResults.
-void AssertionResult::swap(AssertionResult &other) {
-  using std::swap;
-  swap(success_, other.success_);
-  swap(message_, other.message_);
-}
-
-// Returns the assertion's negation. Used with EXPECT/ASSERT_FALSE.
-AssertionResult AssertionResult::operator!() const {
-  AssertionResult negation(!success_);
-  if (message_.get() != nullptr) negation << *message_;
-  return negation;
-}
-
-// Makes a successful assertion result.
-AssertionResult AssertionSuccess() { return AssertionResult(true); }
-
-// Makes a failed assertion result.
-AssertionResult AssertionFailure() { return AssertionResult(false); }
-
-// Makes a failed assertion result with the given failure message.
-// Deprecated; use AssertionFailure() << message.
-AssertionResult AssertionFailure(const Message &message) {
-  return AssertionFailure() << message;
-}
-
 namespace internal {
 
 namespace edit_distance {
-std::vector<EditType> CalculateOptimalEdits(const std::vector<size_t> &left,
-                                            const std::vector<size_t> &right) {
+std::vector<EditType> CalculateOptimalEdits(const std::vector<size_t>& left,
+                                            const std::vector<size_t>& right) {
   std::vector<std::vector<double> > costs(
       left.size() + 1, std::vector<double>(right.size() + 1));
   std::vector<std::vector<EditType> > best_move(
@@ -1226,7 +1325,7 @@ namespace {
 // Helper class to convert string into ids with deduplication.
 class InternalStrings {
  public:
-  size_t GetId(const std::string &str) {
+  size_t GetId(const std::string& str) {
     IdMap::iterator it = ids_.find(str);
     if (it != ids_.end()) return it->second;
     size_t id = ids_.size();
@@ -1241,8 +1340,8 @@ class InternalStrings {
 }  // namespace
 
 std::vector<EditType> CalculateOptimalEdits(
-    const std::vector<std::string> &left,
-    const std::vector<std::string> &right) {
+    const std::vector<std::string>& left,
+    const std::vector<std::string>& right) {
   std::vector<size_t> left_ids, right_ids;
   {
     InternalStrings intern_table;
@@ -1265,10 +1364,13 @@ namespace {
 class Hunk {
  public:
   Hunk(size_t left_start, size_t right_start)
-      : left_start_(left_start), right_start_(right_start), adds_(), removes_(),
+      : left_start_(left_start),
+        right_start_(right_start),
+        adds_(),
+        removes_(),
         common_() {}
 
-  void PushLine(char edit, const char *line) {
+  void PushLine(char edit, const char* line) {
     switch (edit) {
       case ' ':
         ++common_;
@@ -1286,10 +1388,10 @@ class Hunk {
     }
   }
 
-  void PrintTo(std::ostream *os) {
+  void PrintTo(std::ostream* os) {
     PrintHeader(os);
     FlushEdits();
-    for (std::list<std::pair<char, const char *> >::const_iterator it =
+    for (std::list<std::pair<char, const char*> >::const_iterator it =
              hunk_.begin();
          it != hunk_.end(); ++it) {
       *os << it->first << it->second << "\n";
@@ -1308,7 +1410,7 @@ class Hunk {
   // The format is
   //   "@@ -<left_start>,<left_length> +<right_start>,<right_length> @@"
   // where the left/right parts are omitted if unnecessary.
-  void PrintHeader(std::ostream *ss) const {
+  void PrintHeader(std::ostream* ss) const {
     *ss << "@@ ";
     if (removes_) {
       *ss << "-" << left_start_ << "," << (removes_ + common_);
@@ -1324,7 +1426,7 @@ class Hunk {
 
   size_t left_start_, right_start_;
   size_t adds_, removes_, common_;
-  std::list<std::pair<char, const char *> > hunk_, hunk_adds_, hunk_removes_;
+  std::list<std::pair<char, const char*> > hunk_, hunk_adds_, hunk_removes_;
 };
 
 }  // namespace
@@ -1336,8 +1438,8 @@ class Hunk {
 // 'context' represents the desired unchanged prefix/suffix around the diff.
 // If two hunks are close enough that their contexts overlap, then they are
 // joined into one hunk.
-std::string CreateUnifiedDiff(const std::vector<std::string> &left,
-                              const std::vector<std::string> &right,
+std::string CreateUnifiedDiff(const std::vector<std::string>& left,
+                              const std::vector<std::string>& right,
                               size_t context) {
   const std::vector<EditType> edits = CalculateOptimalEdits(left, right);
 
@@ -1406,7 +1508,7 @@ namespace {
 // The string representation of the values received in EqFailure() are already
 // escaped. Split them on escaped '\n' boundaries. Leave all other escaped
 // characters the same.
-std::vector<std::string> SplitEscapedString(const std::string &str) {
+std::vector<std::string> SplitEscapedString(const std::string& str) {
   std::vector<std::string> lines;
   size_t start = 0, end = str.size();
   if (end > 2 && str[0] == '"' && str[end - 1] == '"') {
@@ -1446,10 +1548,10 @@ std::vector<std::string> SplitEscapedString(const std::string &str) {
 // The ignoring_case parameter is true if and only if the assertion is a
 // *_STRCASEEQ*.  When it's true, the string "Ignoring case" will
 // be inserted into the message.
-AssertionResult EqFailure(const char *lhs_expression,
-                          const char *rhs_expression,
-                          const std::string &lhs_value,
-                          const std::string &rhs_value, bool ignoring_case) {
+AssertionResult EqFailure(const char* lhs_expression,
+                          const char* rhs_expression,
+                          const std::string& lhs_value,
+                          const std::string& rhs_value, bool ignoring_case) {
   Message msg;
   msg << "Expected equality of these values:";
   msg << "\n  " << lhs_expression;
@@ -1479,9 +1581,9 @@ AssertionResult EqFailure(const char *lhs_expression,
 
 // Constructs a failure message for Boolean assertions such as EXPECT_TRUE.
 std::string GetBoolAssertionFailureMessage(
-    const AssertionResult &assertion_result, const char *expression_text,
-    const char *actual_predicate_value, const char *expected_predicate_value) {
-  const char *actual_message = assertion_result.message();
+    const AssertionResult& assertion_result, const char* expression_text,
+    const char* actual_predicate_value, const char* expected_predicate_value) {
+  const char* actual_message = assertion_result.message();
   Message msg;
   msg << "Value of: " << expression_text
       << "\n  Actual: " << actual_predicate_value;
@@ -1491,12 +1593,37 @@ std::string GetBoolAssertionFailureMessage(
 }
 
 // Helper function for implementing ASSERT_NEAR.
-AssertionResult DoubleNearPredFormat(const char *expr1, const char *expr2,
-                                     const char *abs_error_expr, double val1,
+AssertionResult DoubleNearPredFormat(const char* expr1, const char* expr2,
+                                     const char* abs_error_expr, double val1,
                                      double val2, double abs_error) {
   const double diff = fabs(val1 - val2);
   if (diff <= abs_error) return AssertionSuccess();
 
+  // Find the value which is closest to zero.
+  const double min_abs = std::min(fabs(val1), fabs(val2));
+  // Find the distance to the next double from that value.
+  const double epsilon =
+      nextafter(min_abs, std::numeric_limits<double>::infinity()) - min_abs;
+  // Detect the case where abs_error is so small that EXPECT_NEAR is
+  // effectively the same as EXPECT_EQUAL, and give an informative error
+  // message so that the situation can be more easily understood without
+  // requiring exotic floating-point knowledge.
+  // Don't do an epsilon check if abs_error is zero because that implies
+  // that an equality check was actually intended.
+  if (!(std::isnan)(val1) && !(std::isnan)(val2) && abs_error > 0 &&
+      abs_error < epsilon) {
+    return AssertionFailure()
+           << "The difference between " << expr1 << " and " << expr2 << " is "
+           << diff << ", where\n"
+           << expr1 << " evaluates to " << val1 << ",\n"
+           << expr2 << " evaluates to " << val2 << ".\nThe abs_error parameter "
+           << abs_error_expr << " evaluates to " << abs_error
+           << " which is smaller than the minimum distance between doubles for "
+              "numbers of this magnitude which is "
+           << epsilon
+           << ", thus making this EXPECT_NEAR check equivalent to "
+              "EXPECT_EQUAL. Consider using EXPECT_DOUBLE_EQ instead.";
+  }
   return AssertionFailure()
          << "The difference between " << expr1 << " and " << expr2 << " is "
          << diff << ", which exceeds " << abs_error_expr << ", where\n"
@@ -1507,7 +1634,7 @@ AssertionResult DoubleNearPredFormat(const char *expr1, const char *expr2,
 
 // Helper template for implementing FloatLE() and DoubleLE().
 template <typename RawType>
-AssertionResult FloatingPointLE(const char *expr1, const char *expr2,
+AssertionResult FloatingPointLE(const char* expr1, const char* expr2,
                                 RawType val1, RawType val2) {
   // Returns success if val1 is less than val2,
   if (val1 < val2) {
@@ -1542,72 +1669,24 @@ AssertionResult FloatingPointLE(const char *expr1, const char *expr2,
 
 // Asserts that val1 is less than, or almost equal to, val2.  Fails
 // otherwise.  In particular, it fails if either val1 or val2 is NaN.
-AssertionResult FloatLE(const char *expr1, const char *expr2, float val1,
+AssertionResult FloatLE(const char* expr1, const char* expr2, float val1,
                         float val2) {
   return internal::FloatingPointLE<float>(expr1, expr2, val1, val2);
 }
 
 // Asserts that val1 is less than, or almost equal to, val2.  Fails
 // otherwise.  In particular, it fails if either val1 or val2 is NaN.
-AssertionResult DoubleLE(const char *expr1, const char *expr2, double val1,
+AssertionResult DoubleLE(const char* expr1, const char* expr2, double val1,
                          double val2) {
   return internal::FloatingPointLE<double>(expr1, expr2, val1, val2);
 }
 
 namespace internal {
 
-// The helper function for {ASSERT|EXPECT}_EQ with int or enum
-// arguments.
-AssertionResult CmpHelperEQ(const char *lhs_expression,
-                            const char *rhs_expression, BiggestInt lhs,
-                            BiggestInt rhs) {
-  if (lhs == rhs) {
-    return AssertionSuccess();
-  }
-
-  return EqFailure(lhs_expression, rhs_expression,
-                   FormatForComparisonFailureMessage(lhs, rhs),
-                   FormatForComparisonFailureMessage(rhs, lhs), false);
-}
-
-// A macro for implementing the helper functions needed to implement
-// ASSERT_?? and EXPECT_?? with integer or enum arguments.  It is here
-// just to avoid copy-and-paste of similar code.
-#define GTEST_IMPL_CMP_HELPER_(op_name, op)                                    \
-  AssertionResult CmpHelper##op_name(const char *expr1, const char *expr2,     \
-                                     BiggestInt val1, BiggestInt val2) {       \
-    if (val1 op val2) {                                                        \
-      return AssertionSuccess();                                               \
-    } else {                                                                   \
-      return AssertionFailure()                                                \
-             << "Expected: (" << expr1 << ") " #op " (" << expr2               \
-             << "), actual: " << FormatForComparisonFailureMessage(val1, val2) \
-             << " vs " << FormatForComparisonFailureMessage(val2, val1);       \
-    }                                                                          \
-  }
-
-// Implements the helper function for {ASSERT|EXPECT}_NE with int or
-// enum arguments.
-GTEST_IMPL_CMP_HELPER_(NE, !=)
-// Implements the helper function for {ASSERT|EXPECT}_LE with int or
-// enum arguments.
-GTEST_IMPL_CMP_HELPER_(LE, <=)
-// Implements the helper function for {ASSERT|EXPECT}_LT with int or
-// enum arguments.
-GTEST_IMPL_CMP_HELPER_(LT, <)
-// Implements the helper function for {ASSERT|EXPECT}_GE with int or
-// enum arguments.
-GTEST_IMPL_CMP_HELPER_(GE, >=)
-// Implements the helper function for {ASSERT|EXPECT}_GT with int or
-// enum arguments.
-GTEST_IMPL_CMP_HELPER_(GT, >)
-
-#undef GTEST_IMPL_CMP_HELPER_
-
 // The helper function for {ASSERT|EXPECT}_STREQ.
-AssertionResult CmpHelperSTREQ(const char *lhs_expression,
-                               const char *rhs_expression, const char *lhs,
-                               const char *rhs) {
+AssertionResult CmpHelperSTREQ(const char* lhs_expression,
+                               const char* rhs_expression, const char* lhs,
+                               const char* rhs) {
   if (String::CStringEquals(lhs, rhs)) {
     return AssertionSuccess();
   }
@@ -1617,9 +1696,9 @@ AssertionResult CmpHelperSTREQ(const char *lhs_expression,
 }
 
 // The helper function for {ASSERT|EXPECT}_STRCASEEQ.
-AssertionResult CmpHelperSTRCASEEQ(const char *lhs_expression,
-                                   const char *rhs_expression, const char *lhs,
-                                   const char *rhs) {
+AssertionResult CmpHelperSTRCASEEQ(const char* lhs_expression,
+                                   const char* rhs_expression, const char* lhs,
+                                   const char* rhs) {
   if (String::CaseInsensitiveCStringEquals(lhs, rhs)) {
     return AssertionSuccess();
   }
@@ -1629,9 +1708,9 @@ AssertionResult CmpHelperSTRCASEEQ(const char *lhs_expression,
 }
 
 // The helper function for {ASSERT|EXPECT}_STRNE.
-AssertionResult CmpHelperSTRNE(const char *s1_expression,
-                               const char *s2_expression, const char *s1,
-                               const char *s2) {
+AssertionResult CmpHelperSTRNE(const char* s1_expression,
+                               const char* s2_expression, const char* s1,
+                               const char* s2) {
   if (!String::CStringEquals(s1, s2)) {
     return AssertionSuccess();
   } else {
@@ -1642,9 +1721,9 @@ AssertionResult CmpHelperSTRNE(const char *s1_expression,
 }
 
 // The helper function for {ASSERT|EXPECT}_STRCASENE.
-AssertionResult CmpHelperSTRCASENE(const char *s1_expression,
-                                   const char *s2_expression, const char *s1,
-                                   const char *s2) {
+AssertionResult CmpHelperSTRCASENE(const char* s1_expression,
+                                   const char* s2_expression, const char* s1,
+                                   const char* s2) {
   if (!String::CaseInsensitiveCStringEquals(s1, s2)) {
     return AssertionSuccess();
   } else {
@@ -1664,13 +1743,13 @@ namespace {
 // is a substring of haystack.  NULL is considered a substring of
 // itself only.
 
-bool IsSubstringPred(const char *needle, const char *haystack) {
+bool IsSubstringPred(const char* needle, const char* haystack) {
   if (needle == nullptr || haystack == nullptr) return needle == haystack;
 
   return strstr(haystack, needle) != nullptr;
 }
 
-bool IsSubstringPred(const wchar_t *needle, const wchar_t *haystack) {
+bool IsSubstringPred(const wchar_t* needle, const wchar_t* haystack) {
   if (needle == nullptr || haystack == nullptr) return needle == haystack;
 
   return wcsstr(haystack, needle) != nullptr;
@@ -1678,7 +1757,7 @@ bool IsSubstringPred(const wchar_t *needle, const wchar_t *haystack) {
 
 // StringType here can be either ::std::string or ::std::wstring.
 template <typename StringType>
-bool IsSubstringPred(const StringType &needle, const StringType &haystack) {
+bool IsSubstringPred(const StringType& needle, const StringType& haystack) {
   return haystack.find(needle) != StringType::npos;
 }
 
@@ -1688,15 +1767,15 @@ bool IsSubstringPred(const StringType &needle, const StringType &haystack) {
 // or ::std::wstring.
 template <typename StringType>
 AssertionResult IsSubstringImpl(bool expected_to_be_substring,
-                                const char *needle_expr,
-                                const char *haystack_expr,
-                                const StringType &needle,
-                                const StringType &haystack) {
+                                const char* needle_expr,
+                                const char* haystack_expr,
+                                const StringType& needle,
+                                const StringType& haystack) {
   if (IsSubstringPred(needle, haystack) == expected_to_be_substring)
     return AssertionSuccess();
 
   const bool is_wide_string = sizeof(needle[0]) > 1;
-  const char *const begin_string_quote = is_wide_string ? "L\"" : "\"";
+  const char* const begin_string_quote = is_wide_string ? "L\"" : "\"";
   return AssertionFailure()
          << "Value of: " << needle_expr << "\n"
          << "  Actual: " << begin_string_quote << needle << "\"\n"
@@ -1711,52 +1790,52 @@ AssertionResult IsSubstringImpl(bool expected_to_be_substring,
 // substring of haystack (NULL is considered a substring of itself
 // only), and return an appropriate error message when they fail.
 
-AssertionResult IsSubstring(const char *needle_expr, const char *haystack_expr,
-                            const char *needle, const char *haystack) {
+AssertionResult IsSubstring(const char* needle_expr, const char* haystack_expr,
+                            const char* needle, const char* haystack) {
   return IsSubstringImpl(true, needle_expr, haystack_expr, needle, haystack);
 }
 
-AssertionResult IsSubstring(const char *needle_expr, const char *haystack_expr,
-                            const wchar_t *needle, const wchar_t *haystack) {
+AssertionResult IsSubstring(const char* needle_expr, const char* haystack_expr,
+                            const wchar_t* needle, const wchar_t* haystack) {
   return IsSubstringImpl(true, needle_expr, haystack_expr, needle, haystack);
 }
 
-AssertionResult IsNotSubstring(const char *needle_expr,
-                               const char *haystack_expr, const char *needle,
-                               const char *haystack) {
+AssertionResult IsNotSubstring(const char* needle_expr,
+                               const char* haystack_expr, const char* needle,
+                               const char* haystack) {
   return IsSubstringImpl(false, needle_expr, haystack_expr, needle, haystack);
 }
 
-AssertionResult IsNotSubstring(const char *needle_expr,
-                               const char *haystack_expr, const wchar_t *needle,
-                               const wchar_t *haystack) {
+AssertionResult IsNotSubstring(const char* needle_expr,
+                               const char* haystack_expr, const wchar_t* needle,
+                               const wchar_t* haystack) {
   return IsSubstringImpl(false, needle_expr, haystack_expr, needle, haystack);
 }
 
-AssertionResult IsSubstring(const char *needle_expr, const char *haystack_expr,
-                            const ::std::string &needle,
-                            const ::std::string &haystack) {
+AssertionResult IsSubstring(const char* needle_expr, const char* haystack_expr,
+                            const ::std::string& needle,
+                            const ::std::string& haystack) {
   return IsSubstringImpl(true, needle_expr, haystack_expr, needle, haystack);
 }
 
-AssertionResult IsNotSubstring(const char *needle_expr,
-                               const char *haystack_expr,
-                               const ::std::string &needle,
-                               const ::std::string &haystack) {
+AssertionResult IsNotSubstring(const char* needle_expr,
+                               const char* haystack_expr,
+                               const ::std::string& needle,
+                               const ::std::string& haystack) {
   return IsSubstringImpl(false, needle_expr, haystack_expr, needle, haystack);
 }
 
 #if GTEST_HAS_STD_WSTRING
-AssertionResult IsSubstring(const char *needle_expr, const char *haystack_expr,
-                            const ::std::wstring &needle,
-                            const ::std::wstring &haystack) {
+AssertionResult IsSubstring(const char* needle_expr, const char* haystack_expr,
+                            const ::std::wstring& needle,
+                            const ::std::wstring& haystack) {
   return IsSubstringImpl(true, needle_expr, haystack_expr, needle, haystack);
 }
 
-AssertionResult IsNotSubstring(const char *needle_expr,
-                               const char *haystack_expr,
-                               const ::std::wstring &needle,
-                               const ::std::wstring &haystack) {
+AssertionResult IsNotSubstring(const char* needle_expr,
+                               const char* haystack_expr,
+                               const ::std::wstring& needle,
+                               const ::std::wstring& haystack) {
   return IsSubstringImpl(false, needle_expr, haystack_expr, needle, haystack);
 }
 #endif  // GTEST_HAS_STD_WSTRING
@@ -1768,7 +1847,7 @@ namespace internal {
 namespace {
 
 // Helper function for IsHRESULT{SuccessFailure} predicates
-AssertionResult HRESULTFailureHelper(const char *expr, const char *expected,
+AssertionResult HRESULTFailureHelper(const char* expr, const char* expected,
                                      long hr) {  // NOLINT
 #if GTEST_OS_WINDOWS_MOBILE || GTEST_OS_WINDOWS_TV_TITLE
 
@@ -1784,7 +1863,7 @@ AssertionResult HRESULTFailureHelper(const char *expr, const char *expected,
       FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS;
   const DWORD kBufSize = 4096;
   // Gets the system's human readable message string for this HRESULT.
-  char error_text[kBufSize] = { '\0' };
+  char error_text[kBufSize] = {'\0'};
   DWORD message_length = ::FormatMessageA(kFlags,
                                           0,  // no source, we're asking system
                                           static_cast<DWORD>(hr),  // the error
@@ -1808,14 +1887,14 @@ AssertionResult HRESULTFailureHelper(const char *expr, const char *expected,
 
 }  // namespace
 
-AssertionResult IsHRESULTSuccess(const char *expr, long hr) {  // NOLINT
+AssertionResult IsHRESULTSuccess(const char* expr, long hr) {  // NOLINT
   if (SUCCEEDED(hr)) {
     return AssertionSuccess();
   }
   return HRESULTFailureHelper(expr, "succeeds", hr);
 }
 
-AssertionResult IsHRESULTFailure(const char *expr, long hr) {  // NOLINT
+AssertionResult IsHRESULTFailure(const char* expr, long hr) {  // NOLINT
   if (FAILED(hr)) {
     return AssertionSuccess();
   }
@@ -1853,7 +1932,7 @@ constexpr uint32_t kMaxCodePoint4 =
 // Chops off the n lowest bits from a bit pattern.  Returns the n
 // lowest bits.  As a side effect, the original bit pattern will be
 // shifted to the right by n bits.
-inline uint32_t ChopLowBits(uint32_t *bits, int n) {
+inline uint32_t ChopLowBits(uint32_t* bits, int n) {
   const uint32_t low_bits = *bits & ((static_cast<uint32_t>(1) << n) - 1);
   *bits >>= n;
   return low_bits;
@@ -1932,7 +2011,7 @@ inline uint32_t CreateCodePointFromUtf16SurrogatePair(wchar_t first,
 // as '(Invalid Unicode 0xXXXXXXXX)'. If the string is in UTF16 encoding
 // and contains invalid UTF-16 surrogate pairs, values in those pairs
 // will be encoded as individual Unicode characters from Basic Normal Plane.
-std::string WideStringToUtf8(const wchar_t *str, int num_chars) {
+std::string WideStringToUtf8(const wchar_t* str, int num_chars) {
   if (num_chars == -1) num_chars = static_cast<int>(wcslen(str));
 
   ::std::stringstream stream;
@@ -1956,7 +2035,7 @@ std::string WideStringToUtf8(const wchar_t *str, int num_chars) {
 
 // Converts a wide C string to an std::string using the UTF-8 encoding.
 // NULL will be converted to "(null)".
-std::string String::ShowWideCString(const wchar_t *wide_c_str) {
+std::string String::ShowWideCString(const wchar_t* wide_c_str) {
   if (wide_c_str == nullptr) return "(null)";
 
   return internal::WideStringToUtf8(wide_c_str, -1);
@@ -1968,7 +2047,7 @@ std::string String::ShowWideCString(const wchar_t *wide_c_str) {
 // Unlike wcscmp(), this function can handle NULL argument(s).  A NULL
 // C string is considered different to any non-NULL C string,
 // including the empty string.
-bool String::WideCStringEquals(const wchar_t *lhs, const wchar_t *rhs) {
+bool String::WideCStringEquals(const wchar_t* lhs, const wchar_t* rhs) {
   if (lhs == nullptr) return rhs == nullptr;
 
   if (rhs == nullptr) return false;
@@ -1977,9 +2056,9 @@ bool String::WideCStringEquals(const wchar_t *lhs, const wchar_t *rhs) {
 }
 
 // Helper function for *_STREQ on wide strings.
-AssertionResult CmpHelperSTREQ(const char *lhs_expression,
-                               const char *rhs_expression, const wchar_t *lhs,
-                               const wchar_t *rhs) {
+AssertionResult CmpHelperSTREQ(const char* lhs_expression,
+                               const char* rhs_expression, const wchar_t* lhs,
+                               const wchar_t* rhs) {
   if (String::WideCStringEquals(lhs, rhs)) {
     return AssertionSuccess();
   }
@@ -1989,9 +2068,9 @@ AssertionResult CmpHelperSTREQ(const char *lhs_expression,
 }
 
 // Helper function for *_STRNE on wide strings.
-AssertionResult CmpHelperSTRNE(const char *s1_expression,
-                               const char *s2_expression, const wchar_t *s1,
-                               const wchar_t *s2) {
+AssertionResult CmpHelperSTRNE(const char* s1_expression,
+                               const char* s2_expression, const wchar_t* s1,
+                               const wchar_t* s2) {
   if (!String::WideCStringEquals(s1, s2)) {
     return AssertionSuccess();
   }
@@ -2007,7 +2086,7 @@ AssertionResult CmpHelperSTRNE(const char *s1_expression,
 // Unlike strcasecmp(), this function can handle NULL argument(s).  A
 // NULL C string is considered different to any non-NULL C string,
 // including the empty string.
-bool String::CaseInsensitiveCStringEquals(const char *lhs, const char *rhs) {
+bool String::CaseInsensitiveCStringEquals(const char* lhs, const char* rhs) {
   if (lhs == nullptr) return rhs == nullptr;
   if (rhs == nullptr) return false;
   return posix::StrCaseCmp(lhs, rhs) == 0;
@@ -2025,8 +2104,8 @@ bool String::CaseInsensitiveCStringEquals(const char *lhs, const char *rhs) {
 // which compares according to LC_CTYPE category of the current locale.
 // On MacOS X, it uses towlower, which also uses LC_CTYPE category of the
 // current locale.
-bool String::CaseInsensitiveWideCStringEquals(const wchar_t *lhs,
-                                              const wchar_t *rhs) {
+bool String::CaseInsensitiveWideCStringEquals(const wchar_t* lhs,
+                                              const wchar_t* rhs) {
   if (lhs == nullptr) return rhs == nullptr;
 
   if (rhs == nullptr) return false;
@@ -2049,8 +2128,8 @@ bool String::CaseInsensitiveWideCStringEquals(const wchar_t *lhs,
 
 // Returns true if and only if str ends with the given suffix, ignoring case.
 // Any string is considered to end with an empty suffix.
-bool String::EndsWithCaseInsensitive(const std::string &str,
-                                     const std::string &suffix) {
+bool String::EndsWithCaseInsensitive(const std::string& str,
+                                     const std::string& suffix) {
   const size_t str_len = str.length();
   const size_t suffix_len = suffix.length();
   return (str_len >= suffix_len) &&
@@ -2060,8 +2139,13 @@ bool String::EndsWithCaseInsensitive(const std::string &str,
 
 // Formats an int value as "%02d".
 std::string String::FormatIntWidth2(int value) {
+  return FormatIntWidthN(value, 2);
+}
+
+// Formats an int value to given width with leading zeros.
+std::string String::FormatIntWidthN(int value, int width) {
   std::stringstream ss;
-  ss << std::setfill('0') << std::setw(2) << value;
+  ss << std::setfill('0') << std::setw(width) << value;
   return ss.str();
 }
 
@@ -2087,14 +2171,14 @@ std::string String::FormatByte(unsigned char value) {
 
 // Converts the buffer in a stringstream to an std::string, converting NUL
 // bytes to "\\0" along the way.
-std::string StringStreamToString(::std::stringstream *ss) {
-  const ::std::string &str = ss->str();
-  const char *const start = str.c_str();
-  const char *const end = start + str.length();
+std::string StringStreamToString(::std::stringstream* ss) {
+  const ::std::string& str = ss->str();
+  const char* const start = str.c_str();
+  const char* const end = start + str.length();
 
   std::string result;
   result.reserve(static_cast<size_t>(2 * (end - start)));
-  for (const char *ch = start; ch != end; ++ch) {
+  for (const char* ch = start; ch != end; ++ch) {
     if (*ch == '\0') {
       result += "\\0";  // Replaces NUL with "\\0";
     } else {
@@ -2106,14 +2190,16 @@ std::string StringStreamToString(::std::stringstream *ss) {
 }
 
 // Appends the user-supplied message to the Google-Test-generated message.
-std::string AppendUserMessage(const std::string &gtest_msg,
-                              const Message &user_msg) {
+std::string AppendUserMessage(const std::string& gtest_msg,
+                              const Message& user_msg) {
   // Appends the user message if it's non-empty.
   const std::string user_msg_string = user_msg.GetString();
   if (user_msg_string.empty()) {
     return gtest_msg;
   }
-
+  if (gtest_msg.empty()) {
+    return user_msg_string;
+  }
   return gtest_msg + "\n" + user_msg_string;
 }
 
@@ -2131,7 +2217,7 @@ TestResult::~TestResult() {}
 // Returns the i-th test part result among all the results. i can
 // range from 0 to total_part_count() - 1. If i is not in that range,
 // aborts the program.
-const TestPartResult &TestResult::GetTestPartResult(int i) const {
+const TestPartResult& TestResult::GetTestPartResult(int i) const {
   if (i < 0 || i >= total_part_count()) internal::posix::Abort();
   return test_part_results_.at(static_cast<size_t>(i));
 }
@@ -2139,7 +2225,7 @@ const TestPartResult &TestResult::GetTestPartResult(int i) const {
 // Returns the i-th test property. i can range from 0 to
 // test_property_count() - 1. If i is not in that range, aborts the
 // program.
-const TestProperty &TestResult::GetTestProperty(int i) const {
+const TestProperty& TestResult::GetTestProperty(int i) const {
   if (i < 0 || i >= test_property_count()) internal::posix::Abort();
   return test_properties_.at(static_cast<size_t>(i));
 }
@@ -2148,19 +2234,19 @@ const TestProperty &TestResult::GetTestProperty(int i) const {
 void TestResult::ClearTestPartResults() { test_part_results_.clear(); }
 
 // Adds a test part result to the list.
-void TestResult::AddTestPartResult(const TestPartResult &test_part_result) {
+void TestResult::AddTestPartResult(const TestPartResult& test_part_result) {
   test_part_results_.push_back(test_part_result);
 }
 
 // Adds a test property to the list. If a property with the same key as the
 // supplied property is already represented, the value of this test_property
 // replaces the old value for that key.
-void TestResult::RecordProperty(const std::string &xml_element,
-                                const TestProperty &test_property) {
+void TestResult::RecordProperty(const std::string& xml_element,
+                                const TestProperty& test_property) {
   if (!ValidateTestProperty(xml_element, test_property)) {
     return;
   }
-  internal::MutexLock lock(&test_properites_mutex_);
+  internal::MutexLock lock(&test_properties_mutex_);
   const std::vector<TestProperty>::iterator property_with_matching_key =
       std::find_if(test_properties_.begin(), test_properties_.end(),
                    internal::TestPropertyKeyIs(test_property.key()));
@@ -2173,37 +2259,34 @@ void TestResult::RecordProperty(const std::string &xml_element,
 
 // The list of reserved attributes used in the <testsuites> element of XML
 // output.
-static const char *const kReservedTestSuitesAttributes[] = {
-  "disabled",    "errors", "failures", "name",
-  "random_seed", "tests",  "time",     "timestamp"
-};
+static const char* const kReservedTestSuitesAttributes[] = {
+    "disabled",    "errors", "failures", "name",
+    "random_seed", "tests",  "time",     "timestamp"};
 
 // The list of reserved attributes used in the <testsuite> element of XML
 // output.
-static const char *const kReservedTestSuiteAttributes[] = {
-  "disabled", "errors", "failures", "name", "tests", "time", "timestamp"
-};
+static const char* const kReservedTestSuiteAttributes[] = {
+    "disabled", "errors", "failures",  "name",
+    "tests",    "time",   "timestamp", "skipped"};
 
 // The list of reserved attributes used in the <testcase> element of XML output.
-static const char *const kReservedTestCaseAttributes[] = {
-  "classname",  "name",        "status", "time",
-  "type_param", "value_param", "file",   "line"
-};
+static const char* const kReservedTestCaseAttributes[] = {
+    "classname",  "name",        "status", "time",
+    "type_param", "value_param", "file",   "line"};
 
 // Use a slightly different set for allowed output to ensure existing tests can
 // still RecordProperty("result") or "RecordProperty(timestamp")
-static const char *const kReservedOutputTestCaseAttributes[] = {
-  "classname",   "name", "status", "time",   "type_param",
-  "value_param", "file", "line",   "result", "timestamp"
-};
+static const char* const kReservedOutputTestCaseAttributes[] = {
+    "classname",   "name", "status", "time",   "type_param",
+    "value_param", "file", "line",   "result", "timestamp"};
 
-template <int kSize>
-std::vector<std::string> ArrayAsVector(const char *const (&array)[kSize]) {
+template <size_t kSize>
+std::vector<std::string> ArrayAsVector(const char* const (&array)[kSize]) {
   return std::vector<std::string>(array, array + kSize);
 }
 
 static std::vector<std::string> GetReservedAttributesForElement(
-    const std::string &xml_element) {
+    const std::string& xml_element) {
   if (xml_element == "testsuites") {
     return ArrayAsVector(kReservedTestSuitesAttributes);
   } else if (xml_element == "testsuite") {
@@ -2219,7 +2302,7 @@ static std::vector<std::string> GetReservedAttributesForElement(
 
 // TODO(jdesprez): Merge the two getReserved attributes once skip is improved
 static std::vector<std::string> GetReservedOutputAttributesForElement(
-    const std::string &xml_element) {
+    const std::string& xml_element) {
   if (xml_element == "testsuites") {
     return ArrayAsVector(kReservedTestSuitesAttributes);
   } else if (xml_element == "testsuite") {
@@ -2233,7 +2316,7 @@ static std::vector<std::string> GetReservedOutputAttributesForElement(
   return std::vector<std::string>();
 }
 
-static std::string FormatWordList(const std::vector<std::string> &words) {
+static std::string FormatWordList(const std::vector<std::string>& words) {
   Message word_list;
   for (size_t i = 0; i < words.size(); ++i) {
     if (i > 0 && words.size() > 2) {
@@ -2248,8 +2331,8 @@ static std::string FormatWordList(const std::vector<std::string> &words) {
 }
 
 static bool ValidateTestPropertyName(
-    const std::string &property_name,
-    const std::vector<std::string> &reserved_names) {
+    const std::string& property_name,
+    const std::vector<std::string>& reserved_names) {
   if (std::find(reserved_names.begin(), reserved_names.end(), property_name) !=
       reserved_names.end()) {
     ADD_FAILURE() << "Reserved key used in RecordProperty(): " << property_name
@@ -2262,8 +2345,8 @@ static bool ValidateTestPropertyName(
 
 // Adds a failure if the key is a reserved attribute of the element named
 // xml_element.  Returns true if the property is valid.
-bool TestResult::ValidateTestProperty(const std::string &xml_element,
-                                      const TestProperty &test_property) {
+bool TestResult::ValidateTestProperty(const std::string& xml_element,
+                                      const TestProperty& test_property) {
   return ValidateTestPropertyName(test_property.key(),
                                   GetReservedAttributesForElement(xml_element));
 }
@@ -2277,7 +2360,7 @@ void TestResult::Clear() {
 }
 
 // Returns true off the test part was skipped.
-static bool TestPartSkipped(const TestPartResult &result) {
+static bool TestPartSkipped(const TestPartResult& result) {
   return result.skipped();
 }
 
@@ -2295,7 +2378,7 @@ bool TestResult::Failed() const {
 }
 
 // Returns true if and only if the test part fatally failed.
-static bool TestPartFatallyFailed(const TestPartResult &result) {
+static bool TestPartFatallyFailed(const TestPartResult& result) {
   return result.fatally_failed();
 }
 
@@ -2305,7 +2388,7 @@ bool TestResult::HasFatalFailure() const {
 }
 
 // Returns true if and only if the test part non-fatally failed.
-static bool TestPartNonfatallyFailed(const TestPartResult &result) {
+static bool TestPartNonfatallyFailed(const TestPartResult& result) {
   return result.nonfatally_failed();
 }
 
@@ -2348,12 +2431,12 @@ void Test::SetUp() {}
 void Test::TearDown() {}
 
 // Allows user supplied key value pairs to be recorded for later output.
-void Test::RecordProperty(const std::string &key, const std::string &value) {
+void Test::RecordProperty(const std::string& key, const std::string& value) {
   UnitTest::GetInstance()->RecordProperty(key, value);
 }
 
 // Allows user supplied key value pairs to be recorded for later output.
-void Test::RecordProperty(const std::string &key, int value) {
+void Test::RecordProperty(const std::string& key, int value) {
   Message value_message;
   value_message << value;
   RecordProperty(key, value_message.GetString().c_str());
@@ -2362,7 +2445,7 @@ void Test::RecordProperty(const std::string &key, int value) {
 namespace internal {
 
 void ReportFailureInUnknownLocation(TestPartResult::Type result_type,
-                                    const std::string &message) {
+                                    const std::string& message) {
   // This function is a friend of UnitTest and as such has access to
   // AddTestPartResult.
   UnitTest::GetInstance()->AddTestPartResult(
@@ -2381,18 +2464,18 @@ void ReportFailureInUnknownLocation(TestPartResult::Type result_type,
 // yes, it returns true; otherwise it generates a Google Test failure and
 // returns false.
 bool Test::HasSameFixtureClass() {
-  internal::UnitTestImpl *const impl = internal::GetUnitTestImpl();
-  const TestSuite *const test_suite = impl->current_test_suite();
+  internal::UnitTestImpl* const impl = internal::GetUnitTestImpl();
+  const TestSuite* const test_suite = impl->current_test_suite();
 
   // Info about the first test in the current test suite.
-  const TestInfo *const first_test_info = test_suite->test_info_list()[0];
+  const TestInfo* const first_test_info = test_suite->test_info_list()[0];
   const internal::TypeId first_fixture_id = first_test_info->fixture_class_id_;
-  const char *const first_test_name = first_test_info->name();
+  const char* const first_test_name = first_test_info->name();
 
   // Info about the current test.
-  const TestInfo *const this_test_info = impl->current_test_info();
+  const TestInfo* const this_test_info = impl->current_test_info();
   const internal::TypeId this_fixture_id = this_test_info->fixture_class_id_;
-  const char *const this_test_name = this_test_info->name();
+  const char* const this_test_name = this_test_info->name();
 
   if (this_fixture_id != first_fixture_id) {
     // Is the first test defined using TEST?
@@ -2407,9 +2490,9 @@ bool Test::HasSameFixtureClass() {
       // Gets the name of the TEST and the name of the TEST_F.  Note
       // that first_is_TEST and this_is_TEST cannot both be true, as
       // the fixture IDs are different for the two tests.
-      const char *const TEST_name =
+      const char* const TEST_name =
           first_is_TEST ? first_test_name : this_test_name;
-      const char *const TEST_F_name =
+      const char* const TEST_F_name =
           first_is_TEST ? this_test_name : first_test_name;
 
       ADD_FAILURE()
@@ -2447,8 +2530,8 @@ bool Test::HasSameFixtureClass() {
 // function returns its result via an output parameter pointer because VC++
 // prohibits creation of objects with destructors on stack in functions
 // using __try (see error C2712).
-static std::string *FormatSehExceptionMessage(DWORD exception_code,
-                                              const char *location) {
+static std::string* FormatSehExceptionMessage(DWORD exception_code,
+                                              const char* location) {
   Message message;
   message << "SEH exception with code 0x" << std::setbase(16) << exception_code
           << std::setbase(10) << " thrown in " << location << ".";
@@ -2463,8 +2546,8 @@ namespace internal {
 #if GTEST_HAS_EXCEPTIONS
 
 // Adds an "exception thrown" fatal failure to the current test.
-static std::string FormatCxxExceptionMessage(const char *description,
-                                             const char *location) {
+static std::string FormatCxxExceptionMessage(const char* description,
+                                             const char* location) {
   Message message;
   if (description != nullptr) {
     message << "C++ exception with description \"" << description << "\"";
@@ -2477,10 +2560,10 @@ static std::string FormatCxxExceptionMessage(const char *description,
 }
 
 static std::string PrintTestPartResultToString(
-    const TestPartResult &test_part_result);
+    const TestPartResult& test_part_result);
 
 GoogleTestFailureException::GoogleTestFailureException(
-    const TestPartResult &failure)
+    const TestPartResult& failure)
     : ::std::runtime_error(PrintTestPartResultToString(failure).c_str()) {}
 
 #endif  // GTEST_HAS_EXCEPTIONS
@@ -2494,8 +2577,8 @@ GoogleTestFailureException::GoogleTestFailureException(
 // exceptions in the same function.  Therefore, we provide a separate
 // wrapper function for handling SEH exceptions.)
 template <class T, typename Result>
-Result HandleSehExceptionsInMethodIfSupported(T *object, Result (T::*method)(),
-                                              const char *location) {
+Result HandleSehExceptionsInMethodIfSupported(T* object, Result (T::*method)(),
+                                              const char* location) {
 #if GTEST_HAS_SEH
   __try {
     return (object->*method)();
@@ -2504,7 +2587,7 @@ Result HandleSehExceptionsInMethodIfSupported(T *object, Result (T::*method)(),
     // We create the exception message on the heap because VC++ prohibits
     // creation of objects with destructors on stack in functions using __try
     // (see error C2712).
-    std::string *exception_message =
+    std::string* exception_message =
         FormatSehExceptionMessage(GetExceptionCode(), location);
     internal::ReportFailureInUnknownLocation(TestPartResult::kFatalFailure,
                                              *exception_message);
@@ -2521,8 +2604,8 @@ Result HandleSehExceptionsInMethodIfSupported(T *object, Result (T::*method)(),
 // exceptions, if they are supported; returns the 0-value for type
 // Result in case of an SEH exception.
 template <class T, typename Result>
-Result HandleExceptionsInMethodIfSupported(T *object, Result (T::*method)(),
-                                           const char *location) {
+Result HandleExceptionsInMethodIfSupported(T* object, Result (T::*method)(),
+                                           const char* location) {
   // NOTE: The user code can affect the way in which Google Test handles
   // exceptions by setting GTEST_FLAG(catch_exceptions), but only before
   // RUN_ALL_TESTS() starts. It is technically possible to check the flag
@@ -2532,7 +2615,7 @@ Result HandleExceptionsInMethodIfSupported(T *object, Result (T::*method)(),
   // try {
   //   // Perform the test method.
   // } catch (...) {
-  //   if (GTEST_FLAG(catch_exceptions))
+  //   if (GTEST_FLAG_GET(catch_exceptions))
   //     // Report the exception as failure.
   //   else
   //     throw;  // Re-throws the original exception.
@@ -2550,14 +2633,14 @@ Result HandleExceptionsInMethodIfSupported(T *object, Result (T::*method)(),
 #if GTEST_HAS_EXCEPTIONS
     try {
       return HandleSehExceptionsInMethodIfSupported(object, method, location);
-    } catch (const AssertionException &) {  // NOLINT
+    } catch (const AssertionException&) {  // NOLINT
       // This failure was reported already.
-    } catch (const internal::GoogleTestFailureException &) {  // NOLINT
+    } catch (const internal::GoogleTestFailureException&) {  // NOLINT
       // This exception type can only be thrown by a failed Google
       // Test assertion with the intention of letting another testing
       // framework catch it.  Therefore we just re-throw it.
       throw;
-    } catch (const std::exception &e) {  // NOLINT
+    } catch (const std::exception& e) {  // NOLINT
       internal::ReportFailureInUnknownLocation(
           TestPartResult::kFatalFailure,
           FormatCxxExceptionMessage(e.what(), location));
@@ -2581,7 +2664,7 @@ Result HandleExceptionsInMethodIfSupported(T *object, Result (T::*method)(),
 void Test::Run() {
   if (!HasSameFixtureClass()) return;
 
-  internal::UnitTestImpl *const impl = internal::GetUnitTestImpl();
+  internal::UnitTestImpl* const impl = internal::GetUnitTestImpl();
   impl->os_stack_trace_getter()->UponLeavingGTest();
   internal::HandleExceptionsInMethodIfSupported(this, &Test::SetUp, "SetUp()");
   // We will run the test only if SetUp() was successful and didn't call
@@ -2621,18 +2704,24 @@ bool Test::IsSkipped() {
 
 // Constructs a TestInfo object. It assumes ownership of the test factory
 // object.
-TestInfo::TestInfo(const std::string &a_test_suite_name,
-                   const std::string &a_name, const char *a_type_param,
-                   const char *a_value_param,
+TestInfo::TestInfo(const std::string& a_test_suite_name,
+                   const std::string& a_name, const char* a_type_param,
+                   const char* a_value_param,
                    internal::CodeLocation a_code_location,
                    internal::TypeId fixture_class_id,
-                   internal::TestFactoryBase *factory)
-    : test_suite_name_(a_test_suite_name), name_(a_name),
+                   internal::TestFactoryBase* factory)
+    : test_suite_name_(a_test_suite_name),
+      name_(a_name),
       type_param_(a_type_param ? new std::string(a_type_param) : nullptr),
       value_param_(a_value_param ? new std::string(a_value_param) : nullptr),
-      location_(a_code_location), fixture_class_id_(fixture_class_id),
-      should_run_(false), is_disabled_(false), matches_filter_(false),
-      factory_(factory), result_() {}
+      location_(a_code_location),
+      fixture_class_id_(fixture_class_id),
+      should_run_(false),
+      is_disabled_(false),
+      matches_filter_(false),
+      is_in_another_shard_(false),
+      factory_(factory),
+      result_() {}
 
 // Destructs a TestInfo object.
 TestInfo::~TestInfo() { delete factory_; }
@@ -2644,7 +2733,7 @@ namespace internal {
 //
 // Arguments:
 //
-//   test_suite_name:   name of the test suite
+//   test_suite_name:  name of the test suite
 //   name:             name of the test
 //   type_param:       the name of the test's type parameter, or NULL if
 //                     this is not a typed or a type-parameterized test.
@@ -2657,19 +2746,19 @@ namespace internal {
 //   factory:          pointer to the factory that creates a test object.
 //                     The newly created TestInfo instance will assume
 //                     ownership of the factory object.
-TestInfo *MakeAndRegisterTestInfo(
-    const char *test_suite_name, const char *name, const char *type_param,
-    const char *value_param, CodeLocation code_location,
+TestInfo* MakeAndRegisterTestInfo(
+    const char* test_suite_name, const char* name, const char* type_param,
+    const char* value_param, CodeLocation code_location,
     TypeId fixture_class_id, SetUpTestSuiteFunc set_up_tc,
-    TearDownTestSuiteFunc tear_down_tc, TestFactoryBase *factory) {
-  TestInfo *const test_info =
+    TearDownTestSuiteFunc tear_down_tc, TestFactoryBase* factory) {
+  TestInfo* const test_info =
       new TestInfo(test_suite_name, name, type_param, value_param,
                    code_location, fixture_class_id, factory);
   GetUnitTestImpl()->AddTestInfo(set_up_tc, tear_down_tc, test_info);
   return test_info;
 }
 
-void ReportInvalidTestSuiteType(const char *test_suite_name,
+void ReportInvalidTestSuiteType(const char* test_suite_name,
                                 CodeLocation code_location) {
   Message errors;
   errors
@@ -2703,10 +2792,10 @@ class TestNameIs {
   // Constructor.
   //
   // TestNameIs has NO default constructor.
-  explicit TestNameIs(const char *name) : name_(name) {}
+  explicit TestNameIs(const char* name) : name_(name) {}
 
   // Returns true if and only if the test name of test_info matches name_.
-  bool operator()(const TestInfo *test_info) const {
+  bool operator()(const TestInfo* test_info) const {
     return test_info && test_info->name() == name_;
   }
 
@@ -2734,23 +2823,24 @@ void UnitTestImpl::RegisterParameterizedTests() {
 // Creates the test object, runs it, records its result, and then
 // deletes it.
 void TestInfo::Run() {
-  if (!should_run_) return;
+  TestEventListener* repeater = UnitTest::GetInstance()->listeners().repeater();
+  if (!should_run_) {
+    if (is_disabled_ && matches_filter_) repeater->OnTestDisabled(*this);
+    return;
+  }
 
   // Tells UnitTest where to store test result.
-  internal::UnitTestImpl *const impl = internal::GetUnitTestImpl();
+  internal::UnitTestImpl* const impl = internal::GetUnitTestImpl();
   impl->set_current_test_info(this);
 
-  TestEventListener *repeater = UnitTest::GetInstance()->listeners().repeater();
-
   // Notifies the unit test event listeners that a test is about to start.
   repeater->OnTestStart(*this);
-
-  const TimeInMillis start = internal::GetTimeInMillis();
-
+  result_.set_start_timestamp(internal::GetTimeInMillis());
+  internal::Timer timer;
   impl->os_stack_trace_getter()->UponLeavingGTest();
 
   // Creates the test object.
-  Test *const test = internal::HandleExceptionsInMethodIfSupported(
+  Test* const test = internal::HandleExceptionsInMethodIfSupported(
       factory_, &internal::TestFactoryBase::CreateTest,
       "the test fixture's constructor");
 
@@ -2770,8 +2860,7 @@ void TestInfo::Run() {
         test, &Test::DeleteSelf_, "the test fixture's destructor");
   }
 
-  result_.set_start_timestamp(start);
-  result_.set_elapsed_time(internal::GetTimeInMillis() - start);
+  result_.set_elapsed_time(timer.Elapsed());
 
   // Notifies the unit test event listener that a test has just finished.
   repeater->OnTestEnd(*this);
@@ -2781,6 +2870,28 @@ void TestInfo::Run() {
   impl->set_current_test_info(nullptr);
 }
 
+// Skip and records a skipped test result for this object.
+void TestInfo::Skip() {
+  if (!should_run_) return;
+
+  internal::UnitTestImpl* const impl = internal::GetUnitTestImpl();
+  impl->set_current_test_info(this);
+
+  TestEventListener* repeater = UnitTest::GetInstance()->listeners().repeater();
+
+  // Notifies the unit test event listeners that a test is about to start.
+  repeater->OnTestStart(*this);
+
+  const TestPartResult test_part_result =
+      TestPartResult(TestPartResult::kSkip, this->file(), this->line(), "");
+  impl->GetTestPartResultReporterForCurrentThread()->ReportTestPartResult(
+      test_part_result);
+
+  // Notifies the unit test event listener that a test has just finished.
+  repeater->OnTestEnd(*this);
+  impl->set_current_test_info(nullptr);
+}
+
 // class TestSuite
 
 // Gets the number of successful tests in this test suite.
@@ -2827,18 +2938,21 @@ int TestSuite::total_test_count() const {
 //
 // Arguments:
 //
-//   name:         name of the test suite
+//   a_name:       name of the test suite
 //   a_type_param: the name of the test suite's type parameter, or NULL if
 //                 this is not a typed or a type-parameterized test suite.
 //   set_up_tc:    pointer to the function that sets up the test suite
 //   tear_down_tc: pointer to the function that tears down the test suite
-TestSuite::TestSuite(const char *a_name, const char *a_type_param,
+TestSuite::TestSuite(const char* a_name, const char* a_type_param,
                      internal::SetUpTestSuiteFunc set_up_tc,
                      internal::TearDownTestSuiteFunc tear_down_tc)
     : name_(a_name),
       type_param_(a_type_param ? new std::string(a_type_param) : nullptr),
-      set_up_tc_(set_up_tc), tear_down_tc_(tear_down_tc), should_run_(false),
-      start_timestamp_(0), elapsed_time_(0) {}
+      set_up_tc_(set_up_tc),
+      tear_down_tc_(tear_down_tc),
+      should_run_(false),
+      start_timestamp_(0),
+      elapsed_time_(0) {}
 
 // Destructor of TestSuite.
 TestSuite::~TestSuite() {
@@ -2848,21 +2962,21 @@ TestSuite::~TestSuite() {
 
 // Returns the i-th test among all the tests. i can range from 0 to
 // total_test_count() - 1. If i is not in that range, returns NULL.
-const TestInfo *TestSuite::GetTestInfo(int i) const {
+const TestInfo* TestSuite::GetTestInfo(int i) const {
   const int index = GetElementOr(test_indices_, i, -1);
   return index < 0 ? nullptr : test_info_list_[static_cast<size_t>(index)];
 }
 
 // Returns the i-th test among all the tests. i can range from 0 to
 // total_test_count() - 1. If i is not in that range, returns NULL.
-TestInfo *TestSuite::GetMutableTestInfo(int i) {
+TestInfo* TestSuite::GetMutableTestInfo(int i) {
   const int index = GetElementOr(test_indices_, i, -1);
   return index < 0 ? nullptr : test_info_list_[static_cast<size_t>(index)];
 }
 
 // Adds a test to this test suite.  Will delete the test upon
 // destruction of the TestSuite object.
-void TestSuite::AddTestInfo(TestInfo *test_info) {
+void TestSuite::AddTestInfo(TestInfo* test_info) {
   test_info_list_.push_back(test_info);
   test_indices_.push_back(static_cast<int>(test_indices_.size()));
 }
@@ -2871,27 +2985,41 @@ void TestSuite::AddTestInfo(TestInfo *test_info) {
 void TestSuite::Run() {
   if (!should_run_) return;
 
-  internal::UnitTestImpl *const impl = internal::GetUnitTestImpl();
+  internal::UnitTestImpl* const impl = internal::GetUnitTestImpl();
   impl->set_current_test_suite(this);
 
-  TestEventListener *repeater = UnitTest::GetInstance()->listeners().repeater();
+  TestEventListener* repeater = UnitTest::GetInstance()->listeners().repeater();
 
   // Call both legacy and the new API
   repeater->OnTestSuiteStart(*this);
 //  Legacy API is deprecated but still available
-#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
   repeater->OnTestCaseStart(*this);
-#endif  //  GTEST_REMOVE_LEGACY_TEST_CASEAPI
+#endif  //  GTEST_REMOVE_LEGACY_TEST_CASEAPI_
 
   impl->os_stack_trace_getter()->UponLeavingGTest();
   internal::HandleExceptionsInMethodIfSupported(
       this, &TestSuite::RunSetUpTestSuite, "SetUpTestSuite()");
 
+  const bool skip_all = ad_hoc_test_result().Failed();
+
   start_timestamp_ = internal::GetTimeInMillis();
+  internal::Timer timer;
   for (int i = 0; i < total_test_count(); i++) {
-    GetMutableTestInfo(i)->Run();
+    if (skip_all) {
+      GetMutableTestInfo(i)->Skip();
+    } else {
+      GetMutableTestInfo(i)->Run();
+    }
+    if (GTEST_FLAG_GET(fail_fast) &&
+        GetMutableTestInfo(i)->result()->Failed()) {
+      for (int j = i + 1; j < total_test_count(); j++) {
+        GetMutableTestInfo(j)->Skip();
+      }
+      break;
+    }
   }
-  elapsed_time_ = internal::GetTimeInMillis() - start_timestamp_;
+  elapsed_time_ = timer.Elapsed();
 
   impl->os_stack_trace_getter()->UponLeavingGTest();
   internal::HandleExceptionsInMethodIfSupported(
@@ -2900,9 +3028,39 @@ void TestSuite::Run() {
   // Call both legacy and the new API
   repeater->OnTestSuiteEnd(*this);
 //  Legacy API is deprecated but still available
-#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
   repeater->OnTestCaseEnd(*this);
-#endif  //  GTEST_REMOVE_LEGACY_TEST_CASEAPI
+#endif  //  GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+
+  impl->set_current_test_suite(nullptr);
+}
+
+// Skips all tests under this TestSuite.
+void TestSuite::Skip() {
+  if (!should_run_) return;
+
+  internal::UnitTestImpl* const impl = internal::GetUnitTestImpl();
+  impl->set_current_test_suite(this);
+
+  TestEventListener* repeater = UnitTest::GetInstance()->listeners().repeater();
+
+  // Call both legacy and the new API
+  repeater->OnTestSuiteStart(*this);
+//  Legacy API is deprecated but still available
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+  repeater->OnTestCaseStart(*this);
+#endif  //  GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+
+  for (int i = 0; i < total_test_count(); i++) {
+    GetMutableTestInfo(i)->Skip();
+  }
+
+  // Call both legacy and the new API
+  repeater->OnTestSuiteEnd(*this);
+  // Legacy API is deprecated but still available
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+  repeater->OnTestCaseEnd(*this);
+#endif  //  GTEST_REMOVE_LEGACY_TEST_CASEAPI_
 
   impl->set_current_test_suite(nullptr);
 }
@@ -2914,7 +3072,7 @@ void TestSuite::ClearResult() {
 }
 
 // Shuffles the tests in this test suite.
-void TestSuite::ShuffleTests(internal::Random *random) {
+void TestSuite::ShuffleTests(internal::Random* random) {
   Shuffle(random, &test_indices_);
 }
 
@@ -2930,8 +3088,8 @@ void TestSuite::UnshuffleTests() {
 //
 // FormatCountableNoun(1, "formula", "formuli") returns "1 formula".
 // FormatCountableNoun(5, "book", "books") returns "5 books".
-static std::string FormatCountableNoun(int count, const char *singular_form,
-                                       const char *plural_form) {
+static std::string FormatCountableNoun(int count, const char* singular_form,
+                                       const char* plural_form) {
   return internal::StreamableToString(count) + " " +
          (count == 1 ? singular_form : plural_form);
 }
@@ -2950,10 +3108,12 @@ static std::string FormatTestSuiteCount(int test_suite_count) {
 // representation.  Both kNonFatalFailure and kFatalFailure are translated
 // to "Failure", as the user usually doesn't care about the difference
 // between the two when viewing the test result.
-static const char *TestPartResultTypeToString(TestPartResult::Type type) {
+static const char* TestPartResultTypeToString(TestPartResult::Type type) {
   switch (type) {
-    case TestPartResult::kSkip: return "Skipped";
-    case TestPartResult::kSuccess: return "Success";
+    case TestPartResult::kSkip:
+      return "Skipped\n";
+    case TestPartResult::kSuccess:
+      return "Success";
 
     case TestPartResult::kNonFatalFailure:
     case TestPartResult::kFatalFailure:
@@ -2962,15 +3122,19 @@ static const char *TestPartResultTypeToString(TestPartResult::Type type) {
 #else
       return "Failure\n";
 #endif
-    default: return "Unknown result type";
+    default:
+      return "Unknown result type";
   }
 }
 
 namespace internal {
+namespace {
+enum class GTestColor { kDefault, kRed, kGreen, kYellow };
+}  // namespace
 
 // Prints a TestPartResult to an std::string.
 static std::string PrintTestPartResultToString(
-    const TestPartResult &test_part_result) {
+    const TestPartResult& test_part_result) {
   return (Message() << internal::FormatFileLocation(
                            test_part_result.file_name(),
                            test_part_result.line_number())
@@ -2981,8 +3145,8 @@ static std::string PrintTestPartResultToString(
 }
 
 // Prints a TestPartResult.
-static void PrintTestPartResult(const TestPartResult &test_part_result) {
-  const std::string &result = PrintTestPartResultToString(test_part_result);
+static void PrintTestPartResult(const TestPartResult& test_part_result) {
+  const std::string& result = PrintTestPartResultToString(test_part_result);
   printf("%s\n", result.c_str());
   fflush(stdout);
   // If the test program runs in Visual Studio or a debugger, the
@@ -3005,10 +3169,14 @@ static void PrintTestPartResult(const TestPartResult &test_part_result) {
 // Returns the character attribute for the given color.
 static WORD GetColorAttribute(GTestColor color) {
   switch (color) {
-    case COLOR_RED: return FOREGROUND_RED;
-    case COLOR_GREEN: return FOREGROUND_GREEN;
-    case COLOR_YELLOW: return FOREGROUND_RED | FOREGROUND_GREEN;
-    default: return 0;
+    case GTestColor::kRed:
+      return FOREGROUND_RED;
+    case GTestColor::kGreen:
+      return FOREGROUND_GREEN;
+    case GTestColor::kYellow:
+      return FOREGROUND_RED | FOREGROUND_GREEN;
+    default:
+      return 0;
   }
 }
 
@@ -3045,14 +3213,18 @@ static WORD GetNewColor(GTestColor color, WORD old_color_attrs) {
 
 #else
 
-// Returns the ANSI color code for the given color.  COLOR_DEFAULT is
+// Returns the ANSI color code for the given color. GTestColor::kDefault is
 // an invalid input.
-static const char *GetAnsiColorCode(GTestColor color) {
+static const char* GetAnsiColorCode(GTestColor color) {
   switch (color) {
-    case COLOR_RED: return "1";
-    case COLOR_GREEN: return "2";
-    case COLOR_YELLOW: return "3";
-    default: return nullptr;
+    case GTestColor::kRed:
+      return "1";
+    case GTestColor::kGreen:
+      return "2";
+    case GTestColor::kYellow:
+      return "3";
+    default:
+      return nullptr;
   }
 }
 
@@ -3060,7 +3232,8 @@ static const char *GetAnsiColorCode(GTestColor color) {
 
 // Returns true if and only if Google Test should use colors in the output.
 bool ShouldUseColor(bool stdout_is_tty) {
-  const char *const gtest_color = GTEST_FLAG(color).c_str();
+  std::string c = GTEST_FLAG_GET(color);
+  const char* const gtest_color = c.c_str();
 
   if (String::CaseInsensitiveCStringEquals(gtest_color, "auto")) {
 #if GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_MINGW
@@ -3069,7 +3242,7 @@ bool ShouldUseColor(bool stdout_is_tty) {
     return stdout_is_tty;
 #else
     // On non-Windows platforms, we rely on the TERM variable.
-    const char *const term = posix::GetEnv("TERM");
+    const char* const term = posix::GetEnv("TERM");
     const bool term_supports_color =
         String::CStringEquals(term, "xterm") ||
         String::CStringEquals(term, "xterm-color") ||
@@ -3099,18 +3272,15 @@ bool ShouldUseColor(bool stdout_is_tty) {
 // cannot simply emit special characters and have the terminal change colors.
 // This routine must actually emit the characters rather than return a string
 // that would be colored when printed, as can be done on Linux.
-void ColoredPrintf(GTestColor color, const char *fmt, ...) {
+
+GTEST_ATTRIBUTE_PRINTF_(2, 3)
+static void ColoredPrintf(GTestColor color, const char* fmt, ...) {
   va_list args;
   va_start(args, fmt);
 
-#if GTEST_OS_WINDOWS_MOBILE || GTEST_OS_ZOS || GTEST_OS_IOS || \
-    GTEST_OS_WINDOWS_PHONE || GTEST_OS_WINDOWS_RT || defined(ESP_PLATFORM)
-  const bool use_color = AlwaysFalse();
-#else
   static const bool in_color_mode =
       ShouldUseColor(posix::IsATTY(posix::FileNo(stdout)) != 0);
-  const bool use_color = in_color_mode && (color != COLOR_DEFAULT);
-#endif  // GTEST_OS_WINDOWS_MOBILE || GTEST_OS_ZOS
+  const bool use_color = in_color_mode && (color != GTestColor::kDefault);
 
   if (!use_color) {
     vprintf(fmt, args);
@@ -3152,9 +3322,9 @@ void ColoredPrintf(GTestColor color, const char *fmt, ...) {
 static const char kTypeParamLabel[] = "TypeParam";
 static const char kValueParamLabel[] = "GetParam()";
 
-static void PrintFullTestCommentIfPresent(const TestInfo &test_info) {
-  const char *const type_param = test_info.type_param();
-  const char *const value_param = test_info.value_param();
+static void PrintFullTestCommentIfPresent(const TestInfo& test_info) {
+  const char* const type_param = test_info.type_param();
+  const char* const value_param = test_info.value_param();
 
   if (type_param != nullptr || value_param != nullptr) {
     printf(", where ");
@@ -3174,70 +3344,73 @@ static void PrintFullTestCommentIfPresent(const TestInfo &test_info) {
 class PrettyUnitTestResultPrinter : public TestEventListener {
  public:
   PrettyUnitTestResultPrinter() {}
-  static void PrintTestName(const char *test_suite, const char *test) {
+  static void PrintTestName(const char* test_suite, const char* test) {
     printf("%s.%s", test_suite, test);
   }
 
   // The following methods override what's in the TestEventListener class.
-  void OnTestProgramStart(const UnitTest & /*unit_test*/) override {}
-  void OnTestIterationStart(const UnitTest &unit_test, int iteration) override;
-  void OnEnvironmentsSetUpStart(const UnitTest &unit_test) override;
-  void OnEnvironmentsSetUpEnd(const UnitTest & /*unit_test*/) override {}
+  void OnTestProgramStart(const UnitTest& /*unit_test*/) override {}
+  void OnTestIterationStart(const UnitTest& unit_test, int iteration) override;
+  void OnEnvironmentsSetUpStart(const UnitTest& unit_test) override;
+  void OnEnvironmentsSetUpEnd(const UnitTest& /*unit_test*/) override {}
 #ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
-  void OnTestCaseStart(const TestCase &test_case) override;
+  void OnTestCaseStart(const TestCase& test_case) override;
 #else
-  void OnTestSuiteStart(const TestSuite &test_suite) override;
+  void OnTestSuiteStart(const TestSuite& test_suite) override;
 #endif  // OnTestCaseStart
 
-  void OnTestStart(const TestInfo &test_info) override;
+  void OnTestStart(const TestInfo& test_info) override;
+  void OnTestDisabled(const TestInfo& test_info) override;
 
-  void OnTestPartResult(const TestPartResult &result) override;
-  void OnTestEnd(const TestInfo &test_info) override;
+  void OnTestPartResult(const TestPartResult& result) override;
+  void OnTestEnd(const TestInfo& test_info) override;
 #ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
-  void OnTestCaseEnd(const TestCase &test_case) override;
+  void OnTestCaseEnd(const TestCase& test_case) override;
 #else
-  void OnTestSuiteEnd(const TestSuite &test_suite) override;
+  void OnTestSuiteEnd(const TestSuite& test_suite) override;
 #endif  // GTEST_REMOVE_LEGACY_TEST_CASEAPI_
 
-  void OnEnvironmentsTearDownStart(const UnitTest &unit_test) override;
-  void OnEnvironmentsTearDownEnd(const UnitTest & /*unit_test*/) override {}
-  void OnTestIterationEnd(const UnitTest &unit_test, int iteration) override;
-  void OnTestProgramEnd(const UnitTest & /*unit_test*/) override {}
+  void OnEnvironmentsTearDownStart(const UnitTest& unit_test) override;
+  void OnEnvironmentsTearDownEnd(const UnitTest& /*unit_test*/) override {}
+  void OnTestIterationEnd(const UnitTest& unit_test, int iteration) override;
+  void OnTestProgramEnd(const UnitTest& /*unit_test*/) override {}
 
  private:
-  static void PrintFailedTests(const UnitTest &unit_test);
-  static void PrintFailedTestSuites(const UnitTest &unit_test);
-  static void PrintSkippedTests(const UnitTest &unit_test);
+  static void PrintFailedTests(const UnitTest& unit_test);
+  static void PrintFailedTestSuites(const UnitTest& unit_test);
+  static void PrintSkippedTests(const UnitTest& unit_test);
 };
 
 // Fired before each iteration of tests starts.
 void PrettyUnitTestResultPrinter::OnTestIterationStart(
-    const UnitTest &unit_test, int iteration) {
-  if (GTEST_FLAG(repeat) != 1)
+    const UnitTest& unit_test, int iteration) {
+  if (GTEST_FLAG_GET(repeat) != 1)
     printf("\nRepeating all tests (iteration %d) . . .\n\n", iteration + 1);
 
-  const char *const filter = GTEST_FLAG(filter).c_str();
+  std::string f = GTEST_FLAG_GET(filter);
+  const char* const filter = f.c_str();
 
   // Prints the filter if it's not *.  This reminds the user that some
   // tests may be skipped.
   if (!String::CStringEquals(filter, kUniversalFilter)) {
-    ColoredPrintf(COLOR_YELLOW, "Note: %s filter = %s\n", GTEST_NAME_, filter);
+    ColoredPrintf(GTestColor::kYellow, "Note: %s filter = %s\n", GTEST_NAME_,
+                  filter);
   }
 
   if (internal::ShouldShard(kTestTotalShards, kTestShardIndex, false)) {
     const int32_t shard_index = Int32FromEnvOrDie(kTestShardIndex, -1);
-    ColoredPrintf(COLOR_YELLOW, "Note: This is test shard %d of %s.\n",
+    ColoredPrintf(GTestColor::kYellow, "Note: This is test shard %d of %s.\n",
                   static_cast<int>(shard_index) + 1,
                   internal::posix::GetEnv(kTestTotalShards));
   }
 
-  if (GTEST_FLAG(shuffle)) {
-    ColoredPrintf(COLOR_YELLOW,
+  if (GTEST_FLAG_GET(shuffle)) {
+    ColoredPrintf(GTestColor::kYellow,
                   "Note: Randomizing tests' orders with a seed of %d .\n",
                   unit_test.random_seed());
   }
 
-  ColoredPrintf(COLOR_GREEN, "[==========] ");
+  ColoredPrintf(GTestColor::kGreen, "[==========] ");
   printf("Running %s from %s.\n",
          FormatTestCount(unit_test.test_to_run_count()).c_str(),
          FormatTestSuiteCount(unit_test.test_suite_to_run_count()).c_str());
@@ -3245,17 +3418,17 @@ void PrettyUnitTestResultPrinter::OnTestIterationStart(
 }
 
 void PrettyUnitTestResultPrinter::OnEnvironmentsSetUpStart(
-    const UnitTest & /*unit_test*/) {
-  ColoredPrintf(COLOR_GREEN, "[----------] ");
+    const UnitTest& /*unit_test*/) {
+  ColoredPrintf(GTestColor::kGreen, "[----------] ");
   printf("Global test environment set-up.\n");
   fflush(stdout);
 }
 
 #ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
-void PrettyUnitTestResultPrinter::OnTestCaseStart(const TestCase &test_case) {
+void PrettyUnitTestResultPrinter::OnTestCaseStart(const TestCase& test_case) {
   const std::string counts =
       FormatCountableNoun(test_case.test_to_run_count(), "test", "tests");
-  ColoredPrintf(COLOR_GREEN, "[----------] ");
+  ColoredPrintf(GTestColor::kGreen, "[----------] ");
   printf("%s from %s", counts.c_str(), test_case.name());
   if (test_case.type_param() == nullptr) {
     printf("\n");
@@ -3266,10 +3439,10 @@ void PrettyUnitTestResultPrinter::OnTestCaseStart(const TestCase &test_case) {
 }
 #else
 void PrettyUnitTestResultPrinter::OnTestSuiteStart(
-    const TestSuite &test_suite) {
+    const TestSuite& test_suite) {
   const std::string counts =
       FormatCountableNoun(test_suite.test_to_run_count(), "test", "tests");
-  ColoredPrintf(COLOR_GREEN, "[----------] ");
+  ColoredPrintf(GTestColor::kGreen, "[----------] ");
   printf("%s from %s", counts.c_str(), test_suite.name());
   if (test_suite.type_param() == nullptr) {
     printf("\n");
@@ -3280,8 +3453,15 @@ void PrettyUnitTestResultPrinter::OnTestSuiteStart(
 }
 #endif  // GTEST_REMOVE_LEGACY_TEST_CASEAPI_
 
-void PrettyUnitTestResultPrinter::OnTestStart(const TestInfo &test_info) {
-  ColoredPrintf(COLOR_GREEN, "[ RUN      ] ");
+void PrettyUnitTestResultPrinter::OnTestStart(const TestInfo& test_info) {
+  ColoredPrintf(GTestColor::kGreen, "[ RUN      ] ");
+  PrintTestName(test_info.test_suite_name(), test_info.name());
+  printf("\n");
+  fflush(stdout);
+}
+
+void PrettyUnitTestResultPrinter::OnTestDisabled(const TestInfo& test_info) {
+  ColoredPrintf(GTestColor::kYellow, "[ DISABLED ] ");
   PrintTestName(test_info.test_suite_name(), test_info.name());
   printf("\n");
   fflush(stdout);
@@ -3289,10 +3469,11 @@ void PrettyUnitTestResultPrinter::OnTestStart(const TestInfo &test_info) {
 
 // Called after an assertion failure.
 void PrettyUnitTestResultPrinter::OnTestPartResult(
-    const TestPartResult &result) {
+    const TestPartResult& result) {
   switch (result.type()) {
     // If the test part succeeded, we don't need to do anything.
-    case TestPartResult::kSuccess: return;
+    case TestPartResult::kSuccess:
+      return;
     default:
       // Print failure message from the assertion
       // (e.g. expected this and got that).
@@ -3301,18 +3482,18 @@ void PrettyUnitTestResultPrinter::OnTestPartResult(
   }
 }
 
-void PrettyUnitTestResultPrinter::OnTestEnd(const TestInfo &test_info) {
+void PrettyUnitTestResultPrinter::OnTestEnd(const TestInfo& test_info) {
   if (test_info.result()->Passed()) {
-    ColoredPrintf(COLOR_GREEN, "[       OK ] ");
+    ColoredPrintf(GTestColor::kGreen, "[       OK ] ");
   } else if (test_info.result()->Skipped()) {
-    ColoredPrintf(COLOR_GREEN, "[  SKIPPED ] ");
+    ColoredPrintf(GTestColor::kGreen, "[  SKIPPED ] ");
   } else {
-    ColoredPrintf(COLOR_RED, "[  FAILED  ] ");
+    ColoredPrintf(GTestColor::kRed, "[  FAILED  ] ");
   }
   PrintTestName(test_info.test_suite_name(), test_info.name());
   if (test_info.result()->Failed()) PrintFullTestCommentIfPresent(test_info);
 
-  if (GTEST_FLAG(print_time)) {
+  if (GTEST_FLAG_GET(print_time)) {
     printf(" (%s ms)\n",
            internal::StreamableToString(test_info.result()->elapsed_time())
                .c_str());
@@ -3323,23 +3504,23 @@ void PrettyUnitTestResultPrinter::OnTestEnd(const TestInfo &test_info) {
 }
 
 #ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
-void PrettyUnitTestResultPrinter::OnTestCaseEnd(const TestCase &test_case) {
-  if (!GTEST_FLAG(print_time)) return;
+void PrettyUnitTestResultPrinter::OnTestCaseEnd(const TestCase& test_case) {
+  if (!GTEST_FLAG_GET(print_time)) return;
 
   const std::string counts =
       FormatCountableNoun(test_case.test_to_run_count(), "test", "tests");
-  ColoredPrintf(COLOR_GREEN, "[----------] ");
+  ColoredPrintf(GTestColor::kGreen, "[----------] ");
   printf("%s from %s (%s ms total)\n\n", counts.c_str(), test_case.name(),
          internal::StreamableToString(test_case.elapsed_time()).c_str());
   fflush(stdout);
 }
 #else
-void PrettyUnitTestResultPrinter::OnTestSuiteEnd(const TestSuite &test_suite) {
-  if (!GTEST_FLAG(print_time)) return;
+void PrettyUnitTestResultPrinter::OnTestSuiteEnd(const TestSuite& test_suite) {
+  if (!GTEST_FLAG_GET(print_time)) return;
 
   const std::string counts =
       FormatCountableNoun(test_suite.test_to_run_count(), "test", "tests");
-  ColoredPrintf(COLOR_GREEN, "[----------] ");
+  ColoredPrintf(GTestColor::kGreen, "[----------] ");
   printf("%s from %s (%s ms total)\n\n", counts.c_str(), test_suite.name(),
          internal::StreamableToString(test_suite.elapsed_time()).c_str());
   fflush(stdout);
@@ -3347,29 +3528,29 @@ void PrettyUnitTestResultPrinter::OnTestSuiteEnd(const TestSuite &test_suite) {
 #endif  // GTEST_REMOVE_LEGACY_TEST_CASEAPI_
 
 void PrettyUnitTestResultPrinter::OnEnvironmentsTearDownStart(
-    const UnitTest & /*unit_test*/) {
-  ColoredPrintf(COLOR_GREEN, "[----------] ");
+    const UnitTest& /*unit_test*/) {
+  ColoredPrintf(GTestColor::kGreen, "[----------] ");
   printf("Global test environment tear-down\n");
   fflush(stdout);
 }
 
 // Internal helper for printing the list of failed tests.
-void PrettyUnitTestResultPrinter::PrintFailedTests(const UnitTest &unit_test) {
+void PrettyUnitTestResultPrinter::PrintFailedTests(const UnitTest& unit_test) {
   const int failed_test_count = unit_test.failed_test_count();
-  ColoredPrintf(COLOR_RED, "[  FAILED  ] ");
+  ColoredPrintf(GTestColor::kRed, "[  FAILED  ] ");
   printf("%s, listed below:\n", FormatTestCount(failed_test_count).c_str());
 
   for (int i = 0; i < unit_test.total_test_suite_count(); ++i) {
-    const TestSuite &test_suite = *unit_test.GetTestSuite(i);
+    const TestSuite& test_suite = *unit_test.GetTestSuite(i);
     if (!test_suite.should_run() || (test_suite.failed_test_count() == 0)) {
       continue;
     }
     for (int j = 0; j < test_suite.total_test_count(); ++j) {
-      const TestInfo &test_info = *test_suite.GetTestInfo(j);
+      const TestInfo& test_info = *test_suite.GetTestInfo(j);
       if (!test_info.should_run() || !test_info.result()->Failed()) {
         continue;
       }
-      ColoredPrintf(COLOR_RED, "[  FAILED  ] ");
+      ColoredPrintf(GTestColor::kRed, "[  FAILED  ] ");
       printf("%s.%s", test_suite.name(), test_info.name());
       PrintFullTestCommentIfPresent(test_info);
       printf("\n");
@@ -3382,15 +3563,15 @@ void PrettyUnitTestResultPrinter::PrintFailedTests(const UnitTest &unit_test) {
 // Internal helper for printing the list of test suite failures not covered by
 // PrintFailedTests.
 void PrettyUnitTestResultPrinter::PrintFailedTestSuites(
-    const UnitTest &unit_test) {
+    const UnitTest& unit_test) {
   int suite_failure_count = 0;
   for (int i = 0; i < unit_test.total_test_suite_count(); ++i) {
-    const TestSuite &test_suite = *unit_test.GetTestSuite(i);
+    const TestSuite& test_suite = *unit_test.GetTestSuite(i);
     if (!test_suite.should_run()) {
       continue;
     }
     if (test_suite.ad_hoc_test_result().Failed()) {
-      ColoredPrintf(COLOR_RED, "[  FAILED  ] ");
+      ColoredPrintf(GTestColor::kRed, "[  FAILED  ] ");
       printf("%s: SetUpTestSuite or TearDownTestSuite\n", test_suite.name());
       ++suite_failure_count;
     }
@@ -3402,46 +3583,46 @@ void PrettyUnitTestResultPrinter::PrintFailedTestSuites(
 }
 
 // Internal helper for printing the list of skipped tests.
-void PrettyUnitTestResultPrinter::PrintSkippedTests(const UnitTest &unit_test) {
+void PrettyUnitTestResultPrinter::PrintSkippedTests(const UnitTest& unit_test) {
   const int skipped_test_count = unit_test.skipped_test_count();
   if (skipped_test_count == 0) {
     return;
   }
 
   for (int i = 0; i < unit_test.total_test_suite_count(); ++i) {
-    const TestSuite &test_suite = *unit_test.GetTestSuite(i);
+    const TestSuite& test_suite = *unit_test.GetTestSuite(i);
     if (!test_suite.should_run() || (test_suite.skipped_test_count() == 0)) {
       continue;
     }
     for (int j = 0; j < test_suite.total_test_count(); ++j) {
-      const TestInfo &test_info = *test_suite.GetTestInfo(j);
+      const TestInfo& test_info = *test_suite.GetTestInfo(j);
       if (!test_info.should_run() || !test_info.result()->Skipped()) {
         continue;
       }
-      ColoredPrintf(COLOR_GREEN, "[  SKIPPED ] ");
+      ColoredPrintf(GTestColor::kGreen, "[  SKIPPED ] ");
       printf("%s.%s", test_suite.name(), test_info.name());
       printf("\n");
     }
   }
 }
 
-void PrettyUnitTestResultPrinter::OnTestIterationEnd(const UnitTest &unit_test,
+void PrettyUnitTestResultPrinter::OnTestIterationEnd(const UnitTest& unit_test,
                                                      int /*iteration*/) {
-  ColoredPrintf(COLOR_GREEN, "[==========] ");
+  ColoredPrintf(GTestColor::kGreen, "[==========] ");
   printf("%s from %s ran.",
          FormatTestCount(unit_test.test_to_run_count()).c_str(),
          FormatTestSuiteCount(unit_test.test_suite_to_run_count()).c_str());
-  if (GTEST_FLAG(print_time)) {
+  if (GTEST_FLAG_GET(print_time)) {
     printf(" (%s ms total)",
            internal::StreamableToString(unit_test.elapsed_time()).c_str());
   }
   printf("\n");
-  ColoredPrintf(COLOR_GREEN, "[  PASSED  ] ");
+  ColoredPrintf(GTestColor::kGreen, "[  PASSED  ] ");
   printf("%s.\n", FormatTestCount(unit_test.successful_test_count()).c_str());
 
   const int skipped_test_count = unit_test.skipped_test_count();
   if (skipped_test_count > 0) {
-    ColoredPrintf(COLOR_GREEN, "[  SKIPPED ] ");
+    ColoredPrintf(GTestColor::kGreen, "[  SKIPPED ] ");
     printf("%s, listed below:\n", FormatTestCount(skipped_test_count).c_str());
     PrintSkippedTests(unit_test);
   }
@@ -3452,12 +3633,12 @@ void PrettyUnitTestResultPrinter::OnTestIterationEnd(const UnitTest &unit_test,
   }
 
   int num_disabled = unit_test.reportable_disabled_test_count();
-  if (num_disabled && !GTEST_FLAG(also_run_disabled_tests)) {
+  if (num_disabled && !GTEST_FLAG_GET(also_run_disabled_tests)) {
     if (unit_test.Passed()) {
       printf("\n");  // Add a spacer if no FAILURE banner is displayed.
     }
-    ColoredPrintf(COLOR_YELLOW, "  YOU HAVE %d DISABLED %s\n\n", num_disabled,
-                  num_disabled == 1 ? "TEST" : "TESTS");
+    ColoredPrintf(GTestColor::kYellow, "  YOU HAVE %d DISABLED %s\n\n",
+                  num_disabled, num_disabled == 1 ? "TEST" : "TESTS");
   }
   // Ensure that Google Test output is printed before, e.g., heapchecker output.
   fflush(stdout);
@@ -3465,6 +3646,111 @@ void PrettyUnitTestResultPrinter::OnTestIterationEnd(const UnitTest &unit_test,
 
 // End PrettyUnitTestResultPrinter
 
+// This class implements the TestEventListener interface.
+//
+// Class BriefUnitTestResultPrinter is copyable.
+class BriefUnitTestResultPrinter : public TestEventListener {
+ public:
+  BriefUnitTestResultPrinter() {}
+  static void PrintTestName(const char* test_suite, const char* test) {
+    printf("%s.%s", test_suite, test);
+  }
+
+  // The following methods override what's in the TestEventListener class.
+  void OnTestProgramStart(const UnitTest& /*unit_test*/) override {}
+  void OnTestIterationStart(const UnitTest& /*unit_test*/,
+                            int /*iteration*/) override {}
+  void OnEnvironmentsSetUpStart(const UnitTest& /*unit_test*/) override {}
+  void OnEnvironmentsSetUpEnd(const UnitTest& /*unit_test*/) override {}
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+  void OnTestCaseStart(const TestCase& /*test_case*/) override {}
+#else
+  void OnTestSuiteStart(const TestSuite& /*test_suite*/) override {}
+#endif  // OnTestCaseStart
+
+  void OnTestStart(const TestInfo& /*test_info*/) override {}
+  void OnTestDisabled(const TestInfo& /*test_info*/) override {}
+
+  void OnTestPartResult(const TestPartResult& result) override;
+  void OnTestEnd(const TestInfo& test_info) override;
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+  void OnTestCaseEnd(const TestCase& /*test_case*/) override {}
+#else
+  void OnTestSuiteEnd(const TestSuite& /*test_suite*/) override {}
+#endif  // GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+
+  void OnEnvironmentsTearDownStart(const UnitTest& /*unit_test*/) override {}
+  void OnEnvironmentsTearDownEnd(const UnitTest& /*unit_test*/) override {}
+  void OnTestIterationEnd(const UnitTest& unit_test, int iteration) override;
+  void OnTestProgramEnd(const UnitTest& /*unit_test*/) override {}
+};
+
+// Called after an assertion failure.
+void BriefUnitTestResultPrinter::OnTestPartResult(
+    const TestPartResult& result) {
+  switch (result.type()) {
+    // If the test part succeeded, we don't need to do anything.
+    case TestPartResult::kSuccess:
+      return;
+    default:
+      // Print failure message from the assertion
+      // (e.g. expected this and got that).
+      PrintTestPartResult(result);
+      fflush(stdout);
+  }
+}
+
+void BriefUnitTestResultPrinter::OnTestEnd(const TestInfo& test_info) {
+  if (test_info.result()->Failed()) {
+    ColoredPrintf(GTestColor::kRed, "[  FAILED  ] ");
+    PrintTestName(test_info.test_suite_name(), test_info.name());
+    PrintFullTestCommentIfPresent(test_info);
+
+    if (GTEST_FLAG_GET(print_time)) {
+      printf(" (%s ms)\n",
+             internal::StreamableToString(test_info.result()->elapsed_time())
+                 .c_str());
+    } else {
+      printf("\n");
+    }
+    fflush(stdout);
+  }
+}
+
+void BriefUnitTestResultPrinter::OnTestIterationEnd(const UnitTest& unit_test,
+                                                    int /*iteration*/) {
+  ColoredPrintf(GTestColor::kGreen, "[==========] ");
+  printf("%s from %s ran.",
+         FormatTestCount(unit_test.test_to_run_count()).c_str(),
+         FormatTestSuiteCount(unit_test.test_suite_to_run_count()).c_str());
+  if (GTEST_FLAG_GET(print_time)) {
+    printf(" (%s ms total)",
+           internal::StreamableToString(unit_test.elapsed_time()).c_str());
+  }
+  printf("\n");
+  ColoredPrintf(GTestColor::kGreen, "[  PASSED  ] ");
+  printf("%s.\n", FormatTestCount(unit_test.successful_test_count()).c_str());
+
+  const int skipped_test_count = unit_test.skipped_test_count();
+  if (skipped_test_count > 0) {
+    ColoredPrintf(GTestColor::kGreen, "[  SKIPPED ] ");
+    printf("%s.\n", FormatTestCount(skipped_test_count).c_str());
+  }
+
+  int num_disabled = unit_test.reportable_disabled_test_count();
+  if (num_disabled && !GTEST_FLAG_GET(also_run_disabled_tests)) {
+    if (unit_test.Passed()) {
+      printf("\n");  // Add a spacer if no FAILURE banner is displayed.
+    }
+    ColoredPrintf(GTestColor::kYellow, "  YOU HAVE %d DISABLED %s\n\n",
+                  num_disabled, num_disabled == 1 ? "TEST" : "TESTS");
+  }
+  // Ensure that Google Test output is printed before, e.g., heapchecker output.
+  fflush(stdout);
+}
+
+// End BriefUnitTestResultPrinter
+
 // class TestEventRepeater
 //
 // This class forwards events to other event listeners.
@@ -3472,55 +3758,57 @@ class TestEventRepeater : public TestEventListener {
  public:
   TestEventRepeater() : forwarding_enabled_(true) {}
   ~TestEventRepeater() override;
-  void Append(TestEventListener *listener);
-  TestEventListener *Release(TestEventListener *listener);
+  void Append(TestEventListener* listener);
+  TestEventListener* Release(TestEventListener* listener);
 
   // Controls whether events will be forwarded to listeners_. Set to false
   // in death test child processes.
   bool forwarding_enabled() const { return forwarding_enabled_; }
   void set_forwarding_enabled(bool enable) { forwarding_enabled_ = enable; }
 
-  void OnTestProgramStart(const UnitTest &unit_test) override;
-  void OnTestIterationStart(const UnitTest &unit_test, int iteration) override;
-  void OnEnvironmentsSetUpStart(const UnitTest &unit_test) override;
-  void OnEnvironmentsSetUpEnd(const UnitTest &unit_test) override;
+  void OnTestProgramStart(const UnitTest& unit_test) override;
+  void OnTestIterationStart(const UnitTest& unit_test, int iteration) override;
+  void OnEnvironmentsSetUpStart(const UnitTest& unit_test) override;
+  void OnEnvironmentsSetUpEnd(const UnitTest& unit_test) override;
 //  Legacy API is deprecated but still available
 #ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
-  void OnTestCaseStart(const TestSuite &parameter) override;
+  void OnTestCaseStart(const TestSuite& parameter) override;
 #endif  //  GTEST_REMOVE_LEGACY_TEST_CASEAPI_
-  void OnTestSuiteStart(const TestSuite &parameter) override;
-  void OnTestStart(const TestInfo &test_info) override;
-  void OnTestPartResult(const TestPartResult &result) override;
-  void OnTestEnd(const TestInfo &test_info) override;
+  void OnTestSuiteStart(const TestSuite& parameter) override;
+  void OnTestStart(const TestInfo& test_info) override;
+  void OnTestDisabled(const TestInfo& test_info) override;
+  void OnTestPartResult(const TestPartResult& result) override;
+  void OnTestEnd(const TestInfo& test_info) override;
 //  Legacy API is deprecated but still available
 #ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
-  void OnTestCaseEnd(const TestCase &parameter) override;
+  void OnTestCaseEnd(const TestCase& parameter) override;
 #endif  //  GTEST_REMOVE_LEGACY_TEST_CASEAPI_
-  void OnTestSuiteEnd(const TestSuite &parameter) override;
-  void OnEnvironmentsTearDownStart(const UnitTest &unit_test) override;
-  void OnEnvironmentsTearDownEnd(const UnitTest &unit_test) override;
-  void OnTestIterationEnd(const UnitTest &unit_test, int iteration) override;
-  void OnTestProgramEnd(const UnitTest &unit_test) override;
+  void OnTestSuiteEnd(const TestSuite& parameter) override;
+  void OnEnvironmentsTearDownStart(const UnitTest& unit_test) override;
+  void OnEnvironmentsTearDownEnd(const UnitTest& unit_test) override;
+  void OnTestIterationEnd(const UnitTest& unit_test, int iteration) override;
+  void OnTestProgramEnd(const UnitTest& unit_test) override;
 
  private:
   // Controls whether events will be forwarded to listeners_. Set to false
   // in death test child processes.
   bool forwarding_enabled_;
   // The list of listeners that receive events.
-  std::vector<TestEventListener *> listeners_;
+  std::vector<TestEventListener*> listeners_;
 
-  GTEST_DISALLOW_COPY_AND_ASSIGN_(TestEventRepeater);
+  TestEventRepeater(const TestEventRepeater&) = delete;
+  TestEventRepeater& operator=(const TestEventRepeater&) = delete;
 };
 
 TestEventRepeater::~TestEventRepeater() {
   ForEach(listeners_, Delete<TestEventListener>);
 }
 
-void TestEventRepeater::Append(TestEventListener *listener) {
+void TestEventRepeater::Append(TestEventListener* listener) {
   listeners_.push_back(listener);
 }
 
-TestEventListener *TestEventRepeater::Release(TestEventListener *listener) {
+TestEventListener* TestEventRepeater::Release(TestEventListener* listener) {
   for (size_t i = 0; i < listeners_.size(); ++i) {
     if (listeners_[i] == listener) {
       listeners_.erase(listeners_.begin() + static_cast<int>(i));
@@ -3534,7 +3822,7 @@ TestEventListener *TestEventRepeater::Release(TestEventListener *listener) {
 // Since most methods are very similar, use macros to reduce boilerplate.
 // This defines a member that forwards the call to all listeners.
 #define GTEST_REPEATER_METHOD_(Name, Type)              \
-  void TestEventRepeater::Name(const Type &parameter) { \
+  void TestEventRepeater::Name(const Type& parameter) { \
     if (forwarding_enabled_) {                          \
       for (size_t i = 0; i < listeners_.size(); i++) {  \
         listeners_[i]->Name(parameter);                 \
@@ -3544,7 +3832,7 @@ TestEventListener *TestEventRepeater::Release(TestEventListener *listener) {
 // This defines a member that forwards the call to all listeners in reverse
 // order.
 #define GTEST_REVERSE_REPEATER_METHOD_(Name, Type)      \
-  void TestEventRepeater::Name(const Type &parameter) { \
+  void TestEventRepeater::Name(const Type& parameter) { \
     if (forwarding_enabled_) {                          \
       for (size_t i = listeners_.size(); i != 0; i--) { \
         listeners_[i - 1]->Name(parameter);             \
@@ -3560,6 +3848,7 @@ GTEST_REPEATER_METHOD_(OnTestCaseStart, TestSuite)
 #endif  //  GTEST_REMOVE_LEGACY_TEST_CASEAPI_
 GTEST_REPEATER_METHOD_(OnTestSuiteStart, TestSuite)
 GTEST_REPEATER_METHOD_(OnTestStart, TestInfo)
+GTEST_REPEATER_METHOD_(OnTestDisabled, TestInfo)
 GTEST_REPEATER_METHOD_(OnTestPartResult, TestPartResult)
 GTEST_REPEATER_METHOD_(OnEnvironmentsTearDownStart, UnitTest)
 GTEST_REVERSE_REPEATER_METHOD_(OnEnvironmentsSetUpEnd, UnitTest)
@@ -3575,7 +3864,7 @@ GTEST_REVERSE_REPEATER_METHOD_(OnTestProgramEnd, UnitTest)
 #undef GTEST_REPEATER_METHOD_
 #undef GTEST_REVERSE_REPEATER_METHOD_
 
-void TestEventRepeater::OnTestIterationStart(const UnitTest &unit_test,
+void TestEventRepeater::OnTestIterationStart(const UnitTest& unit_test,
                                              int iteration) {
   if (forwarding_enabled_) {
     for (size_t i = 0; i < listeners_.size(); i++) {
@@ -3584,7 +3873,7 @@ void TestEventRepeater::OnTestIterationStart(const UnitTest &unit_test,
   }
 }
 
-void TestEventRepeater::OnTestIterationEnd(const UnitTest &unit_test,
+void TestEventRepeater::OnTestIterationEnd(const UnitTest& unit_test,
                                            int iteration) {
   if (forwarding_enabled_) {
     for (size_t i = listeners_.size(); i > 0; i--) {
@@ -3598,24 +3887,25 @@ void TestEventRepeater::OnTestIterationEnd(const UnitTest &unit_test,
 // This class generates an XML output file.
 class XmlUnitTestResultPrinter : public EmptyTestEventListener {
  public:
-  explicit XmlUnitTestResultPrinter(const char *output_file);
+  explicit XmlUnitTestResultPrinter(const char* output_file);
 
-  void OnTestIterationEnd(const UnitTest &unit_test, int iteration) override;
-  void ListTestsMatchingFilter(const std::vector<TestSuite *> &test_suites);
+  void OnTestIterationEnd(const UnitTest& unit_test, int iteration) override;
+  void ListTestsMatchingFilter(const std::vector<TestSuite*>& test_suites);
 
   // Prints an XML summary of all unit tests.
-  static void PrintXmlTestsList(std::ostream *stream,
-                                const std::vector<TestSuite *> &test_suites);
+  static void PrintXmlTestsList(std::ostream* stream,
+                                const std::vector<TestSuite*>& test_suites);
 
  private:
   // Is c a whitespace character that is normalized to a space character
   // when it appears in an XML attribute value?
-  static bool IsNormalizableWhitespace(char c) {
-    return c == 0x9 || c == 0xA || c == 0xD;
+  static bool IsNormalizableWhitespace(unsigned char c) {
+    return c == '\t' || c == '\n' || c == '\r';
   }
 
   // May c appear in a well-formed XML document?
-  static bool IsValidXmlCharacter(char c) {
+  // https://www.w3.org/TR/REC-xml/#charsets
+  static bool IsValidXmlCharacter(unsigned char c) {
     return IsNormalizableWhitespace(c) || c >= 0x20;
   }
 
@@ -3623,63 +3913,74 @@ class XmlUnitTestResultPrinter : public EmptyTestEventListener {
   // is_attribute is true, the text is meant to appear as an attribute
   // value, and normalizable whitespace is preserved by replacing it
   // with character references.
-  static std::string EscapeXml(const std::string &str, bool is_attribute);
+  static std::string EscapeXml(const std::string& str, bool is_attribute);
 
   // Returns the given string with all characters invalid in XML removed.
-  static std::string RemoveInvalidXmlCharacters(const std::string &str);
+  static std::string RemoveInvalidXmlCharacters(const std::string& str);
 
   // Convenience wrapper around EscapeXml when str is an attribute value.
-  static std::string EscapeXmlAttribute(const std::string &str) {
+  static std::string EscapeXmlAttribute(const std::string& str) {
     return EscapeXml(str, true);
   }
 
   // Convenience wrapper around EscapeXml when str is not an attribute value.
-  static std::string EscapeXmlText(const char *str) {
+  static std::string EscapeXmlText(const char* str) {
     return EscapeXml(str, false);
   }
 
   // Verifies that the given attribute belongs to the given element and
   // streams the attribute as XML.
-  static void OutputXmlAttribute(std::ostream *stream,
-                                 const std::string &element_name,
-                                 const std::string &name,
-                                 const std::string &value);
+  static void OutputXmlAttribute(std::ostream* stream,
+                                 const std::string& element_name,
+                                 const std::string& name,
+                                 const std::string& value);
 
   // Streams an XML CDATA section, escaping invalid CDATA sequences as needed.
-  static void OutputXmlCDataSection(::std::ostream *stream, const char *data);
+  static void OutputXmlCDataSection(::std::ostream* stream, const char* data);
+
+  // Streams a test suite XML stanza containing the given test result.
+  //
+  // Requires: result.Failed()
+  static void OutputXmlTestSuiteForTestResult(::std::ostream* stream,
+                                              const TestResult& result);
+
+  // Streams an XML representation of a TestResult object.
+  static void OutputXmlTestResult(::std::ostream* stream,
+                                  const TestResult& result);
 
   // Streams an XML representation of a TestInfo object.
-  static void OutputXmlTestInfo(::std::ostream *stream,
-                                const char *test_suite_name,
-                                const TestInfo &test_info);
+  static void OutputXmlTestInfo(::std::ostream* stream,
+                                const char* test_suite_name,
+                                const TestInfo& test_info);
 
   // Prints an XML representation of a TestSuite object
-  static void PrintXmlTestSuite(::std::ostream *stream,
-                                const TestSuite &test_suite);
+  static void PrintXmlTestSuite(::std::ostream* stream,
+                                const TestSuite& test_suite);
 
   // Prints an XML summary of unit_test to output stream out.
-  static void PrintXmlUnitTest(::std::ostream *stream,
-                               const UnitTest &unit_test);
+  static void PrintXmlUnitTest(::std::ostream* stream,
+                               const UnitTest& unit_test);
 
   // Produces a string representing the test properties in a result as space
   // delimited XML attributes based on the property key="value" pairs.
   // When the std::string is not empty, it includes a space at the beginning,
   // to delimit this attribute from prior attributes.
-  static std::string TestPropertiesAsXmlAttributes(const TestResult &result);
+  static std::string TestPropertiesAsXmlAttributes(const TestResult& result);
 
   // Streams an XML representation of the test properties of a TestResult
   // object.
-  static void OutputXmlTestProperties(std::ostream *stream,
-                                      const TestResult &result);
+  static void OutputXmlTestProperties(std::ostream* stream,
+                                      const TestResult& result);
 
   // The output file.
   const std::string output_file_;
 
-  GTEST_DISALLOW_COPY_AND_ASSIGN_(XmlUnitTestResultPrinter);
+  XmlUnitTestResultPrinter(const XmlUnitTestResultPrinter&) = delete;
+  XmlUnitTestResultPrinter& operator=(const XmlUnitTestResultPrinter&) = delete;
 };
 
 // Creates a new XmlUnitTestResultPrinter.
-XmlUnitTestResultPrinter::XmlUnitTestResultPrinter(const char *output_file)
+XmlUnitTestResultPrinter::XmlUnitTestResultPrinter(const char* output_file)
     : output_file_(output_file) {
   if (output_file_.empty()) {
     GTEST_LOG_(FATAL) << "XML output file may not be null";
@@ -3687,9 +3988,9 @@ XmlUnitTestResultPrinter::XmlUnitTestResultPrinter(const char *output_file)
 }
 
 // Called after the unit test ends.
-void XmlUnitTestResultPrinter::OnTestIterationEnd(const UnitTest &unit_test,
+void XmlUnitTestResultPrinter::OnTestIterationEnd(const UnitTest& unit_test,
                                                   int /*iteration*/) {
-  FILE *xmlout = OpenFileForWriting(output_file_);
+  FILE* xmlout = OpenFileForWriting(output_file_);
   std::stringstream stream;
   PrintXmlUnitTest(&stream, unit_test);
   fprintf(xmlout, "%s", StringStreamToString(&stream).c_str());
@@ -3697,8 +3998,8 @@ void XmlUnitTestResultPrinter::OnTestIterationEnd(const UnitTest &unit_test,
 }
 
 void XmlUnitTestResultPrinter::ListTestsMatchingFilter(
-    const std::vector<TestSuite *> &test_suites) {
-  FILE *xmlout = OpenFileForWriting(output_file_);
+    const std::vector<TestSuite*>& test_suites) {
+  FILE* xmlout = OpenFileForWriting(output_file_);
   std::stringstream stream;
   PrintXmlTestsList(&stream, test_suites);
   fprintf(xmlout, "%s", StringStreamToString(&stream).c_str());
@@ -3715,16 +4016,22 @@ void XmlUnitTestResultPrinter::ListTestsMatchingFilter(
 // module will consist of ordinary English text.
 // If this module is ever modified to produce version 1.1 XML output,
 // most invalid characters can be retained using character references.
-std::string XmlUnitTestResultPrinter::EscapeXml(const std::string &str,
+std::string XmlUnitTestResultPrinter::EscapeXml(const std::string& str,
                                                 bool is_attribute) {
   Message m;
 
   for (size_t i = 0; i < str.size(); ++i) {
     const char ch = str[i];
     switch (ch) {
-      case '<': m << "&lt;"; break;
-      case '>': m << "&gt;"; break;
-      case '&': m << "&amp;"; break;
+      case '<':
+        m << "&lt;";
+        break;
+      case '>':
+        m << "&gt;";
+        break;
+      case '&':
+        m << "&amp;";
+        break;
       case '\'':
         if (is_attribute)
           m << "&apos;";
@@ -3738,8 +4045,9 @@ std::string XmlUnitTestResultPrinter::EscapeXml(const std::string &str,
           m << '"';
         break;
       default:
-        if (IsValidXmlCharacter(ch)) {
-          if (is_attribute && IsNormalizableWhitespace(ch))
+        if (IsValidXmlCharacter(static_cast<unsigned char>(ch))) {
+          if (is_attribute &&
+              IsNormalizableWhitespace(static_cast<unsigned char>(ch)))
             m << "&#x" << String::FormatByte(static_cast<unsigned char>(ch))
               << ";";
           else
@@ -3756,18 +4064,18 @@ std::string XmlUnitTestResultPrinter::EscapeXml(const std::string &str,
 // Currently invalid characters are dropped from the string. An
 // alternative is to replace them with certain characters such as . or ?.
 std::string XmlUnitTestResultPrinter::RemoveInvalidXmlCharacters(
-    const std::string &str) {
+    const std::string& str) {
   std::string output;
   output.reserve(str.size());
   for (std::string::const_iterator it = str.begin(); it != str.end(); ++it)
-    if (IsValidXmlCharacter(*it)) output.push_back(*it);
+    if (IsValidXmlCharacter(static_cast<unsigned char>(*it)))
+      output.push_back(*it);
 
   return output;
 }
 
 // The following routines generate an XML representation of a UnitTest
 // object.
-// GOOGLETEST_CM0009 DO NOT DELETE
 //
 // This is how Google Test concepts map to the DTD:
 //
@@ -3789,16 +4097,20 @@ std::string FormatTimeInMillisAsSeconds(TimeInMillis ms) {
   return ss.str();
 }
 
-static bool PortableLocaltime(time_t seconds, struct tm *out) {
+static bool PortableLocaltime(time_t seconds, struct tm* out) {
 #if defined(_MSC_VER)
   return localtime_s(out, &seconds) == 0;
 #elif defined(__MINGW32__) || defined(__MINGW64__)
   // MINGW <time.h> provides neither localtime_r nor localtime_s, but uses
   // Windows' localtime(), which has a thread-local tm buffer.
-  struct tm *tm_ptr = localtime(&seconds);  // NOLINT
+  struct tm* tm_ptr = localtime(&seconds);  // NOLINT
   if (tm_ptr == nullptr) return false;
   *out = *tm_ptr;
   return true;
+#elif defined(__STDC_LIB_EXT1__)
+  // Uses localtime_s when available as localtime_r is only available from
+  // C23 standard.
+  return localtime_s(&seconds, out) != nullptr;
 #else
   return localtime_r(&seconds, out) != nullptr;
 #endif
@@ -3810,22 +4122,23 @@ std::string FormatEpochTimeInMillisAsIso8601(TimeInMillis ms) {
   struct tm time_struct;
   if (!PortableLocaltime(static_cast<time_t>(ms / 1000), &time_struct))
     return "";
-  // YYYY-MM-DDThh:mm:ss
+  // YYYY-MM-DDThh:mm:ss.sss
   return StreamableToString(time_struct.tm_year + 1900) + "-" +
          String::FormatIntWidth2(time_struct.tm_mon + 1) + "-" +
          String::FormatIntWidth2(time_struct.tm_mday) + "T" +
          String::FormatIntWidth2(time_struct.tm_hour) + ":" +
          String::FormatIntWidth2(time_struct.tm_min) + ":" +
-         String::FormatIntWidth2(time_struct.tm_sec);
+         String::FormatIntWidth2(time_struct.tm_sec) + "." +
+         String::FormatIntWidthN(static_cast<int>(ms % 1000), 3);
 }
 
 // Streams an XML CDATA section, escaping invalid CDATA sequences as needed.
-void XmlUnitTestResultPrinter::OutputXmlCDataSection(::std::ostream *stream,
-                                                     const char *data) {
-  const char *segment = data;
+void XmlUnitTestResultPrinter::OutputXmlCDataSection(::std::ostream* stream,
+                                                     const char* data) {
+  const char* segment = data;
   *stream << "<![CDATA[";
   for (;;) {
-    const char *const next_segment = strstr(segment, "]]>");
+    const char* const next_segment = strstr(segment, "]]>");
     if (next_segment != nullptr) {
       stream->write(segment,
                     static_cast<std::streamsize>(next_segment - segment));
@@ -3840,9 +4153,9 @@ void XmlUnitTestResultPrinter::OutputXmlCDataSection(::std::ostream *stream,
 }
 
 void XmlUnitTestResultPrinter::OutputXmlAttribute(
-    std::ostream *stream, const std::string &element_name,
-    const std::string &name, const std::string &value) {
-  const std::vector<std::string> &allowed_names =
+    std::ostream* stream, const std::string& element_name,
+    const std::string& name, const std::string& value) {
+  const std::vector<std::string>& allowed_names =
       GetReservedOutputAttributesForElement(element_name);
 
   GTEST_CHECK_(std::find(allowed_names.begin(), allowed_names.end(), name) !=
@@ -3853,11 +4166,48 @@ void XmlUnitTestResultPrinter::OutputXmlAttribute(
   *stream << " " << name << "=\"" << EscapeXmlAttribute(value) << "\"";
 }
 
+// Streams a test suite XML stanza containing the given test result.
+void XmlUnitTestResultPrinter::OutputXmlTestSuiteForTestResult(
+    ::std::ostream* stream, const TestResult& result) {
+  // Output the boilerplate for a minimal test suite with one test.
+  *stream << "  <testsuite";
+  OutputXmlAttribute(stream, "testsuite", "name", "NonTestSuiteFailure");
+  OutputXmlAttribute(stream, "testsuite", "tests", "1");
+  OutputXmlAttribute(stream, "testsuite", "failures", "1");
+  OutputXmlAttribute(stream, "testsuite", "disabled", "0");
+  OutputXmlAttribute(stream, "testsuite", "skipped", "0");
+  OutputXmlAttribute(stream, "testsuite", "errors", "0");
+  OutputXmlAttribute(stream, "testsuite", "time",
+                     FormatTimeInMillisAsSeconds(result.elapsed_time()));
+  OutputXmlAttribute(
+      stream, "testsuite", "timestamp",
+      FormatEpochTimeInMillisAsIso8601(result.start_timestamp()));
+  *stream << ">";
+
+  // Output the boilerplate for a minimal test case with a single test.
+  *stream << "    <testcase";
+  OutputXmlAttribute(stream, "testcase", "name", "");
+  OutputXmlAttribute(stream, "testcase", "status", "run");
+  OutputXmlAttribute(stream, "testcase", "result", "completed");
+  OutputXmlAttribute(stream, "testcase", "classname", "");
+  OutputXmlAttribute(stream, "testcase", "time",
+                     FormatTimeInMillisAsSeconds(result.elapsed_time()));
+  OutputXmlAttribute(
+      stream, "testcase", "timestamp",
+      FormatEpochTimeInMillisAsIso8601(result.start_timestamp()));
+
+  // Output the actual test result.
+  OutputXmlTestResult(stream, result);
+
+  // Complete the test suite.
+  *stream << "  </testsuite>\n";
+}
+
 // Prints an XML representation of a TestInfo object.
-void XmlUnitTestResultPrinter::OutputXmlTestInfo(::std::ostream *stream,
-                                                 const char *test_suite_name,
-                                                 const TestInfo &test_info) {
-  const TestResult &result = *test_info.result();
+void XmlUnitTestResultPrinter::OutputXmlTestInfo(::std::ostream* stream,
+                                                 const char* test_suite_name,
+                                                 const TestInfo& test_info) {
+  const TestResult& result = *test_info.result();
   const std::string kTestsuite = "testcase";
 
   if (test_info.is_in_another_shard()) {
@@ -3875,10 +4225,11 @@ void XmlUnitTestResultPrinter::OutputXmlTestInfo(::std::ostream *stream,
     OutputXmlAttribute(stream, kTestsuite, "type_param",
                        test_info.type_param());
   }
-  if (GTEST_FLAG(list_tests)) {
-    OutputXmlAttribute(stream, kTestsuite, "file", test_info.file());
-    OutputXmlAttribute(stream, kTestsuite, "line",
-                       StreamableToString(test_info.line()));
+
+  OutputXmlAttribute(stream, kTestsuite, "file", test_info.file());
+  OutputXmlAttribute(stream, kTestsuite, "line",
+                     StreamableToString(test_info.line()));
+  if (GTEST_FLAG_GET(list_tests)) {
     *stream << " />\n";
     return;
   }
@@ -3896,29 +4247,48 @@ void XmlUnitTestResultPrinter::OutputXmlTestInfo(::std::ostream *stream,
       FormatEpochTimeInMillisAsIso8601(result.start_timestamp()));
   OutputXmlAttribute(stream, kTestsuite, "classname", test_suite_name);
 
+  OutputXmlTestResult(stream, result);
+}
+
+void XmlUnitTestResultPrinter::OutputXmlTestResult(::std::ostream* stream,
+                                                   const TestResult& result) {
   int failures = 0;
+  int skips = 0;
   for (int i = 0; i < result.total_part_count(); ++i) {
-    const TestPartResult &part = result.GetTestPartResult(i);
+    const TestPartResult& part = result.GetTestPartResult(i);
     if (part.failed()) {
-      if (++failures == 1) {
+      if (++failures == 1 && skips == 0) {
         *stream << ">\n";
       }
       const std::string location =
           internal::FormatCompilerIndependentFileLocation(part.file_name(),
                                                           part.line_number());
       const std::string summary = location + "\n" + part.summary();
-      *stream << "      <failure message=\""
-              << EscapeXmlAttribute(summary.c_str()) << "\" type=\"\">";
+      *stream << "      <failure message=\"" << EscapeXmlAttribute(summary)
+              << "\" type=\"\">";
       const std::string detail = location + "\n" + part.message();
       OutputXmlCDataSection(stream, RemoveInvalidXmlCharacters(detail).c_str());
       *stream << "</failure>\n";
+    } else if (part.skipped()) {
+      if (++skips == 1 && failures == 0) {
+        *stream << ">\n";
+      }
+      const std::string location =
+          internal::FormatCompilerIndependentFileLocation(part.file_name(),
+                                                          part.line_number());
+      const std::string summary = location + "\n" + part.summary();
+      *stream << "      <skipped message=\""
+              << EscapeXmlAttribute(summary.c_str()) << "\">";
+      const std::string detail = location + "\n" + part.message();
+      OutputXmlCDataSection(stream, RemoveInvalidXmlCharacters(detail).c_str());
+      *stream << "</skipped>\n";
     }
   }
 
-  if (failures == 0 && result.test_property_count() == 0) {
+  if (failures == 0 && skips == 0 && result.test_property_count() == 0) {
     *stream << " />\n";
   } else {
-    if (failures == 0) {
+    if (failures == 0 && skips == 0) {
       *stream << ">\n";
     }
     OutputXmlTestProperties(stream, result);
@@ -3927,20 +4297,24 @@ void XmlUnitTestResultPrinter::OutputXmlTestInfo(::std::ostream *stream,
 }
 
 // Prints an XML representation of a TestSuite object
-void XmlUnitTestResultPrinter::PrintXmlTestSuite(std::ostream *stream,
-                                                 const TestSuite &test_suite) {
+void XmlUnitTestResultPrinter::PrintXmlTestSuite(std::ostream* stream,
+                                                 const TestSuite& test_suite) {
   const std::string kTestsuite = "testsuite";
   *stream << "  <" << kTestsuite;
   OutputXmlAttribute(stream, kTestsuite, "name", test_suite.name());
   OutputXmlAttribute(stream, kTestsuite, "tests",
                      StreamableToString(test_suite.reportable_test_count()));
-  if (!GTEST_FLAG(list_tests)) {
+  if (!GTEST_FLAG_GET(list_tests)) {
     OutputXmlAttribute(stream, kTestsuite, "failures",
                        StreamableToString(test_suite.failed_test_count()));
     OutputXmlAttribute(
         stream, kTestsuite, "disabled",
         StreamableToString(test_suite.reportable_disabled_test_count()));
+    OutputXmlAttribute(stream, kTestsuite, "skipped",
+                       StreamableToString(test_suite.skipped_test_count()));
+
     OutputXmlAttribute(stream, kTestsuite, "errors", "0");
+
     OutputXmlAttribute(stream, kTestsuite, "time",
                        FormatTimeInMillisAsSeconds(test_suite.elapsed_time()));
     OutputXmlAttribute(
@@ -3957,8 +4331,8 @@ void XmlUnitTestResultPrinter::PrintXmlTestSuite(std::ostream *stream,
 }
 
 // Prints an XML summary of unit_test to output stream out.
-void XmlUnitTestResultPrinter::PrintXmlUnitTest(std::ostream *stream,
-                                                const UnitTest &unit_test) {
+void XmlUnitTestResultPrinter::PrintXmlUnitTest(std::ostream* stream,
+                                                const UnitTest& unit_test) {
   const std::string kTestsuites = "testsuites";
 
   *stream << "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
@@ -3978,7 +4352,7 @@ void XmlUnitTestResultPrinter::PrintXmlUnitTest(std::ostream *stream,
       stream, kTestsuites, "timestamp",
       FormatEpochTimeInMillisAsIso8601(unit_test.start_timestamp()));
 
-  if (GTEST_FLAG(shuffle)) {
+  if (GTEST_FLAG_GET(shuffle)) {
     OutputXmlAttribute(stream, kTestsuites, "random_seed",
                        StreamableToString(unit_test.random_seed()));
   }
@@ -3991,11 +4365,18 @@ void XmlUnitTestResultPrinter::PrintXmlUnitTest(std::ostream *stream,
     if (unit_test.GetTestSuite(i)->reportable_test_count() > 0)
       PrintXmlTestSuite(stream, *unit_test.GetTestSuite(i));
   }
+
+  // If there was a test failure outside of one of the test suites (like in a
+  // test environment) include that in the output.
+  if (unit_test.ad_hoc_test_result().Failed()) {
+    OutputXmlTestSuiteForTestResult(stream, unit_test.ad_hoc_test_result());
+  }
+
   *stream << "</" << kTestsuites << ">\n";
 }
 
 void XmlUnitTestResultPrinter::PrintXmlTestsList(
-    std::ostream *stream, const std::vector<TestSuite *> &test_suites) {
+    std::ostream* stream, const std::vector<TestSuite*>& test_suites) {
   const std::string kTestsuites = "testsuites";
 
   *stream << "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
@@ -4019,10 +4400,10 @@ void XmlUnitTestResultPrinter::PrintXmlTestsList(
 // Produces a string representing the test properties in a result as space
 // delimited XML attributes based on the property key="value" pairs.
 std::string XmlUnitTestResultPrinter::TestPropertiesAsXmlAttributes(
-    const TestResult &result) {
+    const TestResult& result) {
   Message attributes;
   for (int i = 0; i < result.test_property_count(); ++i) {
-    const TestProperty &property = result.GetTestProperty(i);
+    const TestProperty& property = result.GetTestProperty(i);
     attributes << " " << property.key() << "="
                << "\"" << EscapeXmlAttribute(property.value()) << "\"";
   }
@@ -4030,7 +4411,7 @@ std::string XmlUnitTestResultPrinter::TestPropertiesAsXmlAttributes(
 }
 
 void XmlUnitTestResultPrinter::OutputXmlTestProperties(
-    std::ostream *stream, const TestResult &result) {
+    std::ostream* stream, const TestResult& result) {
   const std::string kProperties = "properties";
   const std::string kProperty = "property";
 
@@ -4038,15 +4419,15 @@ void XmlUnitTestResultPrinter::OutputXmlTestProperties(
     return;
   }
 
-  *stream << "<" << kProperties << ">\n";
+  *stream << "      <" << kProperties << ">\n";
   for (int i = 0; i < result.test_property_count(); ++i) {
-    const TestProperty &property = result.GetTestProperty(i);
-    *stream << "<" << kProperty;
+    const TestProperty& property = result.GetTestProperty(i);
+    *stream << "        <" << kProperty;
     *stream << " name=\"" << EscapeXmlAttribute(property.key()) << "\"";
     *stream << " value=\"" << EscapeXmlAttribute(property.value()) << "\"";
     *stream << "/>\n";
   }
-  *stream << "</" << kProperties << ">\n";
+  *stream << "      </" << kProperties << ">\n";
 }
 
 // End XmlUnitTestResultPrinter
@@ -4054,64 +4435,76 @@ void XmlUnitTestResultPrinter::OutputXmlTestProperties(
 // This class generates an JSON output file.
 class JsonUnitTestResultPrinter : public EmptyTestEventListener {
  public:
-  explicit JsonUnitTestResultPrinter(const char *output_file);
+  explicit JsonUnitTestResultPrinter(const char* output_file);
 
-  void OnTestIterationEnd(const UnitTest &unit_test, int iteration) override;
+  void OnTestIterationEnd(const UnitTest& unit_test, int iteration) override;
 
   // Prints an JSON summary of all unit tests.
-  static void PrintJsonTestList(::std::ostream *stream,
-                                const std::vector<TestSuite *> &test_suites);
+  static void PrintJsonTestList(::std::ostream* stream,
+                                const std::vector<TestSuite*>& test_suites);
 
  private:
   // Returns an JSON-escaped copy of the input string str.
-  static std::string EscapeJson(const std::string &str);
+  static std::string EscapeJson(const std::string& str);
 
   //// Verifies that the given attribute belongs to the given element and
   //// streams the attribute as JSON.
-  static void OutputJsonKey(std::ostream *stream,
-                            const std::string &element_name,
-                            const std::string &name, const std::string &value,
-                            const std::string &indent, bool comma = true);
-  static void OutputJsonKey(std::ostream *stream,
-                            const std::string &element_name,
-                            const std::string &name, int value,
-                            const std::string &indent, bool comma = true);
+  static void OutputJsonKey(std::ostream* stream,
+                            const std::string& element_name,
+                            const std::string& name, const std::string& value,
+                            const std::string& indent, bool comma = true);
+  static void OutputJsonKey(std::ostream* stream,
+                            const std::string& element_name,
+                            const std::string& name, int value,
+                            const std::string& indent, bool comma = true);
+
+  // Streams a test suite JSON stanza containing the given test result.
+  //
+  // Requires: result.Failed()
+  static void OutputJsonTestSuiteForTestResult(::std::ostream* stream,
+                                               const TestResult& result);
+
+  // Streams a JSON representation of a TestResult object.
+  static void OutputJsonTestResult(::std::ostream* stream,
+                                   const TestResult& result);
 
   // Streams a JSON representation of a TestInfo object.
-  static void OutputJsonTestInfo(::std::ostream *stream,
-                                 const char *test_suite_name,
-                                 const TestInfo &test_info);
+  static void OutputJsonTestInfo(::std::ostream* stream,
+                                 const char* test_suite_name,
+                                 const TestInfo& test_info);
 
   // Prints a JSON representation of a TestSuite object
-  static void PrintJsonTestSuite(::std::ostream *stream,
-                                 const TestSuite &test_suite);
+  static void PrintJsonTestSuite(::std::ostream* stream,
+                                 const TestSuite& test_suite);
 
   // Prints a JSON summary of unit_test to output stream out.
-  static void PrintJsonUnitTest(::std::ostream *stream,
-                                const UnitTest &unit_test);
+  static void PrintJsonUnitTest(::std::ostream* stream,
+                                const UnitTest& unit_test);
 
   // Produces a string representing the test properties in a result as
   // a JSON dictionary.
-  static std::string TestPropertiesAsJson(const TestResult &result,
-                                          const std::string &indent);
+  static std::string TestPropertiesAsJson(const TestResult& result,
+                                          const std::string& indent);
 
   // The output file.
   const std::string output_file_;
 
-  GTEST_DISALLOW_COPY_AND_ASSIGN_(JsonUnitTestResultPrinter);
+  JsonUnitTestResultPrinter(const JsonUnitTestResultPrinter&) = delete;
+  JsonUnitTestResultPrinter& operator=(const JsonUnitTestResultPrinter&) =
+      delete;
 };
 
 // Creates a new JsonUnitTestResultPrinter.
-JsonUnitTestResultPrinter::JsonUnitTestResultPrinter(const char *output_file)
+JsonUnitTestResultPrinter::JsonUnitTestResultPrinter(const char* output_file)
     : output_file_(output_file) {
   if (output_file_.empty()) {
     GTEST_LOG_(FATAL) << "JSON output file may not be null";
   }
 }
 
-void JsonUnitTestResultPrinter::OnTestIterationEnd(const UnitTest &unit_test,
+void JsonUnitTestResultPrinter::OnTestIterationEnd(const UnitTest& unit_test,
                                                    int /*iteration*/) {
-  FILE *jsonout = OpenFileForWriting(output_file_);
+  FILE* jsonout = OpenFileForWriting(output_file_);
   std::stringstream stream;
   PrintJsonUnitTest(&stream, unit_test);
   fprintf(jsonout, "%s", StringStreamToString(&stream).c_str());
@@ -4119,7 +4512,7 @@ void JsonUnitTestResultPrinter::OnTestIterationEnd(const UnitTest &unit_test,
 }
 
 // Returns an JSON-escaped copy of the input string str.
-std::string JsonUnitTestResultPrinter::EscapeJson(const std::string &str) {
+std::string JsonUnitTestResultPrinter::EscapeJson(const std::string& str) {
   Message m;
 
   for (size_t i = 0; i < str.size(); ++i) {
@@ -4127,12 +4520,24 @@ std::string JsonUnitTestResultPrinter::EscapeJson(const std::string &str) {
     switch (ch) {
       case '\\':
       case '"':
-      case '/': m << '\\' << ch; break;
-      case '\b': m << "\\b"; break;
-      case '\t': m << "\\t"; break;
-      case '\n': m << "\\n"; break;
-      case '\f': m << "\\f"; break;
-      case '\r': m << "\\r"; break;
+      case '/':
+        m << '\\' << ch;
+        break;
+      case '\b':
+        m << "\\b";
+        break;
+      case '\t':
+        m << "\\t";
+        break;
+      case '\n':
+        m << "\\n";
+        break;
+      case '\f':
+        m << "\\f";
+        break;
+      case '\r':
+        m << "\\r";
+        break;
       default:
         if (ch < ' ') {
           m << "\\u00" << String::FormatByte(static_cast<unsigned char>(ch));
@@ -4175,13 +4580,13 @@ static inline std::string Indent(size_t width) {
   return std::string(width, ' ');
 }
 
-void JsonUnitTestResultPrinter::OutputJsonKey(std::ostream *stream,
-                                              const std::string &element_name,
-                                              const std::string &name,
-                                              const std::string &value,
-                                              const std::string &indent,
+void JsonUnitTestResultPrinter::OutputJsonKey(std::ostream* stream,
+                                              const std::string& element_name,
+                                              const std::string& name,
+                                              const std::string& value,
+                                              const std::string& indent,
                                               bool comma) {
-  const std::vector<std::string> &allowed_names =
+  const std::vector<std::string>& allowed_names =
       GetReservedOutputAttributesForElement(element_name);
 
   GTEST_CHECK_(std::find(allowed_names.begin(), allowed_names.end(), name) !=
@@ -4194,9 +4599,9 @@ void JsonUnitTestResultPrinter::OutputJsonKey(std::ostream *stream,
 }
 
 void JsonUnitTestResultPrinter::OutputJsonKey(
-    std::ostream *stream, const std::string &element_name,
-    const std::string &name, int value, const std::string &indent, bool comma) {
-  const std::vector<std::string> &allowed_names =
+    std::ostream* stream, const std::string& element_name,
+    const std::string& name, int value, const std::string& indent, bool comma) {
+  const std::vector<std::string>& allowed_names =
       GetReservedOutputAttributesForElement(element_name);
 
   GTEST_CHECK_(std::find(allowed_names.begin(), allowed_names.end(), name) !=
@@ -4208,11 +4613,53 @@ void JsonUnitTestResultPrinter::OutputJsonKey(
   if (comma) *stream << ",\n";
 }
 
+// Streams a test suite JSON stanza containing the given test result.
+void JsonUnitTestResultPrinter::OutputJsonTestSuiteForTestResult(
+    ::std::ostream* stream, const TestResult& result) {
+  // Output the boilerplate for a new test suite.
+  *stream << Indent(4) << "{\n";
+  OutputJsonKey(stream, "testsuite", "name", "NonTestSuiteFailure", Indent(6));
+  OutputJsonKey(stream, "testsuite", "tests", 1, Indent(6));
+  if (!GTEST_FLAG_GET(list_tests)) {
+    OutputJsonKey(stream, "testsuite", "failures", 1, Indent(6));
+    OutputJsonKey(stream, "testsuite", "disabled", 0, Indent(6));
+    OutputJsonKey(stream, "testsuite", "skipped", 0, Indent(6));
+    OutputJsonKey(stream, "testsuite", "errors", 0, Indent(6));
+    OutputJsonKey(stream, "testsuite", "time",
+                  FormatTimeInMillisAsDuration(result.elapsed_time()),
+                  Indent(6));
+    OutputJsonKey(stream, "testsuite", "timestamp",
+                  FormatEpochTimeInMillisAsRFC3339(result.start_timestamp()),
+                  Indent(6));
+  }
+  *stream << Indent(6) << "\"testsuite\": [\n";
+
+  // Output the boilerplate for a new test case.
+  *stream << Indent(8) << "{\n";
+  OutputJsonKey(stream, "testcase", "name", "", Indent(10));
+  OutputJsonKey(stream, "testcase", "status", "RUN", Indent(10));
+  OutputJsonKey(stream, "testcase", "result", "COMPLETED", Indent(10));
+  OutputJsonKey(stream, "testcase", "timestamp",
+                FormatEpochTimeInMillisAsRFC3339(result.start_timestamp()),
+                Indent(10));
+  OutputJsonKey(stream, "testcase", "time",
+                FormatTimeInMillisAsDuration(result.elapsed_time()),
+                Indent(10));
+  OutputJsonKey(stream, "testcase", "classname", "", Indent(10), false);
+  *stream << TestPropertiesAsJson(result, Indent(10));
+
+  // Output the actual test result.
+  OutputJsonTestResult(stream, result);
+
+  // Finish the test suite.
+  *stream << "\n" << Indent(6) << "]\n" << Indent(4) << "}";
+}
+
 // Prints a JSON representation of a TestInfo object.
-void JsonUnitTestResultPrinter::OutputJsonTestInfo(::std::ostream *stream,
-                                                   const char *test_suite_name,
-                                                   const TestInfo &test_info) {
-  const TestResult &result = *test_info.result();
+void JsonUnitTestResultPrinter::OutputJsonTestInfo(::std::ostream* stream,
+                                                   const char* test_suite_name,
+                                                   const TestInfo& test_info) {
+  const TestResult& result = *test_info.result();
   const std::string kTestsuite = "testcase";
   const std::string kIndent = Indent(10);
 
@@ -4227,11 +4674,14 @@ void JsonUnitTestResultPrinter::OutputJsonTestInfo(::std::ostream *stream,
     OutputJsonKey(stream, kTestsuite, "type_param", test_info.type_param(),
                   kIndent);
   }
-  if (GTEST_FLAG(list_tests)) {
-    OutputJsonKey(stream, kTestsuite, "file", test_info.file(), kIndent);
-    OutputJsonKey(stream, kTestsuite, "line", test_info.line(), kIndent, false);
+
+  OutputJsonKey(stream, kTestsuite, "file", test_info.file(), kIndent);
+  OutputJsonKey(stream, kTestsuite, "line", test_info.line(), kIndent, false);
+  if (GTEST_FLAG_GET(list_tests)) {
     *stream << "\n" << Indent(8) << "}";
     return;
+  } else {
+    *stream << ",\n";
   }
 
   OutputJsonKey(stream, kTestsuite, "status",
@@ -4250,9 +4700,16 @@ void JsonUnitTestResultPrinter::OutputJsonTestInfo(::std::ostream *stream,
                 false);
   *stream << TestPropertiesAsJson(result, kIndent);
 
+  OutputJsonTestResult(stream, result);
+}
+
+void JsonUnitTestResultPrinter::OutputJsonTestResult(::std::ostream* stream,
+                                                     const TestResult& result) {
+  const std::string kIndent = Indent(10);
+
   int failures = 0;
   for (int i = 0; i < result.total_part_count(); ++i) {
-    const TestPartResult &part = result.GetTestPartResult(i);
+    const TestPartResult& part = result.GetTestPartResult(i);
     if (part.failed()) {
       *stream << ",\n";
       if (++failures == 1) {
@@ -4277,7 +4734,7 @@ void JsonUnitTestResultPrinter::OutputJsonTestInfo(::std::ostream *stream,
 
 // Prints an JSON representation of a TestSuite object
 void JsonUnitTestResultPrinter::PrintJsonTestSuite(
-    std::ostream *stream, const TestSuite &test_suite) {
+    std::ostream* stream, const TestSuite& test_suite) {
   const std::string kTestsuite = "testsuite";
   const std::string kIndent = Indent(6);
 
@@ -4285,7 +4742,7 @@ void JsonUnitTestResultPrinter::PrintJsonTestSuite(
   OutputJsonKey(stream, kTestsuite, "name", test_suite.name(), kIndent);
   OutputJsonKey(stream, kTestsuite, "tests", test_suite.reportable_test_count(),
                 kIndent);
-  if (!GTEST_FLAG(list_tests)) {
+  if (!GTEST_FLAG_GET(list_tests)) {
     OutputJsonKey(stream, kTestsuite, "failures",
                   test_suite.failed_test_count(), kIndent);
     OutputJsonKey(stream, kTestsuite, "disabled",
@@ -4319,8 +4776,8 @@ void JsonUnitTestResultPrinter::PrintJsonTestSuite(
 }
 
 // Prints a JSON summary of unit_test to output stream out.
-void JsonUnitTestResultPrinter::PrintJsonUnitTest(std::ostream *stream,
-                                                  const UnitTest &unit_test) {
+void JsonUnitTestResultPrinter::PrintJsonUnitTest(std::ostream* stream,
+                                                  const UnitTest& unit_test) {
   const std::string kTestsuites = "testsuites";
   const std::string kIndent = Indent(2);
   *stream << "{\n";
@@ -4332,7 +4789,7 @@ void JsonUnitTestResultPrinter::PrintJsonUnitTest(std::ostream *stream,
   OutputJsonKey(stream, kTestsuites, "disabled",
                 unit_test.reportable_disabled_test_count(), kIndent);
   OutputJsonKey(stream, kTestsuites, "errors", 0, kIndent);
-  if (GTEST_FLAG(shuffle)) {
+  if (GTEST_FLAG_GET(shuffle)) {
     OutputJsonKey(stream, kTestsuites, "random_seed", unit_test.random_seed(),
                   kIndent);
   }
@@ -4361,13 +4818,19 @@ void JsonUnitTestResultPrinter::PrintJsonUnitTest(std::ostream *stream,
     }
   }
 
+  // If there was a test failure outside of one of the test suites (like in a
+  // test environment) include that in the output.
+  if (unit_test.ad_hoc_test_result().Failed()) {
+    OutputJsonTestSuiteForTestResult(stream, unit_test.ad_hoc_test_result());
+  }
+
   *stream << "\n"
           << kIndent << "]\n"
           << "}\n";
 }
 
 void JsonUnitTestResultPrinter::PrintJsonTestList(
-    std::ostream *stream, const std::vector<TestSuite *> &test_suites) {
+    std::ostream* stream, const std::vector<TestSuite*>& test_suites) {
   const std::string kTestsuites = "testsuites";
   const std::string kIndent = Indent(2);
   *stream << "{\n";
@@ -4394,10 +4857,10 @@ void JsonUnitTestResultPrinter::PrintJsonTestList(
 // Produces a string representing the test properties in a result as
 // a JSON dictionary.
 std::string JsonUnitTestResultPrinter::TestPropertiesAsJson(
-    const TestResult &result, const std::string &indent) {
+    const TestResult& result, const std::string& indent) {
   Message attributes;
   for (int i = 0; i < result.test_property_count(); ++i) {
-    const TestProperty &property = result.GetTestProperty(i);
+    const TestProperty& property = result.GetTestProperty(i);
     attributes << ",\n"
                << indent << "\"" << property.key() << "\": "
                << "\"" << EscapeJson(property.value()) << "\"";
@@ -4414,7 +4877,7 @@ std::string JsonUnitTestResultPrinter::TestPropertiesAsJson(
 // example, replaces "=" with "%3D".  This algorithm is O(strlen(str))
 // in both time and space -- important as the input str may contain an
 // arbitrarily long test failure message and stack trace.
-std::string StreamingListener::UrlEncode(const char *str) {
+std::string StreamingListener::UrlEncode(const char* str) {
   std::string result;
   result.reserve(strlen(str) + 1);
   for (char ch = *str; ch != '\0'; ch = *++str) {
@@ -4425,7 +4888,9 @@ std::string StreamingListener::UrlEncode(const char *str) {
       case '\n':
         result.append("%" + String::FormatByte(static_cast<unsigned char>(ch)));
         break;
-      default: result.push_back(ch); break;
+      default:
+        result.push_back(ch);
+        break;
     }
   }
   return result;
@@ -4439,7 +4904,7 @@ void StreamingListener::SocketWriter::MakeConnection() {
   memset(&hints, 0, sizeof(hints));
   hints.ai_family = AF_UNSPEC;  // To allow both IPv4 and IPv6 addresses.
   hints.ai_socktype = SOCK_STREAM;
-  addrinfo *servinfo = nullptr;
+  addrinfo* servinfo = nullptr;
 
   // Use the getaddrinfo() to get a linked list of IP addresses for
   // the given host name.
@@ -4451,7 +4916,7 @@ void StreamingListener::SocketWriter::MakeConnection() {
   }
 
   // Loop through all the results and connect to the first we can.
-  for (addrinfo *cur_addr = servinfo; sockfd_ == -1 && cur_addr != nullptr;
+  for (addrinfo* cur_addr = servinfo; sockfd_ == -1 && cur_addr != nullptr;
        cur_addr = cur_addr->ai_next) {
     sockfd_ = socket(cur_addr->ai_family, cur_addr->ai_socktype,
                      cur_addr->ai_protocol);
@@ -4477,7 +4942,7 @@ void StreamingListener::SocketWriter::MakeConnection() {
 
 // class OsStackTraceGetter
 
-const char *const OsStackTraceGetterInterface::kElidedFramesMarker =
+const char* const OsStackTraceGetterInterface::kElidedFramesMarker =
     "... " GTEST_NAME_ " internal frames ...";
 
 std::string OsStackTraceGetter::CurrentStackTrace(int max_depth, int skip_count)
@@ -4491,12 +4956,12 @@ std::string OsStackTraceGetter::CurrentStackTrace(int max_depth, int skip_count)
 
   max_depth = std::min(max_depth, kMaxStackTraceDepth);
 
-  std::vector<void *> raw_stack(max_depth);
+  std::vector<void*> raw_stack(max_depth);
   // Skips the frames requested by the caller, plus this function.
   const int raw_stack_size =
       absl::GetStackTrace(&raw_stack[0], max_depth, skip_count + 1);
 
-  void *caller_frame = nullptr;
+  void* caller_frame = nullptr;
   {
     MutexLock lock(&mutex_);
     caller_frame = caller_frame_;
@@ -4504,14 +4969,14 @@ std::string OsStackTraceGetter::CurrentStackTrace(int max_depth, int skip_count)
 
   for (int i = 0; i < raw_stack_size; ++i) {
     if (raw_stack[i] == caller_frame &&
-        !GTEST_FLAG(show_internal_stack_frames)) {
+        !GTEST_FLAG_GET(show_internal_stack_frames)) {
       // Add a marker to the trace and stop adding frames.
       absl::StrAppend(&result, kElidedFramesMarker, "\n");
       break;
     }
 
     char tmp[1024];
-    const char *symbol = "(unknown)";
+    const char* symbol = "(unknown)";
     if (absl::Symbolize(raw_stack[i], tmp, sizeof(tmp))) {
       symbol = tmp;
     }
@@ -4532,7 +4997,7 @@ std::string OsStackTraceGetter::CurrentStackTrace(int max_depth, int skip_count)
 
 void OsStackTraceGetter::UponLeavingGTest() GTEST_LOCK_EXCLUDED_(mutex_) {
 #if GTEST_HAS_ABSL
-  void *caller_frame = nullptr;
+  void* caller_frame = nullptr;
   if (absl::GetStackTrace(&caller_frame, 1, 3) <= 0) {
     caller_frame = nullptr;
   }
@@ -4546,7 +5011,7 @@ void OsStackTraceGetter::UponLeavingGTest() GTEST_LOCK_EXCLUDED_(mutex_) {
 // constructor and deletes the file in its destructor.
 class ScopedPrematureExitFile {
  public:
-  explicit ScopedPrematureExitFile(const char *premature_exit_filepath)
+  explicit ScopedPrematureExitFile(const char* premature_exit_filepath)
       : premature_exit_filepath_(
             premature_exit_filepath ? premature_exit_filepath : "") {
     // If a path to the premature-exit file is specified...
@@ -4554,7 +5019,7 @@ class ScopedPrematureExitFile {
       // create the file with a single "0" character in it.  I/O
       // errors are ignored as there's nothing better we can do and we
       // don't want to fail the test because of this.
-      FILE *pfile = posix::FOpen(premature_exit_filepath, "w");
+      FILE* pfile = posix::FOpen(premature_exit_filepath_.c_str(), "w");
       fwrite("0", 1, 1, pfile);
       fclose(pfile);
     }
@@ -4576,7 +5041,8 @@ class ScopedPrematureExitFile {
  private:
   const std::string premature_exit_filepath_;
 
-  GTEST_DISALLOW_COPY_AND_ASSIGN_(ScopedPrematureExitFile);
+  ScopedPrematureExitFile(const ScopedPrematureExitFile&) = delete;
+  ScopedPrematureExitFile& operator=(const ScopedPrematureExitFile&) = delete;
 };
 
 }  // namespace internal
@@ -4585,7 +5051,8 @@ class ScopedPrematureExitFile {
 
 TestEventListeners::TestEventListeners()
     : repeater_(new internal::TestEventRepeater()),
-      default_result_printer_(nullptr), default_xml_generator_(nullptr) {}
+      default_result_printer_(nullptr),
+      default_xml_generator_(nullptr) {}
 
 TestEventListeners::~TestEventListeners() { delete repeater_; }
 
@@ -4593,14 +5060,14 @@ TestEventListeners::~TestEventListeners() { delete repeater_; }
 // output.  Can be removed from the listeners list to shut down default
 // console output.  Note that removing this object from the listener list
 // with Release transfers its ownership to the user.
-void TestEventListeners::Append(TestEventListener *listener) {
+void TestEventListeners::Append(TestEventListener* listener) {
   repeater_->Append(listener);
 }
 
 // Removes the given event listener from the list and returns it.  It then
 // becomes the caller's responsibility to delete the listener. Returns
 // NULL if the listener is not found in the list.
-TestEventListener *TestEventListeners::Release(TestEventListener *listener) {
+TestEventListener* TestEventListeners::Release(TestEventListener* listener) {
   if (listener == default_result_printer_)
     default_result_printer_ = nullptr;
   else if (listener == default_xml_generator_)
@@ -4610,14 +5077,14 @@ TestEventListener *TestEventListeners::Release(TestEventListener *listener) {
 
 // Returns repeater that broadcasts the TestEventListener events to all
 // subscribers.
-TestEventListener *TestEventListeners::repeater() { return repeater_; }
+TestEventListener* TestEventListeners::repeater() { return repeater_; }
 
 // Sets the default_result_printer attribute to the provided listener.
 // The listener is also added to the listener list and previous
 // default_result_printer is removed from it and deleted. The listener can
 // also be NULL in which case it will not be added to the list. Does
 // nothing if the previous and the current listener objects are the same.
-void TestEventListeners::SetDefaultResultPrinter(TestEventListener *listener) {
+void TestEventListeners::SetDefaultResultPrinter(TestEventListener* listener) {
   if (default_result_printer_ != listener) {
     // It is an error to pass this method a listener that is already in the
     // list.
@@ -4632,7 +5099,7 @@ void TestEventListeners::SetDefaultResultPrinter(TestEventListener *listener) {
 // default_xml_generator is removed from it and deleted. The listener can
 // also be NULL in which case it will not be added to the list. Does
 // nothing if the previous and the current listener objects are the same.
-void TestEventListeners::SetDefaultXmlGenerator(TestEventListener *listener) {
+void TestEventListeners::SetDefaultXmlGenerator(TestEventListener* listener) {
   if (default_xml_generator_ != listener) {
     // It is an error to pass this method a listener that is already in the
     // list.
@@ -4661,13 +5128,13 @@ void TestEventListeners::SuppressEventForwarding() {
 // We don't protect this under mutex_ as a user is not supposed to
 // call this before main() starts, from which point on the return
 // value will never change.
-UnitTest *UnitTest::GetInstance() {
+UnitTest* UnitTest::GetInstance() {
   // CodeGear C++Builder insists on a public destructor for the
   // default implementation.  Use this implementation to keep good OO
   // design with private destructor.
 
 #if defined(__BORLANDC__)
-  static UnitTest *const instance = new UnitTest;
+  static UnitTest* const instance = new UnitTest;
   return instance;
 #else
   static UnitTest instance;
@@ -4767,32 +5234,32 @@ bool UnitTest::Failed() const { return impl()->Failed(); }
 
 // Gets the i-th test suite among all the test suites. i can range from 0 to
 // total_test_suite_count() - 1. If i is not in that range, returns NULL.
-const TestSuite *UnitTest::GetTestSuite(int i) const {
+const TestSuite* UnitTest::GetTestSuite(int i) const {
   return impl()->GetTestSuite(i);
 }
 
 //  Legacy API is deprecated but still available
 #ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
-const TestCase *UnitTest::GetTestCase(int i) const {
+const TestCase* UnitTest::GetTestCase(int i) const {
   return impl()->GetTestCase(i);
 }
 #endif  //  GTEST_REMOVE_LEGACY_TEST_CASEAPI_
 
 // Returns the TestResult containing information on test failures and
 // properties logged outside of individual test suites.
-const TestResult &UnitTest::ad_hoc_test_result() const {
+const TestResult& UnitTest::ad_hoc_test_result() const {
   return *impl()->ad_hoc_test_result();
 }
 
 // Gets the i-th test suite among all the test suites. i can range from 0 to
 // total_test_suite_count() - 1. If i is not in that range, returns NULL.
-TestSuite *UnitTest::GetMutableTestSuite(int i) {
+TestSuite* UnitTest::GetMutableTestSuite(int i) {
   return impl()->GetMutableSuiteCase(i);
 }
 
 // Returns the list of event listeners that can be used to track events
 // inside Google Test.
-TestEventListeners &UnitTest::listeners() { return *impl()->listeners(); }
+TestEventListeners& UnitTest::listeners() { return *impl()->listeners(); }
 
 // Registers and returns a global test environment.  When a test
 // program is run, all global test environments will be set-up in the
@@ -4804,7 +5271,7 @@ TestEventListeners &UnitTest::listeners() { return *impl()->listeners(); }
 //
 // We don't protect this under mutex_, as we only support calling it
 // from the main thread.
-Environment *UnitTest::AddEnvironment(Environment *env) {
+Environment* UnitTest::AddEnvironment(Environment* env) {
   if (env == nullptr) {
     return nullptr;
   }
@@ -4818,9 +5285,9 @@ Environment *UnitTest::AddEnvironment(Environment *env) {
 // this to report their results.  The user code should use the
 // assertion macros instead of calling this directly.
 void UnitTest::AddTestPartResult(TestPartResult::Type result_type,
-                                 const char *file_name, int line_number,
-                                 const std::string &message,
-                                 const std::string &os_stack_trace)
+                                 const char* file_name, int line_number,
+                                 const std::string& message,
+                                 const std::string& os_stack_trace)
     GTEST_LOCK_EXCLUDED_(mutex_) {
   Message msg;
   msg << message;
@@ -4830,7 +5297,7 @@ void UnitTest::AddTestPartResult(TestPartResult::Type result_type,
     msg << "\n" << GTEST_NAME_ << " trace:";
 
     for (size_t i = impl_->gtest_trace_stack().size(); i > 0; --i) {
-      const internal::TraceInfo &trace = impl_->gtest_trace_stack()[i - 1];
+      const internal::TraceInfo& trace = impl_->gtest_trace_stack()[i - 1];
       msg << "\n"
           << internal::FormatFileLocation(trace.file, trace.line) << " "
           << trace.message;
@@ -4853,7 +5320,7 @@ void UnitTest::AddTestPartResult(TestPartResult::Type result_type,
     // in the code (perhaps in order to use Google Test assertions
     // with another testing framework) and specify the former on the
     // command line for debugging.
-    if (GTEST_FLAG(break_on_failure)) {
+    if (GTEST_FLAG_GET(break_on_failure)) {
 #if GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_PHONE && !GTEST_OS_WINDOWS_RT
       // Using DebugBreak on Windows allows gtest to still break into a debugger
       // when a failure happens and both the --gtest_break_on_failure and
@@ -4868,9 +5335,9 @@ void UnitTest::AddTestPartResult(TestPartResult::Type result_type,
       // Dereference nullptr through a volatile pointer to prevent the compiler
       // from removing. We use this rather than abort() or __builtin_trap() for
       // portability: some debuggers don't correctly trap abort().
-      *static_cast<volatile int *>(nullptr) = 1;
+      *static_cast<volatile int*>(nullptr) = 1;
 #endif  // GTEST_OS_WINDOWS
-    } else if (GTEST_FLAG(throw_on_failure)) {
+    } else if (GTEST_FLAG_GET(throw_on_failure)) {
 #if GTEST_HAS_EXCEPTIONS
       throw internal::GoogleTestFailureException(result);
 #else
@@ -4887,8 +5354,8 @@ void UnitTest::AddTestPartResult(TestPartResult::Type result_type,
 // from SetUpTestSuite or TearDownTestSuite, or to the global property set
 // when invoked elsewhere.  If the result already contains a property with
 // the same key, the value will be updated.
-void UnitTest::RecordProperty(const std::string &key,
-                              const std::string &value) {
+void UnitTest::RecordProperty(const std::string& key,
+                              const std::string& value) {
   impl_->RecordProperty(TestProperty(key, value));
 }
 
@@ -4899,7 +5366,7 @@ void UnitTest::RecordProperty(const std::string &key,
 // from the main thread.
 int UnitTest::Run() {
   const bool in_death_test_child_process =
-      internal::GTEST_FLAG(internal_run_death_test).length() > 0;
+      GTEST_FLAG_GET(internal_run_death_test).length() > 0;
 
   // Google Test implements this protocol for catching that a test
   // program exits before returning control to Google Test:
@@ -4929,7 +5396,7 @@ int UnitTest::Run() {
 
   // Captures the value of GTEST_FLAG(catch_exceptions).  This value will be
   // used for the duration of the program.
-  impl()->set_catch_exceptions(GTEST_FLAG(catch_exceptions));
+  impl()->set_catch_exceptions(GTEST_FLAG_GET(catch_exceptions));
 
 #if GTEST_OS_WINDOWS
   // Either the user wants Google Test to catch exceptions thrown by the
@@ -4956,7 +5423,7 @@ int UnitTest::Run() {
     // this dialog or it will pop up for every EXPECT/ASSERT_DEATH statement
     // executed. Google Test will notify the user of any unexpected
     // failure via stderr.
-    if (!GTEST_FLAG(break_on_failure))
+    if (!GTEST_FLAG_GET(break_on_failure))
       _set_abort_behavior(
           0x0,                                    // Clear the following flags:
           _WRITE_ABORT_MSG | _CALL_REPORTFAULT);  // pop-up window, core dump.
@@ -4983,13 +5450,13 @@ int UnitTest::Run() {
 
 // Returns the working directory when the first TEST() or TEST_F() was
 // executed.
-const char *UnitTest::original_working_dir() const {
+const char* UnitTest::original_working_dir() const {
   return impl_->original_working_dir_.c_str();
 }
 
 // Returns the TestSuite object for the test that's currently running,
 // or NULL if no test is running.
-const TestSuite *UnitTest::current_test_suite() const
+const TestSuite* UnitTest::current_test_suite() const
     GTEST_LOCK_EXCLUDED_(mutex_) {
   internal::MutexLock lock(&mutex_);
   return impl_->current_test_suite();
@@ -4997,7 +5464,7 @@ const TestSuite *UnitTest::current_test_suite() const
 
 // Legacy API is still available but deprecated
 #ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
-const TestCase *UnitTest::current_test_case() const
+const TestCase* UnitTest::current_test_case() const
     GTEST_LOCK_EXCLUDED_(mutex_) {
   internal::MutexLock lock(&mutex_);
   return impl_->current_test_suite();
@@ -5006,7 +5473,7 @@ const TestCase *UnitTest::current_test_case() const
 
 // Returns the TestInfo object for the test that's currently running,
 // or NULL if no test is running.
-const TestInfo *UnitTest::current_test_info() const
+const TestInfo* UnitTest::current_test_info() const
     GTEST_LOCK_EXCLUDED_(mutex_) {
   internal::MutexLock lock(&mutex_);
   return impl_->current_test_info();
@@ -5017,7 +5484,7 @@ int UnitTest::random_seed() const { return impl_->random_seed(); }
 
 // Returns ParameterizedTestSuiteRegistry object used to keep track of
 // value-parameterized tests and instantiate and register them.
-internal::ParameterizedTestSuiteRegistry &
+internal::ParameterizedTestSuiteRegistry&
 UnitTest::parameterized_test_registry() GTEST_LOCK_EXCLUDED_(mutex_) {
   return impl_->parameterized_test_registry();
 }
@@ -5030,7 +5497,7 @@ UnitTest::~UnitTest() { delete impl_; }
 
 // Pushes a trace defined by SCOPED_TRACE() on to the per-thread
 // Google Test trace stack.
-void UnitTest::PushGTestTrace(const internal::TraceInfo &trace)
+void UnitTest::PushGTestTrace(const internal::TraceInfo& trace)
     GTEST_LOCK_EXCLUDED_(mutex_) {
   internal::MutexLock lock(&mutex_);
   impl_->gtest_trace_stack().push_back(trace);
@@ -5044,7 +5511,7 @@ void UnitTest::PopGTestTrace() GTEST_LOCK_EXCLUDED_(mutex_) {
 
 namespace internal {
 
-UnitTestImpl::UnitTestImpl(UnitTest *parent)
+UnitTestImpl::UnitTestImpl(UnitTest* parent)
     : parent_(parent),
       GTEST_DISABLE_MSC_WARNINGS_PUSH_(4355 /* using this in initializer */)
           default_global_test_part_result_reporter_(this),
@@ -5053,13 +5520,18 @@ UnitTestImpl::UnitTestImpl(UnitTest *parent)
           &default_global_test_part_result_reporter_),
       per_thread_test_part_result_reporter_(
           &default_per_thread_test_part_result_reporter_),
-      parameterized_test_registry_(), parameterized_tests_registered_(false),
-      last_death_test_suite_(-1), current_test_suite_(nullptr),
-      current_test_info_(nullptr), ad_hoc_test_result_(),
-      os_stack_trace_getter_(nullptr), post_flag_parse_init_performed_(false),
+      parameterized_test_registry_(),
+      parameterized_tests_registered_(false),
+      last_death_test_suite_(-1),
+      current_test_suite_(nullptr),
+      current_test_info_(nullptr),
+      ad_hoc_test_result_(),
+      os_stack_trace_getter_(nullptr),
+      post_flag_parse_init_performed_(false),
       random_seed_(0),  // Will be overridden by the flag before first use.
       random_(0),       // Will be reseeded before first use.
-      start_timestamp_(0), elapsed_time_(0),
+      start_timestamp_(0),
+      elapsed_time_(0),
 #if GTEST_HAS_DEATH_TEST
       death_test_factory_(new DefaultDeathTestFactory),
 #endif
@@ -5083,9 +5555,9 @@ UnitTestImpl::~UnitTestImpl() {
 // from SetUpTestSuite/TearDownTestSuite, or to the global property set
 // otherwise.  If the result already contains a property with the same key,
 // the value will be updated.
-void UnitTestImpl::RecordProperty(const TestProperty &test_property) {
+void UnitTestImpl::RecordProperty(const TestProperty& test_property) {
   std::string xml_element;
-  TestResult *test_result;  // TestResult appropriate for property recording.
+  TestResult* test_result;  // TestResult appropriate for property recording.
 
   if (current_test_info_ != nullptr) {
     xml_element = "testcase";
@@ -5112,7 +5584,7 @@ void UnitTestImpl::SuppressTestEventsIfInSubprocess() {
 // Initializes event listeners performing XML output as specified by
 // UnitTestOptions. Must not be called before InitGoogleTest.
 void UnitTestImpl::ConfigureXmlOutput() {
-  const std::string &output_format = UnitTestOptions::GetOutputFormat();
+  const std::string& output_format = UnitTestOptions::GetOutputFormat();
   if (output_format == "xml") {
     listeners()->SetDefaultXmlGenerator(new XmlUnitTestResultPrinter(
         UnitTestOptions::GetAbsolutePathToOutputFile().c_str()));
@@ -5129,7 +5601,7 @@ void UnitTestImpl::ConfigureXmlOutput() {
 // Initializes event listeners for streaming test results in string form.
 // Must not be called before InitGoogleTest.
 void UnitTestImpl::ConfigureStreamingOutput() {
-  const std::string &target = GTEST_FLAG(stream_result_to);
+  const std::string& target = GTEST_FLAG_GET(stream_result_to);
   if (!target.empty()) {
     const size_t pos = target.find(':');
     if (pos != std::string::npos) {
@@ -5172,13 +5644,17 @@ void UnitTestImpl::PostFlagParsingInit() {
     // to shut down the default XML output before invoking RUN_ALL_TESTS.
     ConfigureXmlOutput();
 
+    if (GTEST_FLAG_GET(brief)) {
+      listeners()->SetDefaultResultPrinter(new BriefUnitTestResultPrinter);
+    }
+
 #if GTEST_CAN_STREAM_RESULTS_
     // Configures listeners for streaming test results to the specified server.
     ConfigureStreamingOutput();
 #endif  // GTEST_CAN_STREAM_RESULTS_
 
 #if GTEST_HAS_ABSL
-    if (GTEST_FLAG(install_failure_signal_handler)) {
+    if (GTEST_FLAG_GET(install_failure_signal_handler)) {
       absl::FailureSignalHandlerOptions options;
       absl::InstallFailureSignalHandler(options);
     }
@@ -5197,10 +5673,10 @@ void UnitTestImpl::PostFlagParsingInit() {
 class TestSuiteNameIs {
  public:
   // Constructor.
-  explicit TestSuiteNameIs(const std::string &name) : name_(name) {}
+  explicit TestSuiteNameIs(const std::string& name) : name_(name) {}
 
   // Returns true if and only if the name of test_suite matches name_.
-  bool operator()(const TestSuite *test_suite) const {
+  bool operator()(const TestSuite* test_suite) const {
     return test_suite != nullptr &&
            strcmp(test_suite->name(), name_.c_str()) == 0;
   }
@@ -5217,12 +5693,12 @@ class TestSuiteNameIs {
 // Arguments:
 //
 //   test_suite_name: name of the test suite
-//   type_param:     the name of the test suite's type parameter, or NULL if
-//                   this is not a typed or a type-parameterized test suite.
-//   set_up_tc:      pointer to the function that sets up the test suite
-//   tear_down_tc:   pointer to the function that tears down the test suite
-TestSuite *UnitTestImpl::GetTestSuite(
-    const char *test_suite_name, const char *type_param,
+//   type_param:      the name of the test suite's type parameter, or NULL if
+//                    this is not a typed or a type-parameterized test suite.
+//   set_up_tc:       pointer to the function that sets up the test suite
+//   tear_down_tc:    pointer to the function that tears down the test suite
+TestSuite* UnitTestImpl::GetTestSuite(
+    const char* test_suite_name, const char* type_param,
     internal::SetUpTestSuiteFunc set_up_tc,
     internal::TearDownTestSuiteFunc tear_down_tc) {
   // Can we find a TestSuite with the given name?
@@ -5233,12 +5709,12 @@ TestSuite *UnitTestImpl::GetTestSuite(
   if (test_suite != test_suites_.rend()) return *test_suite;
 
   // No.  Let's create one.
-  auto *const new_test_suite =
+  auto* const new_test_suite =
       new TestSuite(test_suite_name, type_param, set_up_tc, tear_down_tc);
 
+  const UnitTestFilter death_test_suite_filter(kDeathTestSuiteFilter);
   // Is this a death test suite?
-  if (internal::UnitTestOptions::MatchesFilter(test_suite_name,
-                                               kDeathTestSuiteFilter)) {
+  if (death_test_suite_filter.MatchesName(test_suite_name)) {
     // Yes.  Inserts the test suite after the last death test suite
     // defined so far.  This only works when the test suites haven't
     // been shuffled.  Otherwise we may end up running a death test
@@ -5257,8 +5733,8 @@ TestSuite *UnitTestImpl::GetTestSuite(
 
 // Helpers for setting up / tearing down the given environment.  They
 // are for use in the ForEach() function.
-static void SetUpEnvironment(Environment *env) { env->SetUp(); }
-static void TearDownEnvironment(Environment *env) { env->TearDown(); }
+static void SetUpEnvironment(Environment* env) { env->SetUp(); }
+static void TearDownEnvironment(Environment* env) { env->TearDown(); }
 
 // Runs all tests in this UnitTest object, prints the result, and
 // returns true if all tests are successful.  If any exception is
@@ -5310,37 +5786,48 @@ bool UnitTestImpl::RunAllTests() {
                                : IGNORE_SHARDING_PROTOCOL) > 0;
 
   // Lists the tests and exits if the --gtest_list_tests flag was specified.
-  if (GTEST_FLAG(list_tests)) {
+  if (GTEST_FLAG_GET(list_tests)) {
     // This must be called *after* FilterTests() has been called.
     ListTestsMatchingFilter();
     return true;
   }
 
-  random_seed_ =
-      GTEST_FLAG(shuffle) ? GetRandomSeedFromFlag(GTEST_FLAG(random_seed)) : 0;
+  random_seed_ = GetRandomSeedFromFlag(GTEST_FLAG_GET(random_seed));
 
   // True if and only if at least one test has failed.
   bool failed = false;
 
-  TestEventListener *repeater = listeners()->repeater();
+  TestEventListener* repeater = listeners()->repeater();
 
   start_timestamp_ = GetTimeInMillis();
   repeater->OnTestProgramStart(*parent_);
 
   // How many times to repeat the tests?  We don't want to repeat them
   // when we are inside the subprocess of a death test.
-  const int repeat = in_subprocess_for_death_test ? 1 : GTEST_FLAG(repeat);
+  const int repeat = in_subprocess_for_death_test ? 1 : GTEST_FLAG_GET(repeat);
+
   // Repeats forever if the repeat count is negative.
   const bool gtest_repeat_forever = repeat < 0;
+
+  // Should test environments be set up and torn down for each repeat, or only
+  // set up on the first and torn down on the last iteration? If there is no
+  // "last" iteration because the tests will repeat forever, always recreate the
+  // environments to avoid leaks in case one of the environments is using
+  // resources that are external to this process. Without this check there would
+  // be no way to clean up those external resources automatically.
+  const bool recreate_environments_when_repeating =
+      GTEST_FLAG_GET(recreate_environments_when_repeating) ||
+      gtest_repeat_forever;
+
   for (int i = 0; gtest_repeat_forever || i != repeat; i++) {
     // We want to preserve failures generated by ad-hoc test
     // assertions executed before RUN_ALL_TESTS().
     ClearNonAdHocTestResult();
 
-    const TimeInMillis start = GetTimeInMillis();
+    Timer timer;
 
     // Shuffles test suites and tests if requested.
-    if (has_tests_to_run && GTEST_FLAG(shuffle)) {
+    if (has_tests_to_run && GTEST_FLAG_GET(shuffle)) {
       random()->Reseed(static_cast<uint32_t>(random_seed_));
       // This should be done before calling OnTestIterationStart(),
       // such that a test event listener can see the actual test order
@@ -5353,23 +5840,26 @@ bool UnitTestImpl::RunAllTests() {
 
     // Runs each test suite if there is at least one test to run.
     if (has_tests_to_run) {
-      // Sets up all environments beforehand.
-      repeater->OnEnvironmentsSetUpStart(*parent_);
-      ForEach(environments_, SetUpEnvironment);
-      repeater->OnEnvironmentsSetUpEnd(*parent_);
+      // Sets up all environments beforehand. If test environments aren't
+      // recreated for each iteration, only do so on the first iteration.
+      if (i == 0 || recreate_environments_when_repeating) {
+        repeater->OnEnvironmentsSetUpStart(*parent_);
+        ForEach(environments_, SetUpEnvironment);
+        repeater->OnEnvironmentsSetUpEnd(*parent_);
+      }
 
       // Runs the tests only if there was no fatal failure or skip triggered
       // during global set-up.
       if (Test::IsSkipped()) {
         // Emit diagnostics when global set-up calls skip, as it will not be
         // emitted by default.
-        TestResult &test_result =
+        TestResult& test_result =
             *internal::GetUnitTestImpl()->current_test_result();
         for (int j = 0; j < test_result.total_part_count(); ++j) {
-          const TestPartResult &test_part_result =
+          const TestPartResult& test_part_result =
               test_result.GetTestPartResult(j);
           if (test_part_result.type() == TestPartResult::kSkip) {
-            const std::string &result = test_part_result.message();
+            const std::string& result = test_part_result.message();
             printf("%s\n", result.c_str());
           }
         }
@@ -5378,17 +5868,36 @@ bool UnitTestImpl::RunAllTests() {
         for (int test_index = 0; test_index < total_test_suite_count();
              test_index++) {
           GetMutableSuiteCase(test_index)->Run();
+          if (GTEST_FLAG_GET(fail_fast) &&
+              GetMutableSuiteCase(test_index)->Failed()) {
+            for (int j = test_index + 1; j < total_test_suite_count(); j++) {
+              GetMutableSuiteCase(j)->Skip();
+            }
+            break;
+          }
+        }
+      } else if (Test::HasFatalFailure()) {
+        // If there was a fatal failure during the global setup then we know we
+        // aren't going to run any tests. Explicitly mark all of the tests as
+        // skipped to make this obvious in the output.
+        for (int test_index = 0; test_index < total_test_suite_count();
+             test_index++) {
+          GetMutableSuiteCase(test_index)->Skip();
         }
       }
 
-      // Tears down all environments in reverse order afterwards.
-      repeater->OnEnvironmentsTearDownStart(*parent_);
-      std::for_each(environments_.rbegin(), environments_.rend(),
-                    TearDownEnvironment);
-      repeater->OnEnvironmentsTearDownEnd(*parent_);
+      // Tears down all environments in reverse order afterwards. If test
+      // environments aren't recreated for each iteration, only do so on the
+      // last iteration.
+      if (i == repeat - 1 || recreate_environments_when_repeating) {
+        repeater->OnEnvironmentsTearDownStart(*parent_);
+        std::for_each(environments_.rbegin(), environments_.rend(),
+                      TearDownEnvironment);
+        repeater->OnEnvironmentsTearDownEnd(*parent_);
+      }
     }
 
-    elapsed_time_ = GetTimeInMillis() - start;
+    elapsed_time_ = timer.Elapsed();
 
     // Tells the unit test event listener that the tests have just finished.
     repeater->OnTestIterationEnd(*parent_, i);
@@ -5406,7 +5915,7 @@ bool UnitTestImpl::RunAllTests() {
     // (it's always safe to unshuffle the tests).
     UnshuffleTests();
 
-    if (GTEST_FLAG(shuffle)) {
+    if (GTEST_FLAG_GET(shuffle)) {
       // Picks a new random seed for each iteration.
       random_seed_ = GetNextRandomSeed(random_seed_);
     }
@@ -5416,14 +5925,14 @@ bool UnitTestImpl::RunAllTests() {
 
   if (!gtest_is_initialized_before_run_all_tests) {
     ColoredPrintf(
-        COLOR_RED,
+        GTestColor::kRed,
         "\nIMPORTANT NOTICE - DO NOT IGNORE:\n"
         "This test program did NOT call " GTEST_INIT_GOOGLE_TEST_NAME_
         "() before calling RUN_ALL_TESTS(). This is INVALID. Soon " GTEST_NAME_
         " will start to enforce the valid usage. "
         "Please fix it ASAP, or IT WILL START TO FAIL.\n");  // NOLINT
 #if GTEST_FOR_GOOGLE_
-    ColoredPrintf(COLOR_RED,
+    ColoredPrintf(GTestColor::kRed,
                   "For more details, see http://wiki/Main/ValidGUnitMain.\n");
 #endif  // GTEST_FOR_GOOGLE_
   }
@@ -5436,11 +5945,11 @@ bool UnitTestImpl::RunAllTests() {
 // function will write over it. If the variable is present, but the file cannot
 // be created, prints an error and exits.
 void WriteToShardStatusFileIfNeeded() {
-  const char *const test_shard_file = posix::GetEnv(kTestShardStatusFile);
+  const char* const test_shard_file = posix::GetEnv(kTestShardStatusFile);
   if (test_shard_file != nullptr) {
-    FILE *const file = posix::FOpen(test_shard_file, "w");
+    FILE* const file = posix::FOpen(test_shard_file, "w");
     if (file == nullptr) {
-      ColoredPrintf(COLOR_RED,
+      ColoredPrintf(GTestColor::kRed,
                     "Could not write to the test shard status file \"%s\" "
                     "specified by the %s environment variable.\n",
                     test_shard_file, kTestShardStatusFile);
@@ -5457,7 +5966,7 @@ void WriteToShardStatusFileIfNeeded() {
 // an error and exits. If in_subprocess_for_death_test, sharding is
 // disabled because it must only be applied to the original test
 // process. Otherwise, we could filter out death tests we intended to execute.
-bool ShouldShard(const char *total_shards_env, const char *shard_index_env,
+bool ShouldShard(const char* total_shards_env, const char* shard_index_env,
                  bool in_subprocess_for_death_test) {
   if (in_subprocess_for_death_test) {
     return false;
@@ -5473,7 +5982,7 @@ bool ShouldShard(const char *total_shards_env, const char *shard_index_env,
                                   << kTestShardIndex << " = " << shard_index
                                   << ", but have left " << kTestTotalShards
                                   << " unset.\n";
-    ColoredPrintf(COLOR_RED, "%s", msg.GetString().c_str());
+    ColoredPrintf(GTestColor::kRed, "%s", msg.GetString().c_str());
     fflush(stdout);
     exit(EXIT_FAILURE);
   } else if (total_shards != -1 && shard_index == -1) {
@@ -5481,7 +5990,7 @@ bool ShouldShard(const char *total_shards_env, const char *shard_index_env,
                         << "Invalid environment variables: you have "
                         << kTestTotalShards << " = " << total_shards
                         << ", but have left " << kTestShardIndex << " unset.\n";
-    ColoredPrintf(COLOR_RED, "%s", msg.GetString().c_str());
+    ColoredPrintf(GTestColor::kRed, "%s", msg.GetString().c_str());
     fflush(stdout);
     exit(EXIT_FAILURE);
   } else if (shard_index < 0 || shard_index >= total_shards) {
@@ -5490,7 +5999,7 @@ bool ShouldShard(const char *total_shards_env, const char *shard_index_env,
                   << kTestShardIndex << " < " << kTestTotalShards
                   << ", but you have " << kTestShardIndex << "=" << shard_index
                   << ", " << kTestTotalShards << "=" << total_shards << ".\n";
-    ColoredPrintf(COLOR_RED, "%s", msg.GetString().c_str());
+    ColoredPrintf(GTestColor::kRed, "%s", msg.GetString().c_str());
     fflush(stdout);
     exit(EXIT_FAILURE);
   }
@@ -5501,8 +6010,8 @@ bool ShouldShard(const char *total_shards_env, const char *shard_index_env,
 // Parses the environment variable var as an Int32. If it is unset,
 // returns default_val. If it is not an Int32, prints an error
 // and aborts.
-int32_t Int32FromEnvOrDie(const char *var, int32_t default_val) {
-  const char *str_val = posix::GetEnv(var);
+int32_t Int32FromEnvOrDie(const char* var, int32_t default_val) {
+  const char* str_val = posix::GetEnv(var);
   if (str_val == nullptr) {
     return default_val;
   }
@@ -5538,33 +6047,35 @@ int UnitTestImpl::FilterTests(ReactionToSharding shard_tests) {
                                   ? Int32FromEnvOrDie(kTestShardIndex, -1)
                                   : -1;
 
+  const PositiveAndNegativeUnitTestFilter gtest_flag_filter(
+      GTEST_FLAG_GET(filter));
+  const UnitTestFilter disable_test_filter(kDisableTestFilter);
   // num_runnable_tests are the number of tests that will
   // run across all shards (i.e., match filter and are not disabled).
   // num_selected_tests are the number of tests to be run on
   // this shard.
   int num_runnable_tests = 0;
   int num_selected_tests = 0;
-  for (auto *test_suite : test_suites_) {
-    const std::string &test_suite_name = test_suite->name();
+  for (auto* test_suite : test_suites_) {
+    const std::string& test_suite_name = test_suite->name();
     test_suite->set_should_run(false);
 
     for (size_t j = 0; j < test_suite->test_info_list().size(); j++) {
-      TestInfo *const test_info = test_suite->test_info_list()[j];
+      TestInfo* const test_info = test_suite->test_info_list()[j];
       const std::string test_name(test_info->name());
       // A test is disabled if test suite name or test name matches
       // kDisableTestFilter.
-      const bool is_disabled = internal::UnitTestOptions::MatchesFilter(
-                                   test_suite_name, kDisableTestFilter) ||
-                               internal::UnitTestOptions::MatchesFilter(
-                                   test_name, kDisableTestFilter);
+      const bool is_disabled =
+          disable_test_filter.MatchesName(test_suite_name) ||
+          disable_test_filter.MatchesName(test_name);
       test_info->is_disabled_ = is_disabled;
 
-      const bool matches_filter = internal::UnitTestOptions::FilterMatchesTest(
-          test_suite_name, test_name);
+      const bool matches_filter =
+          gtest_flag_filter.MatchesTest(test_suite_name, test_name);
       test_info->matches_filter_ = matches_filter;
 
       const bool is_runnable =
-          (GTEST_FLAG(also_run_disabled_tests) || !is_disabled) &&
+          (GTEST_FLAG_GET(also_run_disabled_tests) || !is_disabled) &&
           matches_filter;
 
       const bool is_in_another_shard =
@@ -5587,7 +6098,7 @@ int UnitTestImpl::FilterTests(ReactionToSharding shard_tests) {
 // characters with string "\\n".  If the output takes more than
 // max_length characters, only prints the first max_length characters
 // and "...".
-static void PrintOnOneLine(const char *str, int max_length) {
+static void PrintOnOneLine(const char* str, int max_length) {
   if (str != nullptr) {
     for (int i = 0; *str != '\0'; ++str) {
       if (i >= max_length) {
@@ -5610,11 +6121,11 @@ void UnitTestImpl::ListTestsMatchingFilter() {
   // Print at most this many characters for each type/value parameter.
   const int kMaxParamLength = 250;
 
-  for (auto *test_suite : test_suites_) {
+  for (auto* test_suite : test_suites_) {
     bool printed_test_suite_name = false;
 
     for (size_t j = 0; j < test_suite->test_info_list().size(); j++) {
-      const TestInfo *const test_info = test_suite->test_info_list()[j];
+      const TestInfo* const test_info = test_suite->test_info_list()[j];
       if (test_info->matches_filter_) {
         if (!printed_test_suite_name) {
           printed_test_suite_name = true;
@@ -5639,9 +6150,9 @@ void UnitTestImpl::ListTestsMatchingFilter() {
     }
   }
   fflush(stdout);
-  const std::string &output_format = UnitTestOptions::GetOutputFormat();
+  const std::string& output_format = UnitTestOptions::GetOutputFormat();
   if (output_format == "xml" || output_format == "json") {
-    FILE *fileout = OpenFileForWriting(
+    FILE* fileout = OpenFileForWriting(
         UnitTestOptions::GetAbsolutePathToOutputFile().c_str());
     std::stringstream stream;
     if (output_format == "xml") {
@@ -5664,7 +6175,7 @@ void UnitTestImpl::ListTestsMatchingFilter() {
 // the same; otherwise, deletes the old getter and makes the input the
 // current getter.
 void UnitTestImpl::set_os_stack_trace_getter(
-    OsStackTraceGetterInterface *getter) {
+    OsStackTraceGetterInterface* getter) {
   if (os_stack_trace_getter_ != getter) {
     delete os_stack_trace_getter_;
     os_stack_trace_getter_ = getter;
@@ -5674,7 +6185,7 @@ void UnitTestImpl::set_os_stack_trace_getter(
 // Returns the current OS stack trace getter if it is not NULL;
 // otherwise, creates an OsStackTraceGetter, makes it the current
 // getter, and returns it.
-OsStackTraceGetterInterface *UnitTestImpl::os_stack_trace_getter() {
+OsStackTraceGetterInterface* UnitTestImpl::os_stack_trace_getter() {
   if (os_stack_trace_getter_ == nullptr) {
 #ifdef GTEST_OS_STACK_TRACE_GETTER_
     os_stack_trace_getter_ = new GTEST_OS_STACK_TRACE_GETTER_;
@@ -5687,7 +6198,7 @@ OsStackTraceGetterInterface *UnitTestImpl::os_stack_trace_getter() {
 }
 
 // Returns the most specific TestResult currently running.
-TestResult *UnitTestImpl::current_test_result() {
+TestResult* UnitTestImpl::current_test_result() {
   if (current_test_info_ != nullptr) {
     return &current_test_info_->result_;
   }
@@ -5708,7 +6219,7 @@ void UnitTestImpl::ShuffleTests() {
                static_cast<int>(test_suites_.size()), &test_suite_indices_);
 
   // Shuffles the tests inside each test suite.
-  for (auto &test_suite : test_suites_) {
+  for (auto& test_suite : test_suites_) {
     test_suite->ShuffleTests(random());
   }
 }
@@ -5733,8 +6244,8 @@ void UnitTestImpl::UnshuffleTests() {
 // For example, if Foo() calls Bar(), which in turn calls
 // GetCurrentOsStackTraceExceptTop(..., 1), Foo() will be included in
 // the trace but Bar() and GetCurrentOsStackTraceExceptTop() won't.
-std::string GetCurrentOsStackTraceExceptTop(UnitTest * /*unit_test*/,
-                                            int skip_count) {
+GTEST_NO_INLINE_ GTEST_NO_TAIL_CALL_ std::string
+GetCurrentOsStackTraceExceptTop(UnitTest* /*unit_test*/, int skip_count) {
   // We pass skip_count + 1 to skip this wrapper function in addition
   // to what the user really wants to skip.
   return GetUnitTestImpl()->CurrentOsStackTraceExceptTop(skip_count + 1);
@@ -5760,7 +6271,7 @@ bool AlwaysTrue() {
 // If *pstr starts with the given prefix, modifies *pstr to be right
 // past the prefix and returns true; otherwise leaves *pstr unchanged
 // and returns false.  None of pstr, *pstr, and prefix can be NULL.
-bool SkipPrefix(const char *prefix, const char **pstr) {
+bool SkipPrefix(const char* prefix, const char** pstr) {
   const size_t prefix_len = strlen(prefix);
   if (strncmp(*pstr, prefix, prefix_len) == 0) {
     *pstr += prefix_len;
@@ -5774,18 +6285,19 @@ bool SkipPrefix(const char *prefix, const char **pstr) {
 // part can be omitted.
 //
 // Returns the value of the flag, or NULL if the parsing failed.
-static const char *ParseFlagValue(const char *str, const char *flag,
+static const char* ParseFlagValue(const char* str, const char* flag_name,
                                   bool def_optional) {
   // str and flag must not be NULL.
-  if (str == nullptr || flag == nullptr) return nullptr;
+  if (str == nullptr || flag_name == nullptr) return nullptr;
 
   // The flag must start with "--" followed by GTEST_FLAG_PREFIX_.
-  const std::string flag_str = std::string("--") + GTEST_FLAG_PREFIX_ + flag;
+  const std::string flag_str =
+      std::string("--") + GTEST_FLAG_PREFIX_ + flag_name;
   const size_t flag_len = flag_str.length();
   if (strncmp(str, flag_str.c_str(), flag_len) != 0) return nullptr;
 
   // Skips the flag name.
-  const char *flag_end = str + flag_len;
+  const char* flag_end = str + flag_len;
 
   // When def_optional is true, it's OK to not have a "=value" part.
   if (def_optional && (flag_end[0] == '\0')) {
@@ -5811,9 +6323,9 @@ static const char *ParseFlagValue(const char *str, const char *flag,
 //
 // On success, stores the value of the flag in *value, and returns
 // true.  On failure, returns false without changing *value.
-static bool ParseBoolFlag(const char *str, const char *flag, bool *value) {
+static bool ParseFlag(const char* str, const char* flag_name, bool* value) {
   // Gets the value of the flag as a string.
-  const char *const value_str = ParseFlagValue(str, flag, true);
+  const char* const value_str = ParseFlagValue(str, flag_name, true);
 
   // Aborts if the parsing failed.
   if (value_str == nullptr) return false;
@@ -5827,15 +6339,15 @@ static bool ParseBoolFlag(const char *str, const char *flag, bool *value) {
 //
 // On success, stores the value of the flag in *value, and returns
 // true.  On failure, returns false without changing *value.
-bool ParseInt32Flag(const char *str, const char *flag, int32_t *value) {
+bool ParseFlag(const char* str, const char* flag_name, int32_t* value) {
   // Gets the value of the flag as a string.
-  const char *const value_str = ParseFlagValue(str, flag, false);
+  const char* const value_str = ParseFlagValue(str, flag_name, false);
 
   // Aborts if the parsing failed.
   if (value_str == nullptr) return false;
 
   // Sets *value to the value of the flag.
-  return ParseInt32(Message() << "The value of flag --" << flag, value_str,
+  return ParseInt32(Message() << "The value of flag --" << flag_name, value_str,
                     value);
 }
 
@@ -5844,9 +6356,9 @@ bool ParseInt32Flag(const char *str, const char *flag, int32_t *value) {
 // On success, stores the value of the flag in *value, and returns
 // true.  On failure, returns false without changing *value.
 template <typename String>
-static bool ParseStringFlag(const char *str, const char *flag, String *value) {
+static bool ParseFlag(const char* str, const char* flag_name, String* value) {
   // Gets the value of the flag as a string.
-  const char *const value_str = ParseFlagValue(str, flag, false);
+  const char* const value_str = ParseFlagValue(str, flag_name, false);
 
   // Aborts if the parsing failed.
   if (value_str == nullptr) return false;
@@ -5862,7 +6374,7 @@ static bool ParseStringFlag(const char *str, const char *flag, String *value) {
 // recognized, it will print its help message. Flags starting with
 // GTEST_INTERNAL_PREFIX_ followed by "internal_" are considered Google Test
 // internal flags and do not trigger the help message.
-static bool HasGoogleTestFlagPrefix(const char *str) {
+static bool HasGoogleTestFlagPrefix(const char* str) {
   return (SkipPrefix("--", &str) || SkipPrefix("-", &str) ||
           SkipPrefix("/", &str)) &&
          !SkipPrefix(GTEST_FLAG_PREFIX_ "internal_", &str) &&
@@ -5879,15 +6391,15 @@ static bool HasGoogleTestFlagPrefix(const char *str) {
 //   @Y    changes the color to yellow.
 //   @D    changes to the default terminal text color.
 //
-static void PrintColorEncoded(const char *str) {
-  GTestColor color = COLOR_DEFAULT;  // The current color.
+static void PrintColorEncoded(const char* str) {
+  GTestColor color = GTestColor::kDefault;  // The current color.
 
   // Conceptually, we split the string into segments divided by escape
   // sequences.  Then we print one segment at a time.  At the end of
   // each iteration, the str pointer advances to the beginning of the
   // next segment.
   for (;;) {
-    const char *p = strchr(str, '@');
+    const char* p = strchr(str, '@');
     if (p == nullptr) {
       ColoredPrintf(color, "%s", str);
       return;
@@ -5900,13 +6412,13 @@ static void PrintColorEncoded(const char *str) {
     if (ch == '@') {
       ColoredPrintf(color, "@");
     } else if (ch == 'D') {
-      color = COLOR_DEFAULT;
+      color = GTestColor::kDefault;
     } else if (ch == 'R') {
-      color = COLOR_RED;
+      color = GTestColor::kRed;
     } else if (ch == 'G') {
-      color = COLOR_GREEN;
+      color = GTestColor::kGreen;
     } else if (ch == 'Y') {
-      color = COLOR_YELLOW;
+      color = GTestColor::kYellow;
     } else {
       --str;
     }
@@ -5924,7 +6436,7 @@ static const char kColorEncodedHelpMessage[] =
     "      List the names of all tests instead of running them. The name of\n"
     "      TEST(Foo, Bar) is \"Foo.Bar\".\n"
     "  @G--" GTEST_FLAG_PREFIX_
-    "filter=@YPOSTIVE_PATTERNS"
+    "filter=@YPOSITIVE_PATTERNS"
     "[@G-@YNEGATIVE_PATTERNS]@D\n"
     "      Run only the tests whose name matches one of the positive patterns "
     "but\n"
@@ -5946,12 +6458,19 @@ static const char kColorEncodedHelpMessage[] =
     "random_seed=@Y[NUMBER]@D\n"
     "      Random number seed to use for shuffling test orders (between 1 and\n"
     "      99999, or 0 to use a seed based on the current time).\n"
+    "  @G--" GTEST_FLAG_PREFIX_
+    "recreate_environments_when_repeating@D\n"
+    "      Sets up and tears down the global test environment on each repeat\n"
+    "      of the test.\n"
     "\n"
     "Test Output:\n"
     "  @G--" GTEST_FLAG_PREFIX_
     "color=@Y(@Gyes@Y|@Gno@Y|@Gauto@Y)@D\n"
     "      Enable/disable colored output. The default is @Gauto@D.\n"
-    "  -@G-" GTEST_FLAG_PREFIX_
+    "  @G--" GTEST_FLAG_PREFIX_
+    "brief=1@D\n"
+    "      Only print test failures.\n"
+    "  @G--" GTEST_FLAG_PREFIX_
     "print_time=0@D\n"
     "      Don't print the elapsed time of each test.\n"
     "  @G--" GTEST_FLAG_PREFIX_
@@ -6002,40 +6521,45 @@ static const char kColorEncodedHelpMessage[] =
     "(not one in your own code or tests), please report it to\n"
     "@G<" GTEST_DEV_EMAIL_ ">@D.\n";
 
-static bool ParseGoogleTestFlag(const char *const arg) {
-  return ParseBoolFlag(arg, kAlsoRunDisabledTestsFlag,
-                       &GTEST_FLAG(also_run_disabled_tests)) ||
-         ParseBoolFlag(arg, kBreakOnFailureFlag,
-                       &GTEST_FLAG(break_on_failure)) ||
-         ParseBoolFlag(arg, kCatchExceptionsFlag,
-                       &GTEST_FLAG(catch_exceptions)) ||
-         ParseStringFlag(arg, kColorFlag, &GTEST_FLAG(color)) ||
-         ParseStringFlag(arg, kDeathTestStyleFlag,
-                         &GTEST_FLAG(death_test_style)) ||
-         ParseBoolFlag(arg, kDeathTestUseFork,
-                       &GTEST_FLAG(death_test_use_fork)) ||
-         ParseStringFlag(arg, kFilterFlag, &GTEST_FLAG(filter)) ||
-         ParseStringFlag(arg, kInternalRunDeathTestFlag,
-                         &GTEST_FLAG(internal_run_death_test)) ||
-         ParseBoolFlag(arg, kListTestsFlag, &GTEST_FLAG(list_tests)) ||
-         ParseStringFlag(arg, kOutputFlag, &GTEST_FLAG(output)) ||
-         ParseBoolFlag(arg, kPrintTimeFlag, &GTEST_FLAG(print_time)) ||
-         ParseBoolFlag(arg, kPrintUTF8Flag, &GTEST_FLAG(print_utf8)) ||
-         ParseInt32Flag(arg, kRandomSeedFlag, &GTEST_FLAG(random_seed)) ||
-         ParseInt32Flag(arg, kRepeatFlag, &GTEST_FLAG(repeat)) ||
-         ParseBoolFlag(arg, kShuffleFlag, &GTEST_FLAG(shuffle)) ||
-         ParseInt32Flag(arg, kStackTraceDepthFlag,
-                        &GTEST_FLAG(stack_trace_depth)) ||
-         ParseStringFlag(arg, kStreamResultToFlag,
-                         &GTEST_FLAG(stream_result_to)) ||
-         ParseBoolFlag(arg, kThrowOnFailureFlag, &GTEST_FLAG(throw_on_failure));
+static bool ParseGoogleTestFlag(const char* const arg) {
+#define GTEST_INTERNAL_PARSE_FLAG(flag_name)  \
+  do {                                        \
+    auto value = GTEST_FLAG_GET(flag_name);   \
+    if (ParseFlag(arg, #flag_name, &value)) { \
+      GTEST_FLAG_SET(flag_name, value);       \
+      return true;                            \
+    }                                         \
+  } while (false)
+
+  GTEST_INTERNAL_PARSE_FLAG(also_run_disabled_tests);
+  GTEST_INTERNAL_PARSE_FLAG(break_on_failure);
+  GTEST_INTERNAL_PARSE_FLAG(catch_exceptions);
+  GTEST_INTERNAL_PARSE_FLAG(color);
+  GTEST_INTERNAL_PARSE_FLAG(death_test_style);
+  GTEST_INTERNAL_PARSE_FLAG(death_test_use_fork);
+  GTEST_INTERNAL_PARSE_FLAG(fail_fast);
+  GTEST_INTERNAL_PARSE_FLAG(filter);
+  GTEST_INTERNAL_PARSE_FLAG(internal_run_death_test);
+  GTEST_INTERNAL_PARSE_FLAG(list_tests);
+  GTEST_INTERNAL_PARSE_FLAG(output);
+  GTEST_INTERNAL_PARSE_FLAG(brief);
+  GTEST_INTERNAL_PARSE_FLAG(print_time);
+  GTEST_INTERNAL_PARSE_FLAG(print_utf8);
+  GTEST_INTERNAL_PARSE_FLAG(random_seed);
+  GTEST_INTERNAL_PARSE_FLAG(repeat);
+  GTEST_INTERNAL_PARSE_FLAG(recreate_environments_when_repeating);
+  GTEST_INTERNAL_PARSE_FLAG(shuffle);
+  GTEST_INTERNAL_PARSE_FLAG(stack_trace_depth);
+  GTEST_INTERNAL_PARSE_FLAG(stream_result_to);
+  GTEST_INTERNAL_PARSE_FLAG(throw_on_failure);
+  return false;
 }
 
 #if GTEST_USE_OWN_FLAGFILE_FLAG_
-static void LoadFlagsFromFile(const std::string &path) {
-  FILE *flagfile = posix::FOpen(path.c_str(), "r");
+static void LoadFlagsFromFile(const std::string& path) {
+  FILE* flagfile = posix::FOpen(path.c_str(), "r");
   if (!flagfile) {
-    GTEST_LOG_(FATAL) << "Unable to open file \"" << GTEST_FLAG(flagfile)
+    GTEST_LOG_(FATAL) << "Unable to open file \"" << GTEST_FLAG_GET(flagfile)
                       << "\"";
   }
   std::string contents(ReadEntireFile(flagfile));
@@ -6053,26 +6577,24 @@ static void LoadFlagsFromFile(const std::string &path) {
 // other parts of Google Test.  The type parameter CharType can be
 // instantiated to either char or wchar_t.
 template <typename CharType>
-void ParseGoogleTestFlagsOnlyImpl(int *argc, CharType **argv) {
+void ParseGoogleTestFlagsOnlyImpl(int* argc, CharType** argv) {
+  std::string flagfile_value;
   for (int i = 1; i < *argc; i++) {
     const std::string arg_string = StreamableToString(argv[i]);
-    const char *const arg = arg_string.c_str();
+    const char* const arg = arg_string.c_str();
 
-    using internal::ParseBoolFlag;
-    using internal::ParseInt32Flag;
-    using internal::ParseStringFlag;
+    using internal::ParseFlag;
 
     bool remove_flag = false;
     if (ParseGoogleTestFlag(arg)) {
       remove_flag = true;
 #if GTEST_USE_OWN_FLAGFILE_FLAG_
-    } else if (ParseStringFlag(arg, kFlagfileFlag, &GTEST_FLAG(flagfile))) {
-      LoadFlagsFromFile(GTEST_FLAG(flagfile));
+    } else if (ParseFlag(arg, "flagfile", &flagfile_value)) {
+      GTEST_FLAG_SET(flagfile, flagfile_value);
+      LoadFlagsFromFile(flagfile_value);
       remove_flag = true;
 #endif  // GTEST_USE_OWN_FLAGFILE_FLAG_
-    } else if (arg_string == "--help" || arg_string == "-h" ||
-               arg_string == "-?" || arg_string == "/?" ||
-               HasGoogleTestFlagPrefix(arg)) {
+    } else if (arg_string == "--help" || HasGoogleTestFlagPrefix(arg)) {
       // Both help flag and unrecognized Google Test flags (excluding
       // internal ones) trigger help display.
       g_help_flag = true;
@@ -6106,8 +6628,28 @@ void ParseGoogleTestFlagsOnlyImpl(int *argc, CharType **argv) {
 
 // Parses the command line for Google Test flags, without initializing
 // other parts of Google Test.
-void ParseGoogleTestFlagsOnly(int *argc, char **argv) {
+void ParseGoogleTestFlagsOnly(int* argc, char** argv) {
+#if GTEST_HAS_ABSL
+  if (*argc > 0) {
+    // absl::ParseCommandLine() requires *argc > 0.
+    auto positional_args = absl::flags_internal::ParseCommandLineImpl(
+        *argc, argv, absl::flags_internal::ArgvListAction::kRemoveParsedArgs,
+        absl::flags_internal::UsageFlagsAction::kHandleUsage,
+        absl::flags_internal::OnUndefinedFlag::kReportUndefined);
+    // Any command-line positional arguments not part of any command-line flag
+    // (or arguments to a flag) are copied back out to argv, with the program
+    // invocation name at position 0, and argc is resized. This includes
+    // positional arguments after the flag-terminating delimiter '--'.
+    // See https://abseil.io/docs/cpp/guides/flags.
+    std::copy(positional_args.begin(), positional_args.end(), argv);
+    if (static_cast<int>(positional_args.size()) < *argc) {
+      argv[positional_args.size()] = nullptr;
+      *argc = static_cast<int>(positional_args.size());
+    }
+  }
+#else
   ParseGoogleTestFlagsOnlyImpl(argc, argv);
+#endif
 
   // Fix the value of *_NSGetArgc() on macOS, but if and only if
   // *_NSGetArgv() == argv
@@ -6120,7 +6662,7 @@ void ParseGoogleTestFlagsOnly(int *argc, char **argv) {
 #endif
 #endif
 }
-void ParseGoogleTestFlagsOnly(int *argc, wchar_t **argv) {
+void ParseGoogleTestFlagsOnly(int* argc, wchar_t** argv) {
   ParseGoogleTestFlagsOnlyImpl(argc, argv);
 }
 
@@ -6129,7 +6671,7 @@ void ParseGoogleTestFlagsOnly(int *argc, wchar_t **argv) {
 // The type parameter CharType can be instantiated to either char or
 // wchar_t.
 template <typename CharType>
-void InitGoogleTestImpl(int *argc, CharType **argv) {
+void InitGoogleTestImpl(int* argc, CharType** argv) {
   // We don't want to run the initialization code twice.
   if (GTestIsInitialized()) return;
 
@@ -6142,6 +6684,12 @@ void InitGoogleTestImpl(int *argc, CharType **argv) {
 
 #if GTEST_HAS_ABSL
   absl::InitializeSymbolizer(g_argvs[0].c_str());
+
+  // When using the Abseil Flags library, set the program usage message to the
+  // help message, but remove the color-encoding from the message first.
+  absl::SetProgramUsageMessage(absl::StrReplaceAll(
+      kColorEncodedHelpMessage,
+      {{"@D", ""}, {"@R", ""}, {"@G", ""}, {"@Y", ""}, {"@@", "@"}}));
 #endif  // GTEST_HAS_ABSL
 
   ParseGoogleTestFlagsOnly(argc, argv);
@@ -6159,7 +6707,7 @@ void InitGoogleTestImpl(int *argc, CharType **argv) {
 // updated.
 //
 // Calling the function for the second time has no user-visible effect.
-void InitGoogleTest(int *argc, char **argv) {
+void InitGoogleTest(int* argc, char** argv) {
 #if defined(GTEST_CUSTOM_INIT_GOOGLE_TEST_FUNCTION_)
   GTEST_CUSTOM_INIT_GOOGLE_TEST_FUNCTION_(argc, argv);
 #else   // defined(GTEST_CUSTOM_INIT_GOOGLE_TEST_FUNCTION_)
@@ -6169,7 +6717,7 @@ void InitGoogleTest(int *argc, char **argv) {
 
 // This overloaded version can be used in Windows programs compiled in
 // UNICODE mode.
-void InitGoogleTest(int *argc, wchar_t **argv) {
+void InitGoogleTest(int* argc, wchar_t** argv) {
 #if defined(GTEST_CUSTOM_INIT_GOOGLE_TEST_FUNCTION_)
   GTEST_CUSTOM_INIT_GOOGLE_TEST_FUNCTION_(argc, argv);
 #else   // defined(GTEST_CUSTOM_INIT_GOOGLE_TEST_FUNCTION_)
@@ -6183,8 +6731,8 @@ void InitGoogleTest() {
   // Since Arduino doesn't have a command line, fake out the argc/argv arguments
   int argc = 1;
   const auto arg0 = "dummy";
-  char *argv0 = const_cast<char *>(arg0);
-  char **argv = &argv0;
+  char* argv0 = const_cast<char*>(arg0);
+  char** argv = &argv0;
 
 #if defined(GTEST_CUSTOM_INIT_GOOGLE_TEST_FUNCTION_)
   GTEST_CUSTOM_INIT_GOOGLE_TEST_FUNCTION_(&argc, argv);
@@ -6193,37 +6741,44 @@ void InitGoogleTest() {
 #endif  // defined(GTEST_CUSTOM_INIT_GOOGLE_TEST_FUNCTION_)
 }
 
+#if !defined(GTEST_CUSTOM_TEMPDIR_FUNCTION_)
+// Return value of first environment variable that is set and contains
+// a non-empty string. If there are none, return the "fallback" string.
+// Since we like the temporary directory to have a directory separator suffix,
+// add it if not provided in the environment variable value.
+static std::string GetTempDirFromEnv(
+    std::initializer_list<const char*> environment_variables,
+    const char* fallback, char separator) {
+  for (const char* variable_name : environment_variables) {
+    const char* value = internal::posix::GetEnv(variable_name);
+    if (value != nullptr && value[0] != '\0') {
+      if (value[strlen(value) - 1] != separator) {
+        return std::string(value).append(1, separator);
+      }
+      return value;
+    }
+  }
+  return fallback;
+}
+#endif
+
 std::string TempDir() {
 #if defined(GTEST_CUSTOM_TEMPDIR_FUNCTION_)
   return GTEST_CUSTOM_TEMPDIR_FUNCTION_();
-#endif
-
-#if GTEST_OS_WINDOWS_MOBILE
-  return "\\temp\\";
-#elif GTEST_OS_WINDOWS
-  const char *temp_dir = internal::posix::GetEnv("TEMP");
-  if (temp_dir == nullptr || temp_dir[0] == '\0')
-    return "\\temp\\";
-  else if (temp_dir[strlen(temp_dir) - 1] == '\\')
-    return temp_dir;
-  else
-    return std::string(temp_dir) + "\\";
+#elif GTEST_OS_WINDOWS || GTEST_OS_WINDOWS_MOBILE
+  return GetTempDirFromEnv({"TEST_TMPDIR", "TEMP"}, "\\temp\\", '\\');
 #elif GTEST_OS_LINUX_ANDROID
-  const char *temp_dir = internal::posix::GetEnv("TEST_TMPDIR");
-  if (temp_dir == nullptr || temp_dir[0] == '\0')
-    return "/data/local/tmp/";
-  else
-    return temp_dir;
+  return GetTempDirFromEnv({"TEST_TMPDIR", "TMPDIR"}, "/data/local/tmp/", '/');
 #else
-  return "/tmp/";
-#endif  // GTEST_OS_WINDOWS_MOBILE
+  return GetTempDirFromEnv({"TEST_TMPDIR", "TMPDIR"}, "/tmp/", '/');
+#endif
 }
 
 // Class ScopedTrace
 
 // Pushes the given source file location and message onto a per-thread
 // trace stack maintained by Google Test.
-void ScopedTrace::PushTrace(const char *file, int line, std::string message) {
+void ScopedTrace::PushTrace(const char* file, int line, std::string message) {
   internal::TraceInfo trace;
   trace.file = file;
   trace.line = line;
diff --git a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/src/gtest_main.cc b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/src/gtest_main.cc
index 77c90ce61ae..44976375c99 100644
--- a/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/src/gtest_main.cc
+++ b/chromium/third_party/libaom/source/libaom/third_party/googletest/src/googletest/src/gtest_main.cc
@@ -28,6 +28,7 @@
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <cstdio>
+
 #include "gtest/gtest.h"
 
 #if GTEST_OS_ESP8266 || GTEST_OS_ESP32
diff --git a/chromium/third_party/libaom/source/libaom/tools/aggregate_entropy_stats.py b/chromium/third_party/libaom/source/libaom/tools/aggregate_entropy_stats.py
index 7cb4d18e184..0311681f2d1 100644
--- a/chromium/third_party/libaom/source/libaom/tools/aggregate_entropy_stats.py
+++ b/chromium/third_party/libaom/source/libaom/tools/aggregate_entropy_stats.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 ## Copyright (c) 2017, Alliance for Open Media. All rights reserved
 ##
 ## This source code is subject to the terms of the BSD 2 Clause License and
diff --git a/chromium/third_party/libaom/source/libaom/tools/diff.py b/chromium/third_party/libaom/source/libaom/tools/diff.py
index bac6aabdc07..7bb6b7fcb42 100644
--- a/chromium/third_party/libaom/source/libaom/tools/diff.py
+++ b/chromium/third_party/libaom/source/libaom/tools/diff.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 ##
 ## Copyright (c) 2016, Alliance for Open Media. All rights reserved
 ##
diff --git a/chromium/third_party/libaom/source/libaom/tools/gen_constrained_tokenset.py b/chromium/third_party/libaom/source/libaom/tools/gen_constrained_tokenset.py
index 5d12ee1ef5f..f5b0816dbf4 100755
--- a/chromium/third_party/libaom/source/libaom/tools/gen_constrained_tokenset.py
+++ b/chromium/third_party/libaom/source/libaom/tools/gen_constrained_tokenset.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python3
 ##
 ## Copyright (c) 2016, Alliance for Open Media. All rights reserved
 ##
@@ -108,7 +108,7 @@ def main(bits=15, first_token=1):
   for q in range(1, 256):
     parray = get_quantized_spareto(q / 256., beta, bits, first_token)
     assert parray.sum() == 2**bits
-    print '{', ', '.join('%d' % i for i in parray), '},'
+    print('{', ', '.join('%d' % i for i in parray), '},')
 
 
 if __name__ == '__main__':
diff --git a/chromium/third_party/libaom/source/libaom/tools/intersect-diffs.py b/chromium/third_party/libaom/source/libaom/tools/intersect-diffs.py
index df13c4ef70e..960183675d1 100755
--- a/chromium/third_party/libaom/source/libaom/tools/intersect-diffs.py
+++ b/chromium/third_party/libaom/source/libaom/tools/intersect-diffs.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 ##
 ## Copyright (c) 2016, Alliance for Open Media. All rights reserved
 ##
@@ -71,7 +71,7 @@ def main():
                 break
 
     if out_hunks:
-        print FormatDiffHunks(out_hunks)
+        print(FormatDiffHunks(out_hunks))
         sys.exit(1)
 
 if __name__ == "__main__":
diff --git a/chromium/third_party/libaom/source/libaom/tools/lint-hunks.py b/chromium/third_party/libaom/source/libaom/tools/lint-hunks.py
index d02bee16ceb..8b3af972fc5 100755
--- a/chromium/third_party/libaom/source/libaom/tools/lint-hunks.py
+++ b/chromium/third_party/libaom/source/libaom/tools/lint-hunks.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python3
 ##
 ## Copyright (c) 2016, Alliance for Open Media. All rights reserved
 ##
@@ -12,7 +12,7 @@
 """Performs style checking on each diff hunk."""
 import getopt
 import os
-import StringIO
+import io
 import subprocess
 import sys
 
@@ -65,21 +65,21 @@ def main(argv=None):
     try:
         try:
             opts, args = getopt.getopt(argv[1:], SHORT_OPTIONS, LONG_OPTIONS)
-        except getopt.error, msg:
+        except getopt.error as msg:
             raise Usage(msg)
 
         # process options
         for o, _ in opts:
             if o in ("-h", "--help"):
-                print __doc__
+                print(__doc__)
                 sys.exit(0)
 
         if args and len(args) > 1:
-            print __doc__
+            print(__doc__)
             sys.exit(0)
 
         # Find the fully qualified path to the root of the tree
-        tl = Subprocess(TOPLEVEL_CMD, stdout=subprocess.PIPE)
+        tl = Subprocess(TOPLEVEL_CMD, stdout=subprocess.PIPE, text=True)
         tl = tl.communicate()[0].strip()
 
         # See if we're working on the index or not.
@@ -95,9 +95,9 @@ def main(argv=None):
 
         # Get a list of all affected lines
         file_affected_line_map = {}
-        p = Subprocess(diff_cmd, stdout=subprocess.PIPE)
+        p = Subprocess(diff_cmd, stdout=subprocess.PIPE, text=True)
         stdout = p.communicate()[0]
-        for hunk in diff.ParseDiffHunks(StringIO.StringIO(stdout)):
+        for hunk in diff.ParseDiffHunks(io.StringIO(stdout)):
             filename = hunk.right.filename[2:]
             if filename not in file_affected_line_map:
                 file_affected_line_map[filename] = set()
@@ -105,21 +105,25 @@ def main(argv=None):
 
         # Run each affected file through cpplint
         lint_failed = False
-        for filename, affected_lines in file_affected_line_map.iteritems():
+        for filename, affected_lines in file_affected_line_map.items():
             if filename.split(".")[-1] not in ("c", "h", "cc"):
                 continue
+            if filename.startswith("third_party"):
+                continue
 
             if args:
                 # File contents come from git
                 show_cmd = SHOW_CMD + [args[0] + ":" + filename]
-                show = Subprocess(show_cmd, stdout=subprocess.PIPE)
+                show = Subprocess(show_cmd, stdout=subprocess.PIPE, text=True)
                 lint = Subprocess(cpplint_cmd, expected_returncode=(0, 1),
-                                  stdin=show.stdout, stderr=subprocess.PIPE)
+                                  stdin=show.stdout, stderr=subprocess.PIPE,
+                                  text=True)
                 lint_out = lint.communicate()[1]
             else:
                 # File contents come from the working tree
                 lint = Subprocess(cpplint_cmd, expected_returncode=(0, 1),
-                                  stdin=subprocess.PIPE, stderr=subprocess.PIPE)
+                                  stdin=subprocess.PIPE, stderr=subprocess.PIPE,
+                                  text=True)
                 stdin = open(os.path.join(tl, filename)).read()
                 lint_out = lint.communicate(stdin)[1]
 
@@ -129,17 +133,17 @@ def main(argv=None):
                     continue
                 warning_line_num = int(fields[1])
                 if warning_line_num in affected_lines:
-                    print "%s:%d:%s"%(filename, warning_line_num,
-                                      ":".join(fields[2:]))
+                    print("%s:%d:%s"%(filename, warning_line_num,
+                                      ":".join(fields[2:])))
                     lint_failed = True
 
         # Set exit code if any relevant lint errors seen
         if lint_failed:
             return 1
 
-    except Usage, err:
-        print >>sys.stderr, err
-        print >>sys.stderr, "for help use --help"
+    except Usage as err:
+        print(err, file=sys.stderr)
+        print("for help use --help", file=sys.stderr)
         return 2
 
 if __name__ == "__main__":
diff --git a/chromium/third_party/libaom/source/libaom/tools/wrap-commit-msg.py b/chromium/third_party/libaom/source/libaom/tools/wrap-commit-msg.py
index 1c78824439a..c51ed093d32 100755
--- a/chromium/third_party/libaom/source/libaom/tools/wrap-commit-msg.py
+++ b/chromium/third_party/libaom/source/libaom/tools/wrap-commit-msg.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 ##
 ## Copyright (c) 2016, Alliance for Open Media. All rights reserved
 ##
author	Michael Brüning <michael.bruning@qt.io>	2022-12-10 02:34:36 +0100
committer	Michael Brüning <michael.bruning@qt.io>	2022-12-22 08:15:56 +0000
commit	16ba23dd196a5a4556e334e2e491e9f1e63badbc (patch)
tree	99323e8decd03ec30682730ddbb574d48556ed6e
parent	916b739acdcafd12b98fb4922c38889774200660 (diff)
download	qtwebengine-chromium-16ba23dd196a5a4556e334e2e491e9f1e63badbc.tar.gz